policyengine 3.1.7__tar.gz → 3.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {policyengine-3.1.7 → policyengine-3.1.8}/CHANGELOG.md +7 -0
  2. {policyengine-3.1.7 → policyengine-3.1.8}/PKG-INFO +1 -1
  3. {policyengine-3.1.7 → policyengine-3.1.8}/changelog.yaml +5 -0
  4. {policyengine-3.1.7 → policyengine-3.1.8}/pyproject.toml +1 -1
  5. policyengine-3.1.8/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
  6. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/datasets.py +27 -4
  7. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine.egg-info/PKG-INFO +1 -1
  8. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine.egg-info/SOURCES.txt +1 -6
  9. {policyengine-3.1.7 → policyengine-3.1.8}/uv.lock +1 -1
  10. policyengine-3.1.7/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
  11. policyengine-3.1.7/tests/test_get_parameter_variable.py +0 -141
  12. policyengine-3.1.7/tests/test_uk_dataset.py +0 -112
  13. policyengine-3.1.7/tests/test_us_datasets.py +0 -109
  14. policyengine-3.1.7/tests/test_us_entity_mapping.py +0 -334
  15. policyengine-3.1.7/tests/test_us_simulation.py +0 -249
  16. {policyengine-3.1.7 → policyengine-3.1.8}/.claude/policyengine-guide.md +0 -0
  17. {policyengine-3.1.7 → policyengine-3.1.8}/.claude/quick-reference.md +0 -0
  18. {policyengine-3.1.7 → policyengine-3.1.8}/.github/CONTRIBUTING.md +0 -0
  19. {policyengine-3.1.7 → policyengine-3.1.8}/.github/changelog_template.md +0 -0
  20. {policyengine-3.1.7 → policyengine-3.1.8}/.github/fetch_version.py +0 -0
  21. {policyengine-3.1.7 → policyengine-3.1.8}/.github/get-changelog-diff.sh +0 -0
  22. {policyengine-3.1.7 → policyengine-3.1.8}/.github/has-functional-changes.sh +0 -0
  23. {policyengine-3.1.7 → policyengine-3.1.8}/.github/is-version-number-acceptable.sh +0 -0
  24. {policyengine-3.1.7 → policyengine-3.1.8}/.github/publish-git-tag.sh +0 -0
  25. {policyengine-3.1.7 → policyengine-3.1.8}/.github/workflows/code_changes.yaml +0 -0
  26. {policyengine-3.1.7 → policyengine-3.1.8}/.github/workflows/docs.yml +0 -0
  27. {policyengine-3.1.7 → policyengine-3.1.8}/.github/workflows/pr_code_changes.yaml +0 -0
  28. {policyengine-3.1.7 → policyengine-3.1.8}/.github/workflows/pr_docs_changes.yaml +0 -0
  29. {policyengine-3.1.7 → policyengine-3.1.8}/.github/workflows/versioning.yaml +0 -0
  30. {policyengine-3.1.7 → policyengine-3.1.8}/.gitignore +0 -0
  31. {policyengine-3.1.7 → policyengine-3.1.8}/CLAUDE.md +0 -0
  32. {policyengine-3.1.7 → policyengine-3.1.8}/LICENSE +0 -0
  33. {policyengine-3.1.7 → policyengine-3.1.8}/Makefile +0 -0
  34. {policyengine-3.1.7 → policyengine-3.1.8}/README.md +0 -0
  35. {policyengine-3.1.7 → policyengine-3.1.8}/changelog_entry.yaml +0 -0
  36. {policyengine-3.1.7 → policyengine-3.1.8}/docs/.gitignore +0 -0
  37. {policyengine-3.1.7 → policyengine-3.1.8}/docs/core-concepts.md +0 -0
  38. {policyengine-3.1.7 → policyengine-3.1.8}/docs/country-models-uk.md +0 -0
  39. {policyengine-3.1.7 → policyengine-3.1.8}/docs/country-models-us.md +0 -0
  40. {policyengine-3.1.7 → policyengine-3.1.8}/docs/dev.md +0 -0
  41. {policyengine-3.1.7 → policyengine-3.1.8}/docs/index.md +0 -0
  42. {policyengine-3.1.7 → policyengine-3.1.8}/docs/myst.yml +0 -0
  43. {policyengine-3.1.7 → policyengine-3.1.8}/docs/visualisation.md +0 -0
  44. {policyengine-3.1.7 → policyengine-3.1.8}/examples/employment_income_variation_uk.py +0 -0
  45. {policyengine-3.1.7 → policyengine-3.1.8}/examples/employment_income_variation_us.py +0 -0
  46. {policyengine-3.1.7 → policyengine-3.1.8}/examples/income_bands_uk.py +0 -0
  47. {policyengine-3.1.7 → policyengine-3.1.8}/examples/income_distribution_us.py +0 -0
  48. {policyengine-3.1.7 → policyengine-3.1.8}/examples/policy_change_uk.py +0 -0
  49. {policyengine-3.1.7 → policyengine-3.1.8}/examples/speedtest_us_simulation.py +0 -0
  50. {policyengine-3.1.7 → policyengine-3.1.8}/setup.cfg +0 -0
  51. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/__init__.py +0 -0
  52. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/__init__.py +0 -0
  53. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/dataset.py +0 -0
  54. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/dataset_version.py +0 -0
  55. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/dynamic.py +0 -0
  56. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/output.py +0 -0
  57. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/parameter.py +0 -0
  58. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/parameter_value.py +0 -0
  59. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/policy.py +0 -0
  60. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/simulation.py +0 -0
  61. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model.py +0 -0
  62. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model_version.py +0 -0
  63. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/core/variable.py +0 -0
  64. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/outputs/__init__.py +0 -0
  65. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/outputs/aggregate.py +0 -0
  66. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/outputs/change_aggregate.py +0 -0
  67. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/outputs/decile_impact.py +0 -0
  68. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/__init__.py +0 -0
  69. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/analysis.py +0 -0
  70. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/model.py +0 -0
  71. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/outputs.py +0 -0
  72. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk.py +0 -0
  73. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/__init__.py +0 -0
  74. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/analysis.py +0 -0
  75. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/datasets.py +0 -0
  76. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/model.py +0 -0
  77. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/outputs.py +0 -0
  78. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us.py +0 -0
  79. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/utils/__init__.py +0 -0
  80. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/utils/dates.py +0 -0
  81. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/utils/parametric_reforms.py +0 -0
  82. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine/utils/plotting.py +0 -0
  83. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine.egg-info/dependency_links.txt +0 -0
  84. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine.egg-info/requires.txt +0 -0
  85. {policyengine-3.1.7 → policyengine-3.1.8}/src/policyengine.egg-info/top_level.txt +0 -0
  86. {policyengine-3.1.7 → policyengine-3.1.8}/tests/test_aggregate.py +0 -0
  87. {policyengine-3.1.7 → policyengine-3.1.8}/tests/test_change_aggregate.py +0 -0
  88. {policyengine-3.1.7 → policyengine-3.1.8}/tests/test_entity_mapping.py +0 -0
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [3.1.8] - 2025-12-02 00:20:11
9
+
10
+ ### Fixed
11
+
12
+ - Dataset speedup with better handling of string cols.
13
+
8
14
  ## [3.1.7] - 2025-11-24 16:34:53
9
15
 
10
16
  ### Fixed
@@ -239,6 +245,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
239
245
 
240
246
 
241
247
 
248
+ [3.1.8]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.7...3.1.8
242
249
  [3.1.7]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.6...3.1.7
243
250
  [3.1.6]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.5...3.1.6
244
251
  [3.1.5]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.4...3.1.5
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: policyengine
3
- Version: 3.1.7
3
+ Version: 3.1.8
4
4
  Summary: A package to conduct policy analysis using PolicyEngine tax-benefit models.
5
5
  Author-email: PolicyEngine <hello@policyengine.org>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -195,3 +195,8 @@
195
195
  fixed:
196
196
  - Build error
197
197
  date: 2025-11-24 16:34:53
198
+ - bump: patch
199
+ changes:
200
+ fixed:
201
+ - Dataset speedup with better handling of string cols.
202
+ date: 2025-12-02 00:20:11
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "policyengine"
7
- version = "3.1.7"
7
+ version = "3.1.8"
8
8
  description = "A package to conduct policy analysis using PolicyEngine tax-benefit models."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -40,14 +40,37 @@ class PolicyEngineUKDataset(Dataset):
40
40
  self.load()
41
41
 
42
42
  def save(self) -> None:
43
- """Save dataset to HDF5 file."""
43
+ """Save dataset to HDF5 file.
44
+
45
+ Converts object columns to categorical dtype to avoid slow pickle serialization.
46
+ """
44
47
  filepath = Path(self.filepath)
45
48
  if not filepath.parent.exists():
46
49
  filepath.parent.mkdir(parents=True, exist_ok=True)
50
+
51
+ # Convert DataFrames and optimize object columns to categorical
52
+ person_df = pd.DataFrame(self.data.person)
53
+ benunit_df = pd.DataFrame(self.data.benunit)
54
+ household_df = pd.DataFrame(self.data.household)
55
+
56
+ # Convert object columns to categorical to avoid pickle serialization
57
+ for col in person_df.columns:
58
+ if person_df[col].dtype == "object":
59
+ person_df[col] = person_df[col].astype("category")
60
+
61
+ for col in benunit_df.columns:
62
+ if benunit_df[col].dtype == "object":
63
+ benunit_df[col] = benunit_df[col].astype("category")
64
+
65
+ for col in household_df.columns:
66
+ if household_df[col].dtype == "object":
67
+ household_df[col] = household_df[col].astype("category")
68
+
47
69
  with pd.HDFStore(filepath, mode="w") as store:
48
- store["person"] = pd.DataFrame(self.data.person)
49
- store["benunit"] = pd.DataFrame(self.data.benunit)
50
- store["household"] = pd.DataFrame(self.data.household)
70
+ # Use format='table' to support categorical dtypes
71
+ store.put("person", person_df, format="table")
72
+ store.put("benunit", benunit_df, format="table")
73
+ store.put("household", household_df, format="table")
51
74
 
52
75
  def load(self) -> None:
53
76
  """Load dataset from HDF5 file into this instance."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: policyengine
3
- Version: 3.1.7
3
+ Version: 3.1.8
4
4
  Summary: A package to conduct policy analysis using PolicyEngine tax-benefit models.
5
5
  Author-email: PolicyEngine <hello@policyengine.org>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -77,9 +77,4 @@ src/policyengine/utils/parametric_reforms.py
77
77
  src/policyengine/utils/plotting.py
78
78
  tests/test_aggregate.py
79
79
  tests/test_change_aggregate.py
80
- tests/test_entity_mapping.py
81
- tests/test_get_parameter_variable.py
82
- tests/test_uk_dataset.py
83
- tests/test_us_datasets.py
84
- tests/test_us_entity_mapping.py
85
- tests/test_us_simulation.py
80
+ tests/test_entity_mapping.py
@@ -1080,7 +1080,7 @@ wheels = [
1080
1080
 
1081
1081
  [[package]]
1082
1082
  name = "policyengine"
1083
- version = "3.0.0"
1083
+ version = "3.1.7"
1084
1084
  source = { editable = "." }
1085
1085
  dependencies = [
1086
1086
  { name = "microdf-python" },
@@ -1,141 +0,0 @@
1
- """Tests for get_parameter and get_variable methods on TaxBenefitModelVersion."""
2
-
3
- import pytest
4
-
5
- from policyengine.tax_benefit_models.uk import uk_latest
6
- from policyengine.tax_benefit_models.us import us_latest
7
-
8
-
9
- def test_uk_get_variable():
10
- """Test getting a variable by name from UK model."""
11
- # Get a known variable
12
- var = uk_latest.get_variable("income_tax")
13
-
14
- assert var is not None
15
- assert var.name == "income_tax"
16
- assert var.entity == "person"
17
- assert var.tax_benefit_model_version == uk_latest
18
-
19
-
20
- def test_uk_get_variable_not_found():
21
- """Test error handling when variable doesn't exist."""
22
- with pytest.raises(
23
- ValueError, match="Variable 'nonexistent_variable' not found"
24
- ):
25
- uk_latest.get_variable("nonexistent_variable")
26
-
27
-
28
- def test_uk_get_parameter():
29
- """Test getting a parameter by name from UK model."""
30
- # Get a known parameter
31
- param = uk_latest.get_parameter(
32
- "gov.hmrc.income_tax.allowances.personal_allowance.amount"
33
- )
34
-
35
- assert param is not None
36
- assert (
37
- param.name
38
- == "gov.hmrc.income_tax.allowances.personal_allowance.amount"
39
- )
40
- assert param.tax_benefit_model_version == uk_latest
41
-
42
-
43
- def test_uk_get_parameter_not_found():
44
- """Test error handling when parameter doesn't exist."""
45
- with pytest.raises(
46
- ValueError, match="Parameter 'nonexistent.parameter' not found"
47
- ):
48
- uk_latest.get_parameter("nonexistent.parameter")
49
-
50
-
51
- def test_us_get_variable():
52
- """Test getting a variable by name from US model."""
53
- # Get a known variable
54
- var = us_latest.get_variable("income_tax")
55
-
56
- assert var is not None
57
- assert var.name == "income_tax"
58
- assert var.entity == "tax_unit"
59
- assert var.tax_benefit_model_version == us_latest
60
-
61
-
62
- def test_us_get_variable_not_found():
63
- """Test error handling when variable doesn't exist."""
64
- with pytest.raises(
65
- ValueError, match="Variable 'nonexistent_variable' not found"
66
- ):
67
- us_latest.get_variable("nonexistent_variable")
68
-
69
-
70
- def test_us_get_parameter():
71
- """Test getting a parameter by name from US model."""
72
- # Get a known parameter
73
- param = us_latest.get_parameter(
74
- "gov.irs.investment.net_investment_income_tax.rate"
75
- )
76
-
77
- assert param is not None
78
- assert param.name == "gov.irs.investment.net_investment_income_tax.rate"
79
- assert param.tax_benefit_model_version == us_latest
80
-
81
-
82
- def test_us_get_parameter_not_found():
83
- """Test error handling when parameter doesn't exist."""
84
- with pytest.raises(
85
- ValueError, match="Parameter 'nonexistent.parameter' not found"
86
- ):
87
- us_latest.get_parameter("nonexistent.parameter")
88
-
89
-
90
- def test_uk_multiple_variables():
91
- """Test getting multiple different variables."""
92
- vars_to_test = [
93
- "income_tax",
94
- "national_insurance",
95
- "universal_credit",
96
- "household_net_income",
97
- ]
98
-
99
- for var_name in vars_to_test:
100
- var = uk_latest.get_variable(var_name)
101
- assert var.name == var_name
102
-
103
-
104
- def test_us_multiple_variables():
105
- """Test getting multiple different variables."""
106
- vars_to_test = [
107
- "income_tax",
108
- "employee_payroll_tax",
109
- "eitc",
110
- "household_net_income",
111
- ]
112
-
113
- for var_name in vars_to_test:
114
- var = us_latest.get_variable(var_name)
115
- assert var.name == var_name
116
-
117
-
118
- def test_uk_multiple_parameters():
119
- """Test getting multiple different parameters."""
120
- params_to_test = [
121
- "gov.hmrc.income_tax.allowances.personal_allowance.amount",
122
- "gov.hmrc.income_tax.rates.uk[0].rate",
123
- "gov.dwp.universal_credit.means_test.reduction_rate",
124
- ]
125
-
126
- for param_name in params_to_test:
127
- param = uk_latest.get_parameter(param_name)
128
- assert param.name == param_name
129
-
130
-
131
- def test_us_multiple_parameters():
132
- """Test getting multiple different parameters."""
133
- params_to_test = [
134
- "gov.irs.investment.net_investment_income_tax.rate",
135
- "gov.irs.self_employment.rate.social_security",
136
- "gov.irs.vita.eligibility.income_limit",
137
- ]
138
-
139
- for param_name in params_to_test:
140
- param = us_latest.get_parameter(param_name)
141
- assert param.name == param_name
@@ -1,112 +0,0 @@
1
- import os
2
- import tempfile
3
-
4
- import pandas as pd
5
- from microdf import MicroDataFrame
6
-
7
- from policyengine.core import Dataset, TaxBenefitModel
8
- from policyengine.tax_benefit_models.uk import (
9
- PolicyEngineUKDataset,
10
- UKYearData,
11
- )
12
-
13
-
14
- def test_imports():
15
- """Test that basic imports work."""
16
- # Verify classes are importable
17
- assert PolicyEngineUKDataset is not None
18
- assert UKYearData is not None
19
- assert Dataset is not None
20
- assert TaxBenefitModel is not None
21
-
22
-
23
- def test_uk_latest_instantiation():
24
- """Test that uk_latest can be instantiated without errors."""
25
- from policyengine.tax_benefit_models.uk import uk_latest
26
-
27
- assert uk_latest is not None
28
- assert uk_latest.version is not None
29
- assert uk_latest.model is not None
30
- assert uk_latest.created_at is not None
31
- assert (
32
- len(uk_latest.variables) > 0
33
- ) # Should have variables from policyengine-uk
34
-
35
-
36
- def test_save_and_load_single_year():
37
- """Test saving and loading a dataset with a single year."""
38
- # Create sample data
39
- person_df = MicroDataFrame(
40
- pd.DataFrame(
41
- {
42
- "person_id": [1, 2, 3],
43
- "age": [25, 30, 35],
44
- "income": [30000, 45000, 60000],
45
- "person_weight": [1.0, 1.0, 1.0],
46
- }
47
- ),
48
- weights="person_weight",
49
- )
50
-
51
- benunit_df = MicroDataFrame(
52
- pd.DataFrame(
53
- {
54
- "benunit_id": [1, 2],
55
- "size": [2, 1],
56
- "total_income": [75000, 60000],
57
- "benunit_weight": [1.0, 1.0],
58
- }
59
- ),
60
- weights="benunit_weight",
61
- )
62
-
63
- household_df = MicroDataFrame(
64
- pd.DataFrame(
65
- {
66
- "household_id": [1],
67
- "num_people": [3],
68
- "rent": [1200],
69
- "household_weight": [1.0],
70
- }
71
- ),
72
- weights="household_weight",
73
- )
74
-
75
- # Create dataset
76
- with tempfile.TemporaryDirectory() as tmpdir:
77
- filepath = os.path.join(tmpdir, "test_dataset.h5")
78
-
79
- dataset = PolicyEngineUKDataset(
80
- name="Test Dataset",
81
- description="A test dataset",
82
- filepath=filepath,
83
- year=2025,
84
- data=UKYearData(
85
- person=person_df, benunit=benunit_df, household=household_df
86
- ),
87
- )
88
-
89
- # Save to file
90
- dataset.save()
91
-
92
- # Load it back
93
- loaded = PolicyEngineUKDataset(
94
- name="Loaded Dataset",
95
- description="Loaded from file",
96
- filepath=filepath,
97
- year=2025,
98
- )
99
- loaded.load()
100
-
101
- # Verify data
102
- assert loaded.year == 2025
103
- # Convert to DataFrame for comparison (MicroDataFrame inherits from DataFrame)
104
- pd.testing.assert_frame_equal(
105
- pd.DataFrame(loaded.data.person), pd.DataFrame(person_df)
106
- )
107
- pd.testing.assert_frame_equal(
108
- pd.DataFrame(loaded.data.benunit), pd.DataFrame(benunit_df)
109
- )
110
- pd.testing.assert_frame_equal(
111
- pd.DataFrame(loaded.data.household), pd.DataFrame(household_df)
112
- )
@@ -1,109 +0,0 @@
1
- """Tests for US dataset creation from HuggingFace paths."""
2
-
3
- import shutil
4
- from pathlib import Path
5
-
6
- import pandas as pd
7
-
8
- from policyengine.tax_benefit_models.us import (
9
- PolicyEngineUSDataset,
10
- create_datasets,
11
- )
12
-
13
-
14
- def test_create_datasets_from_enhanced_cps():
15
- """Test creating datasets from enhanced CPS HuggingFace path."""
16
- # Clean up data directory if it exists
17
- data_dir = Path("./data")
18
- if data_dir.exists():
19
- shutil.rmtree(data_dir)
20
-
21
- # Create datasets for a single year to test
22
- datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
23
- years = [2024]
24
-
25
- create_datasets(datasets=datasets, years=years)
26
-
27
- # Verify the dataset was created
28
- dataset_file = data_dir / "enhanced_cps_2024_year_2024.h5"
29
- assert dataset_file.exists(), f"Dataset file {dataset_file} should exist"
30
-
31
- # Load and verify dataset structure
32
- dataset = PolicyEngineUSDataset(
33
- name="test",
34
- description="test",
35
- filepath=str(dataset_file),
36
- year=2024,
37
- )
38
- dataset.load()
39
-
40
- # Check all entity types exist
41
- assert dataset.data is not None
42
- assert dataset.data.person is not None
43
- assert dataset.data.household is not None
44
- assert dataset.data.marital_unit is not None
45
- assert dataset.data.family is not None
46
- assert dataset.data.spm_unit is not None
47
- assert dataset.data.tax_unit is not None
48
-
49
- # Check person data has required columns
50
- person_df = pd.DataFrame(dataset.data.person)
51
- assert "person_id" in person_df.columns
52
- assert "person_household_id" in person_df.columns
53
- assert "person_weight" in person_df.columns
54
- assert len(person_df) > 0
55
-
56
- # Check household data
57
- household_df = pd.DataFrame(dataset.data.household)
58
- assert "household_id" in household_df.columns
59
- assert "household_weight" in household_df.columns
60
- assert len(household_df) > 0
61
-
62
- # Check all group entities have weight columns
63
- for entity_name in [
64
- "marital_unit",
65
- "family",
66
- "spm_unit",
67
- "tax_unit",
68
- ]:
69
- entity_df = pd.DataFrame(getattr(dataset.data, entity_name))
70
- assert f"{entity_name}_id" in entity_df.columns
71
- assert f"{entity_name}_weight" in entity_df.columns
72
- assert len(entity_df) > 0
73
-
74
- # Clean up
75
- shutil.rmtree(data_dir)
76
-
77
-
78
- def test_create_datasets_multiple_years():
79
- """Test creating datasets for multiple years."""
80
- # Clean up data directory if it exists
81
- data_dir = Path("./data")
82
- if data_dir.exists():
83
- shutil.rmtree(data_dir)
84
-
85
- datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
86
- years = [2024, 2025]
87
-
88
- create_datasets(datasets=datasets, years=years)
89
-
90
- # Verify both year datasets were created
91
- for year in years:
92
- dataset_file = data_dir / f"enhanced_cps_2024_year_{year}.h5"
93
- assert dataset_file.exists(), (
94
- f"Dataset file for year {year} should exist"
95
- )
96
-
97
- # Load and verify
98
- dataset = PolicyEngineUSDataset(
99
- name=f"test-{year}",
100
- description=f"test {year}",
101
- filepath=str(dataset_file),
102
- year=year,
103
- )
104
- dataset.load()
105
- assert dataset.data is not None
106
- assert len(pd.DataFrame(dataset.data.person)) > 0
107
-
108
- # Clean up
109
- shutil.rmtree(data_dir)