policyengine 3.1.2__py3-none-any.whl → 3.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- policyengine/core/simulation.py +7 -0
- policyengine/tax_benefit_models/uk/__init__.py +39 -21
- policyengine/tax_benefit_models/uk/datasets.py +78 -7
- policyengine/tax_benefit_models/uk/model.py +84 -72
- policyengine/tax_benefit_models/uk.py +33 -28
- policyengine/tax_benefit_models/us/__init__.py +9 -1
- policyengine/tax_benefit_models/us/datasets.py +90 -7
- policyengine/tax_benefit_models/us/model.py +70 -58
- policyengine/tax_benefit_models/us.py +6 -0
- {policyengine-3.1.2.dist-info → policyengine-3.1.4.dist-info}/METADATA +1 -1
- {policyengine-3.1.2.dist-info → policyengine-3.1.4.dist-info}/RECORD +15 -15
- {policyengine-3.1.2.dist-info → policyengine-3.1.4.dist-info}/WHEEL +0 -0
- {policyengine-3.1.2.dist-info → policyengine-3.1.4.dist-info}/licenses/LICENSE +0 -0
- {policyengine-3.1.2.dist-info → policyengine-3.1.4.dist-info}/top_level.txt +0 -0
|
Binary file
|
policyengine/core/simulation.py
CHANGED
|
@@ -24,6 +24,13 @@ class Simulation(BaseModel):
|
|
|
24
24
|
def run(self):
|
|
25
25
|
self.tax_benefit_model_version.run(self)
|
|
26
26
|
|
|
27
|
+
def ensure(self):
|
|
28
|
+
try:
|
|
29
|
+
self.tax_benefit_model_version.load(self)
|
|
30
|
+
except Exception:
|
|
31
|
+
self.run()
|
|
32
|
+
self.save()
|
|
33
|
+
|
|
27
34
|
def save(self):
|
|
28
35
|
"""Save the simulation's output dataset."""
|
|
29
36
|
self.tax_benefit_model_version.save(self)
|
|
@@ -1,26 +1,44 @@
|
|
|
1
1
|
"""PolicyEngine UK tax-benefit model."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .datasets import PolicyEngineUKDataset, UKYearData, create_datasets
|
|
5
|
-
from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model
|
|
6
|
-
from .outputs import ProgrammeStatistics
|
|
3
|
+
from importlib.util import find_spec
|
|
7
4
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
"PolicyEngineUKDataset",
|
|
11
|
-
"create_datasets",
|
|
12
|
-
"PolicyEngineUK",
|
|
13
|
-
"PolicyEngineUKLatest",
|
|
14
|
-
"uk_model",
|
|
15
|
-
"uk_latest",
|
|
16
|
-
"general_policy_reform_analysis",
|
|
17
|
-
"ProgrammeStatistics",
|
|
18
|
-
]
|
|
5
|
+
if find_spec("policyengine_uk") is not None:
|
|
6
|
+
from policyengine.core import Dataset
|
|
19
7
|
|
|
20
|
-
|
|
21
|
-
from
|
|
8
|
+
from .analysis import general_policy_reform_analysis
|
|
9
|
+
from .datasets import (
|
|
10
|
+
PolicyEngineUKDataset,
|
|
11
|
+
UKYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
load_datasets,
|
|
15
|
+
)
|
|
16
|
+
from .model import (
|
|
17
|
+
PolicyEngineUK,
|
|
18
|
+
PolicyEngineUKLatest,
|
|
19
|
+
uk_latest,
|
|
20
|
+
uk_model,
|
|
21
|
+
)
|
|
22
|
+
from .outputs import ProgrammeStatistics
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
# Rebuild Pydantic models to resolve forward references
|
|
25
|
+
Dataset.model_rebuild()
|
|
26
|
+
UKYearData.model_rebuild()
|
|
27
|
+
PolicyEngineUKDataset.model_rebuild()
|
|
28
|
+
PolicyEngineUKLatest.model_rebuild()
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"UKYearData",
|
|
32
|
+
"PolicyEngineUKDataset",
|
|
33
|
+
"create_datasets",
|
|
34
|
+
"load_datasets",
|
|
35
|
+
"ensure_datasets",
|
|
36
|
+
"PolicyEngineUK",
|
|
37
|
+
"PolicyEngineUKLatest",
|
|
38
|
+
"uk_model",
|
|
39
|
+
"uk_latest",
|
|
40
|
+
"general_policy_reform_analysis",
|
|
41
|
+
"ProgrammeStatistics",
|
|
42
|
+
]
|
|
43
|
+
else:
|
|
44
|
+
__all__ = []
|
|
@@ -37,11 +37,7 @@ class PolicyEngineUKDataset(Dataset):
|
|
|
37
37
|
if self.data is not None:
|
|
38
38
|
self.save()
|
|
39
39
|
elif self.filepath and not self.data:
|
|
40
|
-
|
|
41
|
-
self.load()
|
|
42
|
-
except FileNotFoundError:
|
|
43
|
-
# File doesn't exist yet, that's OK
|
|
44
|
-
pass
|
|
40
|
+
self.load()
|
|
45
41
|
|
|
46
42
|
def save(self) -> None:
|
|
47
43
|
"""Save dataset to HDF5 file."""
|
|
@@ -85,7 +81,9 @@ def create_datasets(
|
|
|
85
81
|
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
86
82
|
],
|
|
87
83
|
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
88
|
-
|
|
84
|
+
data_folder: str = "./data",
|
|
85
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
86
|
+
result = {}
|
|
89
87
|
for dataset in datasets:
|
|
90
88
|
from policyengine_uk import Microsimulation
|
|
91
89
|
|
|
@@ -139,9 +137,10 @@ def create_datasets(
|
|
|
139
137
|
)
|
|
140
138
|
|
|
141
139
|
uk_dataset = PolicyEngineUKDataset(
|
|
140
|
+
id=f"{Path(dataset).stem}_year_{year}",
|
|
142
141
|
name=f"{dataset}-year-{year}",
|
|
143
142
|
description=f"UK Dataset for year {year} based on {dataset}",
|
|
144
|
-
filepath=f"
|
|
143
|
+
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
|
|
145
144
|
year=year,
|
|
146
145
|
data=UKYearData(
|
|
147
146
|
person=MicroDataFrame(person_df, weights="person_weight"),
|
|
@@ -154,3 +153,75 @@ def create_datasets(
|
|
|
154
153
|
),
|
|
155
154
|
)
|
|
156
155
|
uk_dataset.save()
|
|
156
|
+
|
|
157
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
158
|
+
result[dataset_key] = uk_dataset
|
|
159
|
+
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def load_datasets(
|
|
164
|
+
datasets: list[str] = [
|
|
165
|
+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
|
|
166
|
+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
167
|
+
],
|
|
168
|
+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
169
|
+
data_folder: str = "./data",
|
|
170
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
171
|
+
result = {}
|
|
172
|
+
for dataset in datasets:
|
|
173
|
+
for year in years:
|
|
174
|
+
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
175
|
+
uk_dataset = PolicyEngineUKDataset(
|
|
176
|
+
name=f"{dataset}-year-{year}",
|
|
177
|
+
description=f"UK Dataset for year {year} based on {dataset}",
|
|
178
|
+
filepath=filepath,
|
|
179
|
+
year=year,
|
|
180
|
+
)
|
|
181
|
+
uk_dataset.load()
|
|
182
|
+
|
|
183
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
184
|
+
result[dataset_key] = uk_dataset
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def ensure_datasets(
|
|
190
|
+
datasets: list[str] = [
|
|
191
|
+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
|
|
192
|
+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
193
|
+
],
|
|
194
|
+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
195
|
+
data_folder: str = "./data",
|
|
196
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
197
|
+
"""Ensure datasets exist, loading if available or creating if not.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
datasets: List of HuggingFace dataset paths
|
|
201
|
+
years: List of years to load/create data for
|
|
202
|
+
data_folder: Directory containing or to save the dataset files
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Dictionary mapping dataset keys to PolicyEngineUKDataset objects
|
|
206
|
+
"""
|
|
207
|
+
# Check if all dataset files exist
|
|
208
|
+
all_exist = True
|
|
209
|
+
for dataset in datasets:
|
|
210
|
+
for year in years:
|
|
211
|
+
filepath = Path(
|
|
212
|
+
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
213
|
+
)
|
|
214
|
+
if not filepath.exists():
|
|
215
|
+
all_exist = False
|
|
216
|
+
break
|
|
217
|
+
if not all_exist:
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
if all_exist:
|
|
221
|
+
return load_datasets(
|
|
222
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
return create_datasets(
|
|
226
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
227
|
+
)
|
|
@@ -44,6 +44,73 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion):
|
|
|
44
44
|
upload_time
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
+
entity_variables: dict[str, list[str]] = {
|
|
48
|
+
"person": [
|
|
49
|
+
# IDs and weights
|
|
50
|
+
"person_id",
|
|
51
|
+
"benunit_id",
|
|
52
|
+
"household_id",
|
|
53
|
+
"person_weight",
|
|
54
|
+
# Demographics
|
|
55
|
+
"age",
|
|
56
|
+
"gender",
|
|
57
|
+
"is_adult",
|
|
58
|
+
"is_SP_age",
|
|
59
|
+
"is_child",
|
|
60
|
+
# Income
|
|
61
|
+
"employment_income",
|
|
62
|
+
"self_employment_income",
|
|
63
|
+
"pension_income",
|
|
64
|
+
"private_pension_income",
|
|
65
|
+
"savings_interest_income",
|
|
66
|
+
"dividend_income",
|
|
67
|
+
"property_income",
|
|
68
|
+
"total_income",
|
|
69
|
+
"earned_income",
|
|
70
|
+
# Benefits
|
|
71
|
+
"universal_credit",
|
|
72
|
+
"child_benefit",
|
|
73
|
+
"pension_credit",
|
|
74
|
+
"income_support",
|
|
75
|
+
"working_tax_credit",
|
|
76
|
+
"child_tax_credit",
|
|
77
|
+
# Tax
|
|
78
|
+
"income_tax",
|
|
79
|
+
"national_insurance",
|
|
80
|
+
],
|
|
81
|
+
"benunit": [
|
|
82
|
+
# IDs and weights
|
|
83
|
+
"benunit_id",
|
|
84
|
+
"benunit_weight",
|
|
85
|
+
# Structure
|
|
86
|
+
"family_type",
|
|
87
|
+
# Income and benefits
|
|
88
|
+
"universal_credit",
|
|
89
|
+
"child_benefit",
|
|
90
|
+
"working_tax_credit",
|
|
91
|
+
"child_tax_credit",
|
|
92
|
+
],
|
|
93
|
+
"household": [
|
|
94
|
+
# IDs and weights
|
|
95
|
+
"household_id",
|
|
96
|
+
"household_weight",
|
|
97
|
+
# Income measures
|
|
98
|
+
"household_net_income",
|
|
99
|
+
"hbai_household_net_income",
|
|
100
|
+
"equiv_hbai_household_net_income",
|
|
101
|
+
"household_market_income",
|
|
102
|
+
"household_gross_income",
|
|
103
|
+
# Benefits and tax
|
|
104
|
+
"household_benefits",
|
|
105
|
+
"household_tax",
|
|
106
|
+
"vat",
|
|
107
|
+
# Housing
|
|
108
|
+
"rent",
|
|
109
|
+
"council_tax",
|
|
110
|
+
"tenure_type",
|
|
111
|
+
],
|
|
112
|
+
}
|
|
113
|
+
|
|
47
114
|
def __init__(self, **kwargs: dict):
|
|
48
115
|
super().__init__(**kwargs)
|
|
49
116
|
from policyengine_core.enums import Enum
|
|
@@ -153,80 +220,13 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion):
|
|
|
153
220
|
)
|
|
154
221
|
modifier(microsim)
|
|
155
222
|
|
|
156
|
-
entity_variables = {
|
|
157
|
-
"person": [
|
|
158
|
-
# IDs and weights
|
|
159
|
-
"person_id",
|
|
160
|
-
"benunit_id",
|
|
161
|
-
"household_id",
|
|
162
|
-
"person_weight",
|
|
163
|
-
# Demographics
|
|
164
|
-
"age",
|
|
165
|
-
"gender",
|
|
166
|
-
"is_adult",
|
|
167
|
-
"is_SP_age",
|
|
168
|
-
"is_child",
|
|
169
|
-
# Income
|
|
170
|
-
"employment_income",
|
|
171
|
-
"self_employment_income",
|
|
172
|
-
"pension_income",
|
|
173
|
-
"private_pension_income",
|
|
174
|
-
"savings_interest_income",
|
|
175
|
-
"dividend_income",
|
|
176
|
-
"property_income",
|
|
177
|
-
"total_income",
|
|
178
|
-
"earned_income",
|
|
179
|
-
# Benefits
|
|
180
|
-
"universal_credit",
|
|
181
|
-
"child_benefit",
|
|
182
|
-
"pension_credit",
|
|
183
|
-
"income_support",
|
|
184
|
-
"working_tax_credit",
|
|
185
|
-
"child_tax_credit",
|
|
186
|
-
# Tax
|
|
187
|
-
"income_tax",
|
|
188
|
-
"national_insurance",
|
|
189
|
-
],
|
|
190
|
-
"benunit": [
|
|
191
|
-
# IDs and weights
|
|
192
|
-
"benunit_id",
|
|
193
|
-
"benunit_weight",
|
|
194
|
-
# Structure
|
|
195
|
-
"family_type",
|
|
196
|
-
# Income and benefits
|
|
197
|
-
"universal_credit",
|
|
198
|
-
"child_benefit",
|
|
199
|
-
"working_tax_credit",
|
|
200
|
-
"child_tax_credit",
|
|
201
|
-
],
|
|
202
|
-
"household": [
|
|
203
|
-
# IDs and weights
|
|
204
|
-
"household_id",
|
|
205
|
-
"household_weight",
|
|
206
|
-
# Income measures
|
|
207
|
-
"household_net_income",
|
|
208
|
-
"hbai_household_net_income",
|
|
209
|
-
"equiv_hbai_household_net_income",
|
|
210
|
-
"household_market_income",
|
|
211
|
-
"household_gross_income",
|
|
212
|
-
# Benefits and tax
|
|
213
|
-
"household_benefits",
|
|
214
|
-
"household_tax",
|
|
215
|
-
"vat",
|
|
216
|
-
# Housing
|
|
217
|
-
"rent",
|
|
218
|
-
"council_tax",
|
|
219
|
-
"tenure_type",
|
|
220
|
-
],
|
|
221
|
-
}
|
|
222
|
-
|
|
223
223
|
data = {
|
|
224
224
|
"person": pd.DataFrame(),
|
|
225
225
|
"benunit": pd.DataFrame(),
|
|
226
226
|
"household": pd.DataFrame(),
|
|
227
227
|
}
|
|
228
228
|
|
|
229
|
-
for entity, variables in entity_variables.items():
|
|
229
|
+
for entity, variables in self.entity_variables.items():
|
|
230
230
|
for var in variables:
|
|
231
231
|
data[entity][var] = microsim.calculate(
|
|
232
232
|
var, period=simulation.dataset.year, map_to=entity
|
|
@@ -265,17 +265,29 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion):
|
|
|
265
265
|
|
|
266
266
|
def load(self, simulation: "Simulation"):
|
|
267
267
|
"""Load the simulation's output dataset."""
|
|
268
|
+
import os
|
|
269
|
+
|
|
270
|
+
filepath = str(
|
|
271
|
+
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
|
|
272
|
+
)
|
|
273
|
+
|
|
268
274
|
simulation.output_dataset = PolicyEngineUKDataset(
|
|
269
275
|
id=simulation.id,
|
|
270
276
|
name=simulation.dataset.name,
|
|
271
277
|
description=simulation.dataset.description,
|
|
272
|
-
filepath=
|
|
273
|
-
Path(simulation.dataset.filepath).parent
|
|
274
|
-
/ (simulation.id + ".h5")
|
|
275
|
-
),
|
|
278
|
+
filepath=filepath,
|
|
276
279
|
year=simulation.dataset.year,
|
|
277
280
|
is_output_dataset=True,
|
|
278
281
|
)
|
|
279
282
|
|
|
283
|
+
# Load timestamps from file system metadata
|
|
284
|
+
if os.path.exists(filepath):
|
|
285
|
+
simulation.created_at = datetime.datetime.fromtimestamp(
|
|
286
|
+
os.path.getctime(filepath)
|
|
287
|
+
)
|
|
288
|
+
simulation.updated_at = datetime.datetime.fromtimestamp(
|
|
289
|
+
os.path.getmtime(filepath)
|
|
290
|
+
)
|
|
291
|
+
|
|
280
292
|
|
|
281
293
|
uk_latest = PolicyEngineUKLatest()
|
|
@@ -1,33 +1,38 @@
|
|
|
1
1
|
"""PolicyEngine UK tax-benefit model - imports from uk/ module."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
PolicyEngineUK,
|
|
5
|
-
PolicyEngineUKDataset,
|
|
6
|
-
PolicyEngineUKLatest,
|
|
7
|
-
ProgrammeStatistics,
|
|
8
|
-
UKYearData,
|
|
9
|
-
create_datasets,
|
|
10
|
-
general_policy_reform_analysis,
|
|
11
|
-
uk_latest,
|
|
12
|
-
uk_model,
|
|
13
|
-
)
|
|
3
|
+
from importlib.util import find_spec
|
|
14
4
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
5
|
+
if find_spec("policyengine_uk") is not None:
|
|
6
|
+
from .uk import (
|
|
7
|
+
PolicyEngineUK,
|
|
8
|
+
PolicyEngineUKDataset,
|
|
9
|
+
PolicyEngineUKLatest,
|
|
10
|
+
ProgrammeStatistics,
|
|
11
|
+
UKYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
general_policy_reform_analysis,
|
|
15
|
+
load_datasets,
|
|
16
|
+
uk_latest,
|
|
17
|
+
uk_model,
|
|
18
|
+
)
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
|
|
20
|
+
__all__ = [
|
|
21
|
+
"UKYearData",
|
|
22
|
+
"PolicyEngineUKDataset",
|
|
23
|
+
"create_datasets",
|
|
24
|
+
"load_datasets",
|
|
25
|
+
"ensure_datasets",
|
|
26
|
+
"PolicyEngineUK",
|
|
27
|
+
"PolicyEngineUKLatest",
|
|
28
|
+
"uk_model",
|
|
29
|
+
"uk_latest",
|
|
30
|
+
"general_policy_reform_analysis",
|
|
31
|
+
"ProgrammeStatistics",
|
|
32
|
+
]
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
# Rebuild models to resolve forward references
|
|
35
|
+
PolicyEngineUKDataset.model_rebuild()
|
|
36
|
+
PolicyEngineUKLatest.model_rebuild()
|
|
37
|
+
else:
|
|
38
|
+
__all__ = []
|
|
@@ -6,7 +6,13 @@ if find_spec("policyengine_us") is not None:
|
|
|
6
6
|
from policyengine.core import Dataset
|
|
7
7
|
|
|
8
8
|
from .analysis import general_policy_reform_analysis
|
|
9
|
-
from .datasets import
|
|
9
|
+
from .datasets import (
|
|
10
|
+
PolicyEngineUSDataset,
|
|
11
|
+
USYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
load_datasets,
|
|
15
|
+
)
|
|
10
16
|
from .model import (
|
|
11
17
|
PolicyEngineUS,
|
|
12
18
|
PolicyEngineUSLatest,
|
|
@@ -25,6 +31,8 @@ if find_spec("policyengine_us") is not None:
|
|
|
25
31
|
"USYearData",
|
|
26
32
|
"PolicyEngineUSDataset",
|
|
27
33
|
"create_datasets",
|
|
34
|
+
"load_datasets",
|
|
35
|
+
"ensure_datasets",
|
|
28
36
|
"PolicyEngineUS",
|
|
29
37
|
"PolicyEngineUSLatest",
|
|
30
38
|
"us_model",
|
|
@@ -44,11 +44,7 @@ class PolicyEngineUSDataset(Dataset):
|
|
|
44
44
|
if self.data is not None:
|
|
45
45
|
self.save()
|
|
46
46
|
elif self.filepath and not self.data:
|
|
47
|
-
|
|
48
|
-
self.load()
|
|
49
|
-
except FileNotFoundError:
|
|
50
|
-
# File doesn't exist yet, that's OK
|
|
51
|
-
pass
|
|
47
|
+
self.load()
|
|
52
48
|
|
|
53
49
|
def save(self) -> None:
|
|
54
50
|
"""Save dataset to HDF5 file."""
|
|
@@ -112,15 +108,21 @@ def create_datasets(
|
|
|
112
108
|
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
113
109
|
],
|
|
114
110
|
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
115
|
-
|
|
111
|
+
data_folder: str = "./data",
|
|
112
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
116
113
|
"""Create PolicyEngineUSDataset instances from HuggingFace dataset paths.
|
|
117
114
|
|
|
118
115
|
Args:
|
|
119
116
|
datasets: List of HuggingFace dataset paths (e.g., "hf://policyengine/policyengine-us-data/cps_2024.h5")
|
|
120
117
|
years: List of years to extract data for
|
|
118
|
+
data_folder: Directory to save the dataset files
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
|
|
121
122
|
"""
|
|
122
123
|
from policyengine_us import Microsimulation
|
|
123
124
|
|
|
125
|
+
result = {}
|
|
124
126
|
for dataset in datasets:
|
|
125
127
|
sim = Microsimulation(dataset=dataset)
|
|
126
128
|
|
|
@@ -263,9 +265,10 @@ def create_datasets(
|
|
|
263
265
|
tax_unit_df = entity_df
|
|
264
266
|
|
|
265
267
|
us_dataset = PolicyEngineUSDataset(
|
|
268
|
+
id=f"{Path(dataset).stem}_year_{year}",
|
|
266
269
|
name=f"{dataset}-year-{year}",
|
|
267
270
|
description=f"US Dataset for year {year} based on {dataset}",
|
|
268
|
-
filepath=f"
|
|
271
|
+
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
|
|
269
272
|
year=year,
|
|
270
273
|
data=USYearData(
|
|
271
274
|
person=MicroDataFrame(person_df, weights="person_weight"),
|
|
@@ -285,3 +288,83 @@ def create_datasets(
|
|
|
285
288
|
),
|
|
286
289
|
)
|
|
287
290
|
us_dataset.save()
|
|
291
|
+
|
|
292
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
293
|
+
result[dataset_key] = us_dataset
|
|
294
|
+
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def load_datasets(
|
|
299
|
+
datasets: list[str] = [
|
|
300
|
+
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
301
|
+
],
|
|
302
|
+
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
303
|
+
data_folder: str = "./data",
|
|
304
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
305
|
+
"""Load PolicyEngineUSDataset instances from saved HDF5 files.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
datasets: List of HuggingFace dataset paths (used to derive file names)
|
|
309
|
+
years: List of years to load data for
|
|
310
|
+
data_folder: Directory containing the dataset files
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
|
|
314
|
+
"""
|
|
315
|
+
result = {}
|
|
316
|
+
for dataset in datasets:
|
|
317
|
+
for year in years:
|
|
318
|
+
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
319
|
+
us_dataset = PolicyEngineUSDataset(
|
|
320
|
+
name=f"{dataset}-year-{year}",
|
|
321
|
+
description=f"US Dataset for year {year} based on {dataset}",
|
|
322
|
+
filepath=filepath,
|
|
323
|
+
year=year,
|
|
324
|
+
)
|
|
325
|
+
us_dataset.load()
|
|
326
|
+
|
|
327
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
328
|
+
result[dataset_key] = us_dataset
|
|
329
|
+
|
|
330
|
+
return result
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def ensure_datasets(
|
|
334
|
+
datasets: list[str] = [
|
|
335
|
+
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
336
|
+
],
|
|
337
|
+
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
338
|
+
data_folder: str = "./data",
|
|
339
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
340
|
+
"""Ensure datasets exist, loading if available or creating if not.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
datasets: List of HuggingFace dataset paths
|
|
344
|
+
years: List of years to load/create data for
|
|
345
|
+
data_folder: Directory containing or to save the dataset files
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Dictionary mapping dataset keys to PolicyEngineUSDataset objects
|
|
349
|
+
"""
|
|
350
|
+
# Check if all dataset files exist
|
|
351
|
+
all_exist = True
|
|
352
|
+
for dataset in datasets:
|
|
353
|
+
for year in years:
|
|
354
|
+
filepath = Path(
|
|
355
|
+
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
356
|
+
)
|
|
357
|
+
if not filepath.exists():
|
|
358
|
+
all_exist = False
|
|
359
|
+
break
|
|
360
|
+
if not all_exist:
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
if all_exist:
|
|
364
|
+
return load_datasets(
|
|
365
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
366
|
+
)
|
|
367
|
+
else:
|
|
368
|
+
return create_datasets(
|
|
369
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
370
|
+
)
|
|
@@ -45,6 +45,59 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
|
45
45
|
version: str = None
|
|
46
46
|
created_at: datetime.datetime = None
|
|
47
47
|
|
|
48
|
+
entity_variables: dict[str, list[str]] = {
|
|
49
|
+
"person": [
|
|
50
|
+
# IDs and weights
|
|
51
|
+
"person_id",
|
|
52
|
+
"marital_unit_id",
|
|
53
|
+
"family_id",
|
|
54
|
+
"spm_unit_id",
|
|
55
|
+
"tax_unit_id",
|
|
56
|
+
"household_id",
|
|
57
|
+
"person_weight",
|
|
58
|
+
# Demographics
|
|
59
|
+
"age",
|
|
60
|
+
# Income
|
|
61
|
+
"employment_income",
|
|
62
|
+
# Benefits
|
|
63
|
+
"ssi",
|
|
64
|
+
"social_security",
|
|
65
|
+
"medicaid",
|
|
66
|
+
"unemployment_compensation",
|
|
67
|
+
],
|
|
68
|
+
"marital_unit": [
|
|
69
|
+
"marital_unit_id",
|
|
70
|
+
"marital_unit_weight",
|
|
71
|
+
],
|
|
72
|
+
"family": [
|
|
73
|
+
"family_id",
|
|
74
|
+
"family_weight",
|
|
75
|
+
],
|
|
76
|
+
"spm_unit": [
|
|
77
|
+
"spm_unit_id",
|
|
78
|
+
"spm_unit_weight",
|
|
79
|
+
"snap",
|
|
80
|
+
"tanf",
|
|
81
|
+
"spm_unit_net_income",
|
|
82
|
+
],
|
|
83
|
+
"tax_unit": [
|
|
84
|
+
"tax_unit_id",
|
|
85
|
+
"tax_unit_weight",
|
|
86
|
+
"income_tax",
|
|
87
|
+
"employee_payroll_tax",
|
|
88
|
+
"eitc",
|
|
89
|
+
"ctc",
|
|
90
|
+
],
|
|
91
|
+
"household": [
|
|
92
|
+
"household_id",
|
|
93
|
+
"household_weight",
|
|
94
|
+
"household_net_income",
|
|
95
|
+
"household_benefits",
|
|
96
|
+
"household_tax",
|
|
97
|
+
"household_market_income",
|
|
98
|
+
],
|
|
99
|
+
}
|
|
100
|
+
|
|
48
101
|
def __init__(self, **kwargs: dict):
|
|
49
102
|
# Lazy-load package metadata if not provided
|
|
50
103
|
if "version" not in kwargs or kwargs.get("version") is None:
|
|
@@ -156,59 +209,6 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
|
156
209
|
)
|
|
157
210
|
modifier(microsim)
|
|
158
211
|
|
|
159
|
-
entity_variables = {
|
|
160
|
-
"person": [
|
|
161
|
-
# IDs and weights
|
|
162
|
-
"person_id",
|
|
163
|
-
"marital_unit_id",
|
|
164
|
-
"family_id",
|
|
165
|
-
"spm_unit_id",
|
|
166
|
-
"tax_unit_id",
|
|
167
|
-
"household_id",
|
|
168
|
-
"person_weight",
|
|
169
|
-
# Demographics
|
|
170
|
-
"age",
|
|
171
|
-
# Income
|
|
172
|
-
"employment_income",
|
|
173
|
-
# Benefits
|
|
174
|
-
"ssi",
|
|
175
|
-
"social_security",
|
|
176
|
-
"medicaid",
|
|
177
|
-
"unemployment_compensation",
|
|
178
|
-
],
|
|
179
|
-
"marital_unit": [
|
|
180
|
-
"marital_unit_id",
|
|
181
|
-
"marital_unit_weight",
|
|
182
|
-
],
|
|
183
|
-
"family": [
|
|
184
|
-
"family_id",
|
|
185
|
-
"family_weight",
|
|
186
|
-
],
|
|
187
|
-
"spm_unit": [
|
|
188
|
-
"spm_unit_id",
|
|
189
|
-
"spm_unit_weight",
|
|
190
|
-
"snap",
|
|
191
|
-
"tanf",
|
|
192
|
-
"spm_unit_net_income",
|
|
193
|
-
],
|
|
194
|
-
"tax_unit": [
|
|
195
|
-
"tax_unit_id",
|
|
196
|
-
"tax_unit_weight",
|
|
197
|
-
"income_tax",
|
|
198
|
-
"employee_payroll_tax",
|
|
199
|
-
"eitc",
|
|
200
|
-
"ctc",
|
|
201
|
-
],
|
|
202
|
-
"household": [
|
|
203
|
-
"household_id",
|
|
204
|
-
"household_weight",
|
|
205
|
-
"household_net_income",
|
|
206
|
-
"household_benefits",
|
|
207
|
-
"household_tax",
|
|
208
|
-
"household_market_income",
|
|
209
|
-
],
|
|
210
|
-
}
|
|
211
|
-
|
|
212
212
|
data = {
|
|
213
213
|
"person": pd.DataFrame(),
|
|
214
214
|
"marital_unit": pd.DataFrame(),
|
|
@@ -259,7 +259,7 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
|
259
259
|
data["person"][target_col] = person_input_df[col].values
|
|
260
260
|
|
|
261
261
|
# Then calculate non-ID, non-weight variables from simulation
|
|
262
|
-
for entity, variables in entity_variables.items():
|
|
262
|
+
for entity, variables in self.entity_variables.items():
|
|
263
263
|
for var in variables:
|
|
264
264
|
if var not in id_columns and var not in weight_columns:
|
|
265
265
|
data[entity][var] = microsim.calculate(
|
|
@@ -311,18 +311,30 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
|
311
311
|
|
|
312
312
|
def load(self, simulation: "Simulation"):
|
|
313
313
|
"""Load the simulation's output dataset."""
|
|
314
|
+
import os
|
|
315
|
+
|
|
316
|
+
filepath = str(
|
|
317
|
+
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
|
|
318
|
+
)
|
|
319
|
+
|
|
314
320
|
simulation.output_dataset = PolicyEngineUSDataset(
|
|
315
321
|
id=simulation.id,
|
|
316
322
|
name=simulation.dataset.name,
|
|
317
323
|
description=simulation.dataset.description,
|
|
318
|
-
filepath=
|
|
319
|
-
Path(simulation.dataset.filepath).parent
|
|
320
|
-
/ (simulation.id + ".h5")
|
|
321
|
-
),
|
|
324
|
+
filepath=filepath,
|
|
322
325
|
year=simulation.dataset.year,
|
|
323
326
|
is_output_dataset=True,
|
|
324
327
|
)
|
|
325
328
|
|
|
329
|
+
# Load timestamps from file system metadata
|
|
330
|
+
if os.path.exists(filepath):
|
|
331
|
+
simulation.created_at = datetime.datetime.fromtimestamp(
|
|
332
|
+
os.path.getctime(filepath)
|
|
333
|
+
)
|
|
334
|
+
simulation.updated_at = datetime.datetime.fromtimestamp(
|
|
335
|
+
os.path.getmtime(filepath)
|
|
336
|
+
)
|
|
337
|
+
|
|
326
338
|
def _build_simulation_from_dataset(self, microsim, dataset, system):
|
|
327
339
|
"""Build a PolicyEngine Core simulation from dataset entity IDs.
|
|
328
340
|
|
|
@@ -9,7 +9,10 @@ if find_spec("policyengine_us") is not None:
|
|
|
9
9
|
PolicyEngineUSLatest,
|
|
10
10
|
ProgramStatistics,
|
|
11
11
|
USYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
12
14
|
general_policy_reform_analysis,
|
|
15
|
+
load_datasets,
|
|
13
16
|
us_latest,
|
|
14
17
|
us_model,
|
|
15
18
|
)
|
|
@@ -17,6 +20,9 @@ if find_spec("policyengine_us") is not None:
|
|
|
17
20
|
__all__ = [
|
|
18
21
|
"USYearData",
|
|
19
22
|
"PolicyEngineUSDataset",
|
|
23
|
+
"create_datasets",
|
|
24
|
+
"load_datasets",
|
|
25
|
+
"ensure_datasets",
|
|
20
26
|
"PolicyEngineUS",
|
|
21
27
|
"PolicyEngineUSLatest",
|
|
22
28
|
"us_model",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
policyengine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
policyengine/__pycache__/__init__.cpython-313.pyc,sha256=
|
|
2
|
+
policyengine/__pycache__/__init__.cpython-313.pyc,sha256=37QsU34QzZ56-bY7bRgKUcPPwi-onLZ481cHrbg3PDU,175
|
|
3
3
|
policyengine/core/__init__.py,sha256=KBVhkqzkvjWLDDwk96vquQKL63ZFuLen5AzBOBnO9pg,912
|
|
4
4
|
policyengine/core/dataset.py,sha256=SFicjsZgMeKgJjlt8z98fOUWG2dfod0Q9NBy_m9PSc8,15506
|
|
5
5
|
policyengine/core/dataset_version.py,sha256=6KeFCRGQto_Yyl4QY4Vo2JFythjaXrNAOHQiwRGESyM,378
|
|
@@ -8,7 +8,7 @@ policyengine/core/output.py,sha256=cCW4vbzkLdQaT_nJTyDJBl7Hubm7nZeRuR7aVG1dKvg,6
|
|
|
8
8
|
policyengine/core/parameter.py,sha256=5nBCw-6-BCfW-_uFqvAN5zcP_Vfz1mAmQsi3peWAbZA,407
|
|
9
9
|
policyengine/core/parameter_value.py,sha256=b0ts1kbWcwjPSYnZm2rlCylmTLPJRLxDL8z3RmxM5OI,377
|
|
10
10
|
policyengine/core/policy.py,sha256=ExMrUDMvNk_uuOL0cSm0UCzDyGka0t_yk6x4U0Kp6Ww,1635
|
|
11
|
-
policyengine/core/simulation.py,sha256=
|
|
11
|
+
policyengine/core/simulation.py,sha256=yvvved75XMcGP3Bj9E2tmKRxvI-DQVZv7k4uTETwBm0,1134
|
|
12
12
|
policyengine/core/tax_benefit_model.py,sha256=2Yc1RlQrUG7djDMZbJOQH4Ns86_lOnLeISCGR4-9zMo,176
|
|
13
13
|
policyengine/core/tax_benefit_model_version.py,sha256=V1CGft5Y6YflMASx0wR3V73jr-WqQu2R8N5QVMRm9yw,2752
|
|
14
14
|
policyengine/core/variable.py,sha256=AjSImORlRkh05xhYxyeT6GFMOfViRzYg0qRQAIj-mxo,350
|
|
@@ -16,24 +16,24 @@ policyengine/outputs/__init__.py,sha256=IJUmLP0Og41VrwiqhJF-a9-3fIb4nlXpS7uFuVCI
|
|
|
16
16
|
policyengine/outputs/aggregate.py,sha256=exI-U04OF5kVf2BBYV6sf8VldIWnT_IzxgkBs5wtnCw,4846
|
|
17
17
|
policyengine/outputs/change_aggregate.py,sha256=tK4K87YlByKikqFaB7OHyh1SqAuGtUnLL7cSF_EhrOs,7373
|
|
18
18
|
policyengine/outputs/decile_impact.py,sha256=jclhbj5U-xX8D-myy0SuWeJFVfQTqJDCh7qBXugak5U,4811
|
|
19
|
-
policyengine/tax_benefit_models/uk.py,sha256=
|
|
20
|
-
policyengine/tax_benefit_models/us.py,sha256=
|
|
21
|
-
policyengine/tax_benefit_models/uk/__init__.py,sha256=
|
|
19
|
+
policyengine/tax_benefit_models/uk.py,sha256=HzAG_dORmsj1NJ9pd9WrqwgZPe9DUDrZ1wV5LuVCKAg,950
|
|
20
|
+
policyengine/tax_benefit_models/us.py,sha256=G51dAmHo8NJLb2mnbne6iO5eNaatCGUd_2unvawwF84,946
|
|
21
|
+
policyengine/tax_benefit_models/uk/__init__.py,sha256=AiA74iED5FEryvUCMfVZi6pYDYuTfQcj9B01h8J5xFA,1105
|
|
22
22
|
policyengine/tax_benefit_models/uk/analysis.py,sha256=O4eYJYF7tsgiuLuiWMU0OXq7ss6U8-vzlg6nC2U8sgU,3175
|
|
23
|
-
policyengine/tax_benefit_models/uk/datasets.py,sha256
|
|
24
|
-
policyengine/tax_benefit_models/uk/model.py,sha256
|
|
23
|
+
policyengine/tax_benefit_models/uk/datasets.py,sha256=-lmj4eG2my2GGmMMkxI1iXobGQW5irBgylEwyV0xU6c,8039
|
|
24
|
+
policyengine/tax_benefit_models/uk/model.py,sha256=HNdqsAKErDw9nruOdj4SiGF2KMopccaGDJ4RTXkdJ1U,9612
|
|
25
25
|
policyengine/tax_benefit_models/uk/outputs.py,sha256=2mYLwQW4QNvrOHtHfm_ACqE9gbmuLxvcCyldRU46s0o,3543
|
|
26
|
-
policyengine/tax_benefit_models/us/__init__.py,sha256=
|
|
26
|
+
policyengine/tax_benefit_models/us/__init__.py,sha256=zP-UUQqOc9g0ymyHkweJdi4RVXQDKSR6SUxavUKvV0s,1101
|
|
27
27
|
policyengine/tax_benefit_models/us/analysis.py,sha256=Xf-DT0QjVySs0QG_koCwgvOeWI_scLtv3S3SP8u8ZWc,3253
|
|
28
|
-
policyengine/tax_benefit_models/us/datasets.py,sha256=
|
|
29
|
-
policyengine/tax_benefit_models/us/model.py,sha256=
|
|
28
|
+
policyengine/tax_benefit_models/us/datasets.py,sha256=UwY5GcrVRl7zdmtqKE5TykYRNtOsGzyDm8kRkc98hyw,14708
|
|
29
|
+
policyengine/tax_benefit_models/us/model.py,sha256=lhffJLnE4tr-ch_tHiyH2zP4IMX9_swWomUllb9WdbM,16421
|
|
30
30
|
policyengine/tax_benefit_models/us/outputs.py,sha256=GT8Eur8DfB9cPQRbSljEl9RpKSNHW80Fq_CBXCybvIU,3519
|
|
31
31
|
policyengine/utils/__init__.py,sha256=1X-VYAWLyB9A0YRHwsGWrqQHns1WfeZ7ISC6DMU5myM,140
|
|
32
32
|
policyengine/utils/dates.py,sha256=HnAqyl8S8EOYp8ibsnMTmECYoDWCSqwL-7A2_qKgxSc,1510
|
|
33
33
|
policyengine/utils/parametric_reforms.py,sha256=4P3U39-4pYTU4BN6JjgmVLUkCkBhRfZJ6UIWTlsjyQE,1155
|
|
34
34
|
policyengine/utils/plotting.py,sha256=ZAzTWz38vIaW0c3Nt4Un1kfrNoXLyHCDd1pEJIlsRg4,5335
|
|
35
|
-
policyengine-3.1.
|
|
36
|
-
policyengine-3.1.
|
|
37
|
-
policyengine-3.1.
|
|
38
|
-
policyengine-3.1.
|
|
39
|
-
policyengine-3.1.
|
|
35
|
+
policyengine-3.1.4.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
36
|
+
policyengine-3.1.4.dist-info/METADATA,sha256=ED5nMhV3Tg6uymV-_I6DYvuqbsqiFnccRHNSSUZcOXc,45889
|
|
37
|
+
policyengine-3.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
policyengine-3.1.4.dist-info/top_level.txt,sha256=_23UPobfkneHQkpJ0e0OmDJfhCUfoXj_F2sTckCGOH4,13
|
|
39
|
+
policyengine-3.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|