policyengine 3.1.3__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- policyengine/core/dataset.py +21 -5
- policyengine/core/simulation.py +7 -0
- policyengine/tax_benefit_models/uk/__init__.py +39 -21
- policyengine/tax_benefit_models/uk/datasets.py +78 -7
- policyengine/tax_benefit_models/uk/model.py +16 -4
- policyengine/tax_benefit_models/uk.py +33 -28
- policyengine/tax_benefit_models/us/__init__.py +9 -1
- policyengine/tax_benefit_models/us/datasets.py +90 -7
- policyengine/tax_benefit_models/us/model.py +16 -4
- policyengine/tax_benefit_models/us.py +6 -0
- {policyengine-3.1.3.dist-info → policyengine-3.1.5.dist-info}/METADATA +1 -1
- {policyengine-3.1.3.dist-info → policyengine-3.1.5.dist-info}/RECORD +16 -16
- {policyengine-3.1.3.dist-info → policyengine-3.1.5.dist-info}/WHEEL +0 -0
- {policyengine-3.1.3.dist-info → policyengine-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {policyengine-3.1.3.dist-info → policyengine-3.1.5.dist-info}/top_level.txt +0 -0
|
Binary file
|
policyengine/core/dataset.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from uuid import uuid4
|
|
2
2
|
|
|
3
|
+
import numpy as np
|
|
3
4
|
import pandas as pd
|
|
4
5
|
from microdf import MicroDataFrame
|
|
5
6
|
from pydantic import BaseModel, ConfigDict, Field
|
|
@@ -100,7 +101,7 @@ def map_to_entity(
|
|
|
100
101
|
target_entity: str,
|
|
101
102
|
person_entity: str = "person",
|
|
102
103
|
columns: list[str] | None = None,
|
|
103
|
-
values:
|
|
104
|
+
values: np.ndarray | None = None,
|
|
104
105
|
how: str = "sum",
|
|
105
106
|
) -> MicroDataFrame:
|
|
106
107
|
"""Map data from source entity to target entity using join keys.
|
|
@@ -143,6 +144,9 @@ def map_to_entity(
|
|
|
143
144
|
# Get source data (convert to plain DataFrame to avoid weighted operations during mapping)
|
|
144
145
|
source_df = pd.DataFrame(entity_data[source_entity])
|
|
145
146
|
|
|
147
|
+
# Track if we should return a MicroSeries (values is a numpy array, not a list)
|
|
148
|
+
return_series = values is not None
|
|
149
|
+
|
|
146
150
|
# Handle values parameter - create a temporary column with the provided values
|
|
147
151
|
if values is not None:
|
|
148
152
|
if len(values) != len(source_df):
|
|
@@ -166,7 +170,10 @@ def map_to_entity(
|
|
|
166
170
|
|
|
167
171
|
# Same entity - return as is
|
|
168
172
|
if source_entity == target_entity:
|
|
169
|
-
|
|
173
|
+
result = MicroDataFrame(source_df, weights=target_weight)
|
|
174
|
+
if return_series:
|
|
175
|
+
return result["__mapped_value"]
|
|
176
|
+
return result
|
|
170
177
|
|
|
171
178
|
# Get target data and key
|
|
172
179
|
target_df = entity_data[target_entity]
|
|
@@ -225,7 +232,10 @@ def map_to_entity(
|
|
|
225
232
|
# Fill NaN with 0 for groups with no members in source entity
|
|
226
233
|
result[agg_cols] = result[agg_cols].fillna(0)
|
|
227
234
|
|
|
228
|
-
|
|
235
|
+
result_df = MicroDataFrame(result, weights=target_weight)
|
|
236
|
+
if return_series:
|
|
237
|
+
return result_df["__mapped_value"]
|
|
238
|
+
return result_df
|
|
229
239
|
|
|
230
240
|
# Group entity to person: expand group-level data to person level
|
|
231
241
|
if source_entity != person_entity and target_entity == person_entity:
|
|
@@ -284,7 +294,10 @@ def map_to_entity(
|
|
|
284
294
|
f"Unsupported aggregation method for group->person: {how}. Use 'project' or 'divide'."
|
|
285
295
|
)
|
|
286
296
|
|
|
287
|
-
|
|
297
|
+
result_df = MicroDataFrame(result, weights=target_weight)
|
|
298
|
+
if return_series:
|
|
299
|
+
return result_df["__mapped_value"]
|
|
300
|
+
return result_df
|
|
288
301
|
|
|
289
302
|
# Group to group: go through person table
|
|
290
303
|
if source_entity != person_entity and target_entity != person_entity:
|
|
@@ -408,7 +421,10 @@ def map_to_entity(
|
|
|
408
421
|
# Fill NaN with 0
|
|
409
422
|
result[agg_cols] = result[agg_cols].fillna(0)
|
|
410
423
|
|
|
411
|
-
|
|
424
|
+
result_df = MicroDataFrame(result, weights=target_weight)
|
|
425
|
+
if return_series:
|
|
426
|
+
return result_df["__mapped_value"]
|
|
427
|
+
return result_df
|
|
412
428
|
|
|
413
429
|
raise ValueError(
|
|
414
430
|
f"Unsupported mapping from {source_entity} to {target_entity}"
|
policyengine/core/simulation.py
CHANGED
|
@@ -24,6 +24,13 @@ class Simulation(BaseModel):
|
|
|
24
24
|
def run(self):
|
|
25
25
|
self.tax_benefit_model_version.run(self)
|
|
26
26
|
|
|
27
|
+
def ensure(self):
|
|
28
|
+
try:
|
|
29
|
+
self.tax_benefit_model_version.load(self)
|
|
30
|
+
except Exception:
|
|
31
|
+
self.run()
|
|
32
|
+
self.save()
|
|
33
|
+
|
|
27
34
|
def save(self):
|
|
28
35
|
"""Save the simulation's output dataset."""
|
|
29
36
|
self.tax_benefit_model_version.save(self)
|
|
@@ -1,26 +1,44 @@
|
|
|
1
1
|
"""PolicyEngine UK tax-benefit model."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .datasets import PolicyEngineUKDataset, UKYearData, create_datasets
|
|
5
|
-
from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model
|
|
6
|
-
from .outputs import ProgrammeStatistics
|
|
3
|
+
from importlib.util import find_spec
|
|
7
4
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
"PolicyEngineUKDataset",
|
|
11
|
-
"create_datasets",
|
|
12
|
-
"PolicyEngineUK",
|
|
13
|
-
"PolicyEngineUKLatest",
|
|
14
|
-
"uk_model",
|
|
15
|
-
"uk_latest",
|
|
16
|
-
"general_policy_reform_analysis",
|
|
17
|
-
"ProgrammeStatistics",
|
|
18
|
-
]
|
|
5
|
+
if find_spec("policyengine_uk") is not None:
|
|
6
|
+
from policyengine.core import Dataset
|
|
19
7
|
|
|
20
|
-
|
|
21
|
-
from
|
|
8
|
+
from .analysis import general_policy_reform_analysis
|
|
9
|
+
from .datasets import (
|
|
10
|
+
PolicyEngineUKDataset,
|
|
11
|
+
UKYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
load_datasets,
|
|
15
|
+
)
|
|
16
|
+
from .model import (
|
|
17
|
+
PolicyEngineUK,
|
|
18
|
+
PolicyEngineUKLatest,
|
|
19
|
+
uk_latest,
|
|
20
|
+
uk_model,
|
|
21
|
+
)
|
|
22
|
+
from .outputs import ProgrammeStatistics
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
# Rebuild Pydantic models to resolve forward references
|
|
25
|
+
Dataset.model_rebuild()
|
|
26
|
+
UKYearData.model_rebuild()
|
|
27
|
+
PolicyEngineUKDataset.model_rebuild()
|
|
28
|
+
PolicyEngineUKLatest.model_rebuild()
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"UKYearData",
|
|
32
|
+
"PolicyEngineUKDataset",
|
|
33
|
+
"create_datasets",
|
|
34
|
+
"load_datasets",
|
|
35
|
+
"ensure_datasets",
|
|
36
|
+
"PolicyEngineUK",
|
|
37
|
+
"PolicyEngineUKLatest",
|
|
38
|
+
"uk_model",
|
|
39
|
+
"uk_latest",
|
|
40
|
+
"general_policy_reform_analysis",
|
|
41
|
+
"ProgrammeStatistics",
|
|
42
|
+
]
|
|
43
|
+
else:
|
|
44
|
+
__all__ = []
|
|
@@ -37,11 +37,7 @@ class PolicyEngineUKDataset(Dataset):
|
|
|
37
37
|
if self.data is not None:
|
|
38
38
|
self.save()
|
|
39
39
|
elif self.filepath and not self.data:
|
|
40
|
-
|
|
41
|
-
self.load()
|
|
42
|
-
except FileNotFoundError:
|
|
43
|
-
# File doesn't exist yet, that's OK
|
|
44
|
-
pass
|
|
40
|
+
self.load()
|
|
45
41
|
|
|
46
42
|
def save(self) -> None:
|
|
47
43
|
"""Save dataset to HDF5 file."""
|
|
@@ -85,7 +81,9 @@ def create_datasets(
|
|
|
85
81
|
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
86
82
|
],
|
|
87
83
|
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
88
|
-
|
|
84
|
+
data_folder: str = "./data",
|
|
85
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
86
|
+
result = {}
|
|
89
87
|
for dataset in datasets:
|
|
90
88
|
from policyengine_uk import Microsimulation
|
|
91
89
|
|
|
@@ -139,9 +137,10 @@ def create_datasets(
|
|
|
139
137
|
)
|
|
140
138
|
|
|
141
139
|
uk_dataset = PolicyEngineUKDataset(
|
|
140
|
+
id=f"{Path(dataset).stem}_year_{year}",
|
|
142
141
|
name=f"{dataset}-year-{year}",
|
|
143
142
|
description=f"UK Dataset for year {year} based on {dataset}",
|
|
144
|
-
filepath=f"
|
|
143
|
+
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
|
|
145
144
|
year=year,
|
|
146
145
|
data=UKYearData(
|
|
147
146
|
person=MicroDataFrame(person_df, weights="person_weight"),
|
|
@@ -154,3 +153,75 @@ def create_datasets(
|
|
|
154
153
|
),
|
|
155
154
|
)
|
|
156
155
|
uk_dataset.save()
|
|
156
|
+
|
|
157
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
158
|
+
result[dataset_key] = uk_dataset
|
|
159
|
+
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def load_datasets(
|
|
164
|
+
datasets: list[str] = [
|
|
165
|
+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
|
|
166
|
+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
167
|
+
],
|
|
168
|
+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
169
|
+
data_folder: str = "./data",
|
|
170
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
171
|
+
result = {}
|
|
172
|
+
for dataset in datasets:
|
|
173
|
+
for year in years:
|
|
174
|
+
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
175
|
+
uk_dataset = PolicyEngineUKDataset(
|
|
176
|
+
name=f"{dataset}-year-{year}",
|
|
177
|
+
description=f"UK Dataset for year {year} based on {dataset}",
|
|
178
|
+
filepath=filepath,
|
|
179
|
+
year=year,
|
|
180
|
+
)
|
|
181
|
+
uk_dataset.load()
|
|
182
|
+
|
|
183
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
184
|
+
result[dataset_key] = uk_dataset
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def ensure_datasets(
|
|
190
|
+
datasets: list[str] = [
|
|
191
|
+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
|
|
192
|
+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
|
|
193
|
+
],
|
|
194
|
+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
|
|
195
|
+
data_folder: str = "./data",
|
|
196
|
+
) -> dict[str, PolicyEngineUKDataset]:
|
|
197
|
+
"""Ensure datasets exist, loading if available or creating if not.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
datasets: List of HuggingFace dataset paths
|
|
201
|
+
years: List of years to load/create data for
|
|
202
|
+
data_folder: Directory containing or to save the dataset files
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Dictionary mapping dataset keys to PolicyEngineUKDataset objects
|
|
206
|
+
"""
|
|
207
|
+
# Check if all dataset files exist
|
|
208
|
+
all_exist = True
|
|
209
|
+
for dataset in datasets:
|
|
210
|
+
for year in years:
|
|
211
|
+
filepath = Path(
|
|
212
|
+
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
213
|
+
)
|
|
214
|
+
if not filepath.exists():
|
|
215
|
+
all_exist = False
|
|
216
|
+
break
|
|
217
|
+
if not all_exist:
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
if all_exist:
|
|
221
|
+
return load_datasets(
|
|
222
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
return create_datasets(
|
|
226
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
227
|
+
)
|
|
@@ -265,17 +265,29 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion):
|
|
|
265
265
|
|
|
266
266
|
def load(self, simulation: "Simulation"):
|
|
267
267
|
"""Load the simulation's output dataset."""
|
|
268
|
+
import os
|
|
269
|
+
|
|
270
|
+
filepath = str(
|
|
271
|
+
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
|
|
272
|
+
)
|
|
273
|
+
|
|
268
274
|
simulation.output_dataset = PolicyEngineUKDataset(
|
|
269
275
|
id=simulation.id,
|
|
270
276
|
name=simulation.dataset.name,
|
|
271
277
|
description=simulation.dataset.description,
|
|
272
|
-
filepath=
|
|
273
|
-
Path(simulation.dataset.filepath).parent
|
|
274
|
-
/ (simulation.id + ".h5")
|
|
275
|
-
),
|
|
278
|
+
filepath=filepath,
|
|
276
279
|
year=simulation.dataset.year,
|
|
277
280
|
is_output_dataset=True,
|
|
278
281
|
)
|
|
279
282
|
|
|
283
|
+
# Load timestamps from file system metadata
|
|
284
|
+
if os.path.exists(filepath):
|
|
285
|
+
simulation.created_at = datetime.datetime.fromtimestamp(
|
|
286
|
+
os.path.getctime(filepath)
|
|
287
|
+
)
|
|
288
|
+
simulation.updated_at = datetime.datetime.fromtimestamp(
|
|
289
|
+
os.path.getmtime(filepath)
|
|
290
|
+
)
|
|
291
|
+
|
|
280
292
|
|
|
281
293
|
uk_latest = PolicyEngineUKLatest()
|
|
@@ -1,33 +1,38 @@
|
|
|
1
1
|
"""PolicyEngine UK tax-benefit model - imports from uk/ module."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
PolicyEngineUK,
|
|
5
|
-
PolicyEngineUKDataset,
|
|
6
|
-
PolicyEngineUKLatest,
|
|
7
|
-
ProgrammeStatistics,
|
|
8
|
-
UKYearData,
|
|
9
|
-
create_datasets,
|
|
10
|
-
general_policy_reform_analysis,
|
|
11
|
-
uk_latest,
|
|
12
|
-
uk_model,
|
|
13
|
-
)
|
|
3
|
+
from importlib.util import find_spec
|
|
14
4
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
5
|
+
if find_spec("policyengine_uk") is not None:
|
|
6
|
+
from .uk import (
|
|
7
|
+
PolicyEngineUK,
|
|
8
|
+
PolicyEngineUKDataset,
|
|
9
|
+
PolicyEngineUKLatest,
|
|
10
|
+
ProgrammeStatistics,
|
|
11
|
+
UKYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
general_policy_reform_analysis,
|
|
15
|
+
load_datasets,
|
|
16
|
+
uk_latest,
|
|
17
|
+
uk_model,
|
|
18
|
+
)
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
|
|
20
|
+
__all__ = [
|
|
21
|
+
"UKYearData",
|
|
22
|
+
"PolicyEngineUKDataset",
|
|
23
|
+
"create_datasets",
|
|
24
|
+
"load_datasets",
|
|
25
|
+
"ensure_datasets",
|
|
26
|
+
"PolicyEngineUK",
|
|
27
|
+
"PolicyEngineUKLatest",
|
|
28
|
+
"uk_model",
|
|
29
|
+
"uk_latest",
|
|
30
|
+
"general_policy_reform_analysis",
|
|
31
|
+
"ProgrammeStatistics",
|
|
32
|
+
]
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
# Rebuild models to resolve forward references
|
|
35
|
+
PolicyEngineUKDataset.model_rebuild()
|
|
36
|
+
PolicyEngineUKLatest.model_rebuild()
|
|
37
|
+
else:
|
|
38
|
+
__all__ = []
|
|
@@ -6,7 +6,13 @@ if find_spec("policyengine_us") is not None:
|
|
|
6
6
|
from policyengine.core import Dataset
|
|
7
7
|
|
|
8
8
|
from .analysis import general_policy_reform_analysis
|
|
9
|
-
from .datasets import
|
|
9
|
+
from .datasets import (
|
|
10
|
+
PolicyEngineUSDataset,
|
|
11
|
+
USYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
14
|
+
load_datasets,
|
|
15
|
+
)
|
|
10
16
|
from .model import (
|
|
11
17
|
PolicyEngineUS,
|
|
12
18
|
PolicyEngineUSLatest,
|
|
@@ -25,6 +31,8 @@ if find_spec("policyengine_us") is not None:
|
|
|
25
31
|
"USYearData",
|
|
26
32
|
"PolicyEngineUSDataset",
|
|
27
33
|
"create_datasets",
|
|
34
|
+
"load_datasets",
|
|
35
|
+
"ensure_datasets",
|
|
28
36
|
"PolicyEngineUS",
|
|
29
37
|
"PolicyEngineUSLatest",
|
|
30
38
|
"us_model",
|
|
@@ -44,11 +44,7 @@ class PolicyEngineUSDataset(Dataset):
|
|
|
44
44
|
if self.data is not None:
|
|
45
45
|
self.save()
|
|
46
46
|
elif self.filepath and not self.data:
|
|
47
|
-
|
|
48
|
-
self.load()
|
|
49
|
-
except FileNotFoundError:
|
|
50
|
-
# File doesn't exist yet, that's OK
|
|
51
|
-
pass
|
|
47
|
+
self.load()
|
|
52
48
|
|
|
53
49
|
def save(self) -> None:
|
|
54
50
|
"""Save dataset to HDF5 file."""
|
|
@@ -112,15 +108,21 @@ def create_datasets(
|
|
|
112
108
|
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
113
109
|
],
|
|
114
110
|
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
115
|
-
|
|
111
|
+
data_folder: str = "./data",
|
|
112
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
116
113
|
"""Create PolicyEngineUSDataset instances from HuggingFace dataset paths.
|
|
117
114
|
|
|
118
115
|
Args:
|
|
119
116
|
datasets: List of HuggingFace dataset paths (e.g., "hf://policyengine/policyengine-us-data/cps_2024.h5")
|
|
120
117
|
years: List of years to extract data for
|
|
118
|
+
data_folder: Directory to save the dataset files
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
|
|
121
122
|
"""
|
|
122
123
|
from policyengine_us import Microsimulation
|
|
123
124
|
|
|
125
|
+
result = {}
|
|
124
126
|
for dataset in datasets:
|
|
125
127
|
sim = Microsimulation(dataset=dataset)
|
|
126
128
|
|
|
@@ -263,9 +265,10 @@ def create_datasets(
|
|
|
263
265
|
tax_unit_df = entity_df
|
|
264
266
|
|
|
265
267
|
us_dataset = PolicyEngineUSDataset(
|
|
268
|
+
id=f"{Path(dataset).stem}_year_{year}",
|
|
266
269
|
name=f"{dataset}-year-{year}",
|
|
267
270
|
description=f"US Dataset for year {year} based on {dataset}",
|
|
268
|
-
filepath=f"
|
|
271
|
+
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
|
|
269
272
|
year=year,
|
|
270
273
|
data=USYearData(
|
|
271
274
|
person=MicroDataFrame(person_df, weights="person_weight"),
|
|
@@ -285,3 +288,83 @@ def create_datasets(
|
|
|
285
288
|
),
|
|
286
289
|
)
|
|
287
290
|
us_dataset.save()
|
|
291
|
+
|
|
292
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
293
|
+
result[dataset_key] = us_dataset
|
|
294
|
+
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def load_datasets(
|
|
299
|
+
datasets: list[str] = [
|
|
300
|
+
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
301
|
+
],
|
|
302
|
+
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
303
|
+
data_folder: str = "./data",
|
|
304
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
305
|
+
"""Load PolicyEngineUSDataset instances from saved HDF5 files.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
datasets: List of HuggingFace dataset paths (used to derive file names)
|
|
309
|
+
years: List of years to load data for
|
|
310
|
+
data_folder: Directory containing the dataset files
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects
|
|
314
|
+
"""
|
|
315
|
+
result = {}
|
|
316
|
+
for dataset in datasets:
|
|
317
|
+
for year in years:
|
|
318
|
+
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
319
|
+
us_dataset = PolicyEngineUSDataset(
|
|
320
|
+
name=f"{dataset}-year-{year}",
|
|
321
|
+
description=f"US Dataset for year {year} based on {dataset}",
|
|
322
|
+
filepath=filepath,
|
|
323
|
+
year=year,
|
|
324
|
+
)
|
|
325
|
+
us_dataset.load()
|
|
326
|
+
|
|
327
|
+
dataset_key = f"{Path(dataset).stem}_{year}"
|
|
328
|
+
result[dataset_key] = us_dataset
|
|
329
|
+
|
|
330
|
+
return result
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def ensure_datasets(
|
|
334
|
+
datasets: list[str] = [
|
|
335
|
+
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
|
|
336
|
+
],
|
|
337
|
+
years: list[int] = [2024, 2025, 2026, 2027, 2028],
|
|
338
|
+
data_folder: str = "./data",
|
|
339
|
+
) -> dict[str, PolicyEngineUSDataset]:
|
|
340
|
+
"""Ensure datasets exist, loading if available or creating if not.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
datasets: List of HuggingFace dataset paths
|
|
344
|
+
years: List of years to load/create data for
|
|
345
|
+
data_folder: Directory containing or to save the dataset files
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Dictionary mapping dataset keys to PolicyEngineUSDataset objects
|
|
349
|
+
"""
|
|
350
|
+
# Check if all dataset files exist
|
|
351
|
+
all_exist = True
|
|
352
|
+
for dataset in datasets:
|
|
353
|
+
for year in years:
|
|
354
|
+
filepath = Path(
|
|
355
|
+
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
|
|
356
|
+
)
|
|
357
|
+
if not filepath.exists():
|
|
358
|
+
all_exist = False
|
|
359
|
+
break
|
|
360
|
+
if not all_exist:
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
if all_exist:
|
|
364
|
+
return load_datasets(
|
|
365
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
366
|
+
)
|
|
367
|
+
else:
|
|
368
|
+
return create_datasets(
|
|
369
|
+
datasets=datasets, years=years, data_folder=data_folder
|
|
370
|
+
)
|
|
@@ -311,18 +311,30 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
|
311
311
|
|
|
312
312
|
def load(self, simulation: "Simulation"):
|
|
313
313
|
"""Load the simulation's output dataset."""
|
|
314
|
+
import os
|
|
315
|
+
|
|
316
|
+
filepath = str(
|
|
317
|
+
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
|
|
318
|
+
)
|
|
319
|
+
|
|
314
320
|
simulation.output_dataset = PolicyEngineUSDataset(
|
|
315
321
|
id=simulation.id,
|
|
316
322
|
name=simulation.dataset.name,
|
|
317
323
|
description=simulation.dataset.description,
|
|
318
|
-
filepath=
|
|
319
|
-
Path(simulation.dataset.filepath).parent
|
|
320
|
-
/ (simulation.id + ".h5")
|
|
321
|
-
),
|
|
324
|
+
filepath=filepath,
|
|
322
325
|
year=simulation.dataset.year,
|
|
323
326
|
is_output_dataset=True,
|
|
324
327
|
)
|
|
325
328
|
|
|
329
|
+
# Load timestamps from file system metadata
|
|
330
|
+
if os.path.exists(filepath):
|
|
331
|
+
simulation.created_at = datetime.datetime.fromtimestamp(
|
|
332
|
+
os.path.getctime(filepath)
|
|
333
|
+
)
|
|
334
|
+
simulation.updated_at = datetime.datetime.fromtimestamp(
|
|
335
|
+
os.path.getmtime(filepath)
|
|
336
|
+
)
|
|
337
|
+
|
|
326
338
|
def _build_simulation_from_dataset(self, microsim, dataset, system):
|
|
327
339
|
"""Build a PolicyEngine Core simulation from dataset entity IDs.
|
|
328
340
|
|
|
@@ -9,7 +9,10 @@ if find_spec("policyengine_us") is not None:
|
|
|
9
9
|
PolicyEngineUSLatest,
|
|
10
10
|
ProgramStatistics,
|
|
11
11
|
USYearData,
|
|
12
|
+
create_datasets,
|
|
13
|
+
ensure_datasets,
|
|
12
14
|
general_policy_reform_analysis,
|
|
15
|
+
load_datasets,
|
|
13
16
|
us_latest,
|
|
14
17
|
us_model,
|
|
15
18
|
)
|
|
@@ -17,6 +20,9 @@ if find_spec("policyengine_us") is not None:
|
|
|
17
20
|
__all__ = [
|
|
18
21
|
"USYearData",
|
|
19
22
|
"PolicyEngineUSDataset",
|
|
23
|
+
"create_datasets",
|
|
24
|
+
"load_datasets",
|
|
25
|
+
"ensure_datasets",
|
|
20
26
|
"PolicyEngineUS",
|
|
21
27
|
"PolicyEngineUSLatest",
|
|
22
28
|
"us_model",
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
policyengine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
policyengine/__pycache__/__init__.cpython-313.pyc,sha256=
|
|
2
|
+
policyengine/__pycache__/__init__.cpython-313.pyc,sha256=i0if-AwQ3fDLDZnEs_L7YxJXPz-_BGDRJstrY56T1cU,175
|
|
3
3
|
policyengine/core/__init__.py,sha256=KBVhkqzkvjWLDDwk96vquQKL63ZFuLen5AzBOBnO9pg,912
|
|
4
|
-
policyengine/core/dataset.py,sha256=
|
|
4
|
+
policyengine/core/dataset.py,sha256=iJr9-J6w11uMRYy3EEJO9Gveku1m71AA1yzeo-0SiCs,16094
|
|
5
5
|
policyengine/core/dataset_version.py,sha256=6KeFCRGQto_Yyl4QY4Vo2JFythjaXrNAOHQiwRGESyM,378
|
|
6
6
|
policyengine/core/dynamic.py,sha256=ng9BjDzxdwjJ0e7zoqXFmq33E1SRbaaPYfW7pjRSSzI,1641
|
|
7
7
|
policyengine/core/output.py,sha256=cCW4vbzkLdQaT_nJTyDJBl7Hubm7nZeRuR7aVG1dKvg,643
|
|
8
8
|
policyengine/core/parameter.py,sha256=5nBCw-6-BCfW-_uFqvAN5zcP_Vfz1mAmQsi3peWAbZA,407
|
|
9
9
|
policyengine/core/parameter_value.py,sha256=b0ts1kbWcwjPSYnZm2rlCylmTLPJRLxDL8z3RmxM5OI,377
|
|
10
10
|
policyengine/core/policy.py,sha256=ExMrUDMvNk_uuOL0cSm0UCzDyGka0t_yk6x4U0Kp6Ww,1635
|
|
11
|
-
policyengine/core/simulation.py,sha256=
|
|
11
|
+
policyengine/core/simulation.py,sha256=yvvved75XMcGP3Bj9E2tmKRxvI-DQVZv7k4uTETwBm0,1134
|
|
12
12
|
policyengine/core/tax_benefit_model.py,sha256=2Yc1RlQrUG7djDMZbJOQH4Ns86_lOnLeISCGR4-9zMo,176
|
|
13
13
|
policyengine/core/tax_benefit_model_version.py,sha256=V1CGft5Y6YflMASx0wR3V73jr-WqQu2R8N5QVMRm9yw,2752
|
|
14
14
|
policyengine/core/variable.py,sha256=AjSImORlRkh05xhYxyeT6GFMOfViRzYg0qRQAIj-mxo,350
|
|
@@ -16,24 +16,24 @@ policyengine/outputs/__init__.py,sha256=IJUmLP0Og41VrwiqhJF-a9-3fIb4nlXpS7uFuVCI
|
|
|
16
16
|
policyengine/outputs/aggregate.py,sha256=exI-U04OF5kVf2BBYV6sf8VldIWnT_IzxgkBs5wtnCw,4846
|
|
17
17
|
policyengine/outputs/change_aggregate.py,sha256=tK4K87YlByKikqFaB7OHyh1SqAuGtUnLL7cSF_EhrOs,7373
|
|
18
18
|
policyengine/outputs/decile_impact.py,sha256=jclhbj5U-xX8D-myy0SuWeJFVfQTqJDCh7qBXugak5U,4811
|
|
19
|
-
policyengine/tax_benefit_models/uk.py,sha256=
|
|
20
|
-
policyengine/tax_benefit_models/us.py,sha256=
|
|
21
|
-
policyengine/tax_benefit_models/uk/__init__.py,sha256=
|
|
19
|
+
policyengine/tax_benefit_models/uk.py,sha256=HzAG_dORmsj1NJ9pd9WrqwgZPe9DUDrZ1wV5LuVCKAg,950
|
|
20
|
+
policyengine/tax_benefit_models/us.py,sha256=G51dAmHo8NJLb2mnbne6iO5eNaatCGUd_2unvawwF84,946
|
|
21
|
+
policyengine/tax_benefit_models/uk/__init__.py,sha256=AiA74iED5FEryvUCMfVZi6pYDYuTfQcj9B01h8J5xFA,1105
|
|
22
22
|
policyengine/tax_benefit_models/uk/analysis.py,sha256=O4eYJYF7tsgiuLuiWMU0OXq7ss6U8-vzlg6nC2U8sgU,3175
|
|
23
|
-
policyengine/tax_benefit_models/uk/datasets.py,sha256
|
|
24
|
-
policyengine/tax_benefit_models/uk/model.py,sha256=
|
|
23
|
+
policyengine/tax_benefit_models/uk/datasets.py,sha256=-lmj4eG2my2GGmMMkxI1iXobGQW5irBgylEwyV0xU6c,8039
|
|
24
|
+
policyengine/tax_benefit_models/uk/model.py,sha256=HNdqsAKErDw9nruOdj4SiGF2KMopccaGDJ4RTXkdJ1U,9612
|
|
25
25
|
policyengine/tax_benefit_models/uk/outputs.py,sha256=2mYLwQW4QNvrOHtHfm_ACqE9gbmuLxvcCyldRU46s0o,3543
|
|
26
|
-
policyengine/tax_benefit_models/us/__init__.py,sha256=
|
|
26
|
+
policyengine/tax_benefit_models/us/__init__.py,sha256=zP-UUQqOc9g0ymyHkweJdi4RVXQDKSR6SUxavUKvV0s,1101
|
|
27
27
|
policyengine/tax_benefit_models/us/analysis.py,sha256=Xf-DT0QjVySs0QG_koCwgvOeWI_scLtv3S3SP8u8ZWc,3253
|
|
28
|
-
policyengine/tax_benefit_models/us/datasets.py,sha256=
|
|
29
|
-
policyengine/tax_benefit_models/us/model.py,sha256=
|
|
28
|
+
policyengine/tax_benefit_models/us/datasets.py,sha256=UwY5GcrVRl7zdmtqKE5TykYRNtOsGzyDm8kRkc98hyw,14708
|
|
29
|
+
policyengine/tax_benefit_models/us/model.py,sha256=lhffJLnE4tr-ch_tHiyH2zP4IMX9_swWomUllb9WdbM,16421
|
|
30
30
|
policyengine/tax_benefit_models/us/outputs.py,sha256=GT8Eur8DfB9cPQRbSljEl9RpKSNHW80Fq_CBXCybvIU,3519
|
|
31
31
|
policyengine/utils/__init__.py,sha256=1X-VYAWLyB9A0YRHwsGWrqQHns1WfeZ7ISC6DMU5myM,140
|
|
32
32
|
policyengine/utils/dates.py,sha256=HnAqyl8S8EOYp8ibsnMTmECYoDWCSqwL-7A2_qKgxSc,1510
|
|
33
33
|
policyengine/utils/parametric_reforms.py,sha256=4P3U39-4pYTU4BN6JjgmVLUkCkBhRfZJ6UIWTlsjyQE,1155
|
|
34
34
|
policyengine/utils/plotting.py,sha256=ZAzTWz38vIaW0c3Nt4Un1kfrNoXLyHCDd1pEJIlsRg4,5335
|
|
35
|
-
policyengine-3.1.
|
|
36
|
-
policyengine-3.1.
|
|
37
|
-
policyengine-3.1.
|
|
38
|
-
policyengine-3.1.
|
|
39
|
-
policyengine-3.1.
|
|
35
|
+
policyengine-3.1.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
36
|
+
policyengine-3.1.5.dist-info/METADATA,sha256=OrYJJBzGq6CZeokXm7gCprOYUPPJe0d4unqvivTPyXU,45889
|
|
37
|
+
policyengine-3.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
policyengine-3.1.5.dist-info/top_level.txt,sha256=_23UPobfkneHQkpJ0e0OmDJfhCUfoXj_F2sTckCGOH4,13
|
|
39
|
+
policyengine-3.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|