policyengine-uk 2.42.0__py3-none-any.whl → 2.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policyengine_uk/data/dataset_schema.py +63 -15
- policyengine_uk/data/economic_assumptions.py +18 -1
- policyengine_uk/model_api.py +1 -0
- policyengine_uk/parameters/gov/economic_assumptions/yoy_growth.yaml +1 -3
- policyengine_uk/scenarios/__init__.py +3 -0
- policyengine_uk/scenarios/pip_reform.py +23 -0
- policyengine_uk/scenarios/reindex_benefit_cap.py +32 -0
- policyengine_uk/scenarios/repeal_two_child_limit.py +10 -0
- policyengine_uk/system.py +543 -186
- policyengine_uk/tests/microsimulation/reforms_config.yaml +1 -1
- policyengine_uk/utils/scenario.py +184 -0
- {policyengine_uk-2.42.0.data → policyengine_uk-2.43.0.data}/data/share/openfisca/openfisca-country-template/CHANGELOG.md +9 -0
- {policyengine_uk-2.42.0.dist-info → policyengine_uk-2.43.0.dist-info}/METADATA +2 -2
- {policyengine_uk-2.42.0.dist-info → policyengine_uk-2.43.0.dist-info}/RECORD +18 -15
- policyengine_uk/repo.py +0 -3
- policyengine_uk/tests/policy/baseline/gov/abolitions/abolition_parameters.yaml +0 -250
- {policyengine_uk-2.42.0.data → policyengine_uk-2.43.0.data}/data/share/openfisca/openfisca-country-template/LICENSE +0 -0
- {policyengine_uk-2.42.0.data → policyengine_uk-2.43.0.data}/data/share/openfisca/openfisca-country-template/README.md +0 -0
- {policyengine_uk-2.42.0.dist-info → policyengine_uk-2.43.0.dist-info}/WHEEL +0 -0
- {policyengine_uk-2.42.0.dist-info → policyengine_uk-2.43.0.dist-info}/licenses/LICENSE +0 -0
policyengine_uk/system.py
CHANGED
@@ -1,21 +1,40 @@
|
|
1
|
+
# Standard library imports
|
2
|
+
import copy
|
1
3
|
from pathlib import Path
|
2
|
-
from
|
4
|
+
from typing import Any, Dict, List, Optional, Union, Type
|
5
|
+
|
6
|
+
# Third-party imports
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from microdf import MicroDataFrame, MicroSeries
|
10
|
+
|
11
|
+
# PolicyEngine core imports
|
3
12
|
from policyengine_core.data import Dataset
|
4
|
-
from policyengine_core.
|
5
|
-
|
6
|
-
Simulation as CoreSimulation,
|
7
|
-
Microsimulation as CoreMicrosimulation,
|
13
|
+
from policyengine_core.parameters.operations.propagate_parameter_metadata import (
|
14
|
+
propagate_parameter_metadata,
|
8
15
|
)
|
9
|
-
from
|
10
|
-
|
11
|
-
|
16
|
+
from policyengine_core.periods import period as period_
|
17
|
+
from policyengine_core.parameters.operations.uprate_parameters import (
|
18
|
+
uprate_parameters,
|
12
19
|
)
|
20
|
+
from policyengine_core.parameters import Parameter
|
21
|
+
from policyengine_core.reforms import Reform
|
22
|
+
from policyengine_core.simulations import Simulation as CoreSimulation
|
23
|
+
from policyengine_core.taxbenefitsystems import TaxBenefitSystem
|
13
24
|
from policyengine_core.tools.hugging_face import download_huggingface_dataset
|
25
|
+
from policyengine_core.tracers import FullTracer, SimpleTracer
|
14
26
|
|
15
|
-
|
16
|
-
from policyengine_uk.
|
17
|
-
|
18
|
-
|
27
|
+
# PolicyEngine UK imports
|
28
|
+
from policyengine_uk.data.dataset_schema import (
|
29
|
+
UKMultiYearDataset,
|
30
|
+
UKSingleYearDataset,
|
31
|
+
)
|
32
|
+
from policyengine_uk.entities import BenUnit, Household, Person
|
33
|
+
from policyengine_uk.parameters.gov.contrib.create_private_pension_uprating import (
|
34
|
+
add_private_pension_uprating_factor,
|
35
|
+
)
|
36
|
+
from policyengine_uk.parameters.gov.dwp.state_pension.triple_lock.create_triple_lock import (
|
37
|
+
add_triple_lock,
|
19
38
|
)
|
20
39
|
from policyengine_uk.parameters.gov.economic_assumptions.create_economic_assumption_indices import (
|
21
40
|
create_economic_assumption_indices,
|
@@ -26,38 +45,29 @@ from policyengine_uk.parameters.gov.economic_assumptions.lag_average_earnings im
|
|
26
45
|
from policyengine_uk.parameters.gov.economic_assumptions.lag_cpi import (
|
27
46
|
add_lagged_cpi,
|
28
47
|
)
|
29
|
-
from
|
30
|
-
|
31
|
-
|
32
|
-
from policyengine_uk.parameters.gov.contrib.create_private_pension_uprating import (
|
33
|
-
add_private_pension_uprating_factor,
|
34
|
-
)
|
35
|
-
from policyengine_uk.parameters.gov.dwp.state_pension.triple_lock.create_triple_lock import (
|
36
|
-
add_triple_lock,
|
37
|
-
)
|
38
|
-
from policyengine_core.parameters.operations.homogenize_parameters import (
|
39
|
-
homogenize_parameter_structures,
|
40
|
-
)
|
41
|
-
from policyengine_core.parameters.operations.interpolate_parameters import (
|
42
|
-
interpolate_parameters,
|
43
|
-
)
|
44
|
-
from policyengine_core.parameters.operations.propagate_parameter_metadata import (
|
45
|
-
propagate_parameter_metadata,
|
48
|
+
from policyengine_uk.utils.parameters import (
|
49
|
+
backdate_parameters,
|
50
|
+
convert_to_fiscal_year_parameters,
|
46
51
|
)
|
47
|
-
from
|
48
|
-
|
52
|
+
from policyengine_uk.utils.scenario import Scenario
|
53
|
+
from policyengine_uk.data.economic_assumptions import (
|
54
|
+
apply_uprating,
|
55
|
+
extend_single_year_dataset,
|
49
56
|
)
|
50
|
-
from policyengine_core.reforms import Reform
|
51
57
|
|
58
|
+
# Module constants
|
52
59
|
COUNTRY_DIR = Path(__file__).parent
|
53
|
-
|
54
60
|
ENHANCED_FRS = "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"
|
55
61
|
|
56
62
|
|
57
63
|
class CountryTaxBenefitSystem(TaxBenefitSystem):
|
58
|
-
|
59
|
-
|
60
|
-
|
64
|
+
"""UK-specific tax and benefit system implementation.
|
65
|
+
|
66
|
+
This class defines the UK tax-benefit system with all relevant
|
67
|
+
variables, parameters, and entities (Person, BenUnit, Household).
|
68
|
+
"""
|
69
|
+
|
70
|
+
basic_inputs: List[str] = [
|
61
71
|
"brma",
|
62
72
|
"local_authority",
|
63
73
|
"region",
|
@@ -65,193 +75,540 @@ class CountryTaxBenefitSystem(TaxBenefitSystem):
|
|
65
75
|
"age",
|
66
76
|
]
|
67
77
|
modelled_policies = COUNTRY_DIR / "modelled_policies.yaml"
|
78
|
+
auto_carry_over_input_variables: bool = True
|
79
|
+
|
80
|
+
def reset_parameters(self) -> None:
|
81
|
+
"""Reset parameters by reloading from the parameters directory."""
|
82
|
+
self._parameters_at_instant_cache = {}
|
83
|
+
self.load_parameters(self.parameters_dir)
|
68
84
|
|
69
|
-
def process_parameters(self
|
70
|
-
|
71
|
-
|
85
|
+
def process_parameters(self) -> None:
|
86
|
+
"""Process and transform parameters with UK-specific adjustments.
|
87
|
+
|
88
|
+
Applies various parameter transformations including:
|
89
|
+
- Private pension uprating factors
|
90
|
+
- Lagged earnings and CPI indices
|
91
|
+
- Triple lock calculations for state pensions
|
92
|
+
- Economic assumption indices
|
93
|
+
- Parameter uprating and backdating
|
94
|
+
- Conversion to fiscal year parameters
|
95
|
+
"""
|
96
|
+
self._parameters_at_instant_cache = {}
|
97
|
+
# Add various UK-specific parameter adjustments
|
72
98
|
self.parameters = add_private_pension_uprating_factor(self.parameters)
|
73
99
|
self.parameters = add_lagged_earnings(self.parameters)
|
74
100
|
self.parameters = add_lagged_cpi(self.parameters)
|
75
101
|
self.parameters = add_triple_lock(self.parameters)
|
76
102
|
self.parameters = create_economic_assumption_indices(self.parameters)
|
103
|
+
|
104
|
+
# Create baseline parameters for reform comparisons
|
77
105
|
self.parameters.add_child("baseline", self.parameters.clone())
|
78
|
-
|
79
|
-
|
80
|
-
)
|
106
|
+
|
107
|
+
# Apply general parameter operations
|
81
108
|
self.parameters = propagate_parameter_metadata(self.parameters)
|
82
|
-
self.parameters = interpolate_parameters(self.parameters)
|
83
109
|
self.parameters = uprate_parameters(self.parameters)
|
84
|
-
self.parameters = propagate_parameter_metadata(self.parameters)
|
85
|
-
self.add_abolition_parameters()
|
86
110
|
self.parameters = backdate_parameters(self.parameters, "2015-01-01")
|
87
|
-
|
88
111
|
self.parameters.gov = convert_to_fiscal_year_parameters(
|
89
112
|
self.parameters.gov
|
90
113
|
)
|
91
114
|
|
92
|
-
def __init__(self
|
93
|
-
|
115
|
+
def __init__(self):
|
116
|
+
"""Initialize the UK tax-benefit system with entities and parameters."""
|
117
|
+
self._parameters_at_instant_cache: Dict[str, Any] = {}
|
118
|
+
self.variables: Dict[Any, Any] = {}
|
94
119
|
|
95
|
-
|
120
|
+
# Create copies of entity classes to avoid modifying originals
|
121
|
+
person, benunit, household = (
|
122
|
+
copy.copy(Person),
|
123
|
+
copy.copy(BenUnit),
|
124
|
+
copy.copy(Household),
|
125
|
+
)
|
96
126
|
|
97
|
-
|
127
|
+
# Set up entities
|
128
|
+
self.entities = [person, benunit, household]
|
129
|
+
self.person_entity = person
|
130
|
+
self.group_entities = [benunit, household]
|
131
|
+
self.group_entity_keys = [entity.key for entity in self.group_entities]
|
98
132
|
|
99
|
-
|
133
|
+
# Link entities to this tax-benefit system
|
134
|
+
for entity in self.entities:
|
135
|
+
entity.set_tax_benefit_system(self)
|
100
136
|
|
137
|
+
self.variable_module_metadata = {}
|
101
138
|
|
102
|
-
|
139
|
+
# Load all variables from the variables directory
|
140
|
+
self.add_variables_from_directory(COUNTRY_DIR / "variables")
|
141
|
+
|
142
|
+
# Set up and process parameters
|
143
|
+
self.parameters_dir = COUNTRY_DIR / "parameters"
|
144
|
+
self.reset_parameters()
|
145
|
+
self.process_parameters()
|
103
146
|
|
147
|
+
|
148
|
+
# Create system instance for module-level access
|
149
|
+
system = CountryTaxBenefitSystem()
|
104
150
|
parameters = system.parameters
|
105
151
|
variables = system.variables
|
106
152
|
|
107
153
|
|
108
154
|
class Simulation(CoreSimulation):
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
155
|
+
"""UK-specific simulation class for calculating tax and benefit outcomes.
|
156
|
+
|
157
|
+
Extends the core simulation functionality with UK-specific features
|
158
|
+
and data handling capabilities.
|
159
|
+
"""
|
160
|
+
|
161
|
+
default_input_period: int = 2025
|
162
|
+
default_calculation_period: int = 2025
|
163
|
+
|
164
|
+
def __init__(
|
165
|
+
self,
|
166
|
+
scenario: Optional[Scenario] = None,
|
167
|
+
situation: Optional[Dict] = None,
|
168
|
+
dataset: Optional[
|
169
|
+
Union[pd.DataFrame, str, UKSingleYearDataset, UKMultiYearDataset]
|
170
|
+
] = None,
|
171
|
+
trace: bool = False,
|
172
|
+
reform: Dict | Type[Reform] = None,
|
173
|
+
):
|
174
|
+
"""Initialize a UK simulation.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
scenario: A Scenario object defining a modification to the simulation
|
178
|
+
situation: A dictionary describing the situation to simulate
|
179
|
+
dataset: Data source - can be DataFrame, URL string, or Dataset object
|
180
|
+
trace: Whether to enable detailed tracing of calculations
|
181
|
+
"""
|
182
|
+
# Initialize tax-benefit rules
|
183
|
+
self.tax_benefit_system = CountryTaxBenefitSystem()
|
184
|
+
|
185
|
+
# Migrate Reform to Scenario
|
186
|
+
|
122
187
|
if reform is not None:
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
188
|
+
scenario = Scenario.from_reform(reform)
|
189
|
+
|
190
|
+
# Apply parametric reforms here
|
191
|
+
|
192
|
+
if scenario is not None:
|
193
|
+
if scenario.parameter_changes is not None:
|
194
|
+
self.apply_parameter_changes(scenario.parameter_changes)
|
195
|
+
|
196
|
+
self.branch_name = "default"
|
197
|
+
self.invalidated_caches = set()
|
198
|
+
self.debug: bool = False
|
199
|
+
self.trace: bool = trace
|
200
|
+
self.tracer: SimpleTracer = (
|
201
|
+
SimpleTracer() if not trace else FullTracer()
|
202
|
+
)
|
203
|
+
self.opt_out_cache: bool = False
|
204
|
+
self.max_spiral_loops: int = 10
|
205
|
+
self.memory_config = None
|
206
|
+
self._data_storage_dir: Optional[str] = None
|
207
|
+
|
208
|
+
self.branches: Dict[str, Simulation] = {}
|
209
|
+
|
210
|
+
# Build simulation from appropriate source
|
211
|
+
if situation is not None:
|
212
|
+
self.build_from_situation(situation)
|
213
|
+
elif isinstance(dataset, str):
|
214
|
+
self.build_from_url(dataset)
|
215
|
+
elif isinstance(dataset, pd.DataFrame):
|
216
|
+
self.build_from_dataframe(dataset)
|
217
|
+
elif isinstance(dataset, Dataset):
|
218
|
+
self.build_from_dataset(dataset)
|
219
|
+
elif isinstance(dataset, UKSingleYearDataset):
|
220
|
+
self.build_from_single_year_dataset(dataset)
|
221
|
+
elif isinstance(dataset, UKMultiYearDataset):
|
222
|
+
self.build_from_multi_year_dataset(dataset)
|
223
|
+
elif dataset is None:
|
224
|
+
self.build_from_url(
|
225
|
+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"
|
226
|
+
)
|
227
|
+
else:
|
228
|
+
raise ValueError(f"Unsupported dataset type: {dataset.__class__}")
|
140
229
|
|
141
|
-
#
|
230
|
+
# Handle behavioral responses for earnings and capital gains
|
231
|
+
self.move_values("employment_income", "employment_income_before_lsr")
|
232
|
+
self.move_values("capital_gains", "capital_gains_before_response")
|
142
233
|
|
143
|
-
|
144
|
-
for known_period in employment_income.get_known_periods():
|
145
|
-
array = employment_income.get_array(known_period)
|
146
|
-
self.set_input("employment_income_before_lsr", known_period, array)
|
147
|
-
employment_income.delete_arrays(known_period)
|
234
|
+
self.input_variables = self.get_known_variables()
|
148
235
|
|
149
|
-
#
|
236
|
+
# Apply structural modifiers
|
150
237
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
238
|
+
if scenario is not None:
|
239
|
+
if scenario.simulation_modifier is not None:
|
240
|
+
scenario.simulation_modifier(self)
|
241
|
+
|
242
|
+
def get_known_variables(self):
|
243
|
+
variables = []
|
244
|
+
for variable in self.tax_benefit_system.variables:
|
245
|
+
if len(self.get_holder(variable).get_known_periods()) > 0:
|
246
|
+
variables.append(variable)
|
247
|
+
return variables
|
248
|
+
|
249
|
+
def apply_parameter_changes(self, changes: dict):
|
250
|
+
self.tax_benefit_system.reset_parameters()
|
251
|
+
|
252
|
+
for parameter in changes:
|
253
|
+
p: Parameter = self.tax_benefit_system.parameters.get_child(
|
254
|
+
parameter
|
156
255
|
)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
default_dataset_year = 2022
|
164
|
-
default_tax_benefit_system_instance = system
|
165
|
-
default_calculation_period = 2025
|
166
|
-
default_input_period = 2025
|
167
|
-
default_role = "member"
|
168
|
-
max_spiral_loops = 10
|
169
|
-
|
170
|
-
def __init__(self, *args, dataset=ENHANCED_FRS, **kwargs):
|
171
|
-
if dataset is not None:
|
172
|
-
if isinstance(dataset, str):
|
173
|
-
if "hf://" in dataset:
|
174
|
-
owner, repo, filename = dataset.split("/")[-3:]
|
175
|
-
if "@" in filename:
|
176
|
-
version = filename.split("@")[-1]
|
177
|
-
filename = filename.split("@")[0]
|
178
|
-
else:
|
179
|
-
version = None
|
180
|
-
dataset_file_path = download_huggingface_dataset(
|
181
|
-
repo=f"{owner}/{repo}",
|
182
|
-
repo_filename=filename,
|
183
|
-
version=version,
|
256
|
+
if isinstance(changes[parameter], dict):
|
257
|
+
# Time-period specific changes
|
258
|
+
for time_period in changes[parameter]:
|
259
|
+
p.update(
|
260
|
+
period=time_period,
|
261
|
+
value=changes[parameter][time_period],
|
184
262
|
)
|
263
|
+
else:
|
264
|
+
p.update(period="year:2000:100", value=changes[parameter])
|
265
|
+
|
266
|
+
self.tax_benefit_system.process_parameters()
|
185
267
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
)
|
195
|
-
except:
|
196
|
-
pass
|
197
|
-
|
198
|
-
try:
|
199
|
-
UKMultiYearDataset.validate_file_path(
|
200
|
-
dataset_file_path
|
201
|
-
)
|
202
|
-
dataset = UKMultiYearDataset(
|
203
|
-
file_path=dataset_file_path
|
204
|
-
)
|
205
|
-
except Exception as e:
|
206
|
-
pass
|
207
|
-
|
208
|
-
if not isinstance(
|
209
|
-
dataset, (UKSingleYearDataset, UKMultiYearDataset)
|
210
|
-
):
|
211
|
-
dataset = Dataset.from_file(dataset_file_path)
|
212
|
-
|
213
|
-
super().__init__(*args, dataset=dataset, **kwargs)
|
214
|
-
|
215
|
-
reform = create_structural_reforms_from_parameters(
|
216
|
-
self.tax_benefit_system.parameters, "2023-01-01"
|
268
|
+
def build_from_situation(self, situation: Dict) -> None:
|
269
|
+
"""Build simulation from a situation dictionary.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
situation: Dictionary describing household composition and characteristics
|
273
|
+
"""
|
274
|
+
self.build_from_populations(
|
275
|
+
self.tax_benefit_system.instantiate_entities()
|
217
276
|
)
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
277
|
+
from policyengine_core.simulations.simulation_builder import (
|
278
|
+
SimulationBuilder,
|
279
|
+
) # Import here to avoid circular dependency
|
280
|
+
|
281
|
+
builder = SimulationBuilder()
|
282
|
+
builder.default_period = self.default_input_period
|
283
|
+
builder.build_from_dict(self.tax_benefit_system, situation, self)
|
284
|
+
self.has_axes = builder.has_axes
|
285
|
+
|
286
|
+
def build_from_url(self, url: str) -> None:
|
287
|
+
"""Build simulation from a HuggingFace dataset URL.
|
288
|
+
|
289
|
+
Args:
|
290
|
+
url: HuggingFace URL in format "hf://owner/repo/filename"
|
291
|
+
|
292
|
+
Raises:
|
293
|
+
ValueError: If URL is not a HuggingFace URL
|
294
|
+
"""
|
295
|
+
if "hf://" not in url:
|
296
|
+
raise ValueError(
|
297
|
+
f"Non-HuggingFace URLs are currently not supported."
|
298
|
+
)
|
236
299
|
|
237
|
-
#
|
300
|
+
# Parse HuggingFace URL components
|
301
|
+
owner, repo, filename = url.split("/")[-3:]
|
302
|
+
if "@" in filename:
|
303
|
+
version = filename.split("@")[-1]
|
304
|
+
filename = filename.split("@")[0]
|
305
|
+
else:
|
306
|
+
version = None
|
307
|
+
|
308
|
+
# Download dataset from HuggingFace
|
309
|
+
dataset = download_huggingface_dataset(
|
310
|
+
repo=f"{owner}/{repo}",
|
311
|
+
repo_filename=filename,
|
312
|
+
version=version,
|
313
|
+
)
|
238
314
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
315
|
+
# Determine dataset type and build accordingly
|
316
|
+
if UKMultiYearDataset.validate_file_path(dataset, False):
|
317
|
+
self.build_from_multi_year_dataset(UKMultiYearDataset(dataset))
|
318
|
+
self.dataset = dataset
|
319
|
+
elif UKSingleYearDataset.validate_file_path(dataset, False):
|
320
|
+
self.build_from_single_year_dataset(UKSingleYearDataset(dataset))
|
321
|
+
self.dataset = dataset
|
322
|
+
else:
|
323
|
+
dataset = Dataset.from_file(dataset, self.default_input_period)
|
324
|
+
self.build_from_dataset(dataset)
|
325
|
+
|
326
|
+
def build_from_dataframe(self, df: pd.DataFrame) -> None:
|
327
|
+
"""Build simulation from a pandas DataFrame.
|
328
|
+
|
329
|
+
Args:
|
330
|
+
df: DataFrame with columns in format "variable_name__time_period"
|
331
|
+
"""
|
332
|
+
|
333
|
+
def get_first_array(variable_name: str) -> pd.Series:
|
334
|
+
"""Extract the first array for a given variable name pattern."""
|
335
|
+
columns = df.columns[df.columns.str.contains(variable_name + "__")]
|
336
|
+
return df[columns[0]]
|
337
|
+
|
338
|
+
# Extract ID columns
|
339
|
+
(
|
340
|
+
person_id,
|
341
|
+
person_benunit_id,
|
342
|
+
person_household_id,
|
343
|
+
benunit_id,
|
344
|
+
household_id,
|
345
|
+
) = map(
|
346
|
+
get_first_array,
|
347
|
+
[
|
348
|
+
"person_id",
|
349
|
+
"person_benunit_id",
|
350
|
+
"person_household_id",
|
351
|
+
"benunit_id",
|
352
|
+
"household_id",
|
353
|
+
],
|
354
|
+
)
|
247
355
|
|
248
|
-
#
|
356
|
+
# Build entity structure
|
357
|
+
self.build_from_ids(
|
358
|
+
person_id,
|
359
|
+
person_benunit_id,
|
360
|
+
person_household_id,
|
361
|
+
benunit_id,
|
362
|
+
household_id,
|
363
|
+
)
|
249
364
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
365
|
+
# Set input values for each variable and time period
|
366
|
+
for column in df:
|
367
|
+
variable, time_period = column.split("__")
|
368
|
+
if variable not in self.tax_benefit_system.variables:
|
369
|
+
continue
|
370
|
+
self.set_input(variable, time_period, df[column])
|
371
|
+
|
372
|
+
def build_from_dataset(self, dataset: Dataset) -> None:
|
373
|
+
"""Build simulation from a Dataset object.
|
374
|
+
|
375
|
+
Args:
|
376
|
+
dataset: PolicyEngine Dataset object containing simulation data
|
377
|
+
"""
|
378
|
+
data: Dict[str, Dict[str, Union[float, int, str]]] = (
|
379
|
+
dataset.load_dataset()
|
380
|
+
)
|
381
|
+
|
382
|
+
first_variable = data[list(data.keys())[0]]
|
383
|
+
first_time_period = list(first_variable.keys())[0]
|
384
|
+
|
385
|
+
def get_first_array(variable_name: str) -> np.ndarray:
|
386
|
+
"""Get the first time period's values for a variable."""
|
387
|
+
time_period_values = data[variable_name]
|
388
|
+
return time_period_values[first_time_period]
|
389
|
+
|
390
|
+
# Build entity structure from IDs
|
391
|
+
self.build_from_ids(
|
392
|
+
*map(
|
393
|
+
get_first_array,
|
394
|
+
[
|
395
|
+
"person_id",
|
396
|
+
"person_benunit_id",
|
397
|
+
"person_household_id",
|
398
|
+
"benunit_id",
|
399
|
+
"household_id",
|
400
|
+
],
|
401
|
+
)
|
402
|
+
)
|
403
|
+
|
404
|
+
# Load all variable values
|
405
|
+
for variable in data:
|
406
|
+
for time_period in data[variable]:
|
407
|
+
if variable not in self.tax_benefit_system.variables:
|
408
|
+
continue
|
254
409
|
self.set_input(
|
255
|
-
|
410
|
+
variable, time_period, data[variable][time_period]
|
256
411
|
)
|
257
|
-
|
412
|
+
|
413
|
+
# Now convert to the new UKSingleYearDataset
|
414
|
+
self.input_variables = self.get_known_variables()
|
415
|
+
self.dataset = dataset
|
416
|
+
dataset = UKSingleYearDataset.from_simulation(
|
417
|
+
self, fiscal_year=first_time_period
|
418
|
+
)
|
419
|
+
multi_year_dataset = extend_single_year_dataset(dataset)
|
420
|
+
|
421
|
+
self.build_from_multi_year_dataset(multi_year_dataset)
|
422
|
+
self.dataset = multi_year_dataset
|
423
|
+
|
424
|
+
def build_from_single_year_dataset(
|
425
|
+
self, dataset: UKSingleYearDataset
|
426
|
+
) -> None:
|
427
|
+
"""Build simulation from a single-year UK dataset.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
dataset: UKSingleYearDataset containing one year of data
|
431
|
+
"""
|
432
|
+
|
433
|
+
dataset = extend_single_year_dataset(dataset)
|
434
|
+
self.build_from_multi_year_dataset(dataset)
|
435
|
+
|
436
|
+
def build_from_multi_year_dataset(
|
437
|
+
self, dataset: UKMultiYearDataset
|
438
|
+
) -> None:
|
439
|
+
"""Build simulation from a multi-year UK dataset.
|
440
|
+
|
441
|
+
Args:
|
442
|
+
dataset: UKMultiYearDataset containing multiple years of data
|
443
|
+
"""
|
444
|
+
# Use first year to establish entity structure
|
445
|
+
first_year = dataset[dataset.years[0]]
|
446
|
+
self.build_from_ids(
|
447
|
+
first_year.person.person_id,
|
448
|
+
first_year.person.person_benunit_id,
|
449
|
+
first_year.person.person_household_id,
|
450
|
+
first_year.benunit.benunit_id,
|
451
|
+
first_year.household.household_id,
|
452
|
+
)
|
453
|
+
|
454
|
+
# Load variable values for all years
|
455
|
+
for year in dataset.years:
|
456
|
+
for table in dataset[year].tables:
|
457
|
+
for variable in table.columns:
|
458
|
+
if variable not in self.tax_benefit_system.variables:
|
459
|
+
continue
|
460
|
+
self.set_input(variable, year, table[variable])
|
461
|
+
|
462
|
+
def build_from_ids(
|
463
|
+
self,
|
464
|
+
person_id: np.ndarray,
|
465
|
+
person_benunit_id: np.ndarray,
|
466
|
+
person_household_id: np.ndarray,
|
467
|
+
benunit_id: np.ndarray,
|
468
|
+
household_id: np.ndarray,
|
469
|
+
) -> None:
|
470
|
+
"""Build simulation entities from ID arrays.
|
471
|
+
|
472
|
+
Args:
|
473
|
+
person_id: Array of person IDs
|
474
|
+
person_benunit_id: Array mapping persons to benefit units
|
475
|
+
person_household_id: Array mapping persons to households
|
476
|
+
benunit_id: Array of benefit unit IDs
|
477
|
+
household_id: Array of household IDs
|
478
|
+
"""
|
479
|
+
from policyengine_core.simulations.simulation_builder import (
|
480
|
+
SimulationBuilder,
|
481
|
+
) # Import here to avoid circular dependency
|
482
|
+
|
483
|
+
builder = SimulationBuilder()
|
484
|
+
builder.populations = self.tax_benefit_system.instantiate_entities()
|
485
|
+
|
486
|
+
# Declare entities
|
487
|
+
builder.declare_person_entity("person", person_id)
|
488
|
+
builder.declare_entity("benunit", np.unique(benunit_id))
|
489
|
+
builder.declare_entity("household", np.unique(household_id))
|
490
|
+
|
491
|
+
# Link persons to benefit units and households
|
492
|
+
builder.join_with_persons(
|
493
|
+
builder.populations["benunit"],
|
494
|
+
person_benunit_id,
|
495
|
+
np.array(["member"] * len(person_benunit_id)),
|
496
|
+
)
|
497
|
+
builder.join_with_persons(
|
498
|
+
builder.populations["household"],
|
499
|
+
person_household_id,
|
500
|
+
np.array(["member"] * len(person_household_id)),
|
501
|
+
)
|
502
|
+
|
503
|
+
self.build_from_populations(builder.populations)
|
504
|
+
|
505
|
+
def move_values(self, variable_donor: str, variable_target: str) -> None:
|
506
|
+
"""Move values from one variable to another across all branches.
|
507
|
+
|
508
|
+
Used for behavioral response modeling where original values need
|
509
|
+
to be preserved.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
variable_donor: Variable to move values from
|
513
|
+
variable_target: Variable to move values to
|
514
|
+
"""
|
515
|
+
for simulation in list(self.branches.values()) + [self]:
|
516
|
+
holder = simulation.get_holder(variable_donor)
|
517
|
+
for known_period in holder.get_known_periods():
|
518
|
+
array = holder.get_array(known_period)
|
519
|
+
simulation.set_input(variable_target, known_period, array)
|
520
|
+
holder.delete_arrays(known_period)
|
521
|
+
|
522
|
+
def calculate(
|
523
|
+
self,
|
524
|
+
variable_name: str,
|
525
|
+
period: str = None,
|
526
|
+
map_to: str = None,
|
527
|
+
decode_enums: bool = False,
|
528
|
+
):
|
529
|
+
tracer: SimpleTracer = self.tracer
|
530
|
+
if len(tracer.stack) == 0:
|
531
|
+
# Only decode enums to string values when we're not within
|
532
|
+
# the simulation tree.
|
533
|
+
decode_enums = True
|
534
|
+
|
535
|
+
if period is None:
|
536
|
+
period = self.default_calculation_period
|
537
|
+
|
538
|
+
period = period_(period)
|
539
|
+
|
540
|
+
return super().calculate(
|
541
|
+
variable_name, period, map_to=map_to, decode_enums=decode_enums
|
542
|
+
)
|
543
|
+
|
544
|
+
|
545
|
+
class Microsimulation(Simulation):
|
546
|
+
"""Extended simulation class with weighting support for microsimulation.
|
547
|
+
|
548
|
+
Provides weighted calculations using survey weights for population-level
|
549
|
+
estimates and statistics.
|
550
|
+
"""
|
551
|
+
|
552
|
+
def get_weights(
|
553
|
+
self, variable_name: str, period: str, map_to: Optional[str] = None
|
554
|
+
) -> np.ndarray:
|
555
|
+
"""Get weights for the specified variable's entity.
|
556
|
+
|
557
|
+
Args:
|
558
|
+
variable_name: Name of the variable to get weights for
|
559
|
+
period: Time period for the weights
|
560
|
+
map_to: Optional entity key to map weights to
|
561
|
+
|
562
|
+
Returns:
|
563
|
+
Array of weights for the entity
|
564
|
+
"""
|
565
|
+
variable = self.tax_benefit_system.get_variable(variable_name)
|
566
|
+
entity_key = map_to or variable.entity.key
|
567
|
+
weight_variable_name = f"{entity_key}_weight"
|
568
|
+
return self.calculate(
|
569
|
+
weight_variable_name, period, map_to=map_to, unweighted=True
|
570
|
+
)
|
571
|
+
|
572
|
+
def calculate(
|
573
|
+
self,
|
574
|
+
variable_name: str,
|
575
|
+
period: str = None,
|
576
|
+
map_to: str = None,
|
577
|
+
decode_enums: bool = False,
|
578
|
+
unweighted: bool = False,
|
579
|
+
):
|
580
|
+
tracer: SimpleTracer = self.tracer
|
581
|
+
|
582
|
+
result = super().calculate(
|
583
|
+
variable_name, period, map_to=map_to, decode_enums=decode_enums
|
584
|
+
)
|
585
|
+
|
586
|
+
if not unweighted and len(tracer.stack) == 0:
|
587
|
+
weights = self.get_weights(variable_name, period, map_to=map_to)
|
588
|
+
return MicroSeries(result, weights=weights)
|
589
|
+
|
590
|
+
return result
|
591
|
+
|
592
|
+
def calculate_dataframe(
|
593
|
+
self,
|
594
|
+
variable_names: List[str],
|
595
|
+
period: Optional[str] = None,
|
596
|
+
map_to: Optional[str] = None,
|
597
|
+
use_weights: bool = True,
|
598
|
+
) -> MicroDataFrame:
|
599
|
+
"""Calculate multiple variables as a weighted DataFrame.
|
600
|
+
|
601
|
+
Args:
|
602
|
+
variable_names: List of variable names to calculate
|
603
|
+
period: Time period for calculation
|
604
|
+
map_to: Optional entity key to map results to
|
605
|
+
use_weights: Whether to apply survey weights
|
606
|
+
|
607
|
+
Returns:
|
608
|
+
MicroDataFrame with calculated values and weights
|
609
|
+
"""
|
610
|
+
values = super().calculate_dataframe(variable_names, period, map_to)
|
611
|
+
if not use_weights:
|
612
|
+
return values
|
613
|
+
weights = self.get_weights(variable_names[0], period, map_to=map_to)
|
614
|
+
return MicroDataFrame(values, weights=weights)
|