policyengine-uk 2.42.0__py3-none-any.whl → 2.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
policyengine_uk/system.py CHANGED
@@ -1,21 +1,40 @@
1
+ # Standard library imports
2
+ import copy
1
3
  from pathlib import Path
2
- from policyengine_uk.entities import entities
4
+ from typing import Any, Dict, List, Optional, Union, Type
5
+
6
+ # Third-party imports
7
+ import numpy as np
8
+ import pandas as pd
9
+ from microdf import MicroDataFrame, MicroSeries
10
+
11
+ # PolicyEngine core imports
3
12
  from policyengine_core.data import Dataset
4
- from policyengine_core.taxbenefitsystems import TaxBenefitSystem
5
- from policyengine_core.simulations import (
6
- Simulation as CoreSimulation,
7
- Microsimulation as CoreMicrosimulation,
13
+ from policyengine_core.parameters.operations.propagate_parameter_metadata import (
14
+ propagate_parameter_metadata,
8
15
  )
9
- from policyengine_uk.data.dataset_schema import (
10
- UKSingleYearDataset,
11
- UKMultiYearDataset,
16
+ from policyengine_core.periods import period as period_
17
+ from policyengine_core.parameters.operations.uprate_parameters import (
18
+ uprate_parameters,
12
19
  )
20
+ from policyengine_core.parameters import Parameter
21
+ from policyengine_core.reforms import Reform
22
+ from policyengine_core.simulations import Simulation as CoreSimulation
23
+ from policyengine_core.taxbenefitsystems import TaxBenefitSystem
13
24
  from policyengine_core.tools.hugging_face import download_huggingface_dataset
25
+ from policyengine_core.tracers import FullTracer, SimpleTracer
14
26
 
15
- import pandas as pd
16
- from policyengine_uk.utils.parameters import (
17
- backdate_parameters,
18
- convert_to_fiscal_year_parameters,
27
+ # PolicyEngine UK imports
28
+ from policyengine_uk.data.dataset_schema import (
29
+ UKMultiYearDataset,
30
+ UKSingleYearDataset,
31
+ )
32
+ from policyengine_uk.entities import BenUnit, Household, Person
33
+ from policyengine_uk.parameters.gov.contrib.create_private_pension_uprating import (
34
+ add_private_pension_uprating_factor,
35
+ )
36
+ from policyengine_uk.parameters.gov.dwp.state_pension.triple_lock.create_triple_lock import (
37
+ add_triple_lock,
19
38
  )
20
39
  from policyengine_uk.parameters.gov.economic_assumptions.create_economic_assumption_indices import (
21
40
  create_economic_assumption_indices,
@@ -26,38 +45,29 @@ from policyengine_uk.parameters.gov.economic_assumptions.lag_average_earnings im
26
45
  from policyengine_uk.parameters.gov.economic_assumptions.lag_cpi import (
27
46
  add_lagged_cpi,
28
47
  )
29
- from policyengine_core.reforms import Reform
30
- from policyengine_uk.reforms import create_structural_reforms_from_parameters
31
-
32
- from policyengine_uk.parameters.gov.contrib.create_private_pension_uprating import (
33
- add_private_pension_uprating_factor,
34
- )
35
- from policyengine_uk.parameters.gov.dwp.state_pension.triple_lock.create_triple_lock import (
36
- add_triple_lock,
37
- )
38
- from policyengine_core.parameters.operations.homogenize_parameters import (
39
- homogenize_parameter_structures,
40
- )
41
- from policyengine_core.parameters.operations.interpolate_parameters import (
42
- interpolate_parameters,
43
- )
44
- from policyengine_core.parameters.operations.propagate_parameter_metadata import (
45
- propagate_parameter_metadata,
48
+ from policyengine_uk.utils.parameters import (
49
+ backdate_parameters,
50
+ convert_to_fiscal_year_parameters,
46
51
  )
47
- from policyengine_core.parameters.operations.uprate_parameters import (
48
- uprate_parameters,
52
+ from policyengine_uk.utils.scenario import Scenario
53
+ from policyengine_uk.data.economic_assumptions import (
54
+ apply_uprating,
55
+ extend_single_year_dataset,
49
56
  )
50
- from policyengine_core.reforms import Reform
51
57
 
58
+ # Module constants
52
59
  COUNTRY_DIR = Path(__file__).parent
53
-
54
60
  ENHANCED_FRS = "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"
55
61
 
56
62
 
57
63
  class CountryTaxBenefitSystem(TaxBenefitSystem):
58
- variables_dir = COUNTRY_DIR / "variables"
59
- auto_carry_over_input_variables = True
60
- basic_inputs = [
64
+ """UK-specific tax and benefit system implementation.
65
+
66
+ This class defines the UK tax-benefit system with all relevant
67
+ variables, parameters, and entities (Person, BenUnit, Household).
68
+ """
69
+
70
+ basic_inputs: List[str] = [
61
71
  "brma",
62
72
  "local_authority",
63
73
  "region",
@@ -65,193 +75,540 @@ class CountryTaxBenefitSystem(TaxBenefitSystem):
65
75
  "age",
66
76
  ]
67
77
  modelled_policies = COUNTRY_DIR / "modelled_policies.yaml"
78
+ auto_carry_over_input_variables: bool = True
79
+
80
+ def reset_parameters(self) -> None:
81
+ """Reset parameters by reloading from the parameters directory."""
82
+ self._parameters_at_instant_cache = {}
83
+ self.load_parameters(self.parameters_dir)
68
84
 
69
- def process_parameters(self, reform=None):
70
- if reform:
71
- self.apply_reform_set(reform)
85
+ def process_parameters(self) -> None:
86
+ """Process and transform parameters with UK-specific adjustments.
87
+
88
+ Applies various parameter transformations including:
89
+ - Private pension uprating factors
90
+ - Lagged earnings and CPI indices
91
+ - Triple lock calculations for state pensions
92
+ - Economic assumption indices
93
+ - Parameter uprating and backdating
94
+ - Conversion to fiscal year parameters
95
+ """
96
+ self._parameters_at_instant_cache = {}
97
+ # Add various UK-specific parameter adjustments
72
98
  self.parameters = add_private_pension_uprating_factor(self.parameters)
73
99
  self.parameters = add_lagged_earnings(self.parameters)
74
100
  self.parameters = add_lagged_cpi(self.parameters)
75
101
  self.parameters = add_triple_lock(self.parameters)
76
102
  self.parameters = create_economic_assumption_indices(self.parameters)
103
+
104
+ # Create baseline parameters for reform comparisons
77
105
  self.parameters.add_child("baseline", self.parameters.clone())
78
- self.parameters = homogenize_parameter_structures(
79
- self.parameters, self.variables
80
- )
106
+
107
+ # Apply general parameter operations
81
108
  self.parameters = propagate_parameter_metadata(self.parameters)
82
- self.parameters = interpolate_parameters(self.parameters)
83
109
  self.parameters = uprate_parameters(self.parameters)
84
- self.parameters = propagate_parameter_metadata(self.parameters)
85
- self.add_abolition_parameters()
86
110
  self.parameters = backdate_parameters(self.parameters, "2015-01-01")
87
-
88
111
  self.parameters.gov = convert_to_fiscal_year_parameters(
89
112
  self.parameters.gov
90
113
  )
91
114
 
92
- def __init__(self, reform=None):
93
- super().__init__(entities, reform=reform)
115
+ def __init__(self):
116
+ """Initialize the UK tax-benefit system with entities and parameters."""
117
+ self._parameters_at_instant_cache: Dict[str, Any] = {}
118
+ self.variables: Dict[Any, Any] = {}
94
119
 
95
- self.parameters_dir = COUNTRY_DIR / "parameters"
120
+ # Create copies of entity classes to avoid modifying originals
121
+ person, benunit, household = (
122
+ copy.copy(Person),
123
+ copy.copy(BenUnit),
124
+ copy.copy(Household),
125
+ )
96
126
 
97
- self.load_parameters(self.parameters_dir)
127
+ # Set up entities
128
+ self.entities = [person, benunit, household]
129
+ self.person_entity = person
130
+ self.group_entities = [benunit, household]
131
+ self.group_entity_keys = [entity.key for entity in self.group_entities]
98
132
 
99
- self.process_parameters(reform=reform)
133
+ # Link entities to this tax-benefit system
134
+ for entity in self.entities:
135
+ entity.set_tax_benefit_system(self)
100
136
 
137
+ self.variable_module_metadata = {}
101
138
 
102
- system = CountryTaxBenefitSystem()
139
+ # Load all variables from the variables directory
140
+ self.add_variables_from_directory(COUNTRY_DIR / "variables")
141
+
142
+ # Set up and process parameters
143
+ self.parameters_dir = COUNTRY_DIR / "parameters"
144
+ self.reset_parameters()
145
+ self.process_parameters()
103
146
 
147
+
148
+ # Create system instance for module-level access
149
+ system = CountryTaxBenefitSystem()
104
150
  parameters = system.parameters
105
151
  variables = system.variables
106
152
 
107
153
 
108
154
  class Simulation(CoreSimulation):
109
- default_tax_benefit_system = CountryTaxBenefitSystem
110
- default_tax_benefit_system_instance = system
111
- default_calculation_period = 2023
112
- default_input_period = 2023
113
- default_role = "member"
114
- max_spiral_loops = 10
115
-
116
- def __init__(self, *args, **kwargs):
117
- super().__init__(*args, **kwargs)
118
-
119
- reform = create_structural_reforms_from_parameters(
120
- self.tax_benefit_system.parameters, "2023-01-01"
121
- )
155
+ """UK-specific simulation class for calculating tax and benefit outcomes.
156
+
157
+ Extends the core simulation functionality with UK-specific features
158
+ and data handling capabilities.
159
+ """
160
+
161
+ default_input_period: int = 2025
162
+ default_calculation_period: int = 2025
163
+
164
+ def __init__(
165
+ self,
166
+ scenario: Optional[Scenario] = None,
167
+ situation: Optional[Dict] = None,
168
+ dataset: Optional[
169
+ Union[pd.DataFrame, str, UKSingleYearDataset, UKMultiYearDataset]
170
+ ] = None,
171
+ trace: bool = False,
172
+ reform: Dict | Type[Reform] = None,
173
+ ):
174
+ """Initialize a UK simulation.
175
+
176
+ Args:
177
+ scenario: A Scenario object defining a modification to the simulation
178
+ situation: A dictionary describing the situation to simulate
179
+ dataset: Data source - can be DataFrame, URL string, or Dataset object
180
+ trace: Whether to enable detailed tracing of calculations
181
+ """
182
+ # Initialize tax-benefit rules
183
+ self.tax_benefit_system = CountryTaxBenefitSystem()
184
+
185
+ # Migrate Reform to Scenario
186
+
122
187
  if reform is not None:
123
- self.apply_reform(reform)
124
-
125
- reform_dict = kwargs.get("reform")
126
- if reform_dict is not None:
127
- if isinstance(reform_dict, type):
128
- try:
129
- reform_dict = reform_dict.parameter_values
130
- except:
131
- reform_dict = None
132
-
133
- if reform_dict is not None:
134
- if any(["obr" in param for param in reform_dict]):
135
- self.tax_benefit_system.load_parameters(
136
- self.tax_benefit_system.parameters_dir
137
- )
138
- Reform.from_dict(reform_dict).apply(self.tax_benefit_system)
139
- self.tax_benefit_system.process_parameters()
188
+ scenario = Scenario.from_reform(reform)
189
+
190
+ # Apply parametric reforms here
191
+
192
+ if scenario is not None:
193
+ if scenario.parameter_changes is not None:
194
+ self.apply_parameter_changes(scenario.parameter_changes)
195
+
196
+ self.branch_name = "default"
197
+ self.invalidated_caches = set()
198
+ self.debug: bool = False
199
+ self.trace: bool = trace
200
+ self.tracer: SimpleTracer = (
201
+ SimpleTracer() if not trace else FullTracer()
202
+ )
203
+ self.opt_out_cache: bool = False
204
+ self.max_spiral_loops: int = 10
205
+ self.memory_config = None
206
+ self._data_storage_dir: Optional[str] = None
207
+
208
+ self.branches: Dict[str, Simulation] = {}
209
+
210
+ # Build simulation from appropriate source
211
+ if situation is not None:
212
+ self.build_from_situation(situation)
213
+ elif isinstance(dataset, str):
214
+ self.build_from_url(dataset)
215
+ elif isinstance(dataset, pd.DataFrame):
216
+ self.build_from_dataframe(dataset)
217
+ elif isinstance(dataset, Dataset):
218
+ self.build_from_dataset(dataset)
219
+ elif isinstance(dataset, UKSingleYearDataset):
220
+ self.build_from_single_year_dataset(dataset)
221
+ elif isinstance(dataset, UKMultiYearDataset):
222
+ self.build_from_multi_year_dataset(dataset)
223
+ elif dataset is None:
224
+ self.build_from_url(
225
+ "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5"
226
+ )
227
+ else:
228
+ raise ValueError(f"Unsupported dataset type: {dataset.__class__}")
140
229
 
141
- # Labor supply responses
230
+ # Handle behavioral responses for earnings and capital gains
231
+ self.move_values("employment_income", "employment_income_before_lsr")
232
+ self.move_values("capital_gains", "capital_gains_before_response")
142
233
 
143
- employment_income = self.get_holder("employment_income")
144
- for known_period in employment_income.get_known_periods():
145
- array = employment_income.get_array(known_period)
146
- self.set_input("employment_income_before_lsr", known_period, array)
147
- employment_income.delete_arrays(known_period)
234
+ self.input_variables = self.get_known_variables()
148
235
 
149
- # Capital gains responses
236
+ # Apply structural modifiers
150
237
 
151
- cg_holder = self.get_holder("capital_gains")
152
- for known_period in cg_holder.get_known_periods():
153
- array = cg_holder.get_array(known_period)
154
- self.set_input(
155
- "capital_gains_before_response", known_period, array
238
+ if scenario is not None:
239
+ if scenario.simulation_modifier is not None:
240
+ scenario.simulation_modifier(self)
241
+
242
+ def get_known_variables(self):
243
+ variables = []
244
+ for variable in self.tax_benefit_system.variables:
245
+ if len(self.get_holder(variable).get_known_periods()) > 0:
246
+ variables.append(variable)
247
+ return variables
248
+
249
+ def apply_parameter_changes(self, changes: dict):
250
+ self.tax_benefit_system.reset_parameters()
251
+
252
+ for parameter in changes:
253
+ p: Parameter = self.tax_benefit_system.parameters.get_child(
254
+ parameter
156
255
  )
157
- employment_income.delete_arrays(known_period)
158
-
159
-
160
- class Microsimulation(CoreMicrosimulation):
161
- default_tax_benefit_system = CountryTaxBenefitSystem
162
- default_dataset = ENHANCED_FRS
163
- default_dataset_year = 2022
164
- default_tax_benefit_system_instance = system
165
- default_calculation_period = 2025
166
- default_input_period = 2025
167
- default_role = "member"
168
- max_spiral_loops = 10
169
-
170
- def __init__(self, *args, dataset=ENHANCED_FRS, **kwargs):
171
- if dataset is not None:
172
- if isinstance(dataset, str):
173
- if "hf://" in dataset:
174
- owner, repo, filename = dataset.split("/")[-3:]
175
- if "@" in filename:
176
- version = filename.split("@")[-1]
177
- filename = filename.split("@")[0]
178
- else:
179
- version = None
180
- dataset_file_path = download_huggingface_dataset(
181
- repo=f"{owner}/{repo}",
182
- repo_filename=filename,
183
- version=version,
256
+ if isinstance(changes[parameter], dict):
257
+ # Time-period specific changes
258
+ for time_period in changes[parameter]:
259
+ p.update(
260
+ period=time_period,
261
+ value=changes[parameter][time_period],
184
262
  )
263
+ else:
264
+ p.update(period="year:2000:100", value=changes[parameter])
265
+
266
+ self.tax_benefit_system.process_parameters()
185
267
 
186
- if Path(dataset_file_path).exists():
187
- if dataset_file_path.endswith(".h5"):
188
- try:
189
- UKSingleYearDataset.validate_file_path(
190
- dataset_file_path
191
- )
192
- dataset = UKSingleYearDataset(
193
- file_path=dataset_file_path
194
- )
195
- except:
196
- pass
197
-
198
- try:
199
- UKMultiYearDataset.validate_file_path(
200
- dataset_file_path
201
- )
202
- dataset = UKMultiYearDataset(
203
- file_path=dataset_file_path
204
- )
205
- except Exception as e:
206
- pass
207
-
208
- if not isinstance(
209
- dataset, (UKSingleYearDataset, UKMultiYearDataset)
210
- ):
211
- dataset = Dataset.from_file(dataset_file_path)
212
-
213
- super().__init__(*args, dataset=dataset, **kwargs)
214
-
215
- reform = create_structural_reforms_from_parameters(
216
- self.tax_benefit_system.parameters, "2023-01-01"
268
+ def build_from_situation(self, situation: Dict) -> None:
269
+ """Build simulation from a situation dictionary.
270
+
271
+ Args:
272
+ situation: Dictionary describing household composition and characteristics
273
+ """
274
+ self.build_from_populations(
275
+ self.tax_benefit_system.instantiate_entities()
217
276
  )
218
- if reform is not None:
219
- self.apply_reform(reform)
220
-
221
- reform_dict = kwargs.get("reform")
222
- if reform_dict is not None:
223
- if isinstance(reform_dict, type):
224
- try:
225
- reform_dict = reform_dict.parameter_values
226
- except:
227
- reform_dict = None
228
-
229
- if reform_dict is not None:
230
- if any(["obr" in param for param in reform_dict]):
231
- self.tax_benefit_system.load_parameters(
232
- self.tax_benefit_system.parameters_dir
233
- )
234
- Reform.from_dict(reform_dict).apply(self.tax_benefit_system)
235
- self.tax_benefit_system.process_parameters()
277
+ from policyengine_core.simulations.simulation_builder import (
278
+ SimulationBuilder,
279
+ ) # Import here to avoid circular dependency
280
+
281
+ builder = SimulationBuilder()
282
+ builder.default_period = self.default_input_period
283
+ builder.build_from_dict(self.tax_benefit_system, situation, self)
284
+ self.has_axes = builder.has_axes
285
+
286
+ def build_from_url(self, url: str) -> None:
287
+ """Build simulation from a HuggingFace dataset URL.
288
+
289
+ Args:
290
+ url: HuggingFace URL in format "hf://owner/repo/filename"
291
+
292
+ Raises:
293
+ ValueError: If URL is not a HuggingFace URL
294
+ """
295
+ if "hf://" not in url:
296
+ raise ValueError(
297
+ f"Non-HuggingFace URLs are currently not supported."
298
+ )
236
299
 
237
- # Labor supply responses
300
+ # Parse HuggingFace URL components
301
+ owner, repo, filename = url.split("/")[-3:]
302
+ if "@" in filename:
303
+ version = filename.split("@")[-1]
304
+ filename = filename.split("@")[0]
305
+ else:
306
+ version = None
307
+
308
+ # Download dataset from HuggingFace
309
+ dataset = download_huggingface_dataset(
310
+ repo=f"{owner}/{repo}",
311
+ repo_filename=filename,
312
+ version=version,
313
+ )
238
314
 
239
- for simulation in list(self.branches.values()) + [self]:
240
- employment_income = simulation.get_holder("employment_income")
241
- for known_period in employment_income.get_known_periods():
242
- array = employment_income.get_array(known_period)
243
- simulation.set_input(
244
- "employment_income_before_lsr", known_period, array
245
- )
246
- employment_income.delete_arrays(known_period)
315
+ # Determine dataset type and build accordingly
316
+ if UKMultiYearDataset.validate_file_path(dataset, False):
317
+ self.build_from_multi_year_dataset(UKMultiYearDataset(dataset))
318
+ self.dataset = dataset
319
+ elif UKSingleYearDataset.validate_file_path(dataset, False):
320
+ self.build_from_single_year_dataset(UKSingleYearDataset(dataset))
321
+ self.dataset = dataset
322
+ else:
323
+ dataset = Dataset.from_file(dataset, self.default_input_period)
324
+ self.build_from_dataset(dataset)
325
+
326
+ def build_from_dataframe(self, df: pd.DataFrame) -> None:
327
+ """Build simulation from a pandas DataFrame.
328
+
329
+ Args:
330
+ df: DataFrame with columns in format "variable_name__time_period"
331
+ """
332
+
333
+ def get_first_array(variable_name: str) -> pd.Series:
334
+ """Extract the first array for a given variable name pattern."""
335
+ columns = df.columns[df.columns.str.contains(variable_name + "__")]
336
+ return df[columns[0]]
337
+
338
+ # Extract ID columns
339
+ (
340
+ person_id,
341
+ person_benunit_id,
342
+ person_household_id,
343
+ benunit_id,
344
+ household_id,
345
+ ) = map(
346
+ get_first_array,
347
+ [
348
+ "person_id",
349
+ "person_benunit_id",
350
+ "person_household_id",
351
+ "benunit_id",
352
+ "household_id",
353
+ ],
354
+ )
247
355
 
248
- # Capital gains responses
356
+ # Build entity structure
357
+ self.build_from_ids(
358
+ person_id,
359
+ person_benunit_id,
360
+ person_household_id,
361
+ benunit_id,
362
+ household_id,
363
+ )
249
364
 
250
- for simulation in list(self.branches.values()) + [self]:
251
- cg_holder = self.get_holder("capital_gains")
252
- for known_period in cg_holder.get_known_periods():
253
- array = cg_holder.get_array(known_period)
365
+ # Set input values for each variable and time period
366
+ for column in df:
367
+ variable, time_period = column.split("__")
368
+ if variable not in self.tax_benefit_system.variables:
369
+ continue
370
+ self.set_input(variable, time_period, df[column])
371
+
372
+ def build_from_dataset(self, dataset: Dataset) -> None:
373
+ """Build simulation from a Dataset object.
374
+
375
+ Args:
376
+ dataset: PolicyEngine Dataset object containing simulation data
377
+ """
378
+ data: Dict[str, Dict[str, Union[float, int, str]]] = (
379
+ dataset.load_dataset()
380
+ )
381
+
382
+ first_variable = data[list(data.keys())[0]]
383
+ first_time_period = list(first_variable.keys())[0]
384
+
385
+ def get_first_array(variable_name: str) -> np.ndarray:
386
+ """Get the first time period's values for a variable."""
387
+ time_period_values = data[variable_name]
388
+ return time_period_values[first_time_period]
389
+
390
+ # Build entity structure from IDs
391
+ self.build_from_ids(
392
+ *map(
393
+ get_first_array,
394
+ [
395
+ "person_id",
396
+ "person_benunit_id",
397
+ "person_household_id",
398
+ "benunit_id",
399
+ "household_id",
400
+ ],
401
+ )
402
+ )
403
+
404
+ # Load all variable values
405
+ for variable in data:
406
+ for time_period in data[variable]:
407
+ if variable not in self.tax_benefit_system.variables:
408
+ continue
254
409
  self.set_input(
255
- "capital_gains_before_response", known_period, array
410
+ variable, time_period, data[variable][time_period]
256
411
  )
257
- employment_income.delete_arrays(known_period)
412
+
413
+ # Now convert to the new UKSingleYearDataset
414
+ self.input_variables = self.get_known_variables()
415
+ self.dataset = dataset
416
+ dataset = UKSingleYearDataset.from_simulation(
417
+ self, fiscal_year=first_time_period
418
+ )
419
+ multi_year_dataset = extend_single_year_dataset(dataset)
420
+
421
+ self.build_from_multi_year_dataset(multi_year_dataset)
422
+ self.dataset = multi_year_dataset
423
+
424
+ def build_from_single_year_dataset(
425
+ self, dataset: UKSingleYearDataset
426
+ ) -> None:
427
+ """Build simulation from a single-year UK dataset.
428
+
429
+ Args:
430
+ dataset: UKSingleYearDataset containing one year of data
431
+ """
432
+
433
+ dataset = extend_single_year_dataset(dataset)
434
+ self.build_from_multi_year_dataset(dataset)
435
+
436
+ def build_from_multi_year_dataset(
437
+ self, dataset: UKMultiYearDataset
438
+ ) -> None:
439
+ """Build simulation from a multi-year UK dataset.
440
+
441
+ Args:
442
+ dataset: UKMultiYearDataset containing multiple years of data
443
+ """
444
+ # Use first year to establish entity structure
445
+ first_year = dataset[dataset.years[0]]
446
+ self.build_from_ids(
447
+ first_year.person.person_id,
448
+ first_year.person.person_benunit_id,
449
+ first_year.person.person_household_id,
450
+ first_year.benunit.benunit_id,
451
+ first_year.household.household_id,
452
+ )
453
+
454
+ # Load variable values for all years
455
+ for year in dataset.years:
456
+ for table in dataset[year].tables:
457
+ for variable in table.columns:
458
+ if variable not in self.tax_benefit_system.variables:
459
+ continue
460
+ self.set_input(variable, year, table[variable])
461
+
462
+ def build_from_ids(
463
+ self,
464
+ person_id: np.ndarray,
465
+ person_benunit_id: np.ndarray,
466
+ person_household_id: np.ndarray,
467
+ benunit_id: np.ndarray,
468
+ household_id: np.ndarray,
469
+ ) -> None:
470
+ """Build simulation entities from ID arrays.
471
+
472
+ Args:
473
+ person_id: Array of person IDs
474
+ person_benunit_id: Array mapping persons to benefit units
475
+ person_household_id: Array mapping persons to households
476
+ benunit_id: Array of benefit unit IDs
477
+ household_id: Array of household IDs
478
+ """
479
+ from policyengine_core.simulations.simulation_builder import (
480
+ SimulationBuilder,
481
+ ) # Import here to avoid circular dependency
482
+
483
+ builder = SimulationBuilder()
484
+ builder.populations = self.tax_benefit_system.instantiate_entities()
485
+
486
+ # Declare entities
487
+ builder.declare_person_entity("person", person_id)
488
+ builder.declare_entity("benunit", np.unique(benunit_id))
489
+ builder.declare_entity("household", np.unique(household_id))
490
+
491
+ # Link persons to benefit units and households
492
+ builder.join_with_persons(
493
+ builder.populations["benunit"],
494
+ person_benunit_id,
495
+ np.array(["member"] * len(person_benunit_id)),
496
+ )
497
+ builder.join_with_persons(
498
+ builder.populations["household"],
499
+ person_household_id,
500
+ np.array(["member"] * len(person_household_id)),
501
+ )
502
+
503
+ self.build_from_populations(builder.populations)
504
+
505
+ def move_values(self, variable_donor: str, variable_target: str) -> None:
506
+ """Move values from one variable to another across all branches.
507
+
508
+ Used for behavioral response modeling where original values need
509
+ to be preserved.
510
+
511
+ Args:
512
+ variable_donor: Variable to move values from
513
+ variable_target: Variable to move values to
514
+ """
515
+ for simulation in list(self.branches.values()) + [self]:
516
+ holder = simulation.get_holder(variable_donor)
517
+ for known_period in holder.get_known_periods():
518
+ array = holder.get_array(known_period)
519
+ simulation.set_input(variable_target, known_period, array)
520
+ holder.delete_arrays(known_period)
521
+
522
+ def calculate(
523
+ self,
524
+ variable_name: str,
525
+ period: str = None,
526
+ map_to: str = None,
527
+ decode_enums: bool = False,
528
+ ):
529
+ tracer: SimpleTracer = self.tracer
530
+ if len(tracer.stack) == 0:
531
+ # Only decode enums to string values when we're not within
532
+ # the simulation tree.
533
+ decode_enums = True
534
+
535
+ if period is None:
536
+ period = self.default_calculation_period
537
+
538
+ period = period_(period)
539
+
540
+ return super().calculate(
541
+ variable_name, period, map_to=map_to, decode_enums=decode_enums
542
+ )
543
+
544
+
545
+ class Microsimulation(Simulation):
546
+ """Extended simulation class with weighting support for microsimulation.
547
+
548
+ Provides weighted calculations using survey weights for population-level
549
+ estimates and statistics.
550
+ """
551
+
552
+ def get_weights(
553
+ self, variable_name: str, period: str, map_to: Optional[str] = None
554
+ ) -> np.ndarray:
555
+ """Get weights for the specified variable's entity.
556
+
557
+ Args:
558
+ variable_name: Name of the variable to get weights for
559
+ period: Time period for the weights
560
+ map_to: Optional entity key to map weights to
561
+
562
+ Returns:
563
+ Array of weights for the entity
564
+ """
565
+ variable = self.tax_benefit_system.get_variable(variable_name)
566
+ entity_key = map_to or variable.entity.key
567
+ weight_variable_name = f"{entity_key}_weight"
568
+ return self.calculate(
569
+ weight_variable_name, period, map_to=map_to, unweighted=True
570
+ )
571
+
572
+ def calculate(
573
+ self,
574
+ variable_name: str,
575
+ period: str = None,
576
+ map_to: str = None,
577
+ decode_enums: bool = False,
578
+ unweighted: bool = False,
579
+ ):
580
+ tracer: SimpleTracer = self.tracer
581
+
582
+ result = super().calculate(
583
+ variable_name, period, map_to=map_to, decode_enums=decode_enums
584
+ )
585
+
586
+ if not unweighted and len(tracer.stack) == 0:
587
+ weights = self.get_weights(variable_name, period, map_to=map_to)
588
+ return MicroSeries(result, weights=weights)
589
+
590
+ return result
591
+
592
+ def calculate_dataframe(
593
+ self,
594
+ variable_names: List[str],
595
+ period: Optional[str] = None,
596
+ map_to: Optional[str] = None,
597
+ use_weights: bool = True,
598
+ ) -> MicroDataFrame:
599
+ """Calculate multiple variables as a weighted DataFrame.
600
+
601
+ Args:
602
+ variable_names: List of variable names to calculate
603
+ period: Time period for calculation
604
+ map_to: Optional entity key to map results to
605
+ use_weights: Whether to apply survey weights
606
+
607
+ Returns:
608
+ MicroDataFrame with calculated values and weights
609
+ """
610
+ values = super().calculate_dataframe(variable_names, period, map_to)
611
+ if not use_weights:
612
+ return values
613
+ weights = self.get_weights(variable_names[0], period, map_to=map_to)
614
+ return MicroDataFrame(values, weights=weights)