policyengine 3.0.0__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- policyengine/core/__init__.py +22 -0
- policyengine/core/dataset.py +260 -0
- policyengine/core/dataset_version.py +16 -0
- policyengine/core/dynamic.py +43 -0
- policyengine/core/output.py +26 -0
- policyengine/{models → core}/parameter.py +4 -2
- policyengine/{models → core}/parameter_value.py +1 -1
- policyengine/core/policy.py +43 -0
- policyengine/{models → core}/simulation.py +10 -14
- policyengine/core/tax_benefit_model.py +11 -0
- policyengine/core/tax_benefit_model_version.py +34 -0
- policyengine/core/variable.py +15 -0
- policyengine/outputs/__init__.py +21 -0
- policyengine/outputs/aggregate.py +124 -0
- policyengine/outputs/change_aggregate.py +184 -0
- policyengine/outputs/decile_impact.py +140 -0
- policyengine/tax_benefit_models/uk/__init__.py +26 -0
- policyengine/tax_benefit_models/uk/analysis.py +97 -0
- policyengine/tax_benefit_models/uk/datasets.py +176 -0
- policyengine/tax_benefit_models/uk/model.py +268 -0
- policyengine/tax_benefit_models/uk/outputs.py +108 -0
- policyengine/tax_benefit_models/uk.py +33 -0
- policyengine/tax_benefit_models/us/__init__.py +36 -0
- policyengine/tax_benefit_models/us/analysis.py +99 -0
- policyengine/tax_benefit_models/us/datasets.py +307 -0
- policyengine/tax_benefit_models/us/model.py +447 -0
- policyengine/tax_benefit_models/us/outputs.py +108 -0
- policyengine/tax_benefit_models/us.py +32 -0
- policyengine/utils/__init__.py +3 -0
- policyengine/utils/dates.py +40 -0
- policyengine/utils/parametric_reforms.py +39 -0
- policyengine/utils/plotting.py +179 -0
- {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/METADATA +185 -20
- policyengine-3.1.1.dist-info/RECORD +39 -0
- policyengine/database/__init__.py +0 -56
- policyengine/database/aggregate.py +0 -33
- policyengine/database/baseline_parameter_value_table.py +0 -66
- policyengine/database/baseline_variable_table.py +0 -40
- policyengine/database/database.py +0 -251
- policyengine/database/dataset_table.py +0 -41
- policyengine/database/dynamic_table.py +0 -34
- policyengine/database/link.py +0 -82
- policyengine/database/model_table.py +0 -27
- policyengine/database/model_version_table.py +0 -28
- policyengine/database/parameter_table.py +0 -31
- policyengine/database/parameter_value_table.py +0 -62
- policyengine/database/policy_table.py +0 -34
- policyengine/database/report_element_table.py +0 -48
- policyengine/database/report_table.py +0 -24
- policyengine/database/simulation_table.py +0 -50
- policyengine/database/user_table.py +0 -28
- policyengine/database/versioned_dataset_table.py +0 -28
- policyengine/models/__init__.py +0 -30
- policyengine/models/aggregate.py +0 -92
- policyengine/models/baseline_parameter_value.py +0 -14
- policyengine/models/baseline_variable.py +0 -12
- policyengine/models/dataset.py +0 -18
- policyengine/models/dynamic.py +0 -15
- policyengine/models/model.py +0 -124
- policyengine/models/model_version.py +0 -14
- policyengine/models/policy.py +0 -17
- policyengine/models/policyengine_uk.py +0 -114
- policyengine/models/policyengine_us.py +0 -115
- policyengine/models/report.py +0 -10
- policyengine/models/report_element.py +0 -36
- policyengine/models/user.py +0 -14
- policyengine/models/versioned_dataset.py +0 -12
- policyengine/utils/charts.py +0 -286
- policyengine/utils/compress.py +0 -20
- policyengine/utils/datasets.py +0 -71
- policyengine-3.0.0.dist-info/RECORD +0 -47
- policyengine-3.0.0.dist-info/entry_points.txt +0 -2
- {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/WHEEL +0 -0
- {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/licenses/LICENSE +0 -0
- {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from importlib.metadata import version
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import requests
|
|
8
|
+
from microdf import MicroDataFrame
|
|
9
|
+
|
|
10
|
+
from policyengine.core import (
|
|
11
|
+
Parameter,
|
|
12
|
+
ParameterValue,
|
|
13
|
+
TaxBenefitModel,
|
|
14
|
+
TaxBenefitModelVersion,
|
|
15
|
+
Variable,
|
|
16
|
+
)
|
|
17
|
+
from policyengine.utils import parse_safe_date
|
|
18
|
+
|
|
19
|
+
from .datasets import PolicyEngineUSDataset, USYearData
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from policyengine.core.simulation import Simulation
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PolicyEngineUS(TaxBenefitModel):
|
|
26
|
+
id: str = "policyengine-us"
|
|
27
|
+
description: str = "The US's open-source dynamic tax and benefit microsimulation model maintained by PolicyEngine."
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
us_model = PolicyEngineUS()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _get_us_package_metadata():
|
|
34
|
+
"""Get PolicyEngine US package version and upload time (lazy-loaded)."""
|
|
35
|
+
pkg_version = version("policyengine-us")
|
|
36
|
+
# Get published time from PyPI
|
|
37
|
+
response = requests.get("https://pypi.org/pypi/policyengine-us/json")
|
|
38
|
+
data = response.json()
|
|
39
|
+
upload_time = data["releases"][pkg_version][0]["upload_time_iso_8601"]
|
|
40
|
+
return pkg_version, upload_time
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class PolicyEngineUSLatest(TaxBenefitModelVersion):
|
|
44
|
+
model: TaxBenefitModel = us_model
|
|
45
|
+
version: str = None
|
|
46
|
+
created_at: datetime.datetime = None
|
|
47
|
+
|
|
48
|
+
def __init__(self, **kwargs: dict):
|
|
49
|
+
# Lazy-load package metadata if not provided
|
|
50
|
+
if "version" not in kwargs or kwargs.get("version") is None:
|
|
51
|
+
pkg_version, upload_time = _get_us_package_metadata()
|
|
52
|
+
kwargs["version"] = pkg_version
|
|
53
|
+
kwargs["created_at"] = datetime.datetime.fromisoformat(upload_time)
|
|
54
|
+
|
|
55
|
+
super().__init__(**kwargs)
|
|
56
|
+
from policyengine_core.enums import Enum
|
|
57
|
+
from policyengine_us.system import system
|
|
58
|
+
|
|
59
|
+
self.id = f"{self.model.id}@{self.version}"
|
|
60
|
+
|
|
61
|
+
self.variables = []
|
|
62
|
+
for var_obj in system.variables.values():
|
|
63
|
+
variable = Variable(
|
|
64
|
+
id=self.id + "-" + var_obj.name,
|
|
65
|
+
name=var_obj.name,
|
|
66
|
+
tax_benefit_model_version=self,
|
|
67
|
+
entity=var_obj.entity.key,
|
|
68
|
+
description=var_obj.documentation,
|
|
69
|
+
data_type=var_obj.value_type
|
|
70
|
+
if var_obj.value_type is not Enum
|
|
71
|
+
else str,
|
|
72
|
+
)
|
|
73
|
+
if (
|
|
74
|
+
hasattr(var_obj, "possible_values")
|
|
75
|
+
and var_obj.possible_values is not None
|
|
76
|
+
):
|
|
77
|
+
variable.possible_values = list(
|
|
78
|
+
map(
|
|
79
|
+
lambda x: x.name,
|
|
80
|
+
var_obj.possible_values._value2member_map_.values(),
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
self.variables.append(variable)
|
|
84
|
+
|
|
85
|
+
self.parameters = []
|
|
86
|
+
from policyengine_core.parameters import Parameter as CoreParameter
|
|
87
|
+
|
|
88
|
+
for param_node in system.parameters.get_descendants():
|
|
89
|
+
if isinstance(param_node, CoreParameter):
|
|
90
|
+
parameter = Parameter(
|
|
91
|
+
id=self.id + "-" + param_node.name,
|
|
92
|
+
name=param_node.name,
|
|
93
|
+
tax_benefit_model_version=self,
|
|
94
|
+
description=param_node.description,
|
|
95
|
+
data_type=type(param_node(2025)),
|
|
96
|
+
unit=param_node.metadata.get("unit"),
|
|
97
|
+
)
|
|
98
|
+
self.parameters.append(parameter)
|
|
99
|
+
|
|
100
|
+
for i in range(len(param_node.values_list)):
|
|
101
|
+
param_at_instant = param_node.values_list[i]
|
|
102
|
+
if i + 1 < len(param_node.values_list):
|
|
103
|
+
next_instant = param_node.values_list[i + 1]
|
|
104
|
+
else:
|
|
105
|
+
next_instant = None
|
|
106
|
+
parameter_value = ParameterValue(
|
|
107
|
+
parameter=parameter,
|
|
108
|
+
start_date=parse_safe_date(
|
|
109
|
+
param_at_instant.instant_str
|
|
110
|
+
),
|
|
111
|
+
end_date=parse_safe_date(next_instant.instant_str)
|
|
112
|
+
if next_instant
|
|
113
|
+
else None,
|
|
114
|
+
value=param_at_instant.value,
|
|
115
|
+
)
|
|
116
|
+
self.parameter_values.append(parameter_value)
|
|
117
|
+
|
|
118
|
+
def run(self, simulation: "Simulation") -> "Simulation":
|
|
119
|
+
from policyengine_us import Microsimulation
|
|
120
|
+
from policyengine_us.system import system
|
|
121
|
+
|
|
122
|
+
from policyengine.utils.parametric_reforms import (
|
|
123
|
+
simulation_modifier_from_parameter_values,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
assert isinstance(simulation.dataset, PolicyEngineUSDataset)
|
|
127
|
+
|
|
128
|
+
dataset = simulation.dataset
|
|
129
|
+
dataset.load()
|
|
130
|
+
|
|
131
|
+
# Build simulation from entity IDs using PolicyEngine Core pattern
|
|
132
|
+
microsim = Microsimulation()
|
|
133
|
+
self._build_simulation_from_dataset(microsim, dataset, system)
|
|
134
|
+
|
|
135
|
+
# Apply policy reforms
|
|
136
|
+
if (
|
|
137
|
+
simulation.policy
|
|
138
|
+
and simulation.policy.simulation_modifier is not None
|
|
139
|
+
):
|
|
140
|
+
simulation.policy.simulation_modifier(microsim)
|
|
141
|
+
elif simulation.policy:
|
|
142
|
+
modifier = simulation_modifier_from_parameter_values(
|
|
143
|
+
simulation.policy.parameter_values
|
|
144
|
+
)
|
|
145
|
+
modifier(microsim)
|
|
146
|
+
|
|
147
|
+
# Apply dynamic reforms
|
|
148
|
+
if (
|
|
149
|
+
simulation.dynamic
|
|
150
|
+
and simulation.dynamic.simulation_modifier is not None
|
|
151
|
+
):
|
|
152
|
+
simulation.dynamic.simulation_modifier(microsim)
|
|
153
|
+
elif simulation.dynamic:
|
|
154
|
+
modifier = simulation_modifier_from_parameter_values(
|
|
155
|
+
simulation.dynamic.parameter_values
|
|
156
|
+
)
|
|
157
|
+
modifier(microsim)
|
|
158
|
+
|
|
159
|
+
# Allow custom variable selection, or use defaults
|
|
160
|
+
if simulation.variables is not None:
|
|
161
|
+
entity_variables = simulation.variables
|
|
162
|
+
else:
|
|
163
|
+
# Default comprehensive variable set
|
|
164
|
+
entity_variables = {
|
|
165
|
+
"person": [
|
|
166
|
+
# IDs and weights
|
|
167
|
+
"person_id",
|
|
168
|
+
"marital_unit_id",
|
|
169
|
+
"family_id",
|
|
170
|
+
"spm_unit_id",
|
|
171
|
+
"tax_unit_id",
|
|
172
|
+
"household_id",
|
|
173
|
+
"person_weight",
|
|
174
|
+
# Demographics
|
|
175
|
+
"age",
|
|
176
|
+
# Income
|
|
177
|
+
"employment_income",
|
|
178
|
+
# Benefits
|
|
179
|
+
"ssi",
|
|
180
|
+
"social_security",
|
|
181
|
+
"medicaid",
|
|
182
|
+
"unemployment_compensation",
|
|
183
|
+
],
|
|
184
|
+
"marital_unit": [
|
|
185
|
+
"marital_unit_id",
|
|
186
|
+
"marital_unit_weight",
|
|
187
|
+
],
|
|
188
|
+
"family": [
|
|
189
|
+
"family_id",
|
|
190
|
+
"family_weight",
|
|
191
|
+
],
|
|
192
|
+
"spm_unit": [
|
|
193
|
+
"spm_unit_id",
|
|
194
|
+
"spm_unit_weight",
|
|
195
|
+
"snap",
|
|
196
|
+
"tanf",
|
|
197
|
+
"spm_unit_net_income",
|
|
198
|
+
],
|
|
199
|
+
"tax_unit": [
|
|
200
|
+
"tax_unit_id",
|
|
201
|
+
"tax_unit_weight",
|
|
202
|
+
"income_tax",
|
|
203
|
+
"employee_payroll_tax",
|
|
204
|
+
"eitc",
|
|
205
|
+
"ctc",
|
|
206
|
+
],
|
|
207
|
+
"household": [
|
|
208
|
+
"household_id",
|
|
209
|
+
"household_weight",
|
|
210
|
+
"household_net_income",
|
|
211
|
+
"household_benefits",
|
|
212
|
+
"household_tax",
|
|
213
|
+
"household_market_income",
|
|
214
|
+
],
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
data = {
|
|
218
|
+
"person": pd.DataFrame(),
|
|
219
|
+
"marital_unit": pd.DataFrame(),
|
|
220
|
+
"family": pd.DataFrame(),
|
|
221
|
+
"spm_unit": pd.DataFrame(),
|
|
222
|
+
"tax_unit": pd.DataFrame(),
|
|
223
|
+
"household": pd.DataFrame(),
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# ID columns should be preserved from input dataset, not calculated
|
|
227
|
+
id_columns = {
|
|
228
|
+
"person_id",
|
|
229
|
+
"household_id",
|
|
230
|
+
"marital_unit_id",
|
|
231
|
+
"family_id",
|
|
232
|
+
"spm_unit_id",
|
|
233
|
+
"tax_unit_id",
|
|
234
|
+
}
|
|
235
|
+
weight_columns = {
|
|
236
|
+
"person_weight",
|
|
237
|
+
"household_weight",
|
|
238
|
+
"marital_unit_weight",
|
|
239
|
+
"family_weight",
|
|
240
|
+
"spm_unit_weight",
|
|
241
|
+
"tax_unit_weight",
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# First, copy ID and weight columns from input dataset
|
|
245
|
+
for entity in data.keys():
|
|
246
|
+
input_df = pd.DataFrame(getattr(dataset.data, entity))
|
|
247
|
+
entity_id_col = f"{entity}_id"
|
|
248
|
+
entity_weight_col = f"{entity}_weight"
|
|
249
|
+
|
|
250
|
+
if entity_id_col in input_df.columns:
|
|
251
|
+
data[entity][entity_id_col] = input_df[entity_id_col].values
|
|
252
|
+
if entity_weight_col in input_df.columns:
|
|
253
|
+
data[entity][entity_weight_col] = input_df[
|
|
254
|
+
entity_weight_col
|
|
255
|
+
].values
|
|
256
|
+
|
|
257
|
+
# For person entity, also copy person-level group ID columns
|
|
258
|
+
person_input_df = pd.DataFrame(dataset.data.person)
|
|
259
|
+
for col in person_input_df.columns:
|
|
260
|
+
if col.startswith("person_") and col.endswith("_id"):
|
|
261
|
+
# Map person_household_id -> household_id, etc.
|
|
262
|
+
target_col = col.replace("person_", "")
|
|
263
|
+
if target_col in id_columns:
|
|
264
|
+
data["person"][target_col] = person_input_df[col].values
|
|
265
|
+
|
|
266
|
+
# Then calculate non-ID, non-weight variables from simulation
|
|
267
|
+
for entity, variables in entity_variables.items():
|
|
268
|
+
for var in variables:
|
|
269
|
+
if var not in id_columns and var not in weight_columns:
|
|
270
|
+
data[entity][var] = microsim.calculate(
|
|
271
|
+
var, period=simulation.dataset.year, map_to=entity
|
|
272
|
+
).values
|
|
273
|
+
|
|
274
|
+
data["person"] = MicroDataFrame(
|
|
275
|
+
data["person"], weights="person_weight"
|
|
276
|
+
)
|
|
277
|
+
data["marital_unit"] = MicroDataFrame(
|
|
278
|
+
data["marital_unit"], weights="marital_unit_weight"
|
|
279
|
+
)
|
|
280
|
+
data["family"] = MicroDataFrame(
|
|
281
|
+
data["family"], weights="family_weight"
|
|
282
|
+
)
|
|
283
|
+
data["spm_unit"] = MicroDataFrame(
|
|
284
|
+
data["spm_unit"], weights="spm_unit_weight"
|
|
285
|
+
)
|
|
286
|
+
data["tax_unit"] = MicroDataFrame(
|
|
287
|
+
data["tax_unit"], weights="tax_unit_weight"
|
|
288
|
+
)
|
|
289
|
+
data["household"] = MicroDataFrame(
|
|
290
|
+
data["household"], weights="household_weight"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
simulation.output_dataset = PolicyEngineUSDataset(
|
|
294
|
+
name=dataset.name,
|
|
295
|
+
description=dataset.description,
|
|
296
|
+
filepath=str(
|
|
297
|
+
Path(simulation.dataset.filepath).parent
|
|
298
|
+
/ (simulation.id + ".h5")
|
|
299
|
+
),
|
|
300
|
+
year=simulation.dataset.year,
|
|
301
|
+
is_output_dataset=True,
|
|
302
|
+
data=USYearData(
|
|
303
|
+
person=data["person"],
|
|
304
|
+
marital_unit=data["marital_unit"],
|
|
305
|
+
family=data["family"],
|
|
306
|
+
spm_unit=data["spm_unit"],
|
|
307
|
+
tax_unit=data["tax_unit"],
|
|
308
|
+
household=data["household"],
|
|
309
|
+
),
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
simulation.output_dataset.save()
|
|
313
|
+
|
|
314
|
+
def _build_simulation_from_dataset(self, microsim, dataset, system):
|
|
315
|
+
"""Build a PolicyEngine Core simulation from dataset entity IDs.
|
|
316
|
+
|
|
317
|
+
This follows the same pattern as policyengine-uk, initializing
|
|
318
|
+
entities from IDs first, then using set_input() for variables.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
microsim: The Microsimulation object to populate
|
|
322
|
+
dataset: The dataset containing entity data
|
|
323
|
+
system: The tax-benefit system
|
|
324
|
+
"""
|
|
325
|
+
import numpy as np
|
|
326
|
+
from policyengine_core.simulations.simulation_builder import (
|
|
327
|
+
SimulationBuilder,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Create builder and instantiate entities
|
|
331
|
+
builder = SimulationBuilder()
|
|
332
|
+
builder.populations = system.instantiate_entities()
|
|
333
|
+
|
|
334
|
+
# Extract entity IDs from dataset
|
|
335
|
+
person_data = pd.DataFrame(dataset.data.person)
|
|
336
|
+
|
|
337
|
+
# Determine column naming convention
|
|
338
|
+
# Support both person_X_id (from create_datasets) and X_id (from custom datasets)
|
|
339
|
+
household_id_col = (
|
|
340
|
+
"person_household_id"
|
|
341
|
+
if "person_household_id" in person_data.columns
|
|
342
|
+
else "household_id"
|
|
343
|
+
)
|
|
344
|
+
marital_unit_id_col = (
|
|
345
|
+
"person_marital_unit_id"
|
|
346
|
+
if "person_marital_unit_id" in person_data.columns
|
|
347
|
+
else "marital_unit_id"
|
|
348
|
+
)
|
|
349
|
+
family_id_col = (
|
|
350
|
+
"person_family_id"
|
|
351
|
+
if "person_family_id" in person_data.columns
|
|
352
|
+
else "family_id"
|
|
353
|
+
)
|
|
354
|
+
spm_unit_id_col = (
|
|
355
|
+
"person_spm_unit_id"
|
|
356
|
+
if "person_spm_unit_id" in person_data.columns
|
|
357
|
+
else "spm_unit_id"
|
|
358
|
+
)
|
|
359
|
+
tax_unit_id_col = (
|
|
360
|
+
"person_tax_unit_id"
|
|
361
|
+
if "person_tax_unit_id" in person_data.columns
|
|
362
|
+
else "tax_unit_id"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Declare entities
|
|
366
|
+
builder.declare_person_entity(
|
|
367
|
+
"person", person_data["person_id"].values
|
|
368
|
+
)
|
|
369
|
+
builder.declare_entity(
|
|
370
|
+
"household", np.unique(person_data[household_id_col].values)
|
|
371
|
+
)
|
|
372
|
+
builder.declare_entity(
|
|
373
|
+
"spm_unit", np.unique(person_data[spm_unit_id_col].values)
|
|
374
|
+
)
|
|
375
|
+
builder.declare_entity(
|
|
376
|
+
"family", np.unique(person_data[family_id_col].values)
|
|
377
|
+
)
|
|
378
|
+
builder.declare_entity(
|
|
379
|
+
"tax_unit", np.unique(person_data[tax_unit_id_col].values)
|
|
380
|
+
)
|
|
381
|
+
builder.declare_entity(
|
|
382
|
+
"marital_unit", np.unique(person_data[marital_unit_id_col].values)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
# Join persons to group entities
|
|
386
|
+
builder.join_with_persons(
|
|
387
|
+
builder.populations["household"],
|
|
388
|
+
person_data[household_id_col].values,
|
|
389
|
+
np.array(["member"] * len(person_data)),
|
|
390
|
+
)
|
|
391
|
+
builder.join_with_persons(
|
|
392
|
+
builder.populations["spm_unit"],
|
|
393
|
+
person_data[spm_unit_id_col].values,
|
|
394
|
+
np.array(["member"] * len(person_data)),
|
|
395
|
+
)
|
|
396
|
+
builder.join_with_persons(
|
|
397
|
+
builder.populations["family"],
|
|
398
|
+
person_data[family_id_col].values,
|
|
399
|
+
np.array(["member"] * len(person_data)),
|
|
400
|
+
)
|
|
401
|
+
builder.join_with_persons(
|
|
402
|
+
builder.populations["tax_unit"],
|
|
403
|
+
person_data[tax_unit_id_col].values,
|
|
404
|
+
np.array(["member"] * len(person_data)),
|
|
405
|
+
)
|
|
406
|
+
builder.join_with_persons(
|
|
407
|
+
builder.populations["marital_unit"],
|
|
408
|
+
person_data[marital_unit_id_col].values,
|
|
409
|
+
np.array(["member"] * len(person_data)),
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Build simulation from populations
|
|
413
|
+
microsim.build_from_populations(builder.populations)
|
|
414
|
+
|
|
415
|
+
# Set input variables for each entity
|
|
416
|
+
# Skip ID columns as they're structural and already used in entity building
|
|
417
|
+
# Support both naming conventions
|
|
418
|
+
id_columns = {
|
|
419
|
+
"person_id",
|
|
420
|
+
"household_id",
|
|
421
|
+
"person_household_id",
|
|
422
|
+
"spm_unit_id",
|
|
423
|
+
"person_spm_unit_id",
|
|
424
|
+
"family_id",
|
|
425
|
+
"person_family_id",
|
|
426
|
+
"tax_unit_id",
|
|
427
|
+
"person_tax_unit_id",
|
|
428
|
+
"marital_unit_id",
|
|
429
|
+
"person_marital_unit_id",
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
for entity_name, entity_df in [
|
|
433
|
+
("person", dataset.data.person),
|
|
434
|
+
("household", dataset.data.household),
|
|
435
|
+
("spm_unit", dataset.data.spm_unit),
|
|
436
|
+
("family", dataset.data.family),
|
|
437
|
+
("tax_unit", dataset.data.tax_unit),
|
|
438
|
+
("marital_unit", dataset.data.marital_unit),
|
|
439
|
+
]:
|
|
440
|
+
df = pd.DataFrame(entity_df)
|
|
441
|
+
for column in df.columns:
|
|
442
|
+
# Skip ID columns and check if variable exists in system
|
|
443
|
+
if column not in id_columns and column in system.variables:
|
|
444
|
+
microsim.set_input(column, dataset.year, df[column].values)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
us_latest = PolicyEngineUSLatest()
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""US-specific output templates."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from pydantic import ConfigDict
|
|
6
|
+
|
|
7
|
+
from policyengine.core import Output
|
|
8
|
+
from policyengine.outputs.aggregate import Aggregate, AggregateType
|
|
9
|
+
from policyengine.outputs.change_aggregate import (
|
|
10
|
+
ChangeAggregate,
|
|
11
|
+
ChangeAggregateType,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from policyengine.core.simulation import Simulation
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ProgramStatistics(Output):
|
|
19
|
+
"""Single program's statistics from a policy reform - represents one database row."""
|
|
20
|
+
|
|
21
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
22
|
+
|
|
23
|
+
baseline_simulation: "Simulation"
|
|
24
|
+
reform_simulation: "Simulation"
|
|
25
|
+
program_name: str
|
|
26
|
+
entity: str
|
|
27
|
+
is_tax: bool = False
|
|
28
|
+
|
|
29
|
+
# Results populated by run()
|
|
30
|
+
baseline_total: float | None = None
|
|
31
|
+
reform_total: float | None = None
|
|
32
|
+
change: float | None = None
|
|
33
|
+
baseline_count: float | None = None
|
|
34
|
+
reform_count: float | None = None
|
|
35
|
+
winners: float | None = None
|
|
36
|
+
losers: float | None = None
|
|
37
|
+
|
|
38
|
+
def run(self):
|
|
39
|
+
"""Calculate statistics for this program."""
|
|
40
|
+
# Baseline totals
|
|
41
|
+
baseline_total = Aggregate(
|
|
42
|
+
simulation=self.baseline_simulation,
|
|
43
|
+
variable=self.program_name,
|
|
44
|
+
aggregate_type=AggregateType.SUM,
|
|
45
|
+
entity=self.entity,
|
|
46
|
+
)
|
|
47
|
+
baseline_total.run()
|
|
48
|
+
|
|
49
|
+
# Reform totals
|
|
50
|
+
reform_total = Aggregate(
|
|
51
|
+
simulation=self.reform_simulation,
|
|
52
|
+
variable=self.program_name,
|
|
53
|
+
aggregate_type=AggregateType.SUM,
|
|
54
|
+
entity=self.entity,
|
|
55
|
+
)
|
|
56
|
+
reform_total.run()
|
|
57
|
+
|
|
58
|
+
# Count of recipients/payers (baseline)
|
|
59
|
+
baseline_count = Aggregate(
|
|
60
|
+
simulation=self.baseline_simulation,
|
|
61
|
+
variable=self.program_name,
|
|
62
|
+
aggregate_type=AggregateType.COUNT,
|
|
63
|
+
entity=self.entity,
|
|
64
|
+
filter_variable=self.program_name,
|
|
65
|
+
filter_variable_geq=0.01,
|
|
66
|
+
)
|
|
67
|
+
baseline_count.run()
|
|
68
|
+
|
|
69
|
+
# Count of recipients/payers (reform)
|
|
70
|
+
reform_count = Aggregate(
|
|
71
|
+
simulation=self.reform_simulation,
|
|
72
|
+
variable=self.program_name,
|
|
73
|
+
aggregate_type=AggregateType.COUNT,
|
|
74
|
+
entity=self.entity,
|
|
75
|
+
filter_variable=self.program_name,
|
|
76
|
+
filter_variable_geq=0.01,
|
|
77
|
+
)
|
|
78
|
+
reform_count.run()
|
|
79
|
+
|
|
80
|
+
# Winners and losers
|
|
81
|
+
winners = ChangeAggregate(
|
|
82
|
+
baseline_simulation=self.baseline_simulation,
|
|
83
|
+
reform_simulation=self.reform_simulation,
|
|
84
|
+
variable=self.program_name,
|
|
85
|
+
aggregate_type=ChangeAggregateType.COUNT,
|
|
86
|
+
entity=self.entity,
|
|
87
|
+
change_geq=0.01 if not self.is_tax else -0.01,
|
|
88
|
+
)
|
|
89
|
+
winners.run()
|
|
90
|
+
|
|
91
|
+
losers = ChangeAggregate(
|
|
92
|
+
baseline_simulation=self.baseline_simulation,
|
|
93
|
+
reform_simulation=self.reform_simulation,
|
|
94
|
+
variable=self.program_name,
|
|
95
|
+
aggregate_type=ChangeAggregateType.COUNT,
|
|
96
|
+
entity=self.entity,
|
|
97
|
+
change_leq=-0.01 if not self.is_tax else 0.01,
|
|
98
|
+
)
|
|
99
|
+
losers.run()
|
|
100
|
+
|
|
101
|
+
# Populate results
|
|
102
|
+
self.baseline_total = float(baseline_total.result)
|
|
103
|
+
self.reform_total = float(reform_total.result)
|
|
104
|
+
self.change = float(reform_total.result - baseline_total.result)
|
|
105
|
+
self.baseline_count = float(baseline_count.result)
|
|
106
|
+
self.reform_count = float(reform_count.result)
|
|
107
|
+
self.winners = float(winners.result)
|
|
108
|
+
self.losers = float(losers.result)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""PolicyEngine US tax-benefit model - imports from us/ module."""
|
|
2
|
+
|
|
3
|
+
from importlib.util import find_spec
|
|
4
|
+
|
|
5
|
+
if find_spec("policyengine_us") is not None:
|
|
6
|
+
from .us import (
|
|
7
|
+
PolicyEngineUS,
|
|
8
|
+
PolicyEngineUSDataset,
|
|
9
|
+
PolicyEngineUSLatest,
|
|
10
|
+
ProgramStatistics,
|
|
11
|
+
USYearData,
|
|
12
|
+
general_policy_reform_analysis,
|
|
13
|
+
us_latest,
|
|
14
|
+
us_model,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"USYearData",
|
|
19
|
+
"PolicyEngineUSDataset",
|
|
20
|
+
"PolicyEngineUS",
|
|
21
|
+
"PolicyEngineUSLatest",
|
|
22
|
+
"us_model",
|
|
23
|
+
"us_latest",
|
|
24
|
+
"general_policy_reform_analysis",
|
|
25
|
+
"ProgramStatistics",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# Rebuild models to resolve forward references
|
|
29
|
+
PolicyEngineUSDataset.model_rebuild()
|
|
30
|
+
PolicyEngineUSLatest.model_rebuild()
|
|
31
|
+
else:
|
|
32
|
+
__all__ = []
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import calendar
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def parse_safe_date(date_string: str) -> datetime:
|
|
6
|
+
"""
|
|
7
|
+
Parse a YYYY-MM-DD date string and ensure the year is at least 1.
|
|
8
|
+
Handles invalid day values by capping to the last valid day of the month.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
date_string: Date string in YYYY-MM-DD format
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Safe datetime object with year >= 1
|
|
15
|
+
"""
|
|
16
|
+
try:
|
|
17
|
+
date_string = date_string.replace("0000-", "0001-")
|
|
18
|
+
date_obj = datetime.strptime(date_string, "%Y-%m-%d")
|
|
19
|
+
if date_obj.year < 1:
|
|
20
|
+
# Replace year 0 or negative years with year 1
|
|
21
|
+
return date_obj.replace(year=1)
|
|
22
|
+
return date_obj
|
|
23
|
+
except ValueError as e:
|
|
24
|
+
# Try to handle invalid day values (e.g., 2021-06-31)
|
|
25
|
+
if "day is out of range for month" in str(e):
|
|
26
|
+
parts = date_string.split("-")
|
|
27
|
+
if len(parts) == 3:
|
|
28
|
+
year = int(parts[0])
|
|
29
|
+
month = int(parts[1])
|
|
30
|
+
# Get the last valid day of the month
|
|
31
|
+
last_day = calendar.monthrange(year, month)[1]
|
|
32
|
+
# Use the last valid day instead
|
|
33
|
+
corrected_date = f"{year:04d}-{month:02d}-{last_day:02d}"
|
|
34
|
+
date_obj = datetime.strptime(corrected_date, "%Y-%m-%d")
|
|
35
|
+
if date_obj.year < 1:
|
|
36
|
+
return date_obj.replace(year=1)
|
|
37
|
+
return date_obj
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Invalid date format: {date_string}. Expected YYYY-MM-DD"
|
|
40
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
|
|
3
|
+
from policyengine_core.periods import period
|
|
4
|
+
|
|
5
|
+
from policyengine.core import ParameterValue
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def simulation_modifier_from_parameter_values(
|
|
9
|
+
parameter_values: list[ParameterValue],
|
|
10
|
+
) -> Callable:
|
|
11
|
+
"""
|
|
12
|
+
Create a simulation modifier function that applies the given parameter values to a simulation.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
parameter_values (list[ParameterValue]): List of ParameterValue objects to apply.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Callable: A function that takes a Simulation object and applies the parameter values.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def modifier(simulation):
|
|
22
|
+
for pv in parameter_values:
|
|
23
|
+
p = simulation.tax_benefit_system.parameters.get_child(
|
|
24
|
+
pv.parameter.name
|
|
25
|
+
)
|
|
26
|
+
start_period = period(pv.start_date.strftime("%Y-%m-%d"))
|
|
27
|
+
stop_period = (
|
|
28
|
+
period(pv.end_date.strftime("%Y-%m-%d"))
|
|
29
|
+
if pv.end_date
|
|
30
|
+
else None
|
|
31
|
+
)
|
|
32
|
+
p.update(
|
|
33
|
+
value=pv.value,
|
|
34
|
+
start=start_period,
|
|
35
|
+
stop=stop_period,
|
|
36
|
+
)
|
|
37
|
+
return simulation
|
|
38
|
+
|
|
39
|
+
return modifier
|