policyengine 3.0.0__py3-none-any.whl → 3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
  2. policyengine/core/__init__.py +22 -0
  3. policyengine/core/dataset.py +260 -0
  4. policyengine/core/dataset_version.py +16 -0
  5. policyengine/core/dynamic.py +43 -0
  6. policyengine/core/output.py +26 -0
  7. policyengine/{models → core}/parameter.py +4 -2
  8. policyengine/{models → core}/parameter_value.py +1 -1
  9. policyengine/core/policy.py +43 -0
  10. policyengine/{models → core}/simulation.py +10 -14
  11. policyengine/core/tax_benefit_model.py +11 -0
  12. policyengine/core/tax_benefit_model_version.py +34 -0
  13. policyengine/core/variable.py +15 -0
  14. policyengine/outputs/__init__.py +21 -0
  15. policyengine/outputs/aggregate.py +124 -0
  16. policyengine/outputs/change_aggregate.py +184 -0
  17. policyengine/outputs/decile_impact.py +140 -0
  18. policyengine/tax_benefit_models/uk/__init__.py +26 -0
  19. policyengine/tax_benefit_models/uk/analysis.py +97 -0
  20. policyengine/tax_benefit_models/uk/datasets.py +176 -0
  21. policyengine/tax_benefit_models/uk/model.py +268 -0
  22. policyengine/tax_benefit_models/uk/outputs.py +108 -0
  23. policyengine/tax_benefit_models/uk.py +33 -0
  24. policyengine/tax_benefit_models/us/__init__.py +36 -0
  25. policyengine/tax_benefit_models/us/analysis.py +99 -0
  26. policyengine/tax_benefit_models/us/datasets.py +307 -0
  27. policyengine/tax_benefit_models/us/model.py +447 -0
  28. policyengine/tax_benefit_models/us/outputs.py +108 -0
  29. policyengine/tax_benefit_models/us.py +32 -0
  30. policyengine/utils/__init__.py +3 -0
  31. policyengine/utils/dates.py +40 -0
  32. policyengine/utils/parametric_reforms.py +39 -0
  33. policyengine/utils/plotting.py +179 -0
  34. {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/METADATA +185 -20
  35. policyengine-3.1.1.dist-info/RECORD +39 -0
  36. policyengine/database/__init__.py +0 -56
  37. policyengine/database/aggregate.py +0 -33
  38. policyengine/database/baseline_parameter_value_table.py +0 -66
  39. policyengine/database/baseline_variable_table.py +0 -40
  40. policyengine/database/database.py +0 -251
  41. policyengine/database/dataset_table.py +0 -41
  42. policyengine/database/dynamic_table.py +0 -34
  43. policyengine/database/link.py +0 -82
  44. policyengine/database/model_table.py +0 -27
  45. policyengine/database/model_version_table.py +0 -28
  46. policyengine/database/parameter_table.py +0 -31
  47. policyengine/database/parameter_value_table.py +0 -62
  48. policyengine/database/policy_table.py +0 -34
  49. policyengine/database/report_element_table.py +0 -48
  50. policyengine/database/report_table.py +0 -24
  51. policyengine/database/simulation_table.py +0 -50
  52. policyengine/database/user_table.py +0 -28
  53. policyengine/database/versioned_dataset_table.py +0 -28
  54. policyengine/models/__init__.py +0 -30
  55. policyengine/models/aggregate.py +0 -92
  56. policyengine/models/baseline_parameter_value.py +0 -14
  57. policyengine/models/baseline_variable.py +0 -12
  58. policyengine/models/dataset.py +0 -18
  59. policyengine/models/dynamic.py +0 -15
  60. policyengine/models/model.py +0 -124
  61. policyengine/models/model_version.py +0 -14
  62. policyengine/models/policy.py +0 -17
  63. policyengine/models/policyengine_uk.py +0 -114
  64. policyengine/models/policyengine_us.py +0 -115
  65. policyengine/models/report.py +0 -10
  66. policyengine/models/report_element.py +0 -36
  67. policyengine/models/user.py +0 -14
  68. policyengine/models/versioned_dataset.py +0 -12
  69. policyengine/utils/charts.py +0 -286
  70. policyengine/utils/compress.py +0 -20
  71. policyengine/utils/datasets.py +0 -71
  72. policyengine-3.0.0.dist-info/RECORD +0 -47
  73. policyengine-3.0.0.dist-info/entry_points.txt +0 -2
  74. {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/WHEEL +0 -0
  75. {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/licenses/LICENSE +0 -0
  76. {policyengine-3.0.0.dist-info → policyengine-3.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,99 @@
1
+ """General utility functions for US policy reform analysis."""
2
+
3
+ import pandas as pd
4
+ from pydantic import BaseModel
5
+
6
+ from policyengine.core import OutputCollection, Simulation
7
+ from policyengine.outputs.decile_impact import (
8
+ DecileImpact,
9
+ calculate_decile_impacts,
10
+ )
11
+
12
+ from .outputs import ProgramStatistics
13
+
14
+
15
+ class PolicyReformAnalysis(BaseModel):
16
+ """Complete policy reform analysis result."""
17
+
18
+ decile_impacts: OutputCollection[DecileImpact]
19
+ program_statistics: OutputCollection[ProgramStatistics]
20
+
21
+
22
+ def general_policy_reform_analysis(
23
+ baseline_simulation: Simulation,
24
+ reform_simulation: Simulation,
25
+ ) -> PolicyReformAnalysis:
26
+ """Perform comprehensive analysis of a policy reform.
27
+
28
+ Returns:
29
+ PolicyReformAnalysis containing decile impacts and program statistics
30
+ """
31
+ # Decile impact (using household_net_income for US)
32
+ decile_impacts = calculate_decile_impacts(
33
+ baseline_simulation=baseline_simulation,
34
+ reform_simulation=reform_simulation,
35
+ income_variable="household_net_income",
36
+ )
37
+
38
+ # Major programs to analyse
39
+ programs = {
40
+ # Federal taxes
41
+ "income_tax": {"entity": "tax_unit", "is_tax": True},
42
+ "payroll_tax": {"entity": "person", "is_tax": True},
43
+ # State and local taxes
44
+ "state_income_tax": {"entity": "tax_unit", "is_tax": True},
45
+ # Benefits
46
+ "snap": {"entity": "spm_unit", "is_tax": False},
47
+ "tanf": {"entity": "spm_unit", "is_tax": False},
48
+ "ssi": {"entity": "person", "is_tax": False},
49
+ "social_security": {"entity": "person", "is_tax": False},
50
+ "medicare": {"entity": "person", "is_tax": False},
51
+ "medicaid": {"entity": "person", "is_tax": False},
52
+ "eitc": {"entity": "tax_unit", "is_tax": False},
53
+ "ctc": {"entity": "tax_unit", "is_tax": False},
54
+ }
55
+
56
+ program_statistics = []
57
+
58
+ for program_name, program_info in programs.items():
59
+ entity = program_info["entity"]
60
+ is_tax = program_info["is_tax"]
61
+
62
+ stats = ProgramStatistics(
63
+ baseline_simulation=baseline_simulation,
64
+ reform_simulation=reform_simulation,
65
+ program_name=program_name,
66
+ entity=entity,
67
+ is_tax=is_tax,
68
+ )
69
+ stats.run()
70
+ program_statistics.append(stats)
71
+
72
+ # Create DataFrame
73
+ program_df = pd.DataFrame(
74
+ [
75
+ {
76
+ "baseline_simulation_id": p.baseline_simulation.id,
77
+ "reform_simulation_id": p.reform_simulation.id,
78
+ "program_name": p.program_name,
79
+ "entity": p.entity,
80
+ "is_tax": p.is_tax,
81
+ "baseline_total": p.baseline_total,
82
+ "reform_total": p.reform_total,
83
+ "change": p.change,
84
+ "baseline_count": p.baseline_count,
85
+ "reform_count": p.reform_count,
86
+ "winners": p.winners,
87
+ "losers": p.losers,
88
+ }
89
+ for p in program_statistics
90
+ ]
91
+ )
92
+
93
+ program_collection = OutputCollection(
94
+ outputs=program_statistics, dataframe=program_df
95
+ )
96
+
97
+ return PolicyReformAnalysis(
98
+ decile_impacts=decile_impacts, program_statistics=program_collection
99
+ )
@@ -0,0 +1,307 @@
1
+ import warnings
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from microdf import MicroDataFrame
6
+ from pydantic import BaseModel, ConfigDict
7
+
8
+ from policyengine.core import Dataset, map_to_entity
9
+
10
+
11
+ class USYearData(BaseModel):
12
+ """Entity-level data for a single year."""
13
+
14
+ model_config = ConfigDict(arbitrary_types_allowed=True)
15
+
16
+ person: MicroDataFrame
17
+ marital_unit: MicroDataFrame
18
+ family: MicroDataFrame
19
+ spm_unit: MicroDataFrame
20
+ tax_unit: MicroDataFrame
21
+ household: MicroDataFrame
22
+
23
+ def map_to_entity(
24
+ self, source_entity: str, target_entity: str, columns: list[str] = None
25
+ ) -> MicroDataFrame:
26
+ """Map data from source entity to target entity using join keys.
27
+
28
+ Args:
29
+ source_entity (str): The source entity name.
30
+ target_entity (str): The target entity name.
31
+ columns (list[str], optional): List of column names to map. If None, maps all columns.
32
+
33
+ Returns:
34
+ MicroDataFrame: The mapped data at the target entity level.
35
+
36
+ Raises:
37
+ ValueError: If source or target entity is invalid.
38
+ """
39
+ entity_data = {
40
+ "person": self.person,
41
+ "marital_unit": self.marital_unit,
42
+ "family": self.family,
43
+ "spm_unit": self.spm_unit,
44
+ "tax_unit": self.tax_unit,
45
+ "household": self.household,
46
+ }
47
+ return map_to_entity(
48
+ entity_data=entity_data,
49
+ source_entity=source_entity,
50
+ target_entity=target_entity,
51
+ person_entity="person",
52
+ columns=columns,
53
+ )
54
+
55
+
56
+ class PolicyEngineUSDataset(Dataset):
57
+ """US dataset with multi-year entity-level data."""
58
+
59
+ data: USYearData | None = None
60
+
61
+ def model_post_init(self, __context) -> None:
62
+ """Called after Pydantic initialization."""
63
+ # Make sure we are synchronised between in-memory and storage, at least on initialisation
64
+ if self.data is not None:
65
+ self.save()
66
+ elif self.filepath and not self.data:
67
+ try:
68
+ self.load()
69
+ except FileNotFoundError:
70
+ # File doesn't exist yet, that's OK
71
+ pass
72
+
73
+ def save(self) -> None:
74
+ """Save dataset to HDF5 file."""
75
+ filepath = Path(self.filepath)
76
+ if not filepath.parent.exists():
77
+ filepath.parent.mkdir(parents=True, exist_ok=True)
78
+ with warnings.catch_warnings():
79
+ warnings.filterwarnings(
80
+ "ignore",
81
+ category=pd.errors.PerformanceWarning,
82
+ message=".*PyTables will pickle object types.*",
83
+ )
84
+ with pd.HDFStore(filepath, mode="w") as store:
85
+ store["person"] = pd.DataFrame(self.data.person)
86
+ store["marital_unit"] = pd.DataFrame(self.data.marital_unit)
87
+ store["family"] = pd.DataFrame(self.data.family)
88
+ store["spm_unit"] = pd.DataFrame(self.data.spm_unit)
89
+ store["tax_unit"] = pd.DataFrame(self.data.tax_unit)
90
+ store["household"] = pd.DataFrame(self.data.household)
91
+
92
+ def load(self) -> None:
93
+ """Load dataset from HDF5 file into this instance."""
94
+ filepath = self.filepath
95
+ with pd.HDFStore(filepath, mode="r") as store:
96
+ self.data = USYearData(
97
+ person=MicroDataFrame(
98
+ store["person"], weights="person_weight"
99
+ ),
100
+ marital_unit=MicroDataFrame(
101
+ store["marital_unit"], weights="marital_unit_weight"
102
+ ),
103
+ family=MicroDataFrame(
104
+ store["family"], weights="family_weight"
105
+ ),
106
+ spm_unit=MicroDataFrame(
107
+ store["spm_unit"], weights="spm_unit_weight"
108
+ ),
109
+ tax_unit=MicroDataFrame(
110
+ store["tax_unit"], weights="tax_unit_weight"
111
+ ),
112
+ household=MicroDataFrame(
113
+ store["household"], weights="household_weight"
114
+ ),
115
+ )
116
+
117
+ def __repr__(self) -> str:
118
+ if self.data is None:
119
+ return f"<PolicyEngineUSDataset id={self.id} year={self.year} filepath={self.filepath} (not loaded)>"
120
+ else:
121
+ n_people = len(self.data.person)
122
+ n_marital_units = len(self.data.marital_unit)
123
+ n_families = len(self.data.family)
124
+ n_spm_units = len(self.data.spm_unit)
125
+ n_tax_units = len(self.data.tax_unit)
126
+ n_households = len(self.data.household)
127
+ return f"<PolicyEngineUSDataset id={self.id} year={self.year} filepath={self.filepath} people={n_people} marital_units={n_marital_units} families={n_families} spm_units={n_spm_units} tax_units={n_tax_units} households={n_households}>"
128
+
129
+
130
+ def create_datasets(
131
+ datasets: list[str] = [
132
+ "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
133
+ ],
134
+ years: list[int] = [2024, 2025, 2026, 2027, 2028],
135
+ ) -> None:
136
+ """Create PolicyEngineUSDataset instances from HuggingFace dataset paths.
137
+
138
+ Args:
139
+ datasets: List of HuggingFace dataset paths (e.g., "hf://policyengine/policyengine-us-data/cps_2024.h5")
140
+ years: List of years to extract data for
141
+ """
142
+ from policyengine_us import Microsimulation
143
+
144
+ for dataset in datasets:
145
+ sim = Microsimulation(dataset=dataset)
146
+
147
+ for year in years:
148
+ # Get all input variables from the simulation
149
+ # We'll calculate each input variable for the specified year
150
+ entity_data = {
151
+ "person": {},
152
+ "household": {},
153
+ "marital_unit": {},
154
+ "family": {},
155
+ "spm_unit": {},
156
+ "tax_unit": {},
157
+ }
158
+
159
+ # First, get ID columns which are structural (not input variables)
160
+ # These define entity membership and relationships
161
+ # For person-level links to group entities, use person_X_id naming
162
+ id_variables = {
163
+ "person": [
164
+ "person_id",
165
+ "person_household_id",
166
+ "person_marital_unit_id",
167
+ "person_family_id",
168
+ "person_spm_unit_id",
169
+ "person_tax_unit_id",
170
+ ],
171
+ "household": ["household_id"],
172
+ "marital_unit": ["marital_unit_id"],
173
+ "family": ["family_id"],
174
+ "spm_unit": ["spm_unit_id"],
175
+ "tax_unit": ["tax_unit_id"],
176
+ }
177
+
178
+ for entity_key, var_names in id_variables.items():
179
+ for id_var in var_names:
180
+ if id_var in sim.tax_benefit_system.variables:
181
+ values = sim.calculate(id_var, period=year).values
182
+ entity_data[entity_key][id_var] = values
183
+
184
+ # Get input variables and calculate them for this year
185
+ for variable_name in sim.input_variables:
186
+ variable = sim.tax_benefit_system.variables[variable_name]
187
+ entity_key = variable.entity.key
188
+
189
+ # Calculate the variable for the given year
190
+ values = sim.calculate(variable_name, period=year).values
191
+
192
+ # Store in the appropriate entity dictionary
193
+ entity_data[entity_key][variable_name] = values
194
+
195
+ # Build entity DataFrames
196
+ person_df = pd.DataFrame(entity_data["person"])
197
+ household_df = pd.DataFrame(entity_data["household"])
198
+ marital_unit_df = pd.DataFrame(entity_data["marital_unit"])
199
+ family_df = pd.DataFrame(entity_data["family"])
200
+ spm_unit_df = pd.DataFrame(entity_data["spm_unit"])
201
+ tax_unit_df = pd.DataFrame(entity_data["tax_unit"])
202
+
203
+ # Add weight columns - household weights are primary, map to all entities
204
+ # Person weights = household weights (mapped via person_household_id)
205
+ if "household_weight" in household_df.columns:
206
+ # Only add person_weight if it doesn't already exist
207
+ if "person_weight" not in person_df.columns:
208
+ person_df = person_df.merge(
209
+ household_df[["household_id", "household_weight"]],
210
+ left_on="person_household_id",
211
+ right_on="household_id",
212
+ how="left",
213
+ )
214
+ person_df = person_df.rename(
215
+ columns={"household_weight": "person_weight"}
216
+ )
217
+ person_df = person_df.drop(
218
+ columns=["household_id"], errors="ignore"
219
+ )
220
+
221
+ # Map household weights to other group entities via person table
222
+ for entity_name, entity_df, person_id_col, entity_id_col in [
223
+ (
224
+ "marital_unit",
225
+ marital_unit_df,
226
+ "person_marital_unit_id",
227
+ "marital_unit_id",
228
+ ),
229
+ ("family", family_df, "person_family_id", "family_id"),
230
+ (
231
+ "spm_unit",
232
+ spm_unit_df,
233
+ "person_spm_unit_id",
234
+ "spm_unit_id",
235
+ ),
236
+ (
237
+ "tax_unit",
238
+ tax_unit_df,
239
+ "person_tax_unit_id",
240
+ "tax_unit_id",
241
+ ),
242
+ ]:
243
+ # Only add entity weight if it doesn't already exist
244
+ if f"{entity_name}_weight" not in entity_df.columns:
245
+ # Get household_id for each entity from person table
246
+ entity_household_map = person_df[
247
+ [person_id_col, "person_household_id"]
248
+ ].drop_duplicates()
249
+ entity_df = entity_df.merge(
250
+ entity_household_map,
251
+ left_on=entity_id_col,
252
+ right_on=person_id_col,
253
+ how="left",
254
+ )
255
+ entity_df = entity_df.merge(
256
+ household_df[["household_id", "household_weight"]],
257
+ left_on="person_household_id",
258
+ right_on="household_id",
259
+ how="left",
260
+ )
261
+ entity_df = entity_df.rename(
262
+ columns={
263
+ "household_weight": f"{entity_name}_weight"
264
+ }
265
+ )
266
+ entity_df = entity_df.drop(
267
+ columns=[
268
+ "household_id",
269
+ "person_household_id",
270
+ person_id_col,
271
+ ],
272
+ errors="ignore",
273
+ )
274
+
275
+ # Update the entity_data
276
+ if entity_name == "marital_unit":
277
+ marital_unit_df = entity_df
278
+ elif entity_name == "family":
279
+ family_df = entity_df
280
+ elif entity_name == "spm_unit":
281
+ spm_unit_df = entity_df
282
+ elif entity_name == "tax_unit":
283
+ tax_unit_df = entity_df
284
+
285
+ us_dataset = PolicyEngineUSDataset(
286
+ name=f"{dataset}-year-{year}",
287
+ description=f"US Dataset for year {year} based on {dataset}",
288
+ filepath=f"./data/{Path(dataset).stem}_year_{year}.h5",
289
+ year=year,
290
+ data=USYearData(
291
+ person=MicroDataFrame(person_df, weights="person_weight"),
292
+ household=MicroDataFrame(
293
+ household_df, weights="household_weight"
294
+ ),
295
+ marital_unit=MicroDataFrame(
296
+ marital_unit_df, weights="marital_unit_weight"
297
+ ),
298
+ family=MicroDataFrame(family_df, weights="family_weight"),
299
+ spm_unit=MicroDataFrame(
300
+ spm_unit_df, weights="spm_unit_weight"
301
+ ),
302
+ tax_unit=MicroDataFrame(
303
+ tax_unit_df, weights="tax_unit_weight"
304
+ ),
305
+ ),
306
+ )
307
+ us_dataset.save()