policyengine 3.1.14__py3-none-any.whl → 3.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- policyengine/core/tax_benefit_model_version.py +9 -1
- policyengine/outputs/__init__.py +28 -0
- policyengine/outputs/decile_impact.py +22 -2
- policyengine/outputs/inequality.py +276 -0
- policyengine/outputs/poverty.py +238 -0
- policyengine/tax_benefit_models/uk/__init__.py +10 -2
- policyengine/tax_benefit_models/uk/analysis.py +199 -4
- policyengine/tax_benefit_models/uk/model.py +15 -4
- policyengine/tax_benefit_models/us/__init__.py +10 -2
- policyengine/tax_benefit_models/us/analysis.py +219 -5
- policyengine/tax_benefit_models/us/model.py +15 -4
- policyengine/utils/__init__.py +4 -0
- policyengine/utils/parameter_labels.py +213 -0
- {policyengine-3.1.14.dist-info → policyengine-3.1.16.dist-info}/METADATA +5 -5
- {policyengine-3.1.14.dist-info → policyengine-3.1.16.dist-info}/RECORD +19 -16
- {policyengine-3.1.14.dist-info → policyengine-3.1.16.dist-info}/WHEEL +1 -1
- {policyengine-3.1.14.dist-info → policyengine-3.1.16.dist-info}/licenses/LICENSE +0 -0
- {policyengine-3.1.14.dist-info → policyengine-3.1.16.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
@@ -24,7 +24,15 @@ class TaxBenefitModelVersion(BaseModel):
|
|
|
24
24
|
|
|
25
25
|
variables: list["Variable"] = Field(default_factory=list)
|
|
26
26
|
parameters: list["Parameter"] = Field(default_factory=list)
|
|
27
|
-
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def parameter_values(self) -> list["ParameterValue"]:
|
|
30
|
+
"""Aggregate all parameter values from all parameters."""
|
|
31
|
+
return [
|
|
32
|
+
pv
|
|
33
|
+
for parameter in self.parameters
|
|
34
|
+
for pv in parameter.parameter_values
|
|
35
|
+
]
|
|
28
36
|
|
|
29
37
|
# Lookup dicts for O(1) access (excluded from serialization)
|
|
30
38
|
variables_by_name: dict[str, "Variable"] = Field(
|
policyengine/outputs/__init__.py
CHANGED
|
@@ -8,6 +8,22 @@ from policyengine.outputs.decile_impact import (
|
|
|
8
8
|
DecileImpact,
|
|
9
9
|
calculate_decile_impacts,
|
|
10
10
|
)
|
|
11
|
+
from policyengine.outputs.inequality import (
|
|
12
|
+
UK_INEQUALITY_INCOME_VARIABLE,
|
|
13
|
+
US_INEQUALITY_INCOME_VARIABLE,
|
|
14
|
+
Inequality,
|
|
15
|
+
calculate_uk_inequality,
|
|
16
|
+
calculate_us_inequality,
|
|
17
|
+
)
|
|
18
|
+
from policyengine.outputs.poverty import (
|
|
19
|
+
UK_POVERTY_VARIABLES,
|
|
20
|
+
US_POVERTY_VARIABLES,
|
|
21
|
+
Poverty,
|
|
22
|
+
UKPovertyType,
|
|
23
|
+
USPovertyType,
|
|
24
|
+
calculate_uk_poverty_rates,
|
|
25
|
+
calculate_us_poverty_rates,
|
|
26
|
+
)
|
|
11
27
|
|
|
12
28
|
__all__ = [
|
|
13
29
|
"Output",
|
|
@@ -18,4 +34,16 @@ __all__ = [
|
|
|
18
34
|
"ChangeAggregateType",
|
|
19
35
|
"DecileImpact",
|
|
20
36
|
"calculate_decile_impacts",
|
|
37
|
+
"Poverty",
|
|
38
|
+
"UKPovertyType",
|
|
39
|
+
"USPovertyType",
|
|
40
|
+
"UK_POVERTY_VARIABLES",
|
|
41
|
+
"US_POVERTY_VARIABLES",
|
|
42
|
+
"calculate_uk_poverty_rates",
|
|
43
|
+
"calculate_us_poverty_rates",
|
|
44
|
+
"Inequality",
|
|
45
|
+
"UK_INEQUALITY_INCOME_VARIABLE",
|
|
46
|
+
"US_INEQUALITY_INCOME_VARIABLE",
|
|
47
|
+
"calculate_uk_inequality",
|
|
48
|
+
"calculate_us_inequality",
|
|
21
49
|
]
|
|
@@ -2,6 +2,10 @@ import pandas as pd
|
|
|
2
2
|
from pydantic import ConfigDict
|
|
3
3
|
|
|
4
4
|
from policyengine.core import Output, OutputCollection, Simulation
|
|
5
|
+
from policyengine.core.dataset import Dataset
|
|
6
|
+
from policyengine.core.dynamic import Dynamic
|
|
7
|
+
from policyengine.core.policy import Policy
|
|
8
|
+
from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class DecileImpact(Output):
|
|
@@ -93,8 +97,11 @@ class DecileImpact(Output):
|
|
|
93
97
|
|
|
94
98
|
|
|
95
99
|
def calculate_decile_impacts(
|
|
96
|
-
|
|
97
|
-
|
|
100
|
+
dataset: Dataset,
|
|
101
|
+
tax_benefit_model_version: TaxBenefitModelVersion,
|
|
102
|
+
baseline_policy: Policy | None = None,
|
|
103
|
+
reform_policy: Policy | None = None,
|
|
104
|
+
dynamic: Dynamic | None = None,
|
|
98
105
|
income_variable: str = "equiv_hbai_household_net_income",
|
|
99
106
|
entity: str | None = None,
|
|
100
107
|
quantiles: int = 10,
|
|
@@ -104,6 +111,19 @@ def calculate_decile_impacts(
|
|
|
104
111
|
Returns:
|
|
105
112
|
OutputCollection containing list of DecileImpact objects and DataFrame
|
|
106
113
|
"""
|
|
114
|
+
baseline_simulation = Simulation(
|
|
115
|
+
dataset=dataset,
|
|
116
|
+
tax_benefit_model_version=tax_benefit_model_version,
|
|
117
|
+
policy=baseline_policy,
|
|
118
|
+
dynamic=dynamic,
|
|
119
|
+
)
|
|
120
|
+
reform_simulation = Simulation(
|
|
121
|
+
dataset=dataset,
|
|
122
|
+
tax_benefit_model_version=tax_benefit_model_version,
|
|
123
|
+
policy=reform_policy,
|
|
124
|
+
dynamic=dynamic,
|
|
125
|
+
)
|
|
126
|
+
|
|
107
127
|
results = []
|
|
108
128
|
for decile in range(1, quantiles + 1):
|
|
109
129
|
impact = DecileImpact(
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Inequality analysis output types."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pydantic import ConfigDict
|
|
8
|
+
|
|
9
|
+
from policyengine.core import Output, Simulation
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _gini(values: np.ndarray, weights: np.ndarray) -> float:
|
|
13
|
+
"""Calculate weighted Gini coefficient.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
values: Array of income values
|
|
17
|
+
weights: Array of weights
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Gini coefficient between 0 (perfect equality) and 1 (perfect inequality)
|
|
21
|
+
"""
|
|
22
|
+
# Handle edge cases
|
|
23
|
+
if len(values) == 0 or weights.sum() == 0:
|
|
24
|
+
return 0.0
|
|
25
|
+
|
|
26
|
+
# Sort by values
|
|
27
|
+
sorted_indices = np.argsort(values)
|
|
28
|
+
sorted_values = values[sorted_indices]
|
|
29
|
+
sorted_weights = weights[sorted_indices]
|
|
30
|
+
|
|
31
|
+
# Cumulative weights and weighted values
|
|
32
|
+
cumulative_weights = np.cumsum(sorted_weights)
|
|
33
|
+
total_weight = cumulative_weights[-1]
|
|
34
|
+
cumulative_weighted_values = np.cumsum(sorted_values * sorted_weights)
|
|
35
|
+
total_weighted_value = cumulative_weighted_values[-1]
|
|
36
|
+
|
|
37
|
+
if total_weighted_value == 0:
|
|
38
|
+
return 0.0
|
|
39
|
+
|
|
40
|
+
# Calculate Gini using the area formula
|
|
41
|
+
# Gini = 1 - 2 * (area under Lorenz curve)
|
|
42
|
+
lorenz_curve = cumulative_weighted_values / total_weighted_value
|
|
43
|
+
weight_fractions = sorted_weights / total_weight
|
|
44
|
+
|
|
45
|
+
# Area under Lorenz curve using trapezoidal rule
|
|
46
|
+
area = np.sum(weight_fractions * (lorenz_curve - weight_fractions / 2))
|
|
47
|
+
|
|
48
|
+
return float(1 - 2 * area)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Inequality(Output):
|
|
52
|
+
"""Single inequality measure result - represents one database row.
|
|
53
|
+
|
|
54
|
+
This is a single-simulation output type that calculates inequality
|
|
55
|
+
metrics for a given income variable, optionally filtered by
|
|
56
|
+
demographic variables.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
60
|
+
|
|
61
|
+
simulation: Simulation
|
|
62
|
+
income_variable: str
|
|
63
|
+
entity: str = "household"
|
|
64
|
+
|
|
65
|
+
# Optional demographic filters
|
|
66
|
+
filter_variable: str | None = None
|
|
67
|
+
filter_variable_eq: Any | None = None
|
|
68
|
+
filter_variable_leq: Any | None = None
|
|
69
|
+
filter_variable_geq: Any | None = None
|
|
70
|
+
|
|
71
|
+
# Results populated by run()
|
|
72
|
+
gini: float | None = None
|
|
73
|
+
top_10_share: float | None = None
|
|
74
|
+
top_1_share: float | None = None
|
|
75
|
+
bottom_50_share: float | None = None
|
|
76
|
+
|
|
77
|
+
def run(self):
|
|
78
|
+
"""Calculate inequality metrics."""
|
|
79
|
+
# Get income variable info
|
|
80
|
+
income_var_obj = (
|
|
81
|
+
self.simulation.tax_benefit_model_version.get_variable(
|
|
82
|
+
self.income_variable
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Get target entity data
|
|
87
|
+
target_entity = self.entity
|
|
88
|
+
data = getattr(self.simulation.output_dataset.data, target_entity)
|
|
89
|
+
|
|
90
|
+
# Map income variable to target entity if needed
|
|
91
|
+
if income_var_obj.entity != target_entity:
|
|
92
|
+
mapped = self.simulation.output_dataset.data.map_to_entity(
|
|
93
|
+
income_var_obj.entity,
|
|
94
|
+
target_entity,
|
|
95
|
+
columns=[self.income_variable],
|
|
96
|
+
)
|
|
97
|
+
income_series = mapped[self.income_variable]
|
|
98
|
+
else:
|
|
99
|
+
income_series = data[self.income_variable]
|
|
100
|
+
|
|
101
|
+
# Get weights
|
|
102
|
+
weight_col = f"{target_entity}_weight"
|
|
103
|
+
if weight_col in data.columns:
|
|
104
|
+
weights = data[weight_col]
|
|
105
|
+
else:
|
|
106
|
+
weights = pd.Series(np.ones(len(income_series)))
|
|
107
|
+
|
|
108
|
+
# Apply demographic filter if specified
|
|
109
|
+
if self.filter_variable is not None:
|
|
110
|
+
filter_var_obj = (
|
|
111
|
+
self.simulation.tax_benefit_model_version.get_variable(
|
|
112
|
+
self.filter_variable
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if filter_var_obj.entity != target_entity:
|
|
117
|
+
filter_mapped = (
|
|
118
|
+
self.simulation.output_dataset.data.map_to_entity(
|
|
119
|
+
filter_var_obj.entity,
|
|
120
|
+
target_entity,
|
|
121
|
+
columns=[self.filter_variable],
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
filter_series = filter_mapped[self.filter_variable]
|
|
125
|
+
else:
|
|
126
|
+
filter_series = data[self.filter_variable]
|
|
127
|
+
|
|
128
|
+
# Build filter mask
|
|
129
|
+
mask = filter_series.notna()
|
|
130
|
+
if self.filter_variable_eq is not None:
|
|
131
|
+
mask &= filter_series == self.filter_variable_eq
|
|
132
|
+
if self.filter_variable_leq is not None:
|
|
133
|
+
mask &= filter_series <= self.filter_variable_leq
|
|
134
|
+
if self.filter_variable_geq is not None:
|
|
135
|
+
mask &= filter_series >= self.filter_variable_geq
|
|
136
|
+
|
|
137
|
+
# Apply mask
|
|
138
|
+
income_series = income_series[mask]
|
|
139
|
+
weights = weights[mask]
|
|
140
|
+
|
|
141
|
+
# Convert to numpy arrays
|
|
142
|
+
values = np.array(income_series)
|
|
143
|
+
weights_arr = np.array(weights)
|
|
144
|
+
|
|
145
|
+
# Remove NaN values
|
|
146
|
+
valid_mask = ~np.isnan(values) & ~np.isnan(weights_arr)
|
|
147
|
+
values = values[valid_mask]
|
|
148
|
+
weights_arr = weights_arr[valid_mask]
|
|
149
|
+
|
|
150
|
+
# Calculate Gini coefficient
|
|
151
|
+
self.gini = _gini(values, weights_arr)
|
|
152
|
+
|
|
153
|
+
# Calculate income shares
|
|
154
|
+
if len(values) > 0 and weights_arr.sum() > 0:
|
|
155
|
+
total_income = np.sum(values * weights_arr)
|
|
156
|
+
|
|
157
|
+
if total_income > 0:
|
|
158
|
+
# Sort by income
|
|
159
|
+
sorted_indices = np.argsort(values)
|
|
160
|
+
sorted_values = values[sorted_indices]
|
|
161
|
+
sorted_weights = weights_arr[sorted_indices]
|
|
162
|
+
|
|
163
|
+
# Cumulative weight fractions
|
|
164
|
+
cumulative_weights = np.cumsum(sorted_weights)
|
|
165
|
+
total_weight = cumulative_weights[-1]
|
|
166
|
+
weight_fractions = cumulative_weights / total_weight
|
|
167
|
+
|
|
168
|
+
# Top 10% share
|
|
169
|
+
top_10_mask = weight_fractions > 0.9
|
|
170
|
+
self.top_10_share = float(
|
|
171
|
+
np.sum(
|
|
172
|
+
sorted_values[top_10_mask]
|
|
173
|
+
* sorted_weights[top_10_mask]
|
|
174
|
+
)
|
|
175
|
+
/ total_income
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Top 1% share
|
|
179
|
+
top_1_mask = weight_fractions > 0.99
|
|
180
|
+
self.top_1_share = float(
|
|
181
|
+
np.sum(
|
|
182
|
+
sorted_values[top_1_mask] * sorted_weights[top_1_mask]
|
|
183
|
+
)
|
|
184
|
+
/ total_income
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Bottom 50% share
|
|
188
|
+
bottom_50_mask = weight_fractions <= 0.5
|
|
189
|
+
self.bottom_50_share = float(
|
|
190
|
+
np.sum(
|
|
191
|
+
sorted_values[bottom_50_mask]
|
|
192
|
+
* sorted_weights[bottom_50_mask]
|
|
193
|
+
)
|
|
194
|
+
/ total_income
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
self.top_10_share = 0.0
|
|
198
|
+
self.top_1_share = 0.0
|
|
199
|
+
self.bottom_50_share = 0.0
|
|
200
|
+
else:
|
|
201
|
+
self.top_10_share = 0.0
|
|
202
|
+
self.top_1_share = 0.0
|
|
203
|
+
self.bottom_50_share = 0.0
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# Default income variables for each country
|
|
207
|
+
UK_INEQUALITY_INCOME_VARIABLE = "equiv_hbai_household_net_income"
|
|
208
|
+
US_INEQUALITY_INCOME_VARIABLE = "household_net_income"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def calculate_uk_inequality(
|
|
212
|
+
simulation: Simulation,
|
|
213
|
+
income_variable: str = UK_INEQUALITY_INCOME_VARIABLE,
|
|
214
|
+
filter_variable: str | None = None,
|
|
215
|
+
filter_variable_eq: Any | None = None,
|
|
216
|
+
filter_variable_leq: Any | None = None,
|
|
217
|
+
filter_variable_geq: Any | None = None,
|
|
218
|
+
) -> Inequality:
|
|
219
|
+
"""Calculate inequality metrics for a UK simulation.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
simulation: The simulation to analyse
|
|
223
|
+
income_variable: Income variable to use (default: equiv_hbai_household_net_income)
|
|
224
|
+
filter_variable: Optional variable to filter by
|
|
225
|
+
filter_variable_eq: Filter for exact match
|
|
226
|
+
filter_variable_leq: Filter for less than or equal
|
|
227
|
+
filter_variable_geq: Filter for greater than or equal
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Inequality object with Gini and income share metrics
|
|
231
|
+
"""
|
|
232
|
+
inequality = Inequality(
|
|
233
|
+
simulation=simulation,
|
|
234
|
+
income_variable=income_variable,
|
|
235
|
+
entity="household",
|
|
236
|
+
filter_variable=filter_variable,
|
|
237
|
+
filter_variable_eq=filter_variable_eq,
|
|
238
|
+
filter_variable_leq=filter_variable_leq,
|
|
239
|
+
filter_variable_geq=filter_variable_geq,
|
|
240
|
+
)
|
|
241
|
+
inequality.run()
|
|
242
|
+
return inequality
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def calculate_us_inequality(
|
|
246
|
+
simulation: Simulation,
|
|
247
|
+
income_variable: str = US_INEQUALITY_INCOME_VARIABLE,
|
|
248
|
+
filter_variable: str | None = None,
|
|
249
|
+
filter_variable_eq: Any | None = None,
|
|
250
|
+
filter_variable_leq: Any | None = None,
|
|
251
|
+
filter_variable_geq: Any | None = None,
|
|
252
|
+
) -> Inequality:
|
|
253
|
+
"""Calculate inequality metrics for a US simulation.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
simulation: The simulation to analyse
|
|
257
|
+
income_variable: Income variable to use (default: household_net_income)
|
|
258
|
+
filter_variable: Optional variable to filter by
|
|
259
|
+
filter_variable_eq: Filter for exact match
|
|
260
|
+
filter_variable_leq: Filter for less than or equal
|
|
261
|
+
filter_variable_geq: Filter for greater than or equal
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Inequality object with Gini and income share metrics
|
|
265
|
+
"""
|
|
266
|
+
inequality = Inequality(
|
|
267
|
+
simulation=simulation,
|
|
268
|
+
income_variable=income_variable,
|
|
269
|
+
entity="household",
|
|
270
|
+
filter_variable=filter_variable,
|
|
271
|
+
filter_variable_eq=filter_variable_eq,
|
|
272
|
+
filter_variable_leq=filter_variable_leq,
|
|
273
|
+
filter_variable_geq=filter_variable_geq,
|
|
274
|
+
)
|
|
275
|
+
inequality.run()
|
|
276
|
+
return inequality
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Poverty analysis output types."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pydantic import ConfigDict
|
|
8
|
+
|
|
9
|
+
from policyengine.core import Output, OutputCollection, Simulation
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class UKPovertyType(str, Enum):
|
|
13
|
+
"""UK poverty measure types."""
|
|
14
|
+
|
|
15
|
+
ABSOLUTE_BHC = "absolute_bhc"
|
|
16
|
+
ABSOLUTE_AHC = "absolute_ahc"
|
|
17
|
+
RELATIVE_BHC = "relative_bhc"
|
|
18
|
+
RELATIVE_AHC = "relative_ahc"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class USPovertyType(str, Enum):
|
|
22
|
+
"""US poverty measure types."""
|
|
23
|
+
|
|
24
|
+
SPM = "spm"
|
|
25
|
+
SPM_DEEP = "spm_deep"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Mapping from poverty type to variable name
|
|
29
|
+
UK_POVERTY_VARIABLES = {
|
|
30
|
+
UKPovertyType.ABSOLUTE_BHC: "in_poverty_bhc",
|
|
31
|
+
UKPovertyType.ABSOLUTE_AHC: "in_poverty_ahc",
|
|
32
|
+
UKPovertyType.RELATIVE_BHC: "in_relative_poverty_bhc",
|
|
33
|
+
UKPovertyType.RELATIVE_AHC: "in_relative_poverty_ahc",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
US_POVERTY_VARIABLES = {
|
|
37
|
+
USPovertyType.SPM: "spm_unit_is_in_spm_poverty",
|
|
38
|
+
USPovertyType.SPM_DEEP: "spm_unit_is_in_deep_spm_poverty",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Poverty(Output):
|
|
43
|
+
"""Single poverty measure result - represents one database row.
|
|
44
|
+
|
|
45
|
+
This is a single-simulation output type that calculates poverty
|
|
46
|
+
headcount and rate for a given poverty measure, optionally filtered
|
|
47
|
+
by demographic variables.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
51
|
+
|
|
52
|
+
simulation: Simulation
|
|
53
|
+
poverty_variable: str
|
|
54
|
+
entity: str = "person"
|
|
55
|
+
|
|
56
|
+
# Optional demographic filters
|
|
57
|
+
filter_variable: str | None = None
|
|
58
|
+
filter_variable_eq: Any | None = None
|
|
59
|
+
filter_variable_leq: Any | None = None
|
|
60
|
+
filter_variable_geq: Any | None = None
|
|
61
|
+
|
|
62
|
+
# Results populated by run()
|
|
63
|
+
headcount: float | None = None
|
|
64
|
+
total_population: float | None = None
|
|
65
|
+
rate: float | None = None
|
|
66
|
+
|
|
67
|
+
def run(self):
|
|
68
|
+
"""Calculate poverty headcount and rate."""
|
|
69
|
+
# Get poverty variable info
|
|
70
|
+
poverty_var_obj = (
|
|
71
|
+
self.simulation.tax_benefit_model_version.get_variable(
|
|
72
|
+
self.poverty_variable
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Get target entity data
|
|
77
|
+
target_entity = self.entity
|
|
78
|
+
data = getattr(self.simulation.output_dataset.data, target_entity)
|
|
79
|
+
|
|
80
|
+
# Map poverty variable to target entity if needed
|
|
81
|
+
if poverty_var_obj.entity != target_entity:
|
|
82
|
+
mapped = self.simulation.output_dataset.data.map_to_entity(
|
|
83
|
+
poverty_var_obj.entity,
|
|
84
|
+
target_entity,
|
|
85
|
+
columns=[self.poverty_variable],
|
|
86
|
+
)
|
|
87
|
+
poverty_series = mapped[self.poverty_variable]
|
|
88
|
+
else:
|
|
89
|
+
poverty_series = data[self.poverty_variable]
|
|
90
|
+
|
|
91
|
+
# Apply demographic filter if specified
|
|
92
|
+
if self.filter_variable is not None:
|
|
93
|
+
filter_var_obj = (
|
|
94
|
+
self.simulation.tax_benefit_model_version.get_variable(
|
|
95
|
+
self.filter_variable
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if filter_var_obj.entity != target_entity:
|
|
100
|
+
filter_mapped = (
|
|
101
|
+
self.simulation.output_dataset.data.map_to_entity(
|
|
102
|
+
filter_var_obj.entity,
|
|
103
|
+
target_entity,
|
|
104
|
+
columns=[self.filter_variable],
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
filter_series = filter_mapped[self.filter_variable]
|
|
108
|
+
else:
|
|
109
|
+
filter_series = data[self.filter_variable]
|
|
110
|
+
|
|
111
|
+
# Build filter mask
|
|
112
|
+
mask = filter_series.notna()
|
|
113
|
+
if self.filter_variable_eq is not None:
|
|
114
|
+
mask &= filter_series == self.filter_variable_eq
|
|
115
|
+
if self.filter_variable_leq is not None:
|
|
116
|
+
mask &= filter_series <= self.filter_variable_leq
|
|
117
|
+
if self.filter_variable_geq is not None:
|
|
118
|
+
mask &= filter_series >= self.filter_variable_geq
|
|
119
|
+
|
|
120
|
+
# Apply mask
|
|
121
|
+
poverty_series = poverty_series[mask]
|
|
122
|
+
|
|
123
|
+
# Calculate results using weighted counts
|
|
124
|
+
self.headcount = float((poverty_series == True).sum()) # noqa: E712
|
|
125
|
+
self.total_population = float(poverty_series.count())
|
|
126
|
+
self.rate = (
|
|
127
|
+
self.headcount / self.total_population
|
|
128
|
+
if self.total_population > 0
|
|
129
|
+
else 0.0
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def calculate_uk_poverty_rates(
|
|
134
|
+
simulation: Simulation,
|
|
135
|
+
filter_variable: str | None = None,
|
|
136
|
+
filter_variable_eq: Any | None = None,
|
|
137
|
+
filter_variable_leq: Any | None = None,
|
|
138
|
+
filter_variable_geq: Any | None = None,
|
|
139
|
+
) -> OutputCollection[Poverty]:
|
|
140
|
+
"""Calculate all UK poverty rates for a simulation.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
simulation: The simulation to analyse
|
|
144
|
+
filter_variable: Optional variable to filter by (e.g., "is_child")
|
|
145
|
+
filter_variable_eq: Filter for exact match
|
|
146
|
+
filter_variable_leq: Filter for less than or equal
|
|
147
|
+
filter_variable_geq: Filter for greater than or equal
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
OutputCollection containing Poverty objects for each UK poverty type
|
|
151
|
+
"""
|
|
152
|
+
results = []
|
|
153
|
+
|
|
154
|
+
for poverty_variable in UK_POVERTY_VARIABLES.values():
|
|
155
|
+
poverty = Poverty(
|
|
156
|
+
simulation=simulation,
|
|
157
|
+
poverty_variable=poverty_variable,
|
|
158
|
+
entity="person",
|
|
159
|
+
filter_variable=filter_variable,
|
|
160
|
+
filter_variable_eq=filter_variable_eq,
|
|
161
|
+
filter_variable_leq=filter_variable_leq,
|
|
162
|
+
filter_variable_geq=filter_variable_geq,
|
|
163
|
+
)
|
|
164
|
+
poverty.run()
|
|
165
|
+
results.append(poverty)
|
|
166
|
+
|
|
167
|
+
df = pd.DataFrame(
|
|
168
|
+
[
|
|
169
|
+
{
|
|
170
|
+
"simulation_id": r.simulation.id,
|
|
171
|
+
"poverty_variable": r.poverty_variable,
|
|
172
|
+
"filter_variable": r.filter_variable,
|
|
173
|
+
"filter_variable_eq": r.filter_variable_eq,
|
|
174
|
+
"filter_variable_leq": r.filter_variable_leq,
|
|
175
|
+
"filter_variable_geq": r.filter_variable_geq,
|
|
176
|
+
"headcount": r.headcount,
|
|
177
|
+
"total_population": r.total_population,
|
|
178
|
+
"rate": r.rate,
|
|
179
|
+
}
|
|
180
|
+
for r in results
|
|
181
|
+
]
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return OutputCollection(outputs=results, dataframe=df)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def calculate_us_poverty_rates(
|
|
188
|
+
simulation: Simulation,
|
|
189
|
+
filter_variable: str | None = None,
|
|
190
|
+
filter_variable_eq: Any | None = None,
|
|
191
|
+
filter_variable_leq: Any | None = None,
|
|
192
|
+
filter_variable_geq: Any | None = None,
|
|
193
|
+
) -> OutputCollection[Poverty]:
|
|
194
|
+
"""Calculate all US poverty rates for a simulation.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
simulation: The simulation to analyse
|
|
198
|
+
filter_variable: Optional variable to filter by (e.g., "is_child")
|
|
199
|
+
filter_variable_eq: Filter for exact match
|
|
200
|
+
filter_variable_leq: Filter for less than or equal
|
|
201
|
+
filter_variable_geq: Filter for greater than or equal
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
OutputCollection containing Poverty objects for each US poverty type
|
|
205
|
+
"""
|
|
206
|
+
results = []
|
|
207
|
+
|
|
208
|
+
for poverty_variable in US_POVERTY_VARIABLES.values():
|
|
209
|
+
poverty = Poverty(
|
|
210
|
+
simulation=simulation,
|
|
211
|
+
poverty_variable=poverty_variable,
|
|
212
|
+
entity="person",
|
|
213
|
+
filter_variable=filter_variable,
|
|
214
|
+
filter_variable_eq=filter_variable_eq,
|
|
215
|
+
filter_variable_leq=filter_variable_leq,
|
|
216
|
+
filter_variable_geq=filter_variable_geq,
|
|
217
|
+
)
|
|
218
|
+
poverty.run()
|
|
219
|
+
results.append(poverty)
|
|
220
|
+
|
|
221
|
+
df = pd.DataFrame(
|
|
222
|
+
[
|
|
223
|
+
{
|
|
224
|
+
"simulation_id": r.simulation.id,
|
|
225
|
+
"poverty_variable": r.poverty_variable,
|
|
226
|
+
"filter_variable": r.filter_variable,
|
|
227
|
+
"filter_variable_eq": r.filter_variable_eq,
|
|
228
|
+
"filter_variable_leq": r.filter_variable_leq,
|
|
229
|
+
"filter_variable_geq": r.filter_variable_geq,
|
|
230
|
+
"headcount": r.headcount,
|
|
231
|
+
"total_population": r.total_population,
|
|
232
|
+
"rate": r.rate,
|
|
233
|
+
}
|
|
234
|
+
for r in results
|
|
235
|
+
]
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return OutputCollection(outputs=results, dataframe=df)
|
|
@@ -5,7 +5,12 @@ from importlib.util import find_spec
|
|
|
5
5
|
if find_spec("policyengine_uk") is not None:
|
|
6
6
|
from policyengine.core import Dataset
|
|
7
7
|
|
|
8
|
-
from .analysis import
|
|
8
|
+
from .analysis import (
|
|
9
|
+
UKHouseholdInput,
|
|
10
|
+
UKHouseholdOutput,
|
|
11
|
+
calculate_household_impact,
|
|
12
|
+
economic_impact_analysis,
|
|
13
|
+
)
|
|
9
14
|
from .datasets import (
|
|
10
15
|
PolicyEngineUKDataset,
|
|
11
16
|
UKYearData,
|
|
@@ -37,7 +42,10 @@ if find_spec("policyengine_uk") is not None:
|
|
|
37
42
|
"PolicyEngineUKLatest",
|
|
38
43
|
"uk_model",
|
|
39
44
|
"uk_latest",
|
|
40
|
-
"
|
|
45
|
+
"economic_impact_analysis",
|
|
46
|
+
"calculate_household_impact",
|
|
47
|
+
"UKHouseholdInput",
|
|
48
|
+
"UKHouseholdOutput",
|
|
41
49
|
"ProgrammeStatistics",
|
|
42
50
|
]
|
|
43
51
|
else:
|