panelbox 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- panelbox/__init__.py +41 -0
- panelbox/__version__.py +13 -1
- panelbox/core/formula_parser.py +9 -2
- panelbox/core/panel_data.py +1 -1
- panelbox/datasets/__init__.py +39 -0
- panelbox/datasets/load.py +334 -0
- panelbox/gmm/difference_gmm.py +63 -15
- panelbox/gmm/estimator.py +46 -5
- panelbox/gmm/system_gmm.py +136 -21
- panelbox/models/static/__init__.py +4 -0
- panelbox/models/static/between.py +434 -0
- panelbox/models/static/first_difference.py +494 -0
- panelbox/models/static/fixed_effects.py +80 -11
- panelbox/models/static/pooled_ols.py +80 -11
- panelbox/models/static/random_effects.py +52 -10
- panelbox/standard_errors/__init__.py +119 -0
- panelbox/standard_errors/clustered.py +386 -0
- panelbox/standard_errors/comparison.py +528 -0
- panelbox/standard_errors/driscoll_kraay.py +386 -0
- panelbox/standard_errors/newey_west.py +324 -0
- panelbox/standard_errors/pcse.py +358 -0
- panelbox/standard_errors/robust.py +324 -0
- panelbox/standard_errors/utils.py +390 -0
- panelbox/validation/__init__.py +6 -0
- panelbox/validation/robustness/__init__.py +51 -0
- panelbox/validation/robustness/bootstrap.py +933 -0
- panelbox/validation/robustness/checks.py +143 -0
- panelbox/validation/robustness/cross_validation.py +538 -0
- panelbox/validation/robustness/influence.py +364 -0
- panelbox/validation/robustness/jackknife.py +457 -0
- panelbox/validation/robustness/outliers.py +529 -0
- panelbox/validation/robustness/sensitivity.py +809 -0
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/METADATA +32 -3
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/RECORD +38 -21
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/WHEEL +1 -1
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/entry_points.txt +0 -0
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,14 @@ from panelbox.utils.matrix_ops import (
|
|
|
16
16
|
compute_vcov_nonrobust,
|
|
17
17
|
compute_rsquared
|
|
18
18
|
)
|
|
19
|
+
from panelbox.standard_errors import (
|
|
20
|
+
robust_covariance,
|
|
21
|
+
cluster_by_entity,
|
|
22
|
+
twoway_cluster,
|
|
23
|
+
driscoll_kraay,
|
|
24
|
+
newey_west,
|
|
25
|
+
pcse
|
|
26
|
+
)
|
|
19
27
|
|
|
20
28
|
|
|
21
29
|
class PooledOLS(PanelModel):
|
|
@@ -84,10 +92,17 @@ class PooledOLS(PanelModel):
|
|
|
84
92
|
cov_type : str, default='nonrobust'
|
|
85
93
|
Type of covariance estimator:
|
|
86
94
|
- 'nonrobust': Classical OLS standard errors
|
|
87
|
-
- 'robust': Heteroskedasticity-robust (HC1)
|
|
95
|
+
- 'robust' or 'hc1': Heteroskedasticity-robust (HC1)
|
|
96
|
+
- 'hc0', 'hc2', 'hc3': Other HC variants
|
|
88
97
|
- 'clustered': Cluster-robust (clustered by entity by default)
|
|
98
|
+
- 'twoway': Two-way clustering (entity and time)
|
|
99
|
+
- 'driscoll_kraay': Driscoll-Kraay for spatial/temporal dependence
|
|
100
|
+
- 'newey_west': Newey-West HAC
|
|
101
|
+
- 'pcse': Panel-corrected standard errors
|
|
89
102
|
**cov_kwds
|
|
90
|
-
Additional arguments for covariance estimation
|
|
103
|
+
Additional arguments for covariance estimation:
|
|
104
|
+
- max_lags : int, for driscoll_kraay and newey_west
|
|
105
|
+
- kernel : str, for driscoll_kraay and newey_west
|
|
91
106
|
|
|
92
107
|
Returns
|
|
93
108
|
-------
|
|
@@ -96,8 +111,23 @@ class PooledOLS(PanelModel):
|
|
|
96
111
|
|
|
97
112
|
Examples
|
|
98
113
|
--------
|
|
114
|
+
>>> # Classical standard errors
|
|
115
|
+
>>> results = model.fit(cov_type='nonrobust')
|
|
116
|
+
>>>
|
|
117
|
+
>>> # Heteroskedasticity-robust
|
|
99
118
|
>>> results = model.fit(cov_type='robust')
|
|
100
|
-
>>>
|
|
119
|
+
>>> results = model.fit(cov_type='hc3')
|
|
120
|
+
>>>
|
|
121
|
+
>>> # Cluster-robust
|
|
122
|
+
>>> results = model.fit(cov_type='clustered')
|
|
123
|
+
>>> results = model.fit(cov_type='twoway')
|
|
124
|
+
>>>
|
|
125
|
+
>>> # HAC
|
|
126
|
+
>>> results = model.fit(cov_type='driscoll_kraay', max_lags=3)
|
|
127
|
+
>>> results = model.fit(cov_type='newey_west', max_lags=4, kernel='bartlett')
|
|
128
|
+
>>>
|
|
129
|
+
>>> # PCSE
|
|
130
|
+
>>> results = model.fit(cov_type='pcse')
|
|
101
131
|
"""
|
|
102
132
|
# Build design matrices
|
|
103
133
|
y, X = self.formula_parser.build_design_matrices(
|
|
@@ -117,17 +147,56 @@ class PooledOLS(PanelModel):
|
|
|
117
147
|
df_model = k - (1 if self.formula_parser.has_intercept else 0)
|
|
118
148
|
df_resid = n - k
|
|
119
149
|
|
|
120
|
-
#
|
|
121
|
-
|
|
150
|
+
# Get entity and time indices
|
|
151
|
+
entities = self.data.data[self.data.entity_col].values
|
|
152
|
+
times = self.data.data[self.data.time_col].values
|
|
153
|
+
|
|
154
|
+
# Compute covariance matrix based on type
|
|
155
|
+
cov_type_lower = cov_type.lower()
|
|
156
|
+
|
|
157
|
+
if cov_type_lower == 'nonrobust':
|
|
122
158
|
vcov = compute_vcov_nonrobust(X, resid, df_resid)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
159
|
+
|
|
160
|
+
elif cov_type_lower in ['robust', 'hc0', 'hc1', 'hc2', 'hc3']:
|
|
161
|
+
# HC robust standard errors
|
|
162
|
+
method = 'HC1' if cov_type_lower == 'robust' else cov_type_lower.upper()
|
|
163
|
+
result = robust_covariance(X, resid, method=method)
|
|
164
|
+
vcov = result.cov_matrix
|
|
165
|
+
|
|
166
|
+
elif cov_type_lower == 'clustered':
|
|
167
|
+
# Cluster-robust by entity
|
|
168
|
+
result = cluster_by_entity(X, resid, entities, df_correction=True)
|
|
169
|
+
vcov = result.cov_matrix
|
|
170
|
+
|
|
171
|
+
elif cov_type_lower == 'twoway':
|
|
172
|
+
# Two-way clustering (entity and time)
|
|
173
|
+
result = twoway_cluster(X, resid, entities, times, df_correction=True)
|
|
174
|
+
vcov = result.cov_matrix
|
|
175
|
+
|
|
176
|
+
elif cov_type_lower == 'driscoll_kraay':
|
|
177
|
+
# Driscoll-Kraay for spatial/temporal dependence
|
|
178
|
+
max_lags = cov_kwds.get('max_lags', None)
|
|
179
|
+
kernel = cov_kwds.get('kernel', 'bartlett')
|
|
180
|
+
result = driscoll_kraay(X, resid, times, max_lags=max_lags, kernel=kernel)
|
|
181
|
+
vcov = result.cov_matrix
|
|
182
|
+
|
|
183
|
+
elif cov_type_lower == 'newey_west':
|
|
184
|
+
# Newey-West HAC
|
|
185
|
+
max_lags = cov_kwds.get('max_lags', None)
|
|
186
|
+
kernel = cov_kwds.get('kernel', 'bartlett')
|
|
187
|
+
result = newey_west(X, resid, max_lags=max_lags, kernel=kernel)
|
|
188
|
+
vcov = result.cov_matrix
|
|
189
|
+
|
|
190
|
+
elif cov_type_lower == 'pcse':
|
|
191
|
+
# Panel-corrected standard errors
|
|
192
|
+
result = pcse(X, resid, entities, times)
|
|
193
|
+
vcov = result.cov_matrix
|
|
194
|
+
|
|
127
195
|
else:
|
|
128
196
|
raise ValueError(
|
|
129
|
-
f"cov_type must be 'nonrobust', 'robust',
|
|
130
|
-
f"
|
|
197
|
+
f"cov_type must be one of: 'nonrobust', 'robust', 'hc0', 'hc1', "
|
|
198
|
+
f"'hc2', 'hc3', 'clustered', 'twoway', 'driscoll_kraay', "
|
|
199
|
+
f"'newey_west', 'pcse'. Got '{cov_type}'"
|
|
131
200
|
)
|
|
132
201
|
|
|
133
202
|
# Standard errors
|
|
@@ -15,6 +15,13 @@ from panelbox.utils.matrix_ops import (
|
|
|
15
15
|
compute_ols,
|
|
16
16
|
compute_panel_rsquared
|
|
17
17
|
)
|
|
18
|
+
from panelbox.standard_errors import (
|
|
19
|
+
robust_covariance,
|
|
20
|
+
cluster_by_entity,
|
|
21
|
+
twoway_cluster,
|
|
22
|
+
driscoll_kraay,
|
|
23
|
+
newey_west
|
|
24
|
+
)
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
class RandomEffects(PanelModel):
|
|
@@ -121,10 +128,16 @@ class RandomEffects(PanelModel):
|
|
|
121
128
|
cov_type : str, default='nonrobust'
|
|
122
129
|
Type of covariance estimator:
|
|
123
130
|
- 'nonrobust': Classical GLS standard errors
|
|
124
|
-
- 'robust': Heteroskedasticity-robust
|
|
125
|
-
- '
|
|
131
|
+
- 'robust' or 'hc1': Heteroskedasticity-robust (HC1)
|
|
132
|
+
- 'hc0', 'hc2', 'hc3': Other HC variants
|
|
133
|
+
- 'clustered': Cluster-robust (by entity by default)
|
|
134
|
+
- 'twoway': Two-way clustered (entity and time)
|
|
135
|
+
- 'driscoll_kraay': Driscoll-Kraay (spatial/temporal dependence)
|
|
136
|
+
- 'newey_west': Newey-West HAC
|
|
126
137
|
**cov_kwds
|
|
127
|
-
Additional arguments for covariance estimation
|
|
138
|
+
Additional arguments for covariance estimation:
|
|
139
|
+
- max_lags: For Driscoll-Kraay and Newey-West
|
|
140
|
+
- kernel: For HAC estimators
|
|
128
141
|
|
|
129
142
|
Returns
|
|
130
143
|
-------
|
|
@@ -169,16 +182,45 @@ class RandomEffects(PanelModel):
|
|
|
169
182
|
df_resid = n - k
|
|
170
183
|
|
|
171
184
|
# Compute covariance matrix
|
|
172
|
-
|
|
185
|
+
cov_type_lower = cov_type.lower()
|
|
186
|
+
|
|
187
|
+
if cov_type_lower == 'nonrobust':
|
|
173
188
|
vcov = self._compute_vcov_gls(X, resid_gls, entities, df_resid)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
189
|
+
|
|
190
|
+
elif cov_type_lower in ['robust', 'hc0', 'hc1', 'hc2', 'hc3']:
|
|
191
|
+
# Map 'robust' to 'hc1' (default robust method)
|
|
192
|
+
method = 'HC1' if cov_type_lower == 'robust' else cov_type_lower.upper()
|
|
193
|
+
result = robust_covariance(X_gls, resid_gls, method=method)
|
|
194
|
+
vcov = result.cov_matrix
|
|
195
|
+
|
|
196
|
+
elif cov_type_lower == 'clustered':
|
|
197
|
+
# Default: cluster by entity
|
|
198
|
+
result = cluster_by_entity(X_gls, resid_gls, entities, df_correction=True)
|
|
199
|
+
vcov = result.cov_matrix
|
|
200
|
+
|
|
201
|
+
elif cov_type_lower == 'twoway':
|
|
202
|
+
# Two-way clustering: entity and time
|
|
203
|
+
result = twoway_cluster(X_gls, resid_gls, entities, times, df_correction=True)
|
|
204
|
+
vcov = result.cov_matrix
|
|
205
|
+
|
|
206
|
+
elif cov_type_lower == 'driscoll_kraay':
|
|
207
|
+
# Driscoll-Kraay for spatial/temporal dependence
|
|
208
|
+
max_lags = cov_kwds.get('max_lags', None)
|
|
209
|
+
kernel = cov_kwds.get('kernel', 'bartlett')
|
|
210
|
+
result = driscoll_kraay(X_gls, resid_gls, times, max_lags=max_lags, kernel=kernel)
|
|
211
|
+
vcov = result.cov_matrix
|
|
212
|
+
|
|
213
|
+
elif cov_type_lower == 'newey_west':
|
|
214
|
+
# Newey-West HAC
|
|
215
|
+
max_lags = cov_kwds.get('max_lags', None)
|
|
216
|
+
kernel = cov_kwds.get('kernel', 'bartlett')
|
|
217
|
+
result = newey_west(X_gls, resid_gls, max_lags=max_lags, kernel=kernel)
|
|
218
|
+
vcov = result.cov_matrix
|
|
219
|
+
|
|
178
220
|
else:
|
|
179
221
|
raise ValueError(
|
|
180
|
-
f"cov_type must be 'nonrobust', 'robust',
|
|
181
|
-
f"got '{cov_type}'"
|
|
222
|
+
f"cov_type must be one of: 'nonrobust', 'robust', 'hc0', 'hc1', 'hc2', 'hc3', "
|
|
223
|
+
f"'clustered', 'twoway', 'driscoll_kraay', 'newey_west', got '{cov_type}'"
|
|
182
224
|
)
|
|
183
225
|
|
|
184
226
|
# Standard errors
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Standard errors and covariance matrix estimation for panel data.
|
|
3
|
+
|
|
4
|
+
This module provides various robust standard error estimators commonly
|
|
5
|
+
used in panel data econometrics:
|
|
6
|
+
|
|
7
|
+
- Heteroskedasticity-robust (HC0, HC1, HC2, HC3)
|
|
8
|
+
- Cluster-robust (one-way and two-way)
|
|
9
|
+
- Driscoll-Kraay (spatial and temporal dependence)
|
|
10
|
+
- Newey-West HAC (heteroskedasticity and autocorrelation consistent)
|
|
11
|
+
|
|
12
|
+
Examples
|
|
13
|
+
--------
|
|
14
|
+
>>> from panelbox.standard_errors import robust_covariance, cluster_by_entity
|
|
15
|
+
>>>
|
|
16
|
+
>>> # HC1 robust standard errors
|
|
17
|
+
>>> result = robust_covariance(X, resid, method='HC1')
|
|
18
|
+
>>> print(result.std_errors)
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Cluster by entity
|
|
21
|
+
>>> result = cluster_by_entity(X, resid, entity_ids)
|
|
22
|
+
>>> print(result.std_errors)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# Robust (HC) standard errors
|
|
26
|
+
from .robust import (
|
|
27
|
+
RobustStandardErrors,
|
|
28
|
+
RobustCovarianceResult,
|
|
29
|
+
robust_covariance
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Clustered standard errors
|
|
33
|
+
from .clustered import (
|
|
34
|
+
ClusteredStandardErrors,
|
|
35
|
+
ClusteredCovarianceResult,
|
|
36
|
+
cluster_by_entity,
|
|
37
|
+
cluster_by_time,
|
|
38
|
+
twoway_cluster
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Driscoll-Kraay standard errors
|
|
42
|
+
from .driscoll_kraay import (
|
|
43
|
+
DriscollKraayStandardErrors,
|
|
44
|
+
DriscollKraayResult,
|
|
45
|
+
driscoll_kraay
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Newey-West HAC standard errors
|
|
49
|
+
from .newey_west import (
|
|
50
|
+
NeweyWestStandardErrors,
|
|
51
|
+
NeweyWestResult,
|
|
52
|
+
newey_west
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Panel-Corrected Standard Errors (PCSE)
|
|
56
|
+
from .pcse import (
|
|
57
|
+
PanelCorrectedStandardErrors,
|
|
58
|
+
PCSEResult,
|
|
59
|
+
pcse
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Standard Error Comparison
|
|
63
|
+
from .comparison import (
|
|
64
|
+
StandardErrorComparison,
|
|
65
|
+
ComparisonResult
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Utilities
|
|
69
|
+
from .utils import (
|
|
70
|
+
compute_leverage,
|
|
71
|
+
compute_bread,
|
|
72
|
+
compute_meat_hc,
|
|
73
|
+
sandwich_covariance,
|
|
74
|
+
hc_covariance,
|
|
75
|
+
clustered_covariance,
|
|
76
|
+
twoway_clustered_covariance
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
__all__ = [
|
|
80
|
+
# Robust (HC) SE
|
|
81
|
+
'RobustStandardErrors',
|
|
82
|
+
'RobustCovarianceResult',
|
|
83
|
+
'robust_covariance',
|
|
84
|
+
|
|
85
|
+
# Clustered SE
|
|
86
|
+
'ClusteredStandardErrors',
|
|
87
|
+
'ClusteredCovarianceResult',
|
|
88
|
+
'cluster_by_entity',
|
|
89
|
+
'cluster_by_time',
|
|
90
|
+
'twoway_cluster',
|
|
91
|
+
|
|
92
|
+
# Driscoll-Kraay SE
|
|
93
|
+
'DriscollKraayStandardErrors',
|
|
94
|
+
'DriscollKraayResult',
|
|
95
|
+
'driscoll_kraay',
|
|
96
|
+
|
|
97
|
+
# Newey-West HAC SE
|
|
98
|
+
'NeweyWestStandardErrors',
|
|
99
|
+
'NeweyWestResult',
|
|
100
|
+
'newey_west',
|
|
101
|
+
|
|
102
|
+
# Panel-Corrected SE (PCSE)
|
|
103
|
+
'PanelCorrectedStandardErrors',
|
|
104
|
+
'PCSEResult',
|
|
105
|
+
'pcse',
|
|
106
|
+
|
|
107
|
+
# Comparison
|
|
108
|
+
'StandardErrorComparison',
|
|
109
|
+
'ComparisonResult',
|
|
110
|
+
|
|
111
|
+
# Utilities
|
|
112
|
+
'compute_leverage',
|
|
113
|
+
'compute_bread',
|
|
114
|
+
'compute_meat_hc',
|
|
115
|
+
'sandwich_covariance',
|
|
116
|
+
'hc_covariance',
|
|
117
|
+
'clustered_covariance',
|
|
118
|
+
'twoway_clustered_covariance',
|
|
119
|
+
]
|