panelbox 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. panelbox/__init__.py +41 -0
  2. panelbox/__version__.py +13 -1
  3. panelbox/core/formula_parser.py +9 -2
  4. panelbox/core/panel_data.py +1 -1
  5. panelbox/datasets/__init__.py +39 -0
  6. panelbox/datasets/load.py +334 -0
  7. panelbox/gmm/difference_gmm.py +63 -15
  8. panelbox/gmm/estimator.py +46 -5
  9. panelbox/gmm/system_gmm.py +136 -21
  10. panelbox/models/static/__init__.py +4 -0
  11. panelbox/models/static/between.py +434 -0
  12. panelbox/models/static/first_difference.py +494 -0
  13. panelbox/models/static/fixed_effects.py +80 -11
  14. panelbox/models/static/pooled_ols.py +80 -11
  15. panelbox/models/static/random_effects.py +52 -10
  16. panelbox/standard_errors/__init__.py +119 -0
  17. panelbox/standard_errors/clustered.py +386 -0
  18. panelbox/standard_errors/comparison.py +528 -0
  19. panelbox/standard_errors/driscoll_kraay.py +386 -0
  20. panelbox/standard_errors/newey_west.py +324 -0
  21. panelbox/standard_errors/pcse.py +358 -0
  22. panelbox/standard_errors/robust.py +324 -0
  23. panelbox/standard_errors/utils.py +390 -0
  24. panelbox/validation/__init__.py +6 -0
  25. panelbox/validation/robustness/__init__.py +51 -0
  26. panelbox/validation/robustness/bootstrap.py +933 -0
  27. panelbox/validation/robustness/checks.py +143 -0
  28. panelbox/validation/robustness/cross_validation.py +538 -0
  29. panelbox/validation/robustness/influence.py +364 -0
  30. panelbox/validation/robustness/jackknife.py +457 -0
  31. panelbox/validation/robustness/outliers.py +529 -0
  32. panelbox/validation/robustness/sensitivity.py +809 -0
  33. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/METADATA +32 -3
  34. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/RECORD +38 -21
  35. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/WHEEL +1 -1
  36. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/entry_points.txt +0 -0
  37. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/licenses/LICENSE +0 -0
  38. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,390 @@
1
+ """
2
+ Utility functions for covariance matrix estimation.
3
+
4
+ This module provides common functions for computing sandwich covariance
5
+ matrices and their components (bread and meat).
6
+ """
7
+
8
+ from typing import Optional
9
+ import numpy as np
10
+ import pandas as pd
11
+ from scipy import sparse
12
+
13
+
14
+ def compute_leverage(X: np.ndarray) -> np.ndarray:
15
+ """
16
+ Compute leverage (hat) values for observations.
17
+
18
+ The leverage h_i is the diagonal element of the hat matrix:
19
+ H = X(X'X)^{-1}X'
20
+
21
+ Parameters
22
+ ----------
23
+ X : np.ndarray
24
+ Design matrix (n x k)
25
+
26
+ Returns
27
+ -------
28
+ leverage : np.ndarray
29
+ Leverage values (n,)
30
+
31
+ Notes
32
+ -----
33
+ Leverage values satisfy:
34
+ - 0 <= h_i <= 1
35
+ - sum(h_i) = k (number of parameters)
36
+ - Average leverage = k/n
37
+
38
+ High leverage points (h_i > 2k/n or 3k/n) may be influential.
39
+ """
40
+ n, k = X.shape
41
+
42
+ # Compute hat values
43
+ # h_i = X_i (X'X)^{-1} X_i'
44
+ XTX_inv = np.linalg.inv(X.T @ X)
45
+
46
+ # Efficient computation: diag(X @ XTX_inv @ X.T)
47
+ leverage = np.sum((X @ XTX_inv) * X, axis=1)
48
+
49
+ # Ensure leverage is between 0 and 1 (numerical stability)
50
+ leverage = np.clip(leverage, 0, 1)
51
+
52
+ return leverage
53
+
54
+
55
+ def compute_bread(X: np.ndarray) -> np.ndarray:
56
+ """
57
+ Compute the "bread" of the sandwich covariance estimator.
58
+
59
+ Bread = (X'X)^{-1}
60
+
61
+ Parameters
62
+ ----------
63
+ X : np.ndarray
64
+ Design matrix (n x k)
65
+
66
+ Returns
67
+ -------
68
+ bread : np.ndarray
69
+ Bread matrix (k x k)
70
+
71
+ Notes
72
+ -----
73
+ The sandwich covariance estimator is:
74
+ V = Bread @ Meat @ Bread
75
+
76
+ where Meat depends on the specific robust estimator (HC, cluster, etc.)
77
+ """
78
+ XTX = X.T @ X
79
+ bread = np.linalg.inv(XTX)
80
+ return bread
81
+
82
+
83
+ def compute_meat_hc(
84
+ X: np.ndarray,
85
+ resid: np.ndarray,
86
+ method: str = 'HC1',
87
+ leverage: Optional[np.ndarray] = None
88
+ ) -> np.ndarray:
89
+ """
90
+ Compute the "meat" of the sandwich for heteroskedasticity-robust SEs.
91
+
92
+ Parameters
93
+ ----------
94
+ X : np.ndarray
95
+ Design matrix (n x k)
96
+ resid : np.ndarray
97
+ Residuals (n,)
98
+ method : str, default='HC1'
99
+ Type of HC adjustment:
100
+ - 'HC0': White (1980)
101
+ - 'HC1': Degrees of freedom correction
102
+ - 'HC2': Leverage adjustment
103
+ - 'HC3': MacKinnon-White (1985)
104
+ leverage : np.ndarray, optional
105
+ Pre-computed leverage values (for efficiency)
106
+
107
+ Returns
108
+ -------
109
+ meat : np.ndarray
110
+ Meat matrix (k x k)
111
+
112
+ References
113
+ ----------
114
+ White, H. (1980). A heteroskedasticity-consistent covariance matrix
115
+ estimator and a direct test for heteroskedasticity. Econometrica,
116
+ 48(4), 817-838.
117
+ MacKinnon, J. G., & White, H. (1985). Some heteroskedasticity-consistent
118
+ covariance matrix estimators with improved finite sample properties.
119
+ Journal of Econometrics, 29(3), 305-325.
120
+ """
121
+ n, k = X.shape
122
+
123
+ # Compute adjustment factors
124
+ if method == 'HC0':
125
+ # No adjustment
126
+ weights = resid ** 2
127
+
128
+ elif method == 'HC1':
129
+ # Degrees of freedom correction
130
+ weights = (n / (n - k)) * (resid ** 2)
131
+
132
+ elif method == 'HC2':
133
+ # Leverage adjustment: ε²/(1-h)
134
+ if leverage is None:
135
+ leverage = compute_leverage(X)
136
+ weights = (resid ** 2) / (1 - leverage)
137
+
138
+ elif method == 'HC3':
139
+ # Leverage adjustment: ε²/(1-h)²
140
+ if leverage is None:
141
+ leverage = compute_leverage(X)
142
+ weights = (resid ** 2) / ((1 - leverage) ** 2)
143
+
144
+ else:
145
+ raise ValueError(f"Unknown HC method: {method}")
146
+
147
+ # Compute meat: X'ΩX where Ω = diag(weights)
148
+ # Efficient computation: X.T @ diag(weights) @ X
149
+ X_weighted = X * np.sqrt(weights)[:, np.newaxis]
150
+ meat = X_weighted.T @ X_weighted
151
+
152
+ return meat
153
+
154
+
155
+ def sandwich_covariance(bread: np.ndarray, meat: np.ndarray) -> np.ndarray:
156
+ """
157
+ Compute sandwich covariance matrix.
158
+
159
+ V = Bread @ Meat @ Bread
160
+
161
+ Parameters
162
+ ----------
163
+ bread : np.ndarray
164
+ Bread matrix (k x k)
165
+ meat : np.ndarray
166
+ Meat matrix (k x k)
167
+
168
+ Returns
169
+ -------
170
+ cov : np.ndarray
171
+ Covariance matrix (k x k)
172
+ """
173
+ return bread @ meat @ bread
174
+
175
+
176
+ def compute_clustered_meat(
177
+ X: np.ndarray,
178
+ resid: np.ndarray,
179
+ clusters: np.ndarray,
180
+ df_correction: bool = True
181
+ ) -> np.ndarray:
182
+ """
183
+ Compute meat matrix for cluster-robust standard errors.
184
+
185
+ Parameters
186
+ ----------
187
+ X : np.ndarray
188
+ Design matrix (n x k)
189
+ resid : np.ndarray
190
+ Residuals (n,)
191
+ clusters : np.ndarray
192
+ Cluster identifiers (n,)
193
+ df_correction : bool, default=True
194
+ Apply finite-sample correction: G/(G-1) × (N-1)/(N-K)
195
+
196
+ Returns
197
+ -------
198
+ meat : np.ndarray
199
+ Meat matrix (k x k)
200
+
201
+ Notes
202
+ -----
203
+ The cluster-robust meat is:
204
+ Meat = Σ_g (X_g' ε_g)(ε_g' X_g)
205
+
206
+ where g indexes clusters.
207
+
208
+ References
209
+ ----------
210
+ Cameron, A. C., Gelbach, J. B., & Miller, D. L. (2011).
211
+ Robust inference with multiway clustering.
212
+ Journal of Business & Economic Statistics, 29(2), 238-249.
213
+ """
214
+ n, k = X.shape
215
+ unique_clusters = np.unique(clusters)
216
+ n_clusters = len(unique_clusters)
217
+
218
+ # Initialize meat
219
+ meat = np.zeros((k, k))
220
+
221
+ # Sum over clusters
222
+ for cluster_id in unique_clusters:
223
+ cluster_mask = clusters == cluster_id
224
+ X_c = X[cluster_mask]
225
+ resid_c = resid[cluster_mask]
226
+
227
+ # Compute outer product for this cluster
228
+ # (X_c' ε_c)(ε_c' X_c) = (X_c' ε_c)(X_c' ε_c)'
229
+ score_c = X_c.T @ resid_c
230
+ meat += np.outer(score_c, score_c)
231
+
232
+ # Apply finite-sample correction
233
+ if df_correction:
234
+ correction = (n_clusters / (n_clusters - 1)) * ((n - 1) / (n - k))
235
+ meat *= correction
236
+
237
+ return meat
238
+
239
+
240
+ def compute_twoway_clustered_meat(
241
+ X: np.ndarray,
242
+ resid: np.ndarray,
243
+ clusters1: np.ndarray,
244
+ clusters2: np.ndarray,
245
+ df_correction: bool = True
246
+ ) -> np.ndarray:
247
+ """
248
+ Compute meat matrix for two-way cluster-robust standard errors.
249
+
250
+ Uses the formula:
251
+ V = V_1 + V_2 - V_12
252
+
253
+ where V_1 is clustered by dimension 1, V_2 by dimension 2,
254
+ and V_12 by the intersection.
255
+
256
+ Parameters
257
+ ----------
258
+ X : np.ndarray
259
+ Design matrix (n x k)
260
+ resid : np.ndarray
261
+ Residuals (n,)
262
+ clusters1 : np.ndarray
263
+ First cluster dimension (n,)
264
+ clusters2 : np.ndarray
265
+ Second cluster dimension (n,)
266
+ df_correction : bool, default=True
267
+ Apply finite-sample correction
268
+
269
+ Returns
270
+ -------
271
+ meat : np.ndarray
272
+ Meat matrix (k x k)
273
+
274
+ References
275
+ ----------
276
+ Cameron, A. C., Gelbach, J. B., & Miller, D. L. (2011).
277
+ Robust inference with multiway clustering.
278
+ Journal of Business & Economic Statistics, 29(2), 238-249.
279
+ """
280
+ # Compute meat for each clustering dimension
281
+ meat1 = compute_clustered_meat(X, resid, clusters1, df_correction)
282
+ meat2 = compute_clustered_meat(X, resid, clusters2, df_correction)
283
+
284
+ # Create intersection clusters
285
+ # Combine cluster IDs as tuples
286
+ clusters_12 = np.array([
287
+ f"{c1}_{c2}" for c1, c2 in zip(clusters1, clusters2)
288
+ ])
289
+ meat12 = compute_clustered_meat(X, resid, clusters_12, df_correction)
290
+
291
+ # Two-way clustering: V_1 + V_2 - V_12
292
+ meat = meat1 + meat2 - meat12
293
+
294
+ return meat
295
+
296
+
297
+ def hc_covariance(
298
+ X: np.ndarray,
299
+ resid: np.ndarray,
300
+ method: str = 'HC1'
301
+ ) -> np.ndarray:
302
+ """
303
+ Compute heteroskedasticity-robust covariance matrix.
304
+
305
+ Convenience function that combines bread and meat computation.
306
+
307
+ Parameters
308
+ ----------
309
+ X : np.ndarray
310
+ Design matrix (n x k)
311
+ resid : np.ndarray
312
+ Residuals (n,)
313
+ method : str, default='HC1'
314
+ HC method: 'HC0', 'HC1', 'HC2', or 'HC3'
315
+
316
+ Returns
317
+ -------
318
+ cov : np.ndarray
319
+ Robust covariance matrix (k x k)
320
+ """
321
+ bread = compute_bread(X)
322
+ meat = compute_meat_hc(X, resid, method)
323
+ return sandwich_covariance(bread, meat)
324
+
325
+
326
+ def clustered_covariance(
327
+ X: np.ndarray,
328
+ resid: np.ndarray,
329
+ clusters: np.ndarray,
330
+ df_correction: bool = True
331
+ ) -> np.ndarray:
332
+ """
333
+ Compute cluster-robust covariance matrix.
334
+
335
+ Convenience function that combines bread and meat computation.
336
+
337
+ Parameters
338
+ ----------
339
+ X : np.ndarray
340
+ Design matrix (n x k)
341
+ resid : np.ndarray
342
+ Residuals (n,)
343
+ clusters : np.ndarray
344
+ Cluster identifiers (n,)
345
+ df_correction : bool, default=True
346
+ Apply finite-sample correction
347
+
348
+ Returns
349
+ -------
350
+ cov : np.ndarray
351
+ Cluster-robust covariance matrix (k x k)
352
+ """
353
+ bread = compute_bread(X)
354
+ meat = compute_clustered_meat(X, resid, clusters, df_correction)
355
+ return sandwich_covariance(bread, meat)
356
+
357
+
358
+ def twoway_clustered_covariance(
359
+ X: np.ndarray,
360
+ resid: np.ndarray,
361
+ clusters1: np.ndarray,
362
+ clusters2: np.ndarray,
363
+ df_correction: bool = True
364
+ ) -> np.ndarray:
365
+ """
366
+ Compute two-way cluster-robust covariance matrix.
367
+
368
+ Convenience function that combines bread and meat computation.
369
+
370
+ Parameters
371
+ ----------
372
+ X : np.ndarray
373
+ Design matrix (n x k)
374
+ resid : np.ndarray
375
+ Residuals (n,)
376
+ clusters1 : np.ndarray
377
+ First cluster dimension (n,)
378
+ clusters2 : np.ndarray
379
+ Second cluster dimension (n,)
380
+ df_correction : bool, default=True
381
+ Apply finite-sample correction
382
+
383
+ Returns
384
+ -------
385
+ cov : np.ndarray
386
+ Two-way cluster-robust covariance matrix (k x k)
387
+ """
388
+ bread = compute_bread(X)
389
+ meat = compute_twoway_clustered_meat(X, resid, clusters1, clusters2, df_correction)
390
+ return sandwich_covariance(bread, meat)
@@ -27,6 +27,9 @@ from panelbox.validation.cross_sectional_dependence.pesaran_cd import PesaranCDT
27
27
  from panelbox.validation.cross_sectional_dependence.breusch_pagan_lm import BreuschPaganLMTest
28
28
  from panelbox.validation.cross_sectional_dependence.frees import FreesTest
29
29
 
30
+ # Robustness analysis
31
+ from panelbox.validation.robustness.bootstrap import PanelBootstrap
32
+
30
33
  __all__ = [
31
34
  # Base classes
32
35
  'ValidationTest',
@@ -55,4 +58,7 @@ __all__ = [
55
58
  'PesaranCDTest',
56
59
  'BreuschPaganLMTest',
57
60
  'FreesTest',
61
+
62
+ # Robustness analysis
63
+ 'PanelBootstrap',
58
64
  ]
@@ -0,0 +1,51 @@
1
+ """
2
+ Robustness analysis tools for panel data models.
3
+
4
+ This module provides various methods for assessing the robustness of panel
5
+ data estimation results, including:
6
+
7
+ - Bootstrap inference (various methods)
8
+ - Sensitivity analysis
9
+ - Cross-validation
10
+ - Jackknife resampling
11
+ - Outlier detection
12
+ - Influence diagnostics
13
+ - Robustness checks
14
+
15
+ Examples
16
+ --------
17
+ >>> import panelbox as pb
18
+ >>>
19
+ >>> # Fit model
20
+ >>> fe = pb.FixedEffects("y ~ x1 + x2", data, "id", "time")
21
+ >>> results = fe.fit()
22
+ >>>
23
+ >>> # Bootstrap inference
24
+ >>> bootstrap = pb.PanelBootstrap(results, n_bootstrap=1000, method='pairs')
25
+ >>> bootstrap.run()
26
+ >>> ci = bootstrap.conf_int()
27
+ >>> print(ci)
28
+ """
29
+
30
+ from panelbox.validation.robustness.bootstrap import PanelBootstrap
31
+ from panelbox.validation.robustness.sensitivity import SensitivityAnalysis, SensitivityResults
32
+ from panelbox.validation.robustness.cross_validation import TimeSeriesCV, CVResults
33
+ from panelbox.validation.robustness.jackknife import PanelJackknife, JackknifeResults
34
+ from panelbox.validation.robustness.outliers import OutlierDetector, OutlierResults
35
+ from panelbox.validation.robustness.influence import InfluenceDiagnostics, InfluenceResults
36
+ from panelbox.validation.robustness.checks import RobustnessChecker
37
+
38
+ __all__ = [
39
+ 'PanelBootstrap',
40
+ 'SensitivityAnalysis',
41
+ 'SensitivityResults',
42
+ 'TimeSeriesCV',
43
+ 'CVResults',
44
+ 'PanelJackknife',
45
+ 'JackknifeResults',
46
+ 'OutlierDetector',
47
+ 'OutlierResults',
48
+ 'InfluenceDiagnostics',
49
+ 'InfluenceResults',
50
+ 'RobustnessChecker',
51
+ ]