panelbox 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. panelbox/__init__.py +41 -0
  2. panelbox/__version__.py +13 -1
  3. panelbox/core/formula_parser.py +9 -2
  4. panelbox/core/panel_data.py +1 -1
  5. panelbox/datasets/__init__.py +39 -0
  6. panelbox/datasets/load.py +334 -0
  7. panelbox/gmm/difference_gmm.py +63 -15
  8. panelbox/gmm/estimator.py +46 -5
  9. panelbox/gmm/system_gmm.py +136 -21
  10. panelbox/models/static/__init__.py +4 -0
  11. panelbox/models/static/between.py +434 -0
  12. panelbox/models/static/first_difference.py +494 -0
  13. panelbox/models/static/fixed_effects.py +80 -11
  14. panelbox/models/static/pooled_ols.py +80 -11
  15. panelbox/models/static/random_effects.py +52 -10
  16. panelbox/standard_errors/__init__.py +119 -0
  17. panelbox/standard_errors/clustered.py +386 -0
  18. panelbox/standard_errors/comparison.py +528 -0
  19. panelbox/standard_errors/driscoll_kraay.py +386 -0
  20. panelbox/standard_errors/newey_west.py +324 -0
  21. panelbox/standard_errors/pcse.py +358 -0
  22. panelbox/standard_errors/robust.py +324 -0
  23. panelbox/standard_errors/utils.py +390 -0
  24. panelbox/validation/__init__.py +6 -0
  25. panelbox/validation/robustness/__init__.py +51 -0
  26. panelbox/validation/robustness/bootstrap.py +933 -0
  27. panelbox/validation/robustness/checks.py +143 -0
  28. panelbox/validation/robustness/cross_validation.py +538 -0
  29. panelbox/validation/robustness/influence.py +364 -0
  30. panelbox/validation/robustness/jackknife.py +457 -0
  31. panelbox/validation/robustness/outliers.py +529 -0
  32. panelbox/validation/robustness/sensitivity.py +809 -0
  33. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/METADATA +32 -3
  34. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/RECORD +38 -21
  35. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/WHEEL +1 -1
  36. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/entry_points.txt +0 -0
  37. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/licenses/LICENSE +0 -0
  38. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,386 @@
1
+ """
2
+ Driscoll-Kraay standard errors for panel data.
3
+
4
+ Driscoll-Kraay (1998) standard errors are robust to general forms of
5
+ spatial and temporal dependence when the number of time periods is large.
6
+ They are particularly useful for macro panel data with potential cross-
7
+ sectional correlation.
8
+ """
9
+
10
+ from typing import Optional, Literal
11
+ import numpy as np
12
+ import pandas as pd
13
+ from dataclasses import dataclass
14
+
15
+ from .utils import compute_bread, sandwich_covariance
16
+
17
+
18
+ KernelType = Literal['bartlett', 'parzen', 'quadratic_spectral']
19
+
20
+
21
+ @dataclass
22
+ class DriscollKraayResult:
23
+ """
24
+ Result of Driscoll-Kraay covariance estimation.
25
+
26
+ Attributes
27
+ ----------
28
+ cov_matrix : np.ndarray
29
+ Driscoll-Kraay covariance matrix (k x k)
30
+ std_errors : np.ndarray
31
+ Driscoll-Kraay standard errors (k,)
32
+ max_lags : int
33
+ Maximum number of lags used
34
+ kernel : str
35
+ Kernel function used
36
+ n_obs : int
37
+ Number of observations
38
+ n_params : int
39
+ Number of parameters
40
+ n_periods : int
41
+ Number of time periods
42
+ bandwidth : Optional[float]
43
+ Bandwidth parameter (for some kernels)
44
+ """
45
+ cov_matrix: np.ndarray
46
+ std_errors: np.ndarray
47
+ max_lags: int
48
+ kernel: str
49
+ n_obs: int
50
+ n_params: int
51
+ n_periods: int
52
+ bandwidth: Optional[float] = None
53
+
54
+
55
+ class DriscollKraayStandardErrors:
56
+ """
57
+ Driscoll-Kraay (1998) standard errors for panel data.
58
+
59
+ Robust to general forms of spatial and temporal dependence.
60
+ Particularly useful for macro panels with cross-sectional correlation.
61
+
62
+ Parameters
63
+ ----------
64
+ X : np.ndarray
65
+ Design matrix (n x k)
66
+ resid : np.ndarray
67
+ Residuals (n,)
68
+ time_ids : np.ndarray
69
+ Time period identifiers (n,)
70
+ max_lags : int, optional
71
+ Maximum number of lags. If None, uses floor(4(T/100)^(2/9))
72
+ kernel : {'bartlett', 'parzen', 'quadratic_spectral'}, default='bartlett'
73
+ Kernel function for weighting lags
74
+
75
+ Attributes
76
+ ----------
77
+ X : np.ndarray
78
+ Design matrix
79
+ resid : np.ndarray
80
+ Residuals
81
+ time_ids : np.ndarray
82
+ Time identifiers
83
+ n_obs : int
84
+ Number of observations
85
+ n_params : int
86
+ Number of parameters
87
+ n_periods : int
88
+ Number of time periods
89
+
90
+ Examples
91
+ --------
92
+ >>> # Panel data with T=20 periods
93
+ >>> dk = DriscollKraayStandardErrors(X, resid, time_ids)
94
+ >>> result = dk.compute()
95
+ >>> print(result.std_errors)
96
+
97
+ >>> # Custom lags
98
+ >>> dk = DriscollKraayStandardErrors(X, resid, time_ids, max_lags=5)
99
+ >>> result = dk.compute()
100
+
101
+ References
102
+ ----------
103
+ Driscoll, J. C., & Kraay, A. C. (1998). Consistent covariance matrix
104
+ estimation with spatially dependent panel data. Review of Economics
105
+ and Statistics, 80(4), 549-560.
106
+
107
+ Hoechle, D. (2007). Robust standard errors for panel regressions with
108
+ cross-sectional dependence. The Stata Journal, 7(3), 281-312.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ X: np.ndarray,
114
+ resid: np.ndarray,
115
+ time_ids: np.ndarray,
116
+ max_lags: Optional[int] = None,
117
+ kernel: KernelType = 'bartlett'
118
+ ):
119
+ self.X = X
120
+ self.resid = resid
121
+ self.time_ids = np.asarray(time_ids)
122
+ self.kernel = kernel
123
+
124
+ self.n_obs, self.n_params = X.shape
125
+
126
+ # Validate dimensions
127
+ if len(self.time_ids) != self.n_obs:
128
+ raise ValueError(
129
+ f"time_ids dimension mismatch: expected {self.n_obs}, "
130
+ f"got {len(self.time_ids)}"
131
+ )
132
+
133
+ # Count time periods
134
+ unique_periods = np.unique(self.time_ids)
135
+ self.n_periods = len(unique_periods)
136
+
137
+ # Set max_lags
138
+ if max_lags is None:
139
+ # Newey-West rule: floor(4(T/100)^(2/9))
140
+ self.max_lags = int(np.floor(4 * (self.n_periods / 100) ** (2/9)))
141
+ else:
142
+ self.max_lags = max_lags
143
+
144
+ # Ensure max_lags is reasonable
145
+ if self.max_lags >= self.n_periods:
146
+ self.max_lags = self.n_periods - 1
147
+
148
+ # Cache
149
+ self._bread = None
150
+ self._time_sorted = None
151
+
152
+ @property
153
+ def bread(self) -> np.ndarray:
154
+ """Compute and cache bread matrix."""
155
+ if self._bread is None:
156
+ self._bread = compute_bread(self.X)
157
+ return self._bread
158
+
159
+ def _sort_by_time(self):
160
+ """Sort data by time periods."""
161
+ if self._time_sorted is None:
162
+ # Get unique time periods in order
163
+ unique_times = np.unique(self.time_ids)
164
+
165
+ # Create mapping
166
+ time_map = {t: i for i, t in enumerate(unique_times)}
167
+
168
+ # Sort indices by time
169
+ time_indices = np.array([time_map[t] for t in self.time_ids])
170
+ sort_idx = np.argsort(time_indices)
171
+
172
+ self._time_sorted = {
173
+ 'X': self.X[sort_idx],
174
+ 'resid': self.resid[sort_idx],
175
+ 'time_ids': self.time_ids[sort_idx],
176
+ 'sort_idx': sort_idx,
177
+ 'unique_times': unique_times
178
+ }
179
+
180
+ return self._time_sorted
181
+
182
+ def _kernel_weight(self, lag: int) -> float:
183
+ """
184
+ Compute kernel weight for given lag.
185
+
186
+ Parameters
187
+ ----------
188
+ lag : int
189
+ Lag number (0, 1, 2, ...)
190
+
191
+ Returns
192
+ -------
193
+ weight : float
194
+ Kernel weight
195
+ """
196
+ if lag > self.max_lags:
197
+ return 0.0
198
+
199
+ if self.kernel == 'bartlett':
200
+ # Bartlett (triangular) kernel
201
+ # w(l) = 1 - l/(max_lags + 1)
202
+ return 1.0 - lag / (self.max_lags + 1)
203
+
204
+ elif self.kernel == 'parzen':
205
+ # Parzen kernel
206
+ z = lag / (self.max_lags + 1)
207
+ if z <= 0.5:
208
+ return 1 - 6 * z**2 + 6 * z**3
209
+ else:
210
+ return 2 * (1 - z)**3
211
+
212
+ elif self.kernel == 'quadratic_spectral':
213
+ # Quadratic Spectral kernel
214
+ if lag == 0:
215
+ return 1.0
216
+ z = 6 * np.pi * lag / (self.max_lags + 1) / 5
217
+ return 3 / z**2 * (np.sin(z) / z - np.cos(z))
218
+
219
+ else:
220
+ raise ValueError(f"Unknown kernel: {self.kernel}")
221
+
222
+ def _compute_gamma(self, lag: int) -> np.ndarray:
223
+ """
224
+ Compute autocovariance matrix for given lag.
225
+
226
+ Γ_l = Σ_t X_t' ε̂_t ε̂_{t-l}' X_{t-l}
227
+
228
+ Parameters
229
+ ----------
230
+ lag : int
231
+ Lag number (0, 1, 2, ...)
232
+
233
+ Returns
234
+ -------
235
+ gamma : np.ndarray
236
+ Autocovariance matrix (k x k)
237
+ """
238
+ sorted_data = self._sort_by_time()
239
+ unique_times = sorted_data['unique_times']
240
+ k = self.n_params
241
+
242
+ gamma = np.zeros((k, k))
243
+
244
+ # For each time period t
245
+ for t_idx in range(lag, self.n_periods):
246
+ t = unique_times[t_idx]
247
+ t_lag = unique_times[t_idx - lag]
248
+
249
+ # Get observations for time t
250
+ mask_t = sorted_data['time_ids'] == t
251
+ X_t = sorted_data['X'][mask_t]
252
+ resid_t = sorted_data['resid'][mask_t]
253
+
254
+ # Get observations for time t-lag
255
+ mask_t_lag = sorted_data['time_ids'] == t_lag
256
+ X_t_lag = sorted_data['X'][mask_t_lag]
257
+ resid_t_lag = sorted_data['resid'][mask_t_lag]
258
+
259
+ # Compute cross-product
260
+ # For each pair of observations
261
+ for i in range(len(X_t)):
262
+ for j in range(len(X_t_lag)):
263
+ gamma += np.outer(
264
+ X_t[i] * resid_t[i],
265
+ X_t_lag[j] * resid_t_lag[j]
266
+ )
267
+
268
+ return gamma
269
+
270
+ def compute(self) -> DriscollKraayResult:
271
+ """
272
+ Compute Driscoll-Kraay covariance matrix.
273
+
274
+ Returns
275
+ -------
276
+ result : DriscollKraayResult
277
+ Driscoll-Kraay covariance and standard errors
278
+
279
+ Notes
280
+ -----
281
+ The Driscoll-Kraay estimator is:
282
+
283
+ V_DK = (X'X)^{-1} S_DK (X'X)^{-1}
284
+
285
+ where:
286
+ S_DK = Γ_0 + Σ_{l=1}^L w_l (Γ_l + Γ_l')
287
+
288
+ and Γ_l is the lag-l autocovariance matrix:
289
+ Γ_l = Σ_t X_t' ε̂_t ε̂_{t-l}' X_{t-l}
290
+
291
+ The kernel weights w_l ensure positive semi-definiteness.
292
+ """
293
+ k = self.n_params
294
+
295
+ # Start with lag-0 autocovariance
296
+ S = self._compute_gamma(0)
297
+
298
+ # Add weighted autocovariances for lags 1, ..., max_lags
299
+ for lag in range(1, self.max_lags + 1):
300
+ weight = self._kernel_weight(lag)
301
+ if weight > 0:
302
+ gamma_l = self._compute_gamma(lag)
303
+ # Add both Γ_l and Γ_l' (symmetrize)
304
+ S += weight * (gamma_l + gamma_l.T)
305
+
306
+ # Sandwich: V = Bread @ S @ Bread
307
+ cov_matrix = sandwich_covariance(self.bread, S)
308
+ std_errors = np.sqrt(np.diag(cov_matrix))
309
+
310
+ return DriscollKraayResult(
311
+ cov_matrix=cov_matrix,
312
+ std_errors=std_errors,
313
+ max_lags=self.max_lags,
314
+ kernel=self.kernel,
315
+ n_obs=self.n_obs,
316
+ n_params=self.n_params,
317
+ n_periods=self.n_periods
318
+ )
319
+
320
+ def diagnostic_summary(self) -> str:
321
+ """
322
+ Generate diagnostic summary.
323
+
324
+ Returns
325
+ -------
326
+ summary : str
327
+ Diagnostic information
328
+ """
329
+ lines = []
330
+ lines.append("Driscoll-Kraay Standard Errors Diagnostics")
331
+ lines.append("=" * 50)
332
+ lines.append(f"Number of observations: {self.n_obs}")
333
+ lines.append(f"Number of time periods: {self.n_periods}")
334
+ lines.append(f"Avg obs per period: {self.n_obs / self.n_periods:.1f}")
335
+ lines.append(f"Maximum lags: {self.max_lags}")
336
+ lines.append(f"Kernel function: {self.kernel}")
337
+ lines.append("")
338
+
339
+ # Recommendations
340
+ if self.n_periods < 20:
341
+ lines.append("⚠ WARNING: Few time periods (<20)")
342
+ lines.append(" Driscoll-Kraay SEs may not perform well with T < 20")
343
+ lines.append(" Consider alternative methods")
344
+ if self.max_lags > self.n_periods / 4:
345
+ lines.append("⚠ WARNING: Large max_lags relative to T")
346
+ lines.append(f" max_lags = {self.max_lags}, T = {self.n_periods}")
347
+
348
+ return "\n".join(lines)
349
+
350
+
351
+ def driscoll_kraay(
352
+ X: np.ndarray,
353
+ resid: np.ndarray,
354
+ time_ids: np.ndarray,
355
+ max_lags: Optional[int] = None,
356
+ kernel: KernelType = 'bartlett'
357
+ ) -> DriscollKraayResult:
358
+ """
359
+ Convenience function for Driscoll-Kraay standard errors.
360
+
361
+ Parameters
362
+ ----------
363
+ X : np.ndarray
364
+ Design matrix (n x k)
365
+ resid : np.ndarray
366
+ Residuals (n,)
367
+ time_ids : np.ndarray
368
+ Time period identifiers (n,)
369
+ max_lags : int, optional
370
+ Maximum number of lags
371
+ kernel : {'bartlett', 'parzen', 'quadratic_spectral'}, default='bartlett'
372
+ Kernel function
373
+
374
+ Returns
375
+ -------
376
+ result : DriscollKraayResult
377
+ Driscoll-Kraay covariance and standard errors
378
+
379
+ Examples
380
+ --------
381
+ >>> from panelbox.standard_errors import driscoll_kraay
382
+ >>> result = driscoll_kraay(X, resid, time_ids, max_lags=3)
383
+ >>> print(result.std_errors)
384
+ """
385
+ dk = DriscollKraayStandardErrors(X, resid, time_ids, max_lags, kernel)
386
+ return dk.compute()
@@ -0,0 +1,324 @@
1
+ """
2
+ Newey-West HAC (Heteroskedasticity and Autocorrelation Consistent) standard errors.
3
+
4
+ Newey-West (1987) standard errors are robust to both heteroskedasticity and
5
+ autocorrelation. Useful for time-series and panel data with serial correlation.
6
+ """
7
+
8
+ from typing import Optional, Literal
9
+ import numpy as np
10
+ from dataclasses import dataclass
11
+
12
+ from .utils import compute_bread, sandwich_covariance
13
+
14
+
15
+ KernelType = Literal['bartlett', 'parzen', 'quadratic_spectral']
16
+
17
+
18
+ @dataclass
19
+ class NeweyWestResult:
20
+ """
21
+ Result of Newey-West HAC covariance estimation.
22
+
23
+ Attributes
24
+ ----------
25
+ cov_matrix : np.ndarray
26
+ Newey-West covariance matrix (k x k)
27
+ std_errors : np.ndarray
28
+ Newey-West standard errors (k,)
29
+ max_lags : int
30
+ Maximum number of lags used
31
+ kernel : str
32
+ Kernel function used
33
+ n_obs : int
34
+ Number of observations
35
+ n_params : int
36
+ Number of parameters
37
+ prewhitening : bool
38
+ Whether prewhitening was applied
39
+ """
40
+ cov_matrix: np.ndarray
41
+ std_errors: np.ndarray
42
+ max_lags: int
43
+ kernel: str
44
+ n_obs: int
45
+ n_params: int
46
+ prewhitening: bool = False
47
+
48
+
49
+ class NeweyWestStandardErrors:
50
+ """
51
+ Newey-West (1987) HAC standard errors.
52
+
53
+ Robust to heteroskedasticity and autocorrelation. Particularly useful
54
+ for time-series data and panel data with serial correlation.
55
+
56
+ Parameters
57
+ ----------
58
+ X : np.ndarray
59
+ Design matrix (n x k)
60
+ resid : np.ndarray
61
+ Residuals (n,)
62
+ max_lags : int, optional
63
+ Maximum number of lags. If None, uses floor(4(T/100)^(2/9))
64
+ kernel : {'bartlett', 'parzen', 'quadratic_spectral'}, default='bartlett'
65
+ Kernel function for weighting lags
66
+ prewhitening : bool, default=False
67
+ Apply AR(1) prewhitening to reduce finite-sample bias
68
+
69
+ Attributes
70
+ ----------
71
+ X : np.ndarray
72
+ Design matrix
73
+ resid : np.ndarray
74
+ Residuals
75
+ n_obs : int
76
+ Number of observations
77
+ n_params : int
78
+ Number of parameters
79
+
80
+ Examples
81
+ --------
82
+ >>> # Time-series with autocorrelation
83
+ >>> nw = NeweyWestStandardErrors(X, resid, max_lags=4)
84
+ >>> result = nw.compute()
85
+ >>> print(result.std_errors)
86
+
87
+ >>> # Auto-select lags
88
+ >>> nw = NeweyWestStandardErrors(X, resid)
89
+ >>> result = nw.compute()
90
+
91
+ References
92
+ ----------
93
+ Newey, W. K., & West, K. D. (1987). A simple, positive semi-definite,
94
+ heteroskedasticity and autocorrelation consistent covariance matrix.
95
+ Econometrica, 55(3), 703-708.
96
+
97
+ Andrews, D. W. K. (1991). Heteroskedasticity and autocorrelation consistent
98
+ covariance matrix estimation. Econometrica, 59(3), 817-858.
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ X: np.ndarray,
104
+ resid: np.ndarray,
105
+ max_lags: Optional[int] = None,
106
+ kernel: KernelType = 'bartlett',
107
+ prewhitening: bool = False
108
+ ):
109
+ self.X = X
110
+ self.resid = resid
111
+ self.kernel = kernel
112
+ self.prewhitening = prewhitening
113
+
114
+ self.n_obs, self.n_params = X.shape
115
+
116
+ # Set max_lags
117
+ if max_lags is None:
118
+ # Newey-West rule: floor(4(T/100)^(2/9))
119
+ self.max_lags = int(np.floor(4 * (self.n_obs / 100) ** (2/9)))
120
+ else:
121
+ self.max_lags = max_lags
122
+
123
+ # Ensure max_lags is reasonable
124
+ if self.max_lags >= self.n_obs:
125
+ self.max_lags = self.n_obs - 1
126
+
127
+ # Cache
128
+ self._bread = None
129
+
130
+ @property
131
+ def bread(self) -> np.ndarray:
132
+ """Compute and cache bread matrix."""
133
+ if self._bread is None:
134
+ self._bread = compute_bread(self.X)
135
+ return self._bread
136
+
137
+ def _kernel_weight(self, lag: int) -> float:
138
+ """
139
+ Compute kernel weight for given lag.
140
+
141
+ Parameters
142
+ ----------
143
+ lag : int
144
+ Lag number (0, 1, 2, ...)
145
+
146
+ Returns
147
+ -------
148
+ weight : float
149
+ Kernel weight
150
+ """
151
+ if lag > self.max_lags:
152
+ return 0.0
153
+
154
+ if self.kernel == 'bartlett':
155
+ # Bartlett (triangular) kernel
156
+ # w(l) = 1 - l/(max_lags + 1)
157
+ return 1.0 - lag / (self.max_lags + 1)
158
+
159
+ elif self.kernel == 'parzen':
160
+ # Parzen kernel
161
+ z = lag / (self.max_lags + 1)
162
+ if z <= 0.5:
163
+ return 1 - 6 * z**2 + 6 * z**3
164
+ else:
165
+ return 2 * (1 - z)**3
166
+
167
+ elif self.kernel == 'quadratic_spectral':
168
+ # Quadratic Spectral kernel
169
+ if lag == 0:
170
+ return 1.0
171
+ z = 6 * np.pi * lag / (self.max_lags + 1) / 5
172
+ return 3 / z**2 * (np.sin(z) / z - np.cos(z))
173
+
174
+ else:
175
+ raise ValueError(f"Unknown kernel: {self.kernel}")
176
+
177
+ def _compute_gamma(self, lag: int) -> np.ndarray:
178
+ """
179
+ Compute lag-l autocovariance matrix.
180
+
181
+ Γ_l = (1/n) Σ_{t=l+1}^n (X_t ε_t)(X_{t-l} ε_{t-l})'
182
+
183
+ Parameters
184
+ ----------
185
+ lag : int
186
+ Lag number (0, 1, 2, ...)
187
+
188
+ Returns
189
+ -------
190
+ gamma : np.ndarray
191
+ Autocovariance matrix (k x k)
192
+ """
193
+ k = self.n_params
194
+ n = self.n_obs
195
+
196
+ if lag == 0:
197
+ # Γ_0 = (1/n) Σ X_t' ε_t² X_t
198
+ # This is the heteroskedasticity component
199
+ X_resid = self.X * self.resid[:, np.newaxis]
200
+ gamma = (X_resid.T @ X_resid) / n
201
+ else:
202
+ # Γ_l = (1/n) Σ (X_t ε_t)(X_{t-l} ε_{t-l})'
203
+ X_resid_t = self.X[lag:] * self.resid[lag:, np.newaxis]
204
+ X_resid_t_lag = self.X[:-lag] * self.resid[:-lag, np.newaxis]
205
+ gamma = (X_resid_t.T @ X_resid_t_lag) / n
206
+
207
+ return gamma
208
+
209
+ def compute(self) -> NeweyWestResult:
210
+ """
211
+ Compute Newey-West HAC covariance matrix.
212
+
213
+ Returns
214
+ -------
215
+ result : NeweyWestResult
216
+ Newey-West covariance and standard errors
217
+
218
+ Notes
219
+ -----
220
+ The Newey-West estimator is:
221
+
222
+ V_NW = (X'X)^{-1} Ω_NW (X'X)^{-1}
223
+
224
+ where:
225
+ Ω_NW = Γ_0 + Σ_{l=1}^L w_l (Γ_l + Γ_l')
226
+
227
+ and Γ_l is the lag-l autocovariance matrix.
228
+
229
+ The kernel weights w_l ensure positive semi-definiteness.
230
+ """
231
+ # Start with lag-0 autocovariance (heteroskedasticity)
232
+ S = self._compute_gamma(0)
233
+
234
+ # Add weighted autocovariances for lags 1, ..., max_lags
235
+ for lag in range(1, self.max_lags + 1):
236
+ weight = self._kernel_weight(lag)
237
+ if weight > 0:
238
+ gamma_l = self._compute_gamma(lag)
239
+ # Add both Γ_l and Γ_l' (symmetrize)
240
+ S += weight * (gamma_l + gamma_l.T)
241
+
242
+ # Scale by n (since gamma is already divided by n)
243
+ S *= self.n_obs
244
+
245
+ # Sandwich: V = Bread @ S @ Bread
246
+ cov_matrix = sandwich_covariance(self.bread, S)
247
+ std_errors = np.sqrt(np.diag(cov_matrix))
248
+
249
+ return NeweyWestResult(
250
+ cov_matrix=cov_matrix,
251
+ std_errors=std_errors,
252
+ max_lags=self.max_lags,
253
+ kernel=self.kernel,
254
+ n_obs=self.n_obs,
255
+ n_params=self.n_params,
256
+ prewhitening=self.prewhitening
257
+ )
258
+
259
+ def diagnostic_summary(self) -> str:
260
+ """
261
+ Generate diagnostic summary.
262
+
263
+ Returns
264
+ -------
265
+ summary : str
266
+ Diagnostic information
267
+ """
268
+ lines = []
269
+ lines.append("Newey-West HAC Standard Errors Diagnostics")
270
+ lines.append("=" * 50)
271
+ lines.append(f"Number of observations: {self.n_obs}")
272
+ lines.append(f"Number of parameters: {self.n_params}")
273
+ lines.append(f"Maximum lags: {self.max_lags}")
274
+ lines.append(f"Kernel function: {self.kernel}")
275
+ lines.append(f"Prewhitening: {self.prewhitening}")
276
+ lines.append("")
277
+
278
+ # Recommendations
279
+ if self.n_obs < 50:
280
+ lines.append("⚠ WARNING: Small sample size (<50)")
281
+ lines.append(" Newey-West SEs may not perform well with few observations")
282
+ if self.max_lags > self.n_obs / 3:
283
+ lines.append("⚠ WARNING: Large max_lags relative to sample size")
284
+ lines.append(f" max_lags = {self.max_lags}, n = {self.n_obs}")
285
+
286
+ return "\n".join(lines)
287
+
288
+
289
+ def newey_west(
290
+ X: np.ndarray,
291
+ resid: np.ndarray,
292
+ max_lags: Optional[int] = None,
293
+ kernel: KernelType = 'bartlett',
294
+ prewhitening: bool = False
295
+ ) -> NeweyWestResult:
296
+ """
297
+ Convenience function for Newey-West HAC standard errors.
298
+
299
+ Parameters
300
+ ----------
301
+ X : np.ndarray
302
+ Design matrix (n x k)
303
+ resid : np.ndarray
304
+ Residuals (n,)
305
+ max_lags : int, optional
306
+ Maximum number of lags
307
+ kernel : {'bartlett', 'parzen', 'quadratic_spectral'}, default='bartlett'
308
+ Kernel function
309
+ prewhitening : bool, default=False
310
+ Apply AR(1) prewhitening
311
+
312
+ Returns
313
+ -------
314
+ result : NeweyWestResult
315
+ Newey-West covariance and standard errors
316
+
317
+ Examples
318
+ --------
319
+ >>> from panelbox.standard_errors import newey_west
320
+ >>> result = newey_west(X, resid, max_lags=4)
321
+ >>> print(result.std_errors)
322
+ """
323
+ nw = NeweyWestStandardErrors(X, resid, max_lags, kernel, prewhitening)
324
+ return nw.compute()