panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,562 @@
1
+ """
2
+ GMM Estimation Algorithms
3
+ ==========================
4
+
5
+ Low-level GMM estimation routines implementing one-step, two-step,
6
+ and iterative GMM with Windmeijer correction.
7
+
8
+ Classes
9
+ -------
10
+ GMMEstimator : Low-level GMM estimation algorithms
11
+
12
+ References
13
+ ----------
14
+ .. [1] Hansen, L. P. (1982). "Large Sample Properties of Generalized Method
15
+ of Moments Estimators." Econometrica, 50(4), 1029-1054.
16
+
17
+ .. [2] Windmeijer, F. (2005). "A Finite Sample Correction for the Variance of
18
+ Linear Efficient Two-Step GMM Estimators." Journal of Econometrics,
19
+ 126(1), 25-51.
20
+
21
+ .. [3] Hansen, L. P., Heaton, J., & Yaron, A. (1996). "Finite-Sample
22
+ Properties of Some Alternative GMM Estimators." Journal of Business &
23
+ Economic Statistics, 14(3), 262-280.
24
+ """
25
+
26
+ from typing import Tuple, Optional
27
+ import numpy as np
28
+ from scipy import linalg
29
+ import warnings
30
+
31
+
32
+ class GMMEstimator:
33
+ """
34
+ Low-level GMM estimation routines.
35
+
36
+ This class implements the core mathematical algorithms for GMM estimation:
37
+ - One-step GMM
38
+ - Two-step GMM with Windmeijer correction
39
+ - Iterative GMM (CUE)
40
+
41
+ Parameters
42
+ ----------
43
+ tol : float
44
+ Convergence tolerance for iterative methods
45
+ max_iter : int
46
+ Maximum iterations for iterative GMM
47
+ """
48
+
49
+ def __init__(self, tol: float = 1e-6, max_iter: int = 100):
50
+ """Initialize estimator."""
51
+ self.tol = tol
52
+ self.max_iter = max_iter
53
+
54
+ def one_step(self,
55
+ y: np.ndarray,
56
+ X: np.ndarray,
57
+ Z: np.ndarray,
58
+ skip_instrument_cleaning: bool = False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
59
+ """
60
+ One-step GMM estimation.
61
+
62
+ Uses weight matrix W = (Z'Z)^{-1}, which is efficient under
63
+ homoskedasticity but not optimal under heteroskedasticity.
64
+
65
+ Parameters
66
+ ----------
67
+ y : np.ndarray
68
+ Dependent variable (n x 1)
69
+ X : np.ndarray
70
+ Regressors (n x k)
71
+ Z : np.ndarray
72
+ Instruments (n x n_instruments)
73
+
74
+ Returns
75
+ -------
76
+ beta : np.ndarray
77
+ Estimated coefficients (k x 1)
78
+ W : np.ndarray
79
+ Weight matrix (n_instruments x n_instruments)
80
+ residuals : np.ndarray
81
+ Residuals (n x 1)
82
+
83
+ Notes
84
+ -----
85
+ GMM estimator: β = (X'Z W Z'X)^{-1} (X'Z W Z'y)
86
+ Weight matrix: W = (Z'Z)^{-1}
87
+ """
88
+ # Ensure arrays are float64
89
+ y = np.asarray(y, dtype=np.float64)
90
+ X = np.asarray(X, dtype=np.float64)
91
+ Z = np.asarray(Z, dtype=np.float64)
92
+
93
+ # Remove observations with missing values
94
+ valid_mask = self._get_valid_mask(y, X, Z)
95
+ y_clean = y[valid_mask]
96
+ X_clean = X[valid_mask]
97
+ Z_clean = Z[valid_mask]
98
+
99
+ # Note: Instrument column cleaning should be done by caller before calling this method
100
+ # to avoid dimension mismatches with weight matrices
101
+
102
+ # Compute weight matrix W = (Z'Z)^{-1}
103
+ ZtZ = Z_clean.T @ Z_clean
104
+ try:
105
+ W = linalg.inv(ZtZ)
106
+ except linalg.LinAlgError:
107
+ # Singular matrix, use pseudo-inverse
108
+ warnings.warn("Singular Z'Z matrix, using pseudo-inverse")
109
+ W = linalg.pinv(ZtZ)
110
+
111
+ # Compute GMM estimator
112
+ # β = (X'Z W Z'X)^{-1} (X'Z W Z'y)
113
+ XtZ = X_clean.T @ Z_clean
114
+ ZtX = Z_clean.T @ X_clean
115
+ Zty = Z_clean.T @ y_clean
116
+
117
+ # A = X'Z W Z'X
118
+ A = XtZ @ W @ ZtX
119
+ try:
120
+ A_inv = linalg.inv(A)
121
+ except linalg.LinAlgError:
122
+ warnings.warn("Singular A matrix, using pseudo-inverse")
123
+ A_inv = linalg.pinv(A)
124
+
125
+ # b = X'Z W Z'y
126
+ b = XtZ @ W @ Zty
127
+
128
+ # β = A^{-1} b
129
+ beta = A_inv @ b
130
+
131
+ # Compute residuals
132
+ residuals = np.full_like(y, np.nan)
133
+ residuals[valid_mask] = y_clean - X_clean @ beta
134
+
135
+ return beta, W, residuals
136
+
137
+ def two_step(self,
138
+ y: np.ndarray,
139
+ X: np.ndarray,
140
+ Z: np.ndarray,
141
+ robust: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
142
+ """
143
+ Two-step GMM estimation with Windmeijer correction.
144
+
145
+ Two-step GMM is asymptotically efficient under heteroskedasticity.
146
+ Windmeijer (2005) correction is crucial for finite-sample inference.
147
+
148
+ Parameters
149
+ ----------
150
+ y : np.ndarray
151
+ Dependent variable (n x 1)
152
+ X : np.ndarray
153
+ Regressors (n x k)
154
+ Z : np.ndarray
155
+ Instruments (n x n_instruments)
156
+ robust : bool
157
+ Whether to use robust variance matrix (Windmeijer correction)
158
+
159
+ Returns
160
+ -------
161
+ beta : np.ndarray
162
+ Estimated coefficients (k x 1)
163
+ vcov : np.ndarray
164
+ Variance-covariance matrix (k x k)
165
+ W : np.ndarray
166
+ Optimal weight matrix (n_instruments x n_instruments)
167
+ residuals : np.ndarray
168
+ Residuals (n x 1)
169
+
170
+ Notes
171
+ -----
172
+ Step 1: One-step GMM to get initial residuals
173
+ Step 2: Construct optimal weight matrix W = (Z'ΩZ)^{-1}
174
+ where Ω is residual variance matrix
175
+ Step 3: Re-estimate with optimal W
176
+ Step 4: Apply Windmeijer correction if robust=True
177
+ """
178
+ # Ensure arrays are float64
179
+ y = np.asarray(y, dtype=np.float64)
180
+ X = np.asarray(X, dtype=np.float64)
181
+ Z = np.asarray(Z, dtype=np.float64)
182
+
183
+ # Remove observations with missing values
184
+ valid_mask = self._get_valid_mask(y, X, Z)
185
+ y_clean = y[valid_mask]
186
+ X_clean = X[valid_mask]
187
+ Z_clean = Z[valid_mask]
188
+
189
+ # Note: Instrument column cleaning should be done by caller before calling this method
190
+
191
+ # Step 1: One-step GMM to get initial residuals
192
+ beta_init, _, resid_init_full = self.one_step(y, X, Z)
193
+ resid_init = resid_init_full[valid_mask]
194
+
195
+ # Step 2: Construct optimal weight matrix
196
+ W_optimal = self._compute_optimal_weight(Z_clean, resid_init, robust=True)
197
+
198
+ # Step 3: Re-estimate with optimal weight matrix
199
+ XtZ = X_clean.T @ Z_clean
200
+ ZtX = Z_clean.T @ X_clean
201
+ Zty = Z_clean.T @ y_clean
202
+
203
+ # A = X'Z W Z'X
204
+ A = XtZ @ W_optimal @ ZtX
205
+ try:
206
+ A_inv = linalg.inv(A)
207
+ except linalg.LinAlgError:
208
+ warnings.warn("Singular A matrix in two-step, using pseudo-inverse")
209
+ A_inv = linalg.pinv(A)
210
+
211
+ # b = X'Z W Z'y
212
+ b = XtZ @ W_optimal @ Zty
213
+
214
+ # β = A^{-1} b
215
+ beta = A_inv @ b
216
+
217
+ # Compute residuals
218
+ residuals = np.full_like(y, np.nan)
219
+ residuals[valid_mask] = y_clean - X_clean @ beta
220
+
221
+ # Step 4: Compute variance-covariance matrix
222
+ if robust:
223
+ # Windmeijer (2005) correction
224
+ vcov = self.windmeijer_correction(
225
+ X_clean, Z_clean, residuals[valid_mask], W_optimal, A_inv
226
+ )
227
+ else:
228
+ # Standard two-step variance (downward biased)
229
+ vcov = A_inv
230
+
231
+ return beta, vcov, W_optimal, residuals
232
+
233
+ def _compute_optimal_weight(self,
234
+ Z: np.ndarray,
235
+ residuals: np.ndarray,
236
+ robust: bool = True) -> np.ndarray:
237
+ """
238
+ Compute optimal GMM weight matrix.
239
+
240
+ Parameters
241
+ ----------
242
+ Z : np.ndarray
243
+ Instruments (n x n_instruments)
244
+ residuals : np.ndarray
245
+ Residuals from initial estimation (n x 1)
246
+ robust : bool
247
+ Use robust variance (heteroskedasticity-consistent)
248
+
249
+ Returns
250
+ -------
251
+ W : np.ndarray
252
+ Optimal weight matrix (n_instruments x n_instruments)
253
+
254
+ Notes
255
+ -----
256
+ Robust: W = (Z'ΩZ)^{-1} where Ω = diag(ε²)
257
+ Non-robust: W = (1/n) (Z'Z)^{-1}
258
+ """
259
+ n = Z.shape[0]
260
+
261
+ if robust:
262
+ # Heteroskedasticity-robust weight matrix
263
+ # Ω = diag(ε²)
264
+ Omega = np.diag(residuals.flatten() ** 2)
265
+ ZtOmegaZ = Z.T @ Omega @ Z
266
+ else:
267
+ # Homoskedastic weight matrix
268
+ sigma2 = np.mean(residuals ** 2)
269
+ ZtOmegaZ = sigma2 * (Z.T @ Z)
270
+
271
+ try:
272
+ W = linalg.inv(ZtOmegaZ)
273
+ except linalg.LinAlgError:
274
+ warnings.warn("Singular optimal weight matrix, using pseudo-inverse")
275
+ W = linalg.pinv(ZtOmegaZ)
276
+
277
+ return W
278
+
279
+ def windmeijer_correction(self,
280
+ X: np.ndarray,
281
+ Z: np.ndarray,
282
+ residuals: np.ndarray,
283
+ W: np.ndarray,
284
+ A_inv: np.ndarray) -> np.ndarray:
285
+ """
286
+ Windmeijer (2005) finite-sample correction for two-step GMM.
287
+
288
+ The standard two-step GMM variance estimator is severely downward
289
+ biased in finite samples. Windmeijer's correction adjusts for the
290
+ estimation error in the weight matrix.
291
+
292
+ Parameters
293
+ ----------
294
+ X : np.ndarray
295
+ Regressors (n x k)
296
+ Z : np.ndarray
297
+ Instruments (n x n_instruments)
298
+ residuals : np.ndarray
299
+ Two-step residuals (n x 1)
300
+ W : np.ndarray
301
+ Optimal weight matrix (n_instruments x n_instruments)
302
+ A_inv : np.ndarray
303
+ (X'Z W Z'X)^{-1} matrix (k x k)
304
+
305
+ Returns
306
+ -------
307
+ vcov_corrected : np.ndarray
308
+ Corrected variance-covariance matrix (k x k)
309
+
310
+ References
311
+ ----------
312
+ Windmeijer, F. (2005). "A Finite Sample Correction for the Variance of
313
+ Linear Efficient Two-Step GMM Estimators." Journal of Econometrics,
314
+ 126(1), 25-51.
315
+ """
316
+ n, k = X.shape
317
+ n_instruments = Z.shape[1]
318
+
319
+ # Compute moment conditions: g_i = Z_i * ε_i
320
+ g = Z * residuals
321
+
322
+ # Estimate variance of moments: Σ = E[g_i g_i']
323
+ Sigma = (g.T @ g) / n
324
+
325
+ # Compute D = E[∂g_i/∂β'] = -E[Z_i X_i']
326
+ D = -(Z.T @ X) / n
327
+
328
+ # Standard two-step variance (uncorrected)
329
+ # V_uncorrected = (D' W D)^{-1}
330
+ # This is what A_inv already is
331
+
332
+ # Windmeijer correction term
333
+ # Accounts for estimation of W in first step
334
+ correction = self._compute_windmeijer_correction_term(
335
+ X, Z, residuals, W, D, Sigma
336
+ )
337
+
338
+ # Corrected variance
339
+ # V_corrected = A_inv + A_inv * correction * A_inv
340
+ vcov_corrected = A_inv + A_inv @ correction @ A_inv
341
+
342
+ # Ensure symmetry
343
+ vcov_corrected = (vcov_corrected + vcov_corrected.T) / 2
344
+
345
+ return vcov_corrected
346
+
347
+ def _compute_windmeijer_correction_term(self,
348
+ X: np.ndarray,
349
+ Z: np.ndarray,
350
+ residuals: np.ndarray,
351
+ W: np.ndarray,
352
+ D: np.ndarray,
353
+ Sigma: np.ndarray) -> np.ndarray:
354
+ """
355
+ Compute the correction term for Windmeijer's variance.
356
+
357
+ This is the most complex part of the Windmeijer correction,
358
+ accounting for the effect of estimating W.
359
+ """
360
+ n = X.shape[0]
361
+ n_instruments = Z.shape[1]
362
+
363
+ # Compute H matrices (derivatives of weight matrix)
364
+ # H_jl = ∂W/∂σ_{jl} where σ_{jl} = E[Z_j ε Z_l ε]
365
+ #
366
+ # For computational efficiency, we use:
367
+ # ∂W/∂σ_{jl} = -W * (e_j e_l' + e_l e_j') * W
368
+ # where e_j is unit vector
369
+
370
+ # Compute B = Σ_{i=1}^n (∂g_i/∂β') W (∂²Σ/∂β∂σ) W (∂g_i/∂β)
371
+ # This is approximated by a simpler form in practice
372
+
373
+ # Simplified Windmeijer correction (commonly used)
374
+ # Based on equation (12) in Windmeijer (2005)
375
+
376
+ # Compute moment Jacobian weighted by W
377
+ DWD = D.T @ W @ D # k x k
378
+
379
+ # Compute correction for estimation of Σ
380
+ # This captures the variability in the weight matrix estimation
381
+ g = Z * residuals # n x n_instruments
382
+
383
+ # For each observation, compute contribution to correction
384
+ correction = np.zeros((X.shape[1], X.shape[1]))
385
+
386
+ for i in range(n):
387
+ # g_i = Z_i * ε_i
388
+ g_i = g[i:i+1, :].T # n_instruments x 1
389
+
390
+ # X_i weighted by instruments
391
+ ZiXi = Z[i:i+1, :].T @ X[i:i+1, :] # n_instruments x k
392
+
393
+ # Contribution to correction
394
+ # This is a simplified version that captures the main effect
395
+ H_i = W @ (g_i @ g_i.T) @ W # Effect of observation i on W
396
+ contrib = ZiXi.T @ H_i @ ZiXi
397
+
398
+ correction += contrib
399
+
400
+ correction = correction / n
401
+
402
+ return correction
403
+
404
+ def iterative(self,
405
+ y: np.ndarray,
406
+ X: np.ndarray,
407
+ Z: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, bool]:
408
+ """
409
+ Iterated GMM (CUE - Continuously Updated Estimator).
410
+
411
+ Iteratively updates both β and W until convergence.
412
+ Hansen et al. (1996) show this can have better finite-sample
413
+ properties than two-step in some cases.
414
+
415
+ Parameters
416
+ ----------
417
+ y : np.ndarray
418
+ Dependent variable (n x 1)
419
+ X : np.ndarray
420
+ Regressors (n x k)
421
+ Z : np.ndarray
422
+ Instruments (n x n_instruments)
423
+
424
+ Returns
425
+ -------
426
+ beta : np.ndarray
427
+ Estimated coefficients (k x 1)
428
+ vcov : np.ndarray
429
+ Variance-covariance matrix (k x k)
430
+ W : np.ndarray
431
+ Weight matrix at convergence (n_instruments x n_instruments)
432
+ converged : bool
433
+ Whether iteration converged
434
+
435
+ Notes
436
+ -----
437
+ Algorithm:
438
+ 1. Start with one-step β
439
+ 2. Compute W(β)
440
+ 3. Update β using W
441
+ 4. Repeat 2-3 until ||β_new - β_old|| < tol
442
+ """
443
+ # Remove observations with missing values
444
+ valid_mask = self._get_valid_mask(y, X, Z)
445
+ y_clean = y[valid_mask]
446
+ X_clean = X[valid_mask]
447
+ Z_clean = Z[valid_mask]
448
+
449
+ # Initialize with one-step
450
+ beta_old, _, resid_full = self.one_step(y, X, Z)
451
+ resid_old = resid_full[valid_mask]
452
+
453
+ converged = False
454
+ for iteration in range(self.max_iter):
455
+ # Update weight matrix using current residuals
456
+ W = self._compute_optimal_weight(Z_clean, resid_old, robust=True)
457
+
458
+ # Update β using new W
459
+ XtZ = X_clean.T @ Z_clean
460
+ ZtX = Z_clean.T @ X_clean
461
+ Zty = Z_clean.T @ y_clean
462
+
463
+ A = XtZ @ W @ ZtX
464
+ try:
465
+ A_inv = linalg.inv(A)
466
+ except linalg.LinAlgError:
467
+ A_inv = linalg.pinv(A)
468
+
469
+ b = XtZ @ W @ Zty
470
+ beta_new = A_inv @ b
471
+
472
+ # Check convergence
473
+ if self._check_convergence(beta_old, beta_new):
474
+ converged = True
475
+ break
476
+
477
+ # Update for next iteration
478
+ beta_old = beta_new
479
+ resid_old = y_clean - X_clean @ beta_new
480
+
481
+ if not converged:
482
+ warnings.warn(f"Iterative GMM did not converge in {self.max_iter} iterations")
483
+
484
+ # Final residuals
485
+ residuals = np.full_like(y, np.nan)
486
+ residuals[valid_mask] = y_clean - X_clean @ beta_new
487
+
488
+ # Variance matrix (with Windmeijer-style correction)
489
+ vcov = self.windmeijer_correction(
490
+ X_clean, Z_clean, residuals[valid_mask], W, A_inv
491
+ )
492
+
493
+ return beta_new, vcov, W, converged
494
+
495
+ def _check_convergence(self,
496
+ beta_old: np.ndarray,
497
+ beta_new: np.ndarray) -> bool:
498
+ """
499
+ Check convergence of iterative methods.
500
+
501
+ Parameters
502
+ ----------
503
+ beta_old : np.ndarray
504
+ Previous parameter vector
505
+ beta_new : np.ndarray
506
+ New parameter vector
507
+
508
+ Returns
509
+ -------
510
+ bool
511
+ True if converged
512
+ """
513
+ diff = np.max(np.abs(beta_new - beta_old))
514
+ return diff < self.tol
515
+
516
+ def _get_valid_mask(self,
517
+ y: np.ndarray,
518
+ X: np.ndarray,
519
+ Z: np.ndarray,
520
+ min_instruments: Optional[int] = None) -> np.ndarray:
521
+ """
522
+ Get mask of observations with sufficient valid data.
523
+
524
+ For unbalanced panels, allows observations where some instruments are
525
+ missing, as long as enough instruments remain for overidentification.
526
+
527
+ Parameters
528
+ ----------
529
+ y : np.ndarray
530
+ Dependent variable
531
+ X : np.ndarray
532
+ Regressors
533
+ Z : np.ndarray
534
+ Instruments
535
+ min_instruments : int, optional
536
+ Minimum number of valid instruments required per observation.
537
+ If None, uses max(k+1, n_instruments//2) where k = number of regressors.
538
+
539
+ Returns
540
+ -------
541
+ np.ndarray
542
+ Boolean mask of valid observations
543
+ """
544
+ y_valid = ~np.isnan(y).any(axis=1) if y.ndim > 1 else ~np.isnan(y)
545
+ X_valid = ~np.isnan(X).any(axis=1)
546
+
547
+ # For instruments, count how many are valid per observation
548
+ Z_notnan = ~np.isnan(Z) # Boolean array: True where not NaN
549
+ n_valid_instruments = Z_notnan.sum(axis=1) # Count per row
550
+
551
+ # Determine minimum required instruments
552
+ if min_instruments is None:
553
+ k = X.shape[1] if X.ndim > 1 else 1
554
+ n_instruments_total = Z.shape[1] if Z.ndim > 1 else 1
555
+ # For unbalanced panels, require at least k+1 for overidentification
556
+ # but don't require half of total instruments (too restrictive)
557
+ min_instruments = k + 1
558
+
559
+ # Observation is valid if y, X are valid AND has enough instruments
560
+ Z_valid = n_valid_instruments >= min_instruments
561
+
562
+ return y_valid & X_valid & Z_valid