panelbox 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. panelbox/__init__.py +41 -0
  2. panelbox/__version__.py +13 -1
  3. panelbox/core/formula_parser.py +9 -2
  4. panelbox/core/panel_data.py +1 -1
  5. panelbox/datasets/__init__.py +39 -0
  6. panelbox/datasets/load.py +334 -0
  7. panelbox/gmm/difference_gmm.py +63 -15
  8. panelbox/gmm/estimator.py +46 -5
  9. panelbox/gmm/system_gmm.py +136 -21
  10. panelbox/models/static/__init__.py +4 -0
  11. panelbox/models/static/between.py +434 -0
  12. panelbox/models/static/first_difference.py +494 -0
  13. panelbox/models/static/fixed_effects.py +80 -11
  14. panelbox/models/static/pooled_ols.py +80 -11
  15. panelbox/models/static/random_effects.py +52 -10
  16. panelbox/standard_errors/__init__.py +119 -0
  17. panelbox/standard_errors/clustered.py +386 -0
  18. panelbox/standard_errors/comparison.py +528 -0
  19. panelbox/standard_errors/driscoll_kraay.py +386 -0
  20. panelbox/standard_errors/newey_west.py +324 -0
  21. panelbox/standard_errors/pcse.py +358 -0
  22. panelbox/standard_errors/robust.py +324 -0
  23. panelbox/standard_errors/utils.py +390 -0
  24. panelbox/validation/__init__.py +6 -0
  25. panelbox/validation/robustness/__init__.py +51 -0
  26. panelbox/validation/robustness/bootstrap.py +933 -0
  27. panelbox/validation/robustness/checks.py +143 -0
  28. panelbox/validation/robustness/cross_validation.py +538 -0
  29. panelbox/validation/robustness/influence.py +364 -0
  30. panelbox/validation/robustness/jackknife.py +457 -0
  31. panelbox/validation/robustness/outliers.py +529 -0
  32. panelbox/validation/robustness/sensitivity.py +809 -0
  33. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/METADATA +32 -3
  34. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/RECORD +38 -21
  35. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/WHEEL +1 -1
  36. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/entry_points.txt +0 -0
  37. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/licenses/LICENSE +0 -0
  38. {panelbox-0.2.0.dist-info → panelbox-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,494 @@
1
+ """
2
+ First Difference estimator for panel data.
3
+
4
+ This module provides the First Difference estimator which eliminates
5
+ entity fixed effects through first-differencing.
6
+ """
7
+
8
+ from typing import Optional
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from panelbox.core.base_model import PanelModel
13
+ from panelbox.core.results import PanelResults
14
+ from panelbox.utils.matrix_ops import (
15
+ compute_ols,
16
+ compute_vcov_nonrobust,
17
+ compute_panel_rsquared
18
+ )
19
+ from panelbox.standard_errors import (
20
+ robust_covariance,
21
+ cluster_by_entity,
22
+ twoway_cluster,
23
+ driscoll_kraay,
24
+ newey_west,
25
+ pcse
26
+ )
27
+
28
+
29
+ class FirstDifferenceEstimator(PanelModel):
30
+ """
31
+ First Difference estimator for panel data.
32
+
33
+ This estimator eliminates unobserved entity-specific fixed effects
34
+ through first-differencing. Instead of demeaning (like Fixed Effects),
35
+ it takes differences:
36
+ Δy_it = y_it - y_{i,t-1} = β Δx_it + Δε_it
37
+
38
+ where Δ denotes the first difference operator.
39
+
40
+ The entity fixed effect (α_i) cancels out because it's time-invariant:
41
+ Δα_i = α_i - α_i = 0
42
+
43
+ Advantages over Fixed Effects (within estimator):
44
+ - More robust when T is small (few time periods)
45
+ - Better suited for models with serially correlated errors
46
+ - Handles unbalanced panels naturally
47
+ - No dummy variable trap issues
48
+
49
+ Disadvantages:
50
+ - Loses one time period per entity (first period dropped)
51
+ - Amplifies measurement error (differences magnify noise)
52
+ - Less efficient than FE under homoskedastic errors
53
+ - Loses time-invariant variables (like FE)
54
+
55
+ Parameters
56
+ ----------
57
+ formula : str
58
+ Model formula in R-style syntax (e.g., "y ~ x1 + x2")
59
+ data : pd.DataFrame
60
+ Panel data in long format (must be sorted by entity and time)
61
+ entity_col : str
62
+ Name of the column identifying entities
63
+ time_col : str
64
+ Name of the column identifying time periods
65
+ weights : np.ndarray, optional
66
+ Observation weights (applied to differenced data)
67
+
68
+ Attributes
69
+ ----------
70
+ n_obs_original : int
71
+ Number of observations before differencing
72
+ n_obs_differenced : int
73
+ Number of observations after differencing (loses first period per entity)
74
+
75
+ Examples
76
+ --------
77
+ >>> import panelbox as pb
78
+ >>> import pandas as pd
79
+ >>>
80
+ >>> # Load data
81
+ >>> data = pb.load_grunfeld()
82
+ >>>
83
+ >>> # First Difference estimator
84
+ >>> fd = pb.FirstDifferenceEstimator("invest ~ value + capital", data, "firm", "year")
85
+ >>> results = fd.fit(cov_type='robust')
86
+ >>> print(results.summary())
87
+ >>>
88
+ >>> # Compare with Fixed Effects
89
+ >>> fe = pb.FixedEffects("invest ~ value + capital", data, "firm", "year")
90
+ >>> results_fe = fe.fit(cov_type='robust')
91
+ >>>
92
+ >>> print(f"First Diff coefs: {results.params.values}")
93
+ >>> print(f"Fixed Effects coefs: {results_fe.params.values}")
94
+ >>>
95
+ >>> # Under homoskedasticity, should be similar
96
+ >>> # Under serial correlation, FD may be more consistent
97
+
98
+ Notes
99
+ -----
100
+ **Data Requirements:**
101
+ - Data must be sorted by entity and time before estimation
102
+ - Missing periods will be handled by taking differences only within consecutive observations
103
+ - At least 2 time periods per entity required
104
+
105
+ **First Differencing:**
106
+ - For each entity i, compute: Δy_it = y_it - y_{i,t-1}
107
+ - Drops the first observation for each entity
108
+ - If N entities and T periods (balanced): N*T → N*(T-1) observations
109
+
110
+ **Inference:**
111
+ - Standard errors account for structure of differenced data
112
+ - Cluster-robust SE recommended (clustering by entity)
113
+ - Driscoll-Kraay useful for serial correlation and heteroskedasticity
114
+
115
+ **Comparison with Fixed Effects:**
116
+ - FE uses within transformation (demeaning): y_it - ȳ_i
117
+ - FD uses first difference: y_it - y_{i,t-1}
118
+ - Under random walk: y_it = y_{i,t-1} + ε_it → FD removes unit root
119
+ - Under classical RE/FE assumptions: FE is more efficient
120
+
121
+ References
122
+ ----------
123
+ .. [1] Wooldridge, J. M. (2010). Econometric Analysis of Cross Section
124
+ and Panel Data. MIT Press. Section 10.5.
125
+ .. [2] Baltagi, B. H. (2013). Econometric Analysis of Panel Data.
126
+ Wiley. Chapter 3.
127
+ .. [3] Hsiao, C. (2014). Analysis of Panel Data. Cambridge University Press.
128
+ """
129
+
130
+ def __init__(
131
+ self,
132
+ formula: str,
133
+ data: pd.DataFrame,
134
+ entity_col: str,
135
+ time_col: str,
136
+ weights: Optional[np.ndarray] = None
137
+ ):
138
+ super().__init__(formula, data, entity_col, time_col, weights)
139
+
140
+ # Store original observation count
141
+ self.n_obs_original = len(data)
142
+ self.n_obs_differenced: Optional[int] = None
143
+
144
+ def fit(
145
+ self,
146
+ cov_type: str = 'nonrobust',
147
+ **cov_kwds
148
+ ) -> PanelResults:
149
+ """
150
+ Fit the First Difference estimator.
151
+
152
+ Parameters
153
+ ----------
154
+ cov_type : str, default='nonrobust'
155
+ Type of covariance estimator:
156
+ - 'nonrobust': Classical standard errors
157
+ - 'robust' or 'hc1': Heteroskedasticity-robust (HC1)
158
+ - 'hc0', 'hc2', 'hc3': Other HC variants
159
+ - 'clustered': Cluster-robust (by entity, recommended for FD)
160
+ - 'twoway': Two-way clustered (entity and time)
161
+ - 'driscoll_kraay': Driscoll-Kraay (for serial correlation)
162
+ - 'newey_west': Newey-West HAC
163
+ - 'pcse': Panel-Corrected Standard Errors
164
+ **cov_kwds
165
+ Additional arguments for covariance estimation:
166
+ - cluster_col: For custom clustering (default: entity)
167
+ - max_lags: For Driscoll-Kraay and Newey-West
168
+ - kernel: For HAC estimators ('bartlett', 'parzen', 'quadratic_spectral')
169
+
170
+ Returns
171
+ -------
172
+ PanelResults
173
+ Fitted model results
174
+
175
+ Examples
176
+ --------
177
+ >>> # Classical standard errors
178
+ >>> results = model.fit(cov_type='nonrobust')
179
+
180
+ >>> # Heteroskedasticity-robust (recommended)
181
+ >>> results = model.fit(cov_type='robust')
182
+
183
+ >>> # Cluster-robust by entity (recommended for FD)
184
+ >>> results = model.fit(cov_type='clustered')
185
+
186
+ >>> # Driscoll-Kraay (for serial correlation + heteroskedasticity)
187
+ >>> results = model.fit(cov_type='driscoll_kraay', max_lags=2)
188
+
189
+ Notes
190
+ -----
191
+ For First Difference models, clustered or Driscoll-Kraay standard errors
192
+ are typically recommended because:
193
+ - Differencing can induce serial correlation (MA(1) structure)
194
+ - Cluster-robust SE account for within-entity correlation
195
+ - Driscoll-Kraay handles both serial correlation and heteroskedasticity
196
+ """
197
+ # Build design matrices from original data
198
+ y_orig, X_orig = self.formula_parser.build_design_matrices(
199
+ self.data.data,
200
+ return_type='array'
201
+ )
202
+
203
+ # Get variable names
204
+ var_names = self.formula_parser.get_variable_names(self.data.data)
205
+
206
+ # Remove intercept from variable names (differencing eliminates it)
207
+ # First differences remove constant terms
208
+ if 'Intercept' in var_names:
209
+ var_names = [v for v in var_names if v != 'Intercept']
210
+ # Remove intercept column from X
211
+ X_orig = X_orig[:, 1:]
212
+
213
+ # Get entity and time identifiers
214
+ entities = self.data.data[self.data.entity_col].values
215
+ times = self.data.data[self.data.time_col].values
216
+
217
+ # Apply first difference transformation
218
+ y_diff, X_diff, entities_diff, times_diff, valid_idx = self._first_difference(
219
+ y_orig, X_orig, entities, times
220
+ )
221
+
222
+ # Store differenced observation count
223
+ self.n_obs_differenced = len(y_diff)
224
+
225
+ # Check that we have enough observations
226
+ if self.n_obs_differenced < X_diff.shape[1]:
227
+ raise ValueError(
228
+ f"Insufficient observations after differencing: {self.n_obs_differenced} obs, "
229
+ f"{X_diff.shape[1]} parameters. Need at least 2 time periods per entity."
230
+ )
231
+
232
+ # Estimate coefficients on differenced data (no intercept)
233
+ beta, resid_diff, fitted_diff = compute_ols(y_diff, X_diff, self.weights)
234
+
235
+ # Degrees of freedom
236
+ n = len(y_diff)
237
+ k = X_diff.shape[1]
238
+ df_model = k # No intercept in first difference
239
+ df_resid = n - k
240
+
241
+ # Ensure df_resid is positive
242
+ if df_resid <= 0:
243
+ raise ValueError(
244
+ f"Insufficient degrees of freedom: df_resid = {df_resid}. "
245
+ f"n={n}, k={k}"
246
+ )
247
+
248
+ # Compute fitted values and residuals in original scale (levels)
249
+ # This requires integrating back from differences (not unique, use cumsum)
250
+ # For reporting purposes, we'll use the differenced residuals
251
+ fitted_orig = np.full(len(y_orig), np.nan)
252
+ resid_orig = np.full(len(y_orig), np.nan)
253
+ fitted_orig[valid_idx] = fitted_diff
254
+ resid_orig[valid_idx] = resid_diff
255
+
256
+ # Compute covariance matrix (on differenced data)
257
+ cov_type_lower = cov_type.lower()
258
+
259
+ if cov_type_lower == 'nonrobust':
260
+ vcov = compute_vcov_nonrobust(X_diff, resid_diff, df_resid)
261
+
262
+ elif cov_type_lower in ['robust', 'hc0', 'hc1', 'hc2', 'hc3']:
263
+ # Map 'robust' to 'hc1'
264
+ method = 'HC1' if cov_type_lower == 'robust' else cov_type_lower.upper()
265
+ result = robust_covariance(X_diff, resid_diff, method=method)
266
+ vcov = result.cov_matrix
267
+
268
+ elif cov_type_lower == 'clustered':
269
+ # Cluster by entity (recommended for FD)
270
+ result = cluster_by_entity(X_diff, resid_diff, entities_diff, df_correction=True)
271
+ vcov = result.cov_matrix
272
+
273
+ elif cov_type_lower == 'twoway':
274
+ # Two-way clustering: entity and time
275
+ result = twoway_cluster(X_diff, resid_diff, entities_diff, times_diff, df_correction=True)
276
+ vcov = result.cov_matrix
277
+
278
+ elif cov_type_lower == 'driscoll_kraay':
279
+ # Driscoll-Kraay for serial correlation (recommended for FD)
280
+ max_lags = cov_kwds.get('max_lags', None)
281
+ kernel = cov_kwds.get('kernel', 'bartlett')
282
+ result = driscoll_kraay(X_diff, resid_diff, times_diff, max_lags=max_lags, kernel=kernel)
283
+ vcov = result.cov_matrix
284
+
285
+ elif cov_type_lower == 'newey_west':
286
+ # Newey-West HAC
287
+ max_lags = cov_kwds.get('max_lags', None)
288
+ kernel = cov_kwds.get('kernel', 'bartlett')
289
+ result = newey_west(X_diff, resid_diff, max_lags=max_lags, kernel=kernel)
290
+ vcov = result.cov_matrix
291
+
292
+ elif cov_type_lower == 'pcse':
293
+ # Panel-Corrected Standard Errors
294
+ result = pcse(X_diff, resid_diff, entities_diff, times_diff)
295
+ vcov = result.cov_matrix
296
+
297
+ else:
298
+ raise ValueError(
299
+ f"cov_type must be one of: 'nonrobust', 'robust', 'hc0', 'hc1', 'hc2', 'hc3', "
300
+ f"'clustered', 'twoway', 'driscoll_kraay', 'newey_west', 'pcse', got '{cov_type}'"
301
+ )
302
+
303
+ # Standard errors
304
+ std_errors = np.sqrt(np.diag(vcov))
305
+
306
+ # Compute R-squared measures on differenced data
307
+ # For FD, R² measures fit of differenced model
308
+ tss_diff = np.sum((y_diff - y_diff.mean()) ** 2)
309
+ ess_diff = np.sum(resid_diff ** 2)
310
+ rsquared = 1 - ess_diff / tss_diff if tss_diff > 0 else 0.0
311
+
312
+ # Adjusted R-squared
313
+ rsquared_adj = 1 - (1 - rsquared) * (n - 1) / df_resid
314
+
315
+ # For FD, within/between/overall R² are less meaningful
316
+ # We report the R² of the differenced model as the primary measure
317
+ rsquared_within = rsquared # Differencing is similar to within transformation
318
+ rsquared_between = np.nan # Not applicable for FD
319
+ rsquared_overall = np.nan # Not applicable for FD
320
+
321
+ # Create Series/DataFrame with variable names
322
+ params = pd.Series(beta.ravel(), index=var_names)
323
+ std_errors_series = pd.Series(std_errors, index=var_names)
324
+ cov_params = pd.DataFrame(vcov, index=var_names, columns=var_names)
325
+
326
+ # Model information
327
+ model_info = {
328
+ 'model_type': 'First Difference',
329
+ 'formula': self.formula,
330
+ 'cov_type': cov_type,
331
+ 'cov_kwds': cov_kwds,
332
+ 'entity_effects': True, # FD eliminates entity FE
333
+ 'time_effects': False,
334
+ }
335
+
336
+ # Data information
337
+ data_info = {
338
+ 'nobs': n, # Number of differenced observations
339
+ 'n_entities': self.data.n_entities,
340
+ 'n_periods': self.data.n_periods,
341
+ 'n_obs_original': self.n_obs_original,
342
+ 'n_obs_dropped': self.n_obs_original - n,
343
+ 'df_model': df_model,
344
+ 'df_resid': df_resid,
345
+ 'entity_index': entities_diff,
346
+ 'time_index': times_diff,
347
+ }
348
+
349
+ # R-squared dictionary
350
+ rsquared_dict = {
351
+ 'rsquared': rsquared, # R² of differenced model
352
+ 'rsquared_adj': rsquared_adj,
353
+ 'rsquared_within': rsquared_within,
354
+ 'rsquared_between': rsquared_between,
355
+ 'rsquared_overall': rsquared_overall
356
+ }
357
+
358
+ # Create results object
359
+ results = PanelResults(
360
+ params=params,
361
+ std_errors=std_errors_series,
362
+ cov_params=cov_params,
363
+ resid=resid_orig, # Residuals in original indexing
364
+ fittedvalues=fitted_orig, # Fitted values in original indexing
365
+ model_info=model_info,
366
+ data_info=data_info,
367
+ rsquared_dict=rsquared_dict,
368
+ model=self
369
+ )
370
+
371
+ # Store results and update state
372
+ self._results = results
373
+ self._fitted = True
374
+
375
+ return results
376
+
377
+ def _first_difference(
378
+ self,
379
+ y: np.ndarray,
380
+ X: np.ndarray,
381
+ entities: np.ndarray,
382
+ times: np.ndarray
383
+ ) -> tuple:
384
+ """
385
+ Apply first difference transformation.
386
+
387
+ Computes Δy_it = y_it - y_{i,t-1} and Δx_it = x_it - x_{i,t-1}
388
+ for each entity i.
389
+
390
+ Parameters
391
+ ----------
392
+ y : np.ndarray
393
+ Dependent variable
394
+ X : np.ndarray
395
+ Independent variables
396
+ entities : np.ndarray
397
+ Entity identifiers
398
+ times : np.ndarray
399
+ Time identifiers
400
+
401
+ Returns
402
+ -------
403
+ y_diff : np.ndarray
404
+ Differenced dependent variable
405
+ X_diff : np.ndarray
406
+ Differenced independent variables
407
+ entities_diff : np.ndarray
408
+ Entity identifiers for differenced observations
409
+ times_diff : np.ndarray
410
+ Time identifiers for differenced observations
411
+ valid_idx : np.ndarray
412
+ Indices of valid differenced observations in original data
413
+ """
414
+ # Get unique entities
415
+ unique_entities = np.unique(entities)
416
+
417
+ # Initialize lists for differenced data
418
+ y_diff_list = []
419
+ X_diff_list = []
420
+ entities_diff_list = []
421
+ times_diff_list = []
422
+ valid_idx_list = []
423
+
424
+ # For each entity, compute first differences
425
+ for entity in unique_entities:
426
+ # Get observations for this entity
427
+ mask = entities == entity
428
+ indices = np.where(mask)[0]
429
+
430
+ # Get entity-specific data
431
+ y_entity = y[mask]
432
+ X_entity = X[mask]
433
+ times_entity = times[mask]
434
+
435
+ # Sort by time (should already be sorted, but ensure)
436
+ sort_idx = np.argsort(times_entity)
437
+ y_entity = y_entity[sort_idx]
438
+ X_entity = X_entity[sort_idx]
439
+ times_entity = times_entity[sort_idx]
440
+ indices_sorted = indices[sort_idx]
441
+
442
+ # Compute first differences (drop first observation)
443
+ if len(y_entity) >= 2:
444
+ y_diff_entity = y_entity[1:] - y_entity[:-1]
445
+ X_diff_entity = X_entity[1:] - X_entity[:-1]
446
+ times_diff_entity = times_entity[1:] # Use time of current period
447
+ entities_diff_entity = np.full(len(y_diff_entity), entity)
448
+ valid_idx_entity = indices_sorted[1:] # Indices in original data
449
+
450
+ # Append to lists
451
+ y_diff_list.append(y_diff_entity)
452
+ X_diff_list.append(X_diff_entity)
453
+ entities_diff_list.append(entities_diff_entity)
454
+ times_diff_list.append(times_diff_entity)
455
+ valid_idx_list.append(valid_idx_entity)
456
+
457
+ # Concatenate all entities
458
+ y_diff = np.concatenate(y_diff_list)
459
+ X_diff = np.vstack(X_diff_list)
460
+ entities_diff = np.concatenate(entities_diff_list)
461
+ times_diff = np.concatenate(times_diff_list)
462
+ valid_idx = np.concatenate(valid_idx_list)
463
+
464
+ return y_diff, X_diff, entities_diff, times_diff, valid_idx
465
+
466
+ def _estimate_coefficients(self) -> np.ndarray:
467
+ """
468
+ Estimate coefficients (implementation of abstract method).
469
+
470
+ Returns
471
+ -------
472
+ np.ndarray
473
+ Estimated coefficients
474
+ """
475
+ # Build design matrices
476
+ y, X = self.formula_parser.build_design_matrices(
477
+ self.data.data,
478
+ return_type='array'
479
+ )
480
+
481
+ # Remove intercept
482
+ if self.formula_parser.has_intercept:
483
+ X = X[:, 1:]
484
+
485
+ # Get identifiers
486
+ entities = self.data.data[self.data.entity_col].values
487
+ times = self.data.data[self.data.time_col].values
488
+
489
+ # Apply first difference
490
+ y_diff, X_diff, _, _, _ = self._first_difference(y, X, entities, times)
491
+
492
+ # OLS on differenced data
493
+ beta, _, _ = compute_ols(y_diff, X_diff, self.weights)
494
+ return beta
@@ -17,6 +17,14 @@ from panelbox.utils.matrix_ops import (
17
17
  compute_panel_rsquared,
18
18
  demean_matrix
19
19
  )
20
+ from panelbox.standard_errors import (
21
+ robust_covariance,
22
+ cluster_by_entity,
23
+ twoway_cluster,
24
+ driscoll_kraay,
25
+ newey_west,
26
+ pcse
27
+ )
20
28
 
21
29
 
22
30
  class FixedEffects(PanelModel):
@@ -121,10 +129,18 @@ class FixedEffects(PanelModel):
121
129
  cov_type : str, default='nonrobust'
122
130
  Type of covariance estimator:
123
131
  - 'nonrobust': Classical standard errors
124
- - 'robust': Heteroskedasticity-robust (HC1)
125
- - 'clustered': Cluster-robust (clustered by entity by default)
132
+ - 'robust' or 'hc1': Heteroskedasticity-robust (HC1)
133
+ - 'hc0', 'hc2', 'hc3': Other HC variants
134
+ - 'clustered': Cluster-robust (by entity by default)
135
+ - 'twoway': Two-way clustered (entity and time)
136
+ - 'driscoll_kraay': Driscoll-Kraay (spatial/temporal dependence)
137
+ - 'newey_west': Newey-West HAC
138
+ - 'pcse': Panel-Corrected Standard Errors (requires T > N)
126
139
  **cov_kwds
127
- Additional arguments for covariance estimation
140
+ Additional arguments for covariance estimation:
141
+ - cluster_col: For custom clustering (default: entity)
142
+ - max_lags: For Driscoll-Kraay and Newey-West
143
+ - kernel: For HAC estimators ('bartlett', 'parzen', 'quadratic_spectral')
128
144
 
129
145
  Returns
130
146
  -------
@@ -133,8 +149,27 @@ class FixedEffects(PanelModel):
133
149
 
134
150
  Examples
135
151
  --------
152
+ >>> # Classical standard errors
153
+ >>> results = model.fit(cov_type='nonrobust')
154
+
155
+ >>> # Heteroskedasticity-robust
136
156
  >>> results = model.fit(cov_type='robust')
137
- >>> results_cluster = model.fit(cov_type='clustered')
157
+ >>> results = model.fit(cov_type='hc3')
158
+
159
+ >>> # Cluster-robust by entity
160
+ >>> results = model.fit(cov_type='clustered')
161
+
162
+ >>> # Two-way clustering
163
+ >>> results = model.fit(cov_type='twoway')
164
+
165
+ >>> # Driscoll-Kraay (for spatial/temporal dependence)
166
+ >>> results = model.fit(cov_type='driscoll_kraay', max_lags=3)
167
+
168
+ >>> # Newey-West HAC
169
+ >>> results = model.fit(cov_type='newey_west', max_lags=4)
170
+
171
+ >>> # Panel-Corrected SE (requires T > N)
172
+ >>> results = model.fit(cov_type='pcse')
138
173
  """
139
174
  # Build design matrices
140
175
  y_orig, X_orig = self.formula_parser.build_design_matrices(
@@ -211,16 +246,50 @@ class FixedEffects(PanelModel):
211
246
  )
212
247
 
213
248
  # Compute covariance matrix (on demeaned data)
214
- if cov_type == 'nonrobust':
249
+ cov_type_lower = cov_type.lower()
250
+
251
+ if cov_type_lower == 'nonrobust':
215
252
  vcov = compute_vcov_nonrobust(X, resid_demeaned, df_resid)
216
- elif cov_type == 'robust':
217
- vcov = self._compute_vcov_robust(X, resid_demeaned, df_resid)
218
- elif cov_type == 'clustered':
219
- vcov = self._compute_vcov_clustered(X, resid_demeaned, entities, df_resid)
253
+
254
+ elif cov_type_lower in ['robust', 'hc0', 'hc1', 'hc2', 'hc3']:
255
+ # Map 'robust' to 'hc1' (default robust method)
256
+ method = 'HC1' if cov_type_lower == 'robust' else cov_type_lower.upper()
257
+ result = robust_covariance(X, resid_demeaned, method=method)
258
+ vcov = result.cov_matrix
259
+
260
+ elif cov_type_lower == 'clustered':
261
+ # Default: cluster by entity
262
+ result = cluster_by_entity(X, resid_demeaned, entities, df_correction=True)
263
+ vcov = result.cov_matrix
264
+
265
+ elif cov_type_lower == 'twoway':
266
+ # Two-way clustering: entity and time
267
+ result = twoway_cluster(X, resid_demeaned, entities, times, df_correction=True)
268
+ vcov = result.cov_matrix
269
+
270
+ elif cov_type_lower == 'driscoll_kraay':
271
+ # Driscoll-Kraay for spatial/temporal dependence
272
+ max_lags = cov_kwds.get('max_lags', None)
273
+ kernel = cov_kwds.get('kernel', 'bartlett')
274
+ result = driscoll_kraay(X, resid_demeaned, times, max_lags=max_lags, kernel=kernel)
275
+ vcov = result.cov_matrix
276
+
277
+ elif cov_type_lower == 'newey_west':
278
+ # Newey-West HAC
279
+ max_lags = cov_kwds.get('max_lags', None)
280
+ kernel = cov_kwds.get('kernel', 'bartlett')
281
+ result = newey_west(X, resid_demeaned, max_lags=max_lags, kernel=kernel)
282
+ vcov = result.cov_matrix
283
+
284
+ elif cov_type_lower == 'pcse':
285
+ # Panel-Corrected Standard Errors
286
+ result = pcse(X, resid_demeaned, entities, times)
287
+ vcov = result.cov_matrix
288
+
220
289
  else:
221
290
  raise ValueError(
222
- f"cov_type must be 'nonrobust', 'robust', or 'clustered', "
223
- f"got '{cov_type}'"
291
+ f"cov_type must be one of: 'nonrobust', 'robust', 'hc0', 'hc1', 'hc2', 'hc3', "
292
+ f"'clustered', 'twoway', 'driscoll_kraay', 'newey_west', 'pcse', got '{cov_type}'"
224
293
  )
225
294
 
226
295
  # Standard errors