panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,516 @@
1
+ """
2
+ Fixed Effects (Within) estimator for panel data.
3
+
4
+ This module provides the Fixed Effects estimator which removes entity-specific
5
+ (and optionally time-specific) fixed effects through demeaning.
6
+ """
7
+
8
+ from typing import Optional, Dict
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from panelbox.core.base_model import PanelModel
13
+ from panelbox.core.results import PanelResults
14
+ from panelbox.utils.matrix_ops import (
15
+ compute_ols,
16
+ compute_vcov_nonrobust,
17
+ compute_panel_rsquared,
18
+ demean_matrix
19
+ )
20
+
21
+
22
+ class FixedEffects(PanelModel):
23
+ """
24
+ Fixed Effects (Within) estimator for panel data.
25
+
26
+ This estimator removes unobserved entity-specific (and optionally time-specific)
27
+ fixed effects through demeaning (within transformation). This is equivalent to
28
+ including entity (and time) dummy variables, but more efficient computationally.
29
+
30
+ The within transformation removes time-invariant variables from the model.
31
+
32
+ Parameters
33
+ ----------
34
+ formula : str
35
+ Model formula in R-style syntax (e.g., "y ~ x1 + x2")
36
+ data : pd.DataFrame
37
+ Panel data in long format
38
+ entity_col : str
39
+ Name of the column identifying entities
40
+ time_col : str
41
+ Name of the column identifying time periods
42
+ entity_effects : bool, default=True
43
+ Include entity fixed effects
44
+ time_effects : bool, default=False
45
+ Include time fixed effects
46
+ weights : np.ndarray, optional
47
+ Observation weights
48
+
49
+ Attributes
50
+ ----------
51
+ entity_effects : bool
52
+ Whether entity fixed effects are included
53
+ time_effects : bool
54
+ Whether time fixed effects are included
55
+ entity_fe : pd.Series, optional
56
+ Estimated entity fixed effects (after fitting)
57
+ time_fe : pd.Series, optional
58
+ Estimated time fixed effects (after fitting)
59
+
60
+ Examples
61
+ --------
62
+ >>> import panelbox as pb
63
+ >>> import pandas as pd
64
+ >>>
65
+ >>> # Load data
66
+ >>> data = pd.read_csv('panel_data.csv')
67
+ >>>
68
+ >>> # Entity fixed effects only
69
+ >>> model = pb.FixedEffects("y ~ x1 + x2", data, "firm", "year")
70
+ >>> results = model.fit(cov_type='clustered')
71
+ >>> print(results.summary())
72
+ >>>
73
+ >>> # Two-way fixed effects (entity + time)
74
+ >>> model_twoway = pb.FixedEffects(
75
+ ... "y ~ x1 + x2", data, "firm", "year",
76
+ ... entity_effects=True,
77
+ ... time_effects=True
78
+ ... )
79
+ >>> results_twoway = model_twoway.fit()
80
+ >>>
81
+ >>> # Access estimated fixed effects
82
+ >>> entity_fe = model.entity_fe
83
+ >>> time_fe = model_twoway.time_fe
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ formula: str,
89
+ data: pd.DataFrame,
90
+ entity_col: str,
91
+ time_col: str,
92
+ entity_effects: bool = True,
93
+ time_effects: bool = False,
94
+ weights: Optional[np.ndarray] = None
95
+ ):
96
+ super().__init__(formula, data, entity_col, time_col, weights)
97
+
98
+ self.entity_effects = entity_effects
99
+ self.time_effects = time_effects
100
+
101
+ if not entity_effects and not time_effects:
102
+ raise ValueError(
103
+ "At least one of entity_effects or time_effects must be True. "
104
+ "Use PooledOLS if you don't want fixed effects."
105
+ )
106
+
107
+ # Fixed effects (computed after fitting)
108
+ self.entity_fe: Optional[pd.Series] = None
109
+ self.time_fe: Optional[pd.Series] = None
110
+
111
+ def fit(
112
+ self,
113
+ cov_type: str = 'nonrobust',
114
+ **cov_kwds
115
+ ) -> PanelResults:
116
+ """
117
+ Fit the Fixed Effects model.
118
+
119
+ Parameters
120
+ ----------
121
+ cov_type : str, default='nonrobust'
122
+ Type of covariance estimator:
123
+ - 'nonrobust': Classical standard errors
124
+ - 'robust': Heteroskedasticity-robust (HC1)
125
+ - 'clustered': Cluster-robust (clustered by entity by default)
126
+ **cov_kwds
127
+ Additional arguments for covariance estimation
128
+
129
+ Returns
130
+ -------
131
+ PanelResults
132
+ Fitted model results
133
+
134
+ Examples
135
+ --------
136
+ >>> results = model.fit(cov_type='robust')
137
+ >>> results_cluster = model.fit(cov_type='clustered')
138
+ """
139
+ # Build design matrices
140
+ y_orig, X_orig = self.formula_parser.build_design_matrices(
141
+ self.data.data,
142
+ return_type='array'
143
+ )
144
+
145
+ # Get variable names before demeaning
146
+ var_names = self.formula_parser.get_variable_names(self.data.data)
147
+
148
+ # Remove intercept from variable names (FE absorbs it)
149
+ if 'Intercept' in var_names:
150
+ var_names = [v for v in var_names if v != 'Intercept']
151
+ # Remove intercept column from X
152
+ X_orig = X_orig[:, 1:]
153
+
154
+ # Get entity and time identifiers as arrays
155
+ entities = self.data.data[self.data.entity_col].values
156
+ times = self.data.data[self.data.time_col].values
157
+
158
+ # Store original data for fixed effects computation
159
+ self._y_orig = y_orig
160
+ self._X_orig = X_orig
161
+ self._entities = entities
162
+ self._times = times
163
+
164
+ # Apply within transformation (demeaning)
165
+ if self.entity_effects and self.time_effects:
166
+ # Two-way demeaning
167
+ y = self._demean_both(y_orig.reshape(-1, 1), entities, times).ravel()
168
+ X = self._demean_both(X_orig, entities, times)
169
+ elif self.entity_effects:
170
+ # Entity demeaning only
171
+ y = demean_matrix(y_orig.reshape(-1, 1), entities).ravel()
172
+ X = demean_matrix(X_orig, entities)
173
+ else: # time_effects only
174
+ # Time demeaning only
175
+ y = demean_matrix(y_orig.reshape(-1, 1), times).ravel()
176
+ X = demean_matrix(X_orig, times)
177
+
178
+ # Estimate coefficients on demeaned data
179
+ beta, resid_demeaned, fitted_demeaned = compute_ols(y, X, self.weights)
180
+
181
+ # Compute residuals and fitted values in original scale
182
+ fitted = (X_orig @ beta).ravel()
183
+ resid = (y_orig - fitted).ravel()
184
+
185
+ # Degrees of freedom
186
+ n = len(y_orig)
187
+ k = X.shape[1]
188
+
189
+ # Account for absorbed fixed effects
190
+ if self.entity_effects:
191
+ n_fe_entity = self.data.n_entities
192
+ else:
193
+ n_fe_entity = 0
194
+
195
+ if self.time_effects:
196
+ n_fe_time = len(np.unique(times))
197
+ else:
198
+ n_fe_time = 0
199
+
200
+ # df_model: number of slopes (excludes fixed effects and intercept)
201
+ df_model = k
202
+
203
+ # df_resid: n - k - n_fixed_effects
204
+ df_resid = n - k - n_fe_entity - n_fe_time
205
+
206
+ # Ensure df_resid is positive
207
+ if df_resid <= 0:
208
+ raise ValueError(
209
+ f"Insufficient degrees of freedom: df_resid = {df_resid}. "
210
+ f"n={n}, k={k}, entity FE={n_fe_entity}, time FE={n_fe_time}"
211
+ )
212
+
213
+ # Compute covariance matrix (on demeaned data)
214
+ if cov_type == 'nonrobust':
215
+ vcov = compute_vcov_nonrobust(X, resid_demeaned, df_resid)
216
+ elif cov_type == 'robust':
217
+ vcov = self._compute_vcov_robust(X, resid_demeaned, df_resid)
218
+ elif cov_type == 'clustered':
219
+ vcov = self._compute_vcov_clustered(X, resid_demeaned, entities, df_resid)
220
+ else:
221
+ raise ValueError(
222
+ f"cov_type must be 'nonrobust', 'robust', or 'clustered', "
223
+ f"got '{cov_type}'"
224
+ )
225
+
226
+ # Standard errors
227
+ std_errors = np.sqrt(np.diag(vcov))
228
+
229
+ # Compute panel R-squared measures
230
+ rsquared_within, rsquared_between, rsquared_overall = compute_panel_rsquared(
231
+ y_orig, fitted, resid, entities
232
+ )
233
+
234
+ # Adjusted R-squared (within)
235
+ rsquared_adj = 1 - (1 - rsquared_within) * (n - 1) / df_resid
236
+
237
+ # Create Series/DataFrame with variable names
238
+ params = pd.Series(beta.ravel(), index=var_names)
239
+ std_errors_series = pd.Series(std_errors, index=var_names)
240
+ cov_params = pd.DataFrame(vcov, index=var_names, columns=var_names)
241
+
242
+ # Compute fixed effects
243
+ self._compute_fixed_effects(beta)
244
+
245
+ # Model information
246
+ model_type = "Fixed Effects"
247
+ if self.entity_effects and self.time_effects:
248
+ model_type = "Fixed Effects (Two-Way)"
249
+ elif self.time_effects:
250
+ model_type = "Fixed Effects (Time)"
251
+
252
+ model_info = {
253
+ 'model_type': model_type,
254
+ 'formula': self.formula,
255
+ 'cov_type': cov_type,
256
+ 'cov_kwds': cov_kwds,
257
+ 'entity_effects': self.entity_effects,
258
+ 'time_effects': self.time_effects,
259
+ }
260
+
261
+ # Data information
262
+ data_info = {
263
+ 'nobs': n,
264
+ 'n_entities': self.data.n_entities,
265
+ 'n_periods': self.data.n_periods,
266
+ 'df_model': df_model,
267
+ 'df_resid': df_resid,
268
+ 'n_fe_entity': n_fe_entity if self.entity_effects else 0,
269
+ 'n_fe_time': n_fe_time if self.time_effects else 0,
270
+ 'entity_index': entities.ravel() if hasattr(entities, 'ravel') else entities,
271
+ 'time_index': times.ravel() if hasattr(times, 'ravel') else times,
272
+ }
273
+
274
+ # R-squared dictionary
275
+ rsquared_dict = {
276
+ 'rsquared': rsquared_within, # For FE, R² = within R²
277
+ 'rsquared_adj': rsquared_adj,
278
+ 'rsquared_within': rsquared_within,
279
+ 'rsquared_between': rsquared_between,
280
+ 'rsquared_overall': rsquared_overall
281
+ }
282
+
283
+ # Create results object
284
+ results = PanelResults(
285
+ params=params,
286
+ std_errors=std_errors_series,
287
+ cov_params=cov_params,
288
+ resid=resid,
289
+ fittedvalues=fitted,
290
+ model_info=model_info,
291
+ data_info=data_info,
292
+ rsquared_dict=rsquared_dict,
293
+ model=self
294
+ )
295
+
296
+ # Store results and update state
297
+ self._results = results
298
+ self._fitted = True
299
+
300
+ return results
301
+
302
+ def _demean_both(
303
+ self,
304
+ X: np.ndarray,
305
+ entities: np.ndarray,
306
+ times: np.ndarray
307
+ ) -> np.ndarray:
308
+ """
309
+ Apply two-way demeaning (entity and time).
310
+
311
+ Parameters
312
+ ----------
313
+ X : np.ndarray
314
+ Data to demean
315
+ entities : np.ndarray
316
+ Entity identifiers
317
+ times : np.ndarray
318
+ Time identifiers
319
+
320
+ Returns
321
+ -------
322
+ np.ndarray
323
+ Two-way demeaned data
324
+ """
325
+ # First demean by entity
326
+ X_entity_demeaned = demean_matrix(X, entities)
327
+
328
+ # Then demean by time
329
+ X_both_demeaned = demean_matrix(X_entity_demeaned, times)
330
+
331
+ return X_both_demeaned
332
+
333
+ def _compute_fixed_effects(self, beta: np.ndarray) -> None:
334
+ """
335
+ Compute estimated fixed effects.
336
+
337
+ Parameters
338
+ ----------
339
+ beta : np.ndarray
340
+ Estimated coefficients
341
+ """
342
+ # Fitted values from slope coefficients
343
+ fitted_from_slopes = self._X_orig @ beta
344
+
345
+ # Overall residual: y - X*beta
346
+ overall_resid = self._y_orig - fitted_from_slopes
347
+
348
+ if self.entity_effects:
349
+ # Entity fixed effects: mean residual by entity
350
+ unique_entities = np.unique(self._entities)
351
+ entity_fe_values = []
352
+
353
+ for entity in unique_entities:
354
+ mask = self._entities == entity
355
+ entity_mean_resid = overall_resid[mask].mean()
356
+ entity_fe_values.append(entity_mean_resid)
357
+
358
+ self.entity_fe = pd.Series(
359
+ entity_fe_values,
360
+ index=unique_entities,
361
+ name='entity_fe'
362
+ )
363
+
364
+ if self.time_effects:
365
+ # Time fixed effects: mean residual by time (after removing entity FE if present)
366
+ if self.entity_effects:
367
+ # Remove entity FE first
368
+ resid_after_entity = overall_resid.copy()
369
+ for i, entity in enumerate(self._entities):
370
+ resid_after_entity[i] -= self.entity_fe[entity]
371
+ base_resid = resid_after_entity
372
+ else:
373
+ base_resid = overall_resid
374
+
375
+ unique_times = np.unique(self._times)
376
+ time_fe_values = []
377
+
378
+ for time in unique_times:
379
+ mask = self._times == time
380
+ time_mean_resid = base_resid[mask].mean()
381
+ time_fe_values.append(time_mean_resid)
382
+
383
+ self.time_fe = pd.Series(
384
+ time_fe_values,
385
+ index=unique_times,
386
+ name='time_fe'
387
+ )
388
+
389
+ def _estimate_coefficients(self) -> np.ndarray:
390
+ """
391
+ Estimate coefficients (implementation of abstract method).
392
+
393
+ Returns
394
+ -------
395
+ np.ndarray
396
+ Estimated coefficients
397
+ """
398
+ # Build design matrices
399
+ y, X = self.formula_parser.build_design_matrices(
400
+ self.data.data,
401
+ return_type='array'
402
+ )
403
+
404
+ # Remove intercept
405
+ if self.formula_parser.has_intercept:
406
+ X = X[:, 1:]
407
+
408
+ # Get identifiers
409
+ entities = self.data.data[self.data.entity_col].values
410
+ times = self.data.data[self.data.time_col].values
411
+
412
+ # Demean
413
+ if self.entity_effects and self.time_effects:
414
+ y_dm = self._demean_both(y.reshape(-1, 1), entities, times).ravel()
415
+ X_dm = self._demean_both(X, entities, times)
416
+ elif self.entity_effects:
417
+ y_dm = demean_matrix(y.reshape(-1, 1), entities).ravel()
418
+ X_dm = demean_matrix(X, entities)
419
+ else:
420
+ y_dm = demean_matrix(y.reshape(-1, 1), times).ravel()
421
+ X_dm = demean_matrix(X, times)
422
+
423
+ beta, _, _ = compute_ols(y_dm, X_dm, self.weights)
424
+ return beta
425
+
426
+ def _compute_vcov_robust(
427
+ self,
428
+ X: np.ndarray,
429
+ resid: np.ndarray,
430
+ df_resid: int
431
+ ) -> np.ndarray:
432
+ """
433
+ Compute heteroskedasticity-robust covariance matrix (HC1).
434
+
435
+ Parameters
436
+ ----------
437
+ X : np.ndarray
438
+ Design matrix (demeaned)
439
+ resid : np.ndarray
440
+ Residuals (demeaned)
441
+ df_resid : int
442
+ Degrees of freedom
443
+
444
+ Returns
445
+ -------
446
+ np.ndarray
447
+ Robust covariance matrix
448
+ """
449
+ n = len(resid)
450
+ k = X.shape[1]
451
+
452
+ # HC1: adjustment factor n/(n-k)
453
+ adjustment = n / df_resid
454
+
455
+ # Bread: (X'X)^{-1}
456
+ XtX_inv = np.linalg.inv(X.T @ X)
457
+
458
+ # Meat: X' diag(resid^2) X
459
+ meat = X.T @ (resid[:, np.newaxis]**2 * X)
460
+
461
+ # Sandwich
462
+ vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
463
+
464
+ return vcov
465
+
466
+ def _compute_vcov_clustered(
467
+ self,
468
+ X: np.ndarray,
469
+ resid: np.ndarray,
470
+ entities: np.ndarray,
471
+ df_resid: int
472
+ ) -> np.ndarray:
473
+ """
474
+ Compute cluster-robust covariance matrix.
475
+
476
+ Parameters
477
+ ----------
478
+ X : np.ndarray
479
+ Design matrix (demeaned)
480
+ resid : np.ndarray
481
+ Residuals (demeaned)
482
+ entities : np.ndarray
483
+ Entity identifiers
484
+ df_resid : int
485
+ Degrees of freedom
486
+
487
+ Returns
488
+ -------
489
+ np.ndarray
490
+ Cluster-robust covariance matrix
491
+ """
492
+ n = len(resid)
493
+ k = X.shape[1]
494
+
495
+ unique_entities = np.unique(entities)
496
+ n_clusters = len(unique_entities)
497
+
498
+ # Bread: (X'X)^{-1}
499
+ XtX_inv = np.linalg.inv(X.T @ X)
500
+
501
+ # Meat: sum over clusters
502
+ meat = np.zeros((k, k))
503
+ for entity in unique_entities:
504
+ mask = entities == entity
505
+ X_c = X[mask]
506
+ resid_c = resid[mask]
507
+ score = X_c.T @ resid_c
508
+ meat += np.outer(score, score)
509
+
510
+ # Small sample adjustment
511
+ adjustment = (n_clusters / (n_clusters - 1)) * (df_resid / (df_resid - k))
512
+
513
+ # Sandwich
514
+ vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
515
+
516
+ return vcov