panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,298 @@
1
+ """
2
+ Pooled OLS estimator for panel data.
3
+
4
+ This module provides the Pooled OLS estimator which ignores the panel structure
5
+ and estimates a standard OLS regression.
6
+ """
7
+
8
+ from typing import Optional
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from panelbox.core.base_model import PanelModel
13
+ from panelbox.core.results import PanelResults
14
+ from panelbox.utils.matrix_ops import (
15
+ compute_ols,
16
+ compute_vcov_nonrobust,
17
+ compute_rsquared
18
+ )
19
+
20
+
21
+ class PooledOLS(PanelModel):
22
+ """
23
+ Pooled OLS estimator for panel data.
24
+
25
+ This estimator ignores the panel structure and pools all observations
26
+ together, estimating a standard OLS regression. This is often used as
27
+ a baseline comparison for panel-specific estimators.
28
+
29
+ Parameters
30
+ ----------
31
+ formula : str
32
+ Model formula in R-style syntax (e.g., "y ~ x1 + x2")
33
+ data : pd.DataFrame
34
+ Panel data in long format
35
+ entity_col : str
36
+ Name of the column identifying entities
37
+ time_col : str
38
+ Name of the column identifying time periods
39
+ weights : np.ndarray, optional
40
+ Observation weights for WLS estimation
41
+
42
+ Attributes
43
+ ----------
44
+ All attributes from PanelModel plus model-specific attributes
45
+ after fitting.
46
+
47
+ Examples
48
+ --------
49
+ >>> import panelbox as pb
50
+ >>> import pandas as pd
51
+ >>>
52
+ >>> # Load data
53
+ >>> data = pd.read_csv('panel_data.csv')
54
+ >>>
55
+ >>> # Estimate Pooled OLS
56
+ >>> model = pb.PooledOLS("y ~ x1 + x2", data, "firm", "year")
57
+ >>> results = model.fit(cov_type='robust')
58
+ >>> print(results.summary())
59
+ >>>
60
+ >>> # With clustered standard errors
61
+ >>> results_cluster = model.fit(cov_type='clustered')
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ formula: str,
67
+ data: pd.DataFrame,
68
+ entity_col: str,
69
+ time_col: str,
70
+ weights: Optional[np.ndarray] = None
71
+ ):
72
+ super().__init__(formula, data, entity_col, time_col, weights)
73
+
74
+ def fit(
75
+ self,
76
+ cov_type: str = 'nonrobust',
77
+ **cov_kwds
78
+ ) -> PanelResults:
79
+ """
80
+ Fit the Pooled OLS model.
81
+
82
+ Parameters
83
+ ----------
84
+ cov_type : str, default='nonrobust'
85
+ Type of covariance estimator:
86
+ - 'nonrobust': Classical OLS standard errors
87
+ - 'robust': Heteroskedasticity-robust (HC1)
88
+ - 'clustered': Cluster-robust (clustered by entity by default)
89
+ **cov_kwds
90
+ Additional arguments for covariance estimation
91
+
92
+ Returns
93
+ -------
94
+ PanelResults
95
+ Fitted model results
96
+
97
+ Examples
98
+ --------
99
+ >>> results = model.fit(cov_type='robust')
100
+ >>> results_cluster = model.fit(cov_type='clustered')
101
+ """
102
+ # Build design matrices
103
+ y, X = self.formula_parser.build_design_matrices(
104
+ self.data.data,
105
+ return_type='array'
106
+ )
107
+
108
+ # Get variable names
109
+ var_names = self.formula_parser.get_variable_names(self.data.data)
110
+
111
+ # Estimate coefficients
112
+ beta, resid, fitted = compute_ols(y, X, self.weights)
113
+
114
+ # Degrees of freedom
115
+ n = len(y)
116
+ k = X.shape[1]
117
+ df_model = k - (1 if self.formula_parser.has_intercept else 0)
118
+ df_resid = n - k
119
+
120
+ # Compute covariance matrix
121
+ if cov_type == 'nonrobust':
122
+ vcov = compute_vcov_nonrobust(X, resid, df_resid)
123
+ elif cov_type == 'robust':
124
+ vcov = self._compute_vcov_robust(X, resid)
125
+ elif cov_type == 'clustered':
126
+ vcov = self._compute_vcov_clustered(X, resid)
127
+ else:
128
+ raise ValueError(
129
+ f"cov_type must be 'nonrobust', 'robust', or 'clustered', "
130
+ f"got '{cov_type}'"
131
+ )
132
+
133
+ # Standard errors
134
+ std_errors = np.sqrt(np.diag(vcov))
135
+
136
+ # Compute R-squared
137
+ rsquared = compute_rsquared(
138
+ y, fitted, resid,
139
+ has_intercept=self.formula_parser.has_intercept
140
+ )
141
+
142
+ # Adjusted R-squared
143
+ rsquared_adj = 1 - (1 - rsquared) * (n - 1) / df_resid
144
+
145
+ # Create Series/DataFrame with variable names
146
+ params = pd.Series(beta.ravel(), index=var_names)
147
+ std_errors = pd.Series(std_errors, index=var_names)
148
+ cov_params = pd.DataFrame(vcov, index=var_names, columns=var_names)
149
+
150
+ # Model information
151
+ model_info = {
152
+ 'model_type': 'Pooled OLS',
153
+ 'formula': self.formula,
154
+ 'cov_type': cov_type,
155
+ 'cov_kwds': cov_kwds
156
+ }
157
+
158
+ # Data information
159
+ data_info = {
160
+ 'nobs': n,
161
+ 'n_entities': self.data.n_entities,
162
+ 'n_periods': self.data.n_periods,
163
+ 'df_model': df_model,
164
+ 'df_resid': df_resid,
165
+ 'entity_index': self.data.data[self.data.entity_col].values.ravel(),
166
+ 'time_index': self.data.data[self.data.time_col].values.ravel()
167
+ }
168
+
169
+ # R-squared dictionary
170
+ rsquared_dict = {
171
+ 'rsquared': rsquared,
172
+ 'rsquared_adj': rsquared_adj,
173
+ 'rsquared_within': np.nan,
174
+ 'rsquared_between': np.nan,
175
+ 'rsquared_overall': rsquared
176
+ }
177
+
178
+ # Create results object
179
+ results = PanelResults(
180
+ params=params,
181
+ std_errors=std_errors,
182
+ cov_params=cov_params,
183
+ resid=resid,
184
+ fittedvalues=fitted,
185
+ model_info=model_info,
186
+ data_info=data_info,
187
+ rsquared_dict=rsquared_dict,
188
+ model=self
189
+ )
190
+
191
+ # Store results and update state
192
+ self._results = results
193
+ self._fitted = True
194
+
195
+ return results
196
+
197
+ def _estimate_coefficients(self) -> np.ndarray:
198
+ """
199
+ Estimate coefficients (implementation of abstract method).
200
+
201
+ Returns
202
+ -------
203
+ np.ndarray
204
+ Estimated coefficients
205
+ """
206
+ y, X = self.formula_parser.build_design_matrices(
207
+ self.data.data,
208
+ return_type='array'
209
+ )
210
+ beta, _, _ = compute_ols(y, X, self.weights)
211
+ return beta
212
+
213
+ def _compute_vcov_robust(
214
+ self,
215
+ X: np.ndarray,
216
+ resid: np.ndarray
217
+ ) -> np.ndarray:
218
+ """
219
+ Compute heteroskedasticity-robust covariance matrix (HC1).
220
+
221
+ Parameters
222
+ ----------
223
+ X : np.ndarray
224
+ Design matrix
225
+ resid : np.ndarray
226
+ Residuals
227
+
228
+ Returns
229
+ -------
230
+ np.ndarray
231
+ Robust covariance matrix
232
+ """
233
+ n, k = X.shape
234
+ df_resid = n - k
235
+
236
+ # HC1: adjustment factor n/(n-k)
237
+ adjustment = n / df_resid
238
+
239
+ # Bread: (X'X)^{-1}
240
+ XtX_inv = np.linalg.inv(X.T @ X)
241
+
242
+ # Meat: X' diag(resid^2) X
243
+ meat = X.T @ (resid[:, np.newaxis]**2 * X)
244
+
245
+ # Sandwich: (X'X)^{-1} * X'ΩX * (X'X)^{-1}
246
+ vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
247
+
248
+ return vcov
249
+
250
+ def _compute_vcov_clustered(
251
+ self,
252
+ X: np.ndarray,
253
+ resid: np.ndarray
254
+ ) -> np.ndarray:
255
+ """
256
+ Compute cluster-robust covariance matrix.
257
+
258
+ Clusters by entity by default.
259
+
260
+ Parameters
261
+ ----------
262
+ X : np.ndarray
263
+ Design matrix
264
+ resid : np.ndarray
265
+ Residuals
266
+
267
+ Returns
268
+ -------
269
+ np.ndarray
270
+ Cluster-robust covariance matrix
271
+ """
272
+ n, k = X.shape
273
+
274
+ # Get entity identifiers
275
+ entities = self.data.data[self.data.entity_col].values
276
+ unique_entities = np.unique(entities)
277
+ n_clusters = len(unique_entities)
278
+
279
+ # Bread: (X'X)^{-1}
280
+ XtX_inv = np.linalg.inv(X.T @ X)
281
+
282
+ # Meat: sum over clusters
283
+ meat = np.zeros((k, k))
284
+ for entity in unique_entities:
285
+ mask = entities == entity
286
+ X_c = X[mask]
287
+ resid_c = resid[mask]
288
+ # Sum of (X_i * resid_i) for cluster
289
+ score = X_c.T @ resid_c
290
+ meat += np.outer(score, score)
291
+
292
+ # Small sample adjustment: G/(G-1) * (N-1)/(N-K)
293
+ adjustment = (n_clusters / (n_clusters - 1)) * ((n - 1) / (n - k))
294
+
295
+ # Sandwich
296
+ vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
297
+
298
+ return vcov