panelbox 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- panelbox/__init__.py +67 -0
- panelbox/__version__.py +14 -0
- panelbox/cli/__init__.py +0 -0
- panelbox/cli/{commands}/__init__.py +0 -0
- panelbox/core/__init__.py +0 -0
- panelbox/core/base_model.py +164 -0
- panelbox/core/formula_parser.py +318 -0
- panelbox/core/panel_data.py +387 -0
- panelbox/core/results.py +366 -0
- panelbox/datasets/__init__.py +0 -0
- panelbox/datasets/{data}/__init__.py +0 -0
- panelbox/gmm/__init__.py +65 -0
- panelbox/gmm/difference_gmm.py +645 -0
- panelbox/gmm/estimator.py +562 -0
- panelbox/gmm/instruments.py +580 -0
- panelbox/gmm/results.py +550 -0
- panelbox/gmm/system_gmm.py +621 -0
- panelbox/gmm/tests.py +535 -0
- panelbox/models/__init__.py +11 -0
- panelbox/models/dynamic/__init__.py +0 -0
- panelbox/models/iv/__init__.py +0 -0
- panelbox/models/static/__init__.py +13 -0
- panelbox/models/static/fixed_effects.py +516 -0
- panelbox/models/static/pooled_ols.py +298 -0
- panelbox/models/static/random_effects.py +512 -0
- panelbox/report/__init__.py +61 -0
- panelbox/report/asset_manager.py +410 -0
- panelbox/report/css_manager.py +472 -0
- panelbox/report/exporters/__init__.py +15 -0
- panelbox/report/exporters/html_exporter.py +440 -0
- panelbox/report/exporters/latex_exporter.py +510 -0
- panelbox/report/exporters/markdown_exporter.py +446 -0
- panelbox/report/renderers/__init__.py +11 -0
- panelbox/report/renderers/static/__init__.py +0 -0
- panelbox/report/renderers/static_validation_renderer.py +341 -0
- panelbox/report/report_manager.py +502 -0
- panelbox/report/template_manager.py +337 -0
- panelbox/report/transformers/__init__.py +0 -0
- panelbox/report/transformers/static/__init__.py +0 -0
- panelbox/report/validation_transformer.py +449 -0
- panelbox/standard_errors/__init__.py +0 -0
- panelbox/templates/__init__.py +0 -0
- panelbox/templates/assets/css/base_styles.css +382 -0
- panelbox/templates/assets/css/report_components.css +747 -0
- panelbox/templates/assets/js/tab-navigation.js +161 -0
- panelbox/templates/assets/js/utils.js +276 -0
- panelbox/templates/common/footer.html +24 -0
- panelbox/templates/common/header.html +44 -0
- panelbox/templates/common/meta.html +5 -0
- panelbox/templates/validation/interactive/index.html +272 -0
- panelbox/templates/validation/interactive/partials/charts.html +58 -0
- panelbox/templates/validation/interactive/partials/methodology.html +201 -0
- panelbox/templates/validation/interactive/partials/overview.html +146 -0
- panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
- panelbox/templates/validation/interactive/partials/test_results.html +231 -0
- panelbox/utils/__init__.py +0 -0
- panelbox/utils/formatting.py +172 -0
- panelbox/utils/matrix_ops.py +233 -0
- panelbox/utils/statistical.py +173 -0
- panelbox/validation/__init__.py +58 -0
- panelbox/validation/base.py +175 -0
- panelbox/validation/cointegration/__init__.py +0 -0
- panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
- panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
- panelbox/validation/cross_sectional_dependence/frees.py +297 -0
- panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
- panelbox/validation/heteroskedasticity/__init__.py +13 -0
- panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
- panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
- panelbox/validation/heteroskedasticity/white.py +208 -0
- panelbox/validation/instruments/__init__.py +0 -0
- panelbox/validation/robustness/__init__.py +0 -0
- panelbox/validation/serial_correlation/__init__.py +13 -0
- panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
- panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
- panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
- panelbox/validation/specification/__init__.py +16 -0
- panelbox/validation/specification/chow.py +273 -0
- panelbox/validation/specification/hausman.py +264 -0
- panelbox/validation/specification/mundlak.py +331 -0
- panelbox/validation/specification/reset.py +273 -0
- panelbox/validation/unit_root/__init__.py +0 -0
- panelbox/validation/validation_report.py +257 -0
- panelbox/validation/validation_suite.py +401 -0
- panelbox-0.2.0.dist-info/METADATA +337 -0
- panelbox-0.2.0.dist-info/RECORD +90 -0
- panelbox-0.2.0.dist-info/WHEEL +5 -0
- panelbox-0.2.0.dist-info/entry_points.txt +2 -0
- panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
- panelbox-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pooled OLS estimator for panel data.
|
|
3
|
+
|
|
4
|
+
This module provides the Pooled OLS estimator which ignores the panel structure
|
|
5
|
+
and estimates a standard OLS regression.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from panelbox.core.base_model import PanelModel
|
|
13
|
+
from panelbox.core.results import PanelResults
|
|
14
|
+
from panelbox.utils.matrix_ops import (
|
|
15
|
+
compute_ols,
|
|
16
|
+
compute_vcov_nonrobust,
|
|
17
|
+
compute_rsquared
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PooledOLS(PanelModel):
|
|
22
|
+
"""
|
|
23
|
+
Pooled OLS estimator for panel data.
|
|
24
|
+
|
|
25
|
+
This estimator ignores the panel structure and pools all observations
|
|
26
|
+
together, estimating a standard OLS regression. This is often used as
|
|
27
|
+
a baseline comparison for panel-specific estimators.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
formula : str
|
|
32
|
+
Model formula in R-style syntax (e.g., "y ~ x1 + x2")
|
|
33
|
+
data : pd.DataFrame
|
|
34
|
+
Panel data in long format
|
|
35
|
+
entity_col : str
|
|
36
|
+
Name of the column identifying entities
|
|
37
|
+
time_col : str
|
|
38
|
+
Name of the column identifying time periods
|
|
39
|
+
weights : np.ndarray, optional
|
|
40
|
+
Observation weights for WLS estimation
|
|
41
|
+
|
|
42
|
+
Attributes
|
|
43
|
+
----------
|
|
44
|
+
All attributes from PanelModel plus model-specific attributes
|
|
45
|
+
after fitting.
|
|
46
|
+
|
|
47
|
+
Examples
|
|
48
|
+
--------
|
|
49
|
+
>>> import panelbox as pb
|
|
50
|
+
>>> import pandas as pd
|
|
51
|
+
>>>
|
|
52
|
+
>>> # Load data
|
|
53
|
+
>>> data = pd.read_csv('panel_data.csv')
|
|
54
|
+
>>>
|
|
55
|
+
>>> # Estimate Pooled OLS
|
|
56
|
+
>>> model = pb.PooledOLS("y ~ x1 + x2", data, "firm", "year")
|
|
57
|
+
>>> results = model.fit(cov_type='robust')
|
|
58
|
+
>>> print(results.summary())
|
|
59
|
+
>>>
|
|
60
|
+
>>> # With clustered standard errors
|
|
61
|
+
>>> results_cluster = model.fit(cov_type='clustered')
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
formula: str,
|
|
67
|
+
data: pd.DataFrame,
|
|
68
|
+
entity_col: str,
|
|
69
|
+
time_col: str,
|
|
70
|
+
weights: Optional[np.ndarray] = None
|
|
71
|
+
):
|
|
72
|
+
super().__init__(formula, data, entity_col, time_col, weights)
|
|
73
|
+
|
|
74
|
+
def fit(
|
|
75
|
+
self,
|
|
76
|
+
cov_type: str = 'nonrobust',
|
|
77
|
+
**cov_kwds
|
|
78
|
+
) -> PanelResults:
|
|
79
|
+
"""
|
|
80
|
+
Fit the Pooled OLS model.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
cov_type : str, default='nonrobust'
|
|
85
|
+
Type of covariance estimator:
|
|
86
|
+
- 'nonrobust': Classical OLS standard errors
|
|
87
|
+
- 'robust': Heteroskedasticity-robust (HC1)
|
|
88
|
+
- 'clustered': Cluster-robust (clustered by entity by default)
|
|
89
|
+
**cov_kwds
|
|
90
|
+
Additional arguments for covariance estimation
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
PanelResults
|
|
95
|
+
Fitted model results
|
|
96
|
+
|
|
97
|
+
Examples
|
|
98
|
+
--------
|
|
99
|
+
>>> results = model.fit(cov_type='robust')
|
|
100
|
+
>>> results_cluster = model.fit(cov_type='clustered')
|
|
101
|
+
"""
|
|
102
|
+
# Build design matrices
|
|
103
|
+
y, X = self.formula_parser.build_design_matrices(
|
|
104
|
+
self.data.data,
|
|
105
|
+
return_type='array'
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Get variable names
|
|
109
|
+
var_names = self.formula_parser.get_variable_names(self.data.data)
|
|
110
|
+
|
|
111
|
+
# Estimate coefficients
|
|
112
|
+
beta, resid, fitted = compute_ols(y, X, self.weights)
|
|
113
|
+
|
|
114
|
+
# Degrees of freedom
|
|
115
|
+
n = len(y)
|
|
116
|
+
k = X.shape[1]
|
|
117
|
+
df_model = k - (1 if self.formula_parser.has_intercept else 0)
|
|
118
|
+
df_resid = n - k
|
|
119
|
+
|
|
120
|
+
# Compute covariance matrix
|
|
121
|
+
if cov_type == 'nonrobust':
|
|
122
|
+
vcov = compute_vcov_nonrobust(X, resid, df_resid)
|
|
123
|
+
elif cov_type == 'robust':
|
|
124
|
+
vcov = self._compute_vcov_robust(X, resid)
|
|
125
|
+
elif cov_type == 'clustered':
|
|
126
|
+
vcov = self._compute_vcov_clustered(X, resid)
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"cov_type must be 'nonrobust', 'robust', or 'clustered', "
|
|
130
|
+
f"got '{cov_type}'"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Standard errors
|
|
134
|
+
std_errors = np.sqrt(np.diag(vcov))
|
|
135
|
+
|
|
136
|
+
# Compute R-squared
|
|
137
|
+
rsquared = compute_rsquared(
|
|
138
|
+
y, fitted, resid,
|
|
139
|
+
has_intercept=self.formula_parser.has_intercept
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Adjusted R-squared
|
|
143
|
+
rsquared_adj = 1 - (1 - rsquared) * (n - 1) / df_resid
|
|
144
|
+
|
|
145
|
+
# Create Series/DataFrame with variable names
|
|
146
|
+
params = pd.Series(beta.ravel(), index=var_names)
|
|
147
|
+
std_errors = pd.Series(std_errors, index=var_names)
|
|
148
|
+
cov_params = pd.DataFrame(vcov, index=var_names, columns=var_names)
|
|
149
|
+
|
|
150
|
+
# Model information
|
|
151
|
+
model_info = {
|
|
152
|
+
'model_type': 'Pooled OLS',
|
|
153
|
+
'formula': self.formula,
|
|
154
|
+
'cov_type': cov_type,
|
|
155
|
+
'cov_kwds': cov_kwds
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# Data information
|
|
159
|
+
data_info = {
|
|
160
|
+
'nobs': n,
|
|
161
|
+
'n_entities': self.data.n_entities,
|
|
162
|
+
'n_periods': self.data.n_periods,
|
|
163
|
+
'df_model': df_model,
|
|
164
|
+
'df_resid': df_resid,
|
|
165
|
+
'entity_index': self.data.data[self.data.entity_col].values.ravel(),
|
|
166
|
+
'time_index': self.data.data[self.data.time_col].values.ravel()
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# R-squared dictionary
|
|
170
|
+
rsquared_dict = {
|
|
171
|
+
'rsquared': rsquared,
|
|
172
|
+
'rsquared_adj': rsquared_adj,
|
|
173
|
+
'rsquared_within': np.nan,
|
|
174
|
+
'rsquared_between': np.nan,
|
|
175
|
+
'rsquared_overall': rsquared
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Create results object
|
|
179
|
+
results = PanelResults(
|
|
180
|
+
params=params,
|
|
181
|
+
std_errors=std_errors,
|
|
182
|
+
cov_params=cov_params,
|
|
183
|
+
resid=resid,
|
|
184
|
+
fittedvalues=fitted,
|
|
185
|
+
model_info=model_info,
|
|
186
|
+
data_info=data_info,
|
|
187
|
+
rsquared_dict=rsquared_dict,
|
|
188
|
+
model=self
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Store results and update state
|
|
192
|
+
self._results = results
|
|
193
|
+
self._fitted = True
|
|
194
|
+
|
|
195
|
+
return results
|
|
196
|
+
|
|
197
|
+
def _estimate_coefficients(self) -> np.ndarray:
|
|
198
|
+
"""
|
|
199
|
+
Estimate coefficients (implementation of abstract method).
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
np.ndarray
|
|
204
|
+
Estimated coefficients
|
|
205
|
+
"""
|
|
206
|
+
y, X = self.formula_parser.build_design_matrices(
|
|
207
|
+
self.data.data,
|
|
208
|
+
return_type='array'
|
|
209
|
+
)
|
|
210
|
+
beta, _, _ = compute_ols(y, X, self.weights)
|
|
211
|
+
return beta
|
|
212
|
+
|
|
213
|
+
def _compute_vcov_robust(
|
|
214
|
+
self,
|
|
215
|
+
X: np.ndarray,
|
|
216
|
+
resid: np.ndarray
|
|
217
|
+
) -> np.ndarray:
|
|
218
|
+
"""
|
|
219
|
+
Compute heteroskedasticity-robust covariance matrix (HC1).
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
X : np.ndarray
|
|
224
|
+
Design matrix
|
|
225
|
+
resid : np.ndarray
|
|
226
|
+
Residuals
|
|
227
|
+
|
|
228
|
+
Returns
|
|
229
|
+
-------
|
|
230
|
+
np.ndarray
|
|
231
|
+
Robust covariance matrix
|
|
232
|
+
"""
|
|
233
|
+
n, k = X.shape
|
|
234
|
+
df_resid = n - k
|
|
235
|
+
|
|
236
|
+
# HC1: adjustment factor n/(n-k)
|
|
237
|
+
adjustment = n / df_resid
|
|
238
|
+
|
|
239
|
+
# Bread: (X'X)^{-1}
|
|
240
|
+
XtX_inv = np.linalg.inv(X.T @ X)
|
|
241
|
+
|
|
242
|
+
# Meat: X' diag(resid^2) X
|
|
243
|
+
meat = X.T @ (resid[:, np.newaxis]**2 * X)
|
|
244
|
+
|
|
245
|
+
# Sandwich: (X'X)^{-1} * X'ΩX * (X'X)^{-1}
|
|
246
|
+
vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
|
|
247
|
+
|
|
248
|
+
return vcov
|
|
249
|
+
|
|
250
|
+
def _compute_vcov_clustered(
|
|
251
|
+
self,
|
|
252
|
+
X: np.ndarray,
|
|
253
|
+
resid: np.ndarray
|
|
254
|
+
) -> np.ndarray:
|
|
255
|
+
"""
|
|
256
|
+
Compute cluster-robust covariance matrix.
|
|
257
|
+
|
|
258
|
+
Clusters by entity by default.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
X : np.ndarray
|
|
263
|
+
Design matrix
|
|
264
|
+
resid : np.ndarray
|
|
265
|
+
Residuals
|
|
266
|
+
|
|
267
|
+
Returns
|
|
268
|
+
-------
|
|
269
|
+
np.ndarray
|
|
270
|
+
Cluster-robust covariance matrix
|
|
271
|
+
"""
|
|
272
|
+
n, k = X.shape
|
|
273
|
+
|
|
274
|
+
# Get entity identifiers
|
|
275
|
+
entities = self.data.data[self.data.entity_col].values
|
|
276
|
+
unique_entities = np.unique(entities)
|
|
277
|
+
n_clusters = len(unique_entities)
|
|
278
|
+
|
|
279
|
+
# Bread: (X'X)^{-1}
|
|
280
|
+
XtX_inv = np.linalg.inv(X.T @ X)
|
|
281
|
+
|
|
282
|
+
# Meat: sum over clusters
|
|
283
|
+
meat = np.zeros((k, k))
|
|
284
|
+
for entity in unique_entities:
|
|
285
|
+
mask = entities == entity
|
|
286
|
+
X_c = X[mask]
|
|
287
|
+
resid_c = resid[mask]
|
|
288
|
+
# Sum of (X_i * resid_i) for cluster
|
|
289
|
+
score = X_c.T @ resid_c
|
|
290
|
+
meat += np.outer(score, score)
|
|
291
|
+
|
|
292
|
+
# Small sample adjustment: G/(G-1) * (N-1)/(N-K)
|
|
293
|
+
adjustment = (n_clusters / (n_clusters - 1)) * ((n - 1) / (n - k))
|
|
294
|
+
|
|
295
|
+
# Sandwich
|
|
296
|
+
vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
|
|
297
|
+
|
|
298
|
+
return vcov
|