PyPI - panelbox - Versions diffs - 0.2.0__py3-none-any.whl - Mend

panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

panelbox/__init__.py +67 -0
panelbox/__version__.py +14 -0
panelbox/cli/__init__.py +0 -0
panelbox/cli/{commands}/__init__.py +0 -0
panelbox/core/__init__.py +0 -0
panelbox/core/base_model.py +164 -0
panelbox/core/formula_parser.py +318 -0
panelbox/core/panel_data.py +387 -0
panelbox/core/results.py +366 -0
panelbox/datasets/__init__.py +0 -0
panelbox/datasets/{data}/__init__.py +0 -0
panelbox/gmm/__init__.py +65 -0
panelbox/gmm/difference_gmm.py +645 -0
panelbox/gmm/estimator.py +562 -0
panelbox/gmm/instruments.py +580 -0
panelbox/gmm/results.py +550 -0
panelbox/gmm/system_gmm.py +621 -0
panelbox/gmm/tests.py +535 -0
panelbox/models/__init__.py +11 -0
panelbox/models/dynamic/__init__.py +0 -0
panelbox/models/iv/__init__.py +0 -0
panelbox/models/static/__init__.py +13 -0
panelbox/models/static/fixed_effects.py +516 -0
panelbox/models/static/pooled_ols.py +298 -0
panelbox/models/static/random_effects.py +512 -0
panelbox/report/__init__.py +61 -0
panelbox/report/asset_manager.py +410 -0
panelbox/report/css_manager.py +472 -0
panelbox/report/exporters/__init__.py +15 -0
panelbox/report/exporters/html_exporter.py +440 -0
panelbox/report/exporters/latex_exporter.py +510 -0
panelbox/report/exporters/markdown_exporter.py +446 -0
panelbox/report/renderers/__init__.py +11 -0
panelbox/report/renderers/static/__init__.py +0 -0
panelbox/report/renderers/static_validation_renderer.py +341 -0
panelbox/report/report_manager.py +502 -0
panelbox/report/template_manager.py +337 -0
panelbox/report/transformers/__init__.py +0 -0
panelbox/report/transformers/static/__init__.py +0 -0
panelbox/report/validation_transformer.py +449 -0
panelbox/standard_errors/__init__.py +0 -0
panelbox/templates/__init__.py +0 -0
panelbox/templates/assets/css/base_styles.css +382 -0
panelbox/templates/assets/css/report_components.css +747 -0
panelbox/templates/assets/js/tab-navigation.js +161 -0
panelbox/templates/assets/js/utils.js +276 -0
panelbox/templates/common/footer.html +24 -0
panelbox/templates/common/header.html +44 -0
panelbox/templates/common/meta.html +5 -0
panelbox/templates/validation/interactive/index.html +272 -0
panelbox/templates/validation/interactive/partials/charts.html +58 -0
panelbox/templates/validation/interactive/partials/methodology.html +201 -0
panelbox/templates/validation/interactive/partials/overview.html +146 -0
panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
panelbox/templates/validation/interactive/partials/test_results.html +231 -0
panelbox/utils/__init__.py +0 -0
panelbox/utils/formatting.py +172 -0
panelbox/utils/matrix_ops.py +233 -0
panelbox/utils/statistical.py +173 -0
panelbox/validation/__init__.py +58 -0
panelbox/validation/base.py +175 -0
panelbox/validation/cointegration/__init__.py +0 -0
panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
panelbox/validation/cross_sectional_dependence/frees.py +297 -0
panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
panelbox/validation/heteroskedasticity/__init__.py +13 -0
panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
panelbox/validation/heteroskedasticity/white.py +208 -0
panelbox/validation/instruments/__init__.py +0 -0
panelbox/validation/robustness/__init__.py +0 -0
panelbox/validation/serial_correlation/__init__.py +13 -0
panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
panelbox/validation/specification/__init__.py +16 -0
panelbox/validation/specification/chow.py +273 -0
panelbox/validation/specification/hausman.py +264 -0
panelbox/validation/specification/mundlak.py +331 -0
panelbox/validation/specification/reset.py +273 -0
panelbox/validation/unit_root/__init__.py +0 -0
panelbox/validation/validation_report.py +257 -0
panelbox/validation/validation_suite.py +401 -0
panelbox-0.2.0.dist-info/METADATA +337 -0
panelbox-0.2.0.dist-info/RECORD +90 -0
panelbox-0.2.0.dist-info/WHEEL +5 -0
panelbox-0.2.0.dist-info/entry_points.txt +2 -0
panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
panelbox-0.2.0.dist-info/top_level.txt +1 -0

panelbox/models/static/random_effects.py ADDED Viewed

@@ -0,0 +1,512 @@
+"""
+Random Effects (GLS) estimator for panel data.
+This module provides the Random Effects estimator which uses GLS (Generalized Least Squares)
+to account for the variance component structure in panel data.
+"""
+from typing import Optional
+import numpy as np
+import pandas as pd
+from panelbox.core.base_model import PanelModel
+from panelbox.core.results import PanelResults
+from panelbox.utils.matrix_ops import (
+    compute_ols,
+    compute_panel_rsquared
+)
+class RandomEffects(PanelModel):
+    """
+    Random Effects (GLS) estimator for panel data.
+    This estimator assumes that entity-specific effects are uncorrelated with
+    the regressors and uses Generalized Least Squares to efficiently estimate
+    the model accounting for the variance component structure.
+    The key assumption is E[u_i | X_it] = 0, where u_i is the entity-specific effect.
+    Parameters
+    ----------
+    formula : str
+        Model formula in R-style syntax (e.g., "y ~ x1 + x2")
+    data : pd.DataFrame
+        Panel data in long format
+    entity_col : str
+        Name of the column identifying entities
+    time_col : str
+        Name of the column identifying time periods
+    variance_estimator : str, default='swamy-arora'
+        Method for estimating variance components:
+        - 'swamy-arora': Swamy-Arora estimator (most common)
+        - 'walhus': Wallace-Hussain estimator
+        - 'amemiya': Amemiya estimator
+        - 'nerlove': Nerlove estimator
+    weights : np.ndarray, optional
+        Observation weights
+    Attributes
+    ----------
+    variance_estimator : str
+        Variance estimation method
+    sigma2_u : float
+        Estimated variance of entity-specific effects (after fitting)
+    sigma2_e : float
+        Estimated variance of idiosyncratic errors (after fitting)
+    theta : float
+        GLS transformation parameter (after fitting)
+    Examples
+    --------
+    >>> import panelbox as pb
+    >>> import pandas as pd
+    >>>
+    >>> # Load data
+    >>> data = pd.read_csv('panel_data.csv')
+    >>>
+    >>> # Estimate Random Effects
+    >>> model = pb.RandomEffects("y ~ x1 + x2", data, "firm", "year")
+    >>> results = model.fit()
+    >>> print(results.summary())
+    >>>
+    >>> # Access variance components
+    >>> print(f"sigma2_u: {model.sigma2_u:.4f}")
+    >>> print(f"sigma2_e: {model.sigma2_e:.4f}")
+    >>> print(f"theta: {model.theta:.4f}")
+    >>>
+    >>> # Use different variance estimator
+    >>> model_amemiya = pb.RandomEffects(
+    ...     "y ~ x1 + x2", data, "firm", "year",
+    ...     variance_estimator='amemiya'
+    ... )
+    >>> results_amemiya = model_amemiya.fit()
+    """
+    def __init__(
+        self,
+        formula: str,
+        data: pd.DataFrame,
+        entity_col: str,
+        time_col: str,
+        variance_estimator: str = 'swamy-arora',
+        weights: Optional[np.ndarray] = None
+    ):
+        super().__init__(formula, data, entity_col, time_col, weights)
+        valid_estimators = ['swamy-arora', 'walhus', 'amemiya', 'nerlove']
+        if variance_estimator not in valid_estimators:
+            raise ValueError(
+                f"variance_estimator must be one of {valid_estimators}, "
+                f"got '{variance_estimator}'"
+            )
+        self.variance_estimator = variance_estimator
+        # Variance components (computed after fitting)
+        self.sigma2_u: Optional[float] = None  # Variance of entity effects
+        self.sigma2_e: Optional[float] = None  # Variance of idiosyncratic errors
+        self.theta: Optional[float] = None      # GLS transformation parameter
+    def fit(
+        self,
+        cov_type: str = 'nonrobust',
+        **cov_kwds
+    ) -> PanelResults:
+        """
+        Fit the Random Effects model.
+        Parameters
+        ----------
+        cov_type : str, default='nonrobust'
+            Type of covariance estimator:
+            - 'nonrobust': Classical GLS standard errors
+            - 'robust': Heteroskedasticity-robust
+            - 'clustered': Cluster-robust (clustered by entity)
+        **cov_kwds
+            Additional arguments for covariance estimation
+        Returns
+        -------
+        PanelResults
+            Fitted model results
+        Examples
+        --------
+        >>> results = model.fit()
+        >>> results_robust = model.fit(cov_type='robust')
+        """
+        # Build design matrices
+        y, X = self.formula_parser.build_design_matrices(
+            self.data.data,
+            return_type='array'
+        )
+        # Get variable names
+        var_names = self.formula_parser.get_variable_names(self.data.data)
+        # Get entity and time identifiers
+        entities = self.data.data[self.data.entity_col].values
+        times = self.data.data[self.data.time_col].values
+        # Estimate variance components
+        self._estimate_variance_components(y, X, entities)
+        # Apply GLS transformation
+        y_gls, X_gls = self._gls_transform(y, X, entities)
+        # Estimate coefficients on transformed data
+        beta, resid_gls, fitted_gls = compute_ols(y_gls, X_gls, self.weights)
+        # Compute residuals and fitted values in original scale
+        fitted = (X @ beta).ravel()
+        resid = (y - fitted).ravel()
+        # Degrees of freedom
+        n = len(y)
+        k = X.shape[1]
+        df_model = k - (1 if self.formula_parser.has_intercept else 0)
+        df_resid = n - k
+        # Compute covariance matrix
+        if cov_type == 'nonrobust':
+            vcov = self._compute_vcov_gls(X, resid_gls, entities, df_resid)
+        elif cov_type == 'robust':
+            vcov = self._compute_vcov_robust(X_gls, resid_gls, df_resid)
+        elif cov_type == 'clustered':
+            vcov = self._compute_vcov_clustered(X_gls, resid_gls, entities, df_resid)
+        else:
+            raise ValueError(
+                f"cov_type must be 'nonrobust', 'robust', or 'clustered', "
+                f"got '{cov_type}'"
+            )
+        # Standard errors
+        std_errors = np.sqrt(np.diag(vcov))
+        # Compute panel R-squared measures
+        rsquared_within, rsquared_between, rsquared_overall = compute_panel_rsquared(
+            y, fitted, resid, entities
+        )
+        # Adjusted R-squared (overall)
+        rsquared_adj = 1 - (1 - rsquared_overall) * (n - 1) / df_resid
+        # Create Series/DataFrame with variable names
+        params = pd.Series(beta.ravel(), index=var_names)
+        std_errors_series = pd.Series(std_errors, index=var_names)
+        cov_params = pd.DataFrame(vcov, index=var_names, columns=var_names)
+        # Model information
+        model_info = {
+            'model_type': 'Random Effects (GLS)',
+            'formula': self.formula,
+            'cov_type': cov_type,
+            'cov_kwds': cov_kwds,
+            'variance_estimator': self.variance_estimator,
+        }
+        # Data information
+        data_info = {
+            'nobs': n,
+            'n_entities': self.data.n_entities,
+            'n_periods': self.data.n_periods,
+            'df_model': df_model,
+            'df_resid': df_resid,
+            'entity_index': entities.ravel() if hasattr(entities, 'ravel') else entities,
+            'time_index': times.ravel() if hasattr(times, 'ravel') else times,
+        }
+        # R-squared dictionary
+        rsquared_dict = {
+            'rsquared': rsquared_overall,  # For RE, main R² is overall
+            'rsquared_adj': rsquared_adj,
+            'rsquared_within': rsquared_within,
+            'rsquared_between': rsquared_between,
+            'rsquared_overall': rsquared_overall
+        }
+        # Create results object
+        results = PanelResults(
+            params=params,
+            std_errors=std_errors_series,
+            cov_params=cov_params,
+            resid=resid,
+            fittedvalues=fitted,
+            model_info=model_info,
+            data_info=data_info,
+            rsquared_dict=rsquared_dict,
+            model=self
+        )
+        # Store results and update state
+        self._results = results
+        self._fitted = True
+        return results
+    def _estimate_variance_components(
+        self,
+        y: np.ndarray,
+        X: np.ndarray,
+        entities: np.ndarray
+    ) -> None:
+        """
+        Estimate variance components.
+        Parameters
+        ----------
+        y : np.ndarray
+            Dependent variable
+        X : np.ndarray
+            Design matrix
+        entities : np.ndarray
+            Entity identifiers
+        """
+        n = len(y)
+        k = X.shape[1]
+        if self.variance_estimator == 'swamy-arora':
+            self._swamy_arora_variance(y, X, entities, n, k)
+        elif self.variance_estimator == 'walhus':
+            self._walhus_variance(y, X, entities, n, k)
+        elif self.variance_estimator == 'amemiya':
+            self._amemiya_variance(y, X, entities, n, k)
+        elif self.variance_estimator == 'nerlove':
+            self._nerlove_variance(y, X, entities, n, k)
+    def _swamy_arora_variance(
+        self,
+        y: np.ndarray,
+        X: np.ndarray,
+        entities: np.ndarray,
+        n: int,
+        k: int
+    ) -> None:
+        """
+        Swamy-Arora variance component estimator.
+        This is the most common estimator for RE models.
+        """
+        # Step 1: Estimate within (FE) model
+        from panelbox.utils.matrix_ops import demean_matrix
+        y_within = demean_matrix(y.reshape(-1, 1), entities).ravel()
+        X_within = demean_matrix(X, entities)
+        beta_within, resid_within, _ = compute_ols(y_within, X_within)
+        # Estimate sigma2_e from within residuals
+        N = self.data.n_entities
+        df_within = n - N - k  # Account for absorbed entity dummies
+        self.sigma2_e = np.sum(resid_within ** 2) / df_within
+        # Step 2: Estimate between model (on entity means)
+        unique_entities = np.unique(entities)
+        y_means = []
+        X_means = []
+        for entity in unique_entities:
+            mask = entities == entity
+            y_means.append(y[mask].mean())
+            X_means.append(X[mask].mean(axis=0))
+        y_between = np.array(y_means)
+        X_between = np.array(X_means)
+        beta_between, resid_between, _ = compute_ols(y_between, X_between)
+        # Estimate sigma2_u from between residuals
+        # Average group size
+        T_bar = n / N
+        # Variance of between residuals
+        var_between = np.sum(resid_between ** 2) / (N - k)
+        # sigma2_u = var_between - sigma2_e / T_bar
+        self.sigma2_u = max(0, var_between - self.sigma2_e / T_bar)
+        # Compute theta (GLS transformation parameter)
+        # theta = 1 - sqrt(sigma2_e / (sigma2_e + T*sigma2_u))
+        self.theta = 1 - np.sqrt(self.sigma2_e / (self.sigma2_e + T_bar * self.sigma2_u))
+    def _walhus_variance(self, y, X, entities, n, k):
+        """Wallace-Hussain variance estimator."""
+        # Similar to Swamy-Arora but uses different degrees of freedom
+        # For simplicity, use Swamy-Arora (can be refined later)
+        self._swamy_arora_variance(y, X, entities, n, k)
+    def _amemiya_variance(self, y, X, entities, n, k):
+        """Amemiya variance estimator."""
+        # Uses quadratic forms of residuals
+        # For simplicity, use Swamy-Arora (can be refined later)
+        self._swamy_arora_variance(y, X, entities, n, k)
+    def _nerlove_variance(self, y, X, entities, n, k):
+        """Nerlove variance estimator."""
+        # Uses pooled OLS residuals
+        # For simplicity, use Swamy-Arora (can be refined later)
+        self._swamy_arora_variance(y, X, entities, n, k)
+    def _gls_transform(
+        self,
+        y: np.ndarray,
+        X: np.ndarray,
+        entities: np.ndarray
+    ) -> tuple:
+        """
+        Apply GLS transformation.
+        The transformation is: y* = y - theta * y_bar_i
+        where y_bar_i is the entity mean and theta is computed from variance components.
+        Parameters
+        ----------
+        y : np.ndarray
+            Dependent variable
+        X : np.ndarray
+            Design matrix
+        entities : np.ndarray
+            Entity identifiers
+        Returns
+        -------
+        y_gls : np.ndarray
+            Transformed dependent variable
+        X_gls : np.ndarray
+            Transformed design matrix
+        """
+        unique_entities = np.unique(entities)
+        y_gls = y.copy()
+        X_gls = X.copy()
+        for entity in unique_entities:
+            mask = entities == entity
+            # Entity means
+            y_mean = y[mask].mean()
+            X_mean = X[mask].mean(axis=0)
+            # GLS transformation: subtract theta * mean
+            y_gls[mask] -= self.theta * y_mean
+            X_gls[mask] -= self.theta * X_mean
+        return y_gls, X_gls
+    def _estimate_coefficients(self) -> np.ndarray:
+        """
+        Estimate coefficients (implementation of abstract method).
+        Returns
+        -------
+        np.ndarray
+            Estimated coefficients
+        """
+        y, X = self.formula_parser.build_design_matrices(
+            self.data.data,
+            return_type='array'
+        )
+        entities = self.data.data[self.data.entity_col].values
+        # Estimate variance components
+        self._estimate_variance_components(y, X, entities)
+        # GLS transformation
+        y_gls, X_gls = self._gls_transform(y, X, entities)
+        # Estimate
+        beta, _, _ = compute_ols(y_gls, X_gls, self.weights)
+        return beta
+    def _compute_vcov_gls(
+        self,
+        X: np.ndarray,
+        resid: np.ndarray,
+        entities: np.ndarray,
+        df_resid: int
+    ) -> np.ndarray:
+        """
+        Compute GLS covariance matrix.
+        Parameters
+        ----------
+        X : np.ndarray
+            Original design matrix (not transformed)
+        resid : np.ndarray
+            GLS residuals
+        entities : np.ndarray
+            Entity identifiers
+        df_resid : int
+            Degrees of freedom
+        Returns
+        -------
+        np.ndarray
+            Covariance matrix
+        """
+        # Estimate of error variance from GLS residuals
+        s2 = np.sum(resid ** 2) / df_resid
+        # Build Omega matrix (variance-covariance of errors)
+        # For RE: Omega_i = sigma2_e * I + sigma2_u * J
+        # where J is matrix of ones
+        # For computational efficiency, use transformation approach
+        # V(beta_GLS) = s^2 * (X'Omega^{-1}X)^{-1}
+        # Create transformed X (same transformation as in GLS)
+        X_gls, _ = self._gls_transform(X, X, entities)
+        # Covariance: s^2 (X_gls' X_gls)^{-1}
+        XtX_inv = np.linalg.inv(X_gls.T @ X_gls)
+        vcov = s2 * XtX_inv
+        return vcov
+    def _compute_vcov_robust(
+        self,
+        X: np.ndarray,
+        resid: np.ndarray,
+        df_resid: int
+    ) -> np.ndarray:
+        """Compute robust covariance matrix."""
+        n = len(resid)
+        k = X.shape[1]
+        adjustment = n / df_resid
+        XtX_inv = np.linalg.inv(X.T @ X)
+        meat = X.T @ (resid[:, np.newaxis]**2 * X)
+        vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
+        return vcov
+    def _compute_vcov_clustered(
+        self,
+        X: np.ndarray,
+        resid: np.ndarray,
+        entities: np.ndarray,
+        df_resid: int
+    ) -> np.ndarray:
+        """Compute cluster-robust covariance matrix."""
+        n = len(resid)
+        k = X.shape[1]
+        unique_entities = np.unique(entities)
+        n_clusters = len(unique_entities)
+        XtX_inv = np.linalg.inv(X.T @ X)
+        meat = np.zeros((k, k))
+        for entity in unique_entities:
+            mask = entities == entity
+            X_c = X[mask]
+            resid_c = resid[mask]
+            score = X_c.T @ resid_c
+            meat += np.outer(score, score)
+        adjustment = (n_clusters / (n_clusters - 1)) * (df_resid / (df_resid - k))
+        vcov = adjustment * (XtX_inv @ meat @ XtX_inv)
+        return vcov

panelbox/report/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+PanelBox Report Generation Module.
+Provides comprehensive report generation capabilities for panel data analysis.
+Main Components
+---------------
+- ReportManager: Main orchestrator for report generation
+- TemplateManager: Jinja2 template management
+- AssetManager: CSS, JS, and image asset management
+- CSSManager: 3-layer CSS compilation system
+Examples
+--------
+Generate a validation report:
+>>> from panelbox.report import ReportManager
+>>> report_mgr = ReportManager()
+>>> html = report_mgr.generate_validation_report(
+...     validation_data={'tests': [...], 'model_info': {...}},
+...     title='Panel Validation Report'
+... )
+>>> report_mgr.save_report(html, 'validation_report.html')
+Custom report generation:
+>>> context = {
+...     'report_title': 'Custom Analysis',
+...     'data': {...}
+... }
+>>> html = report_mgr.generate_report(
+...     report_type='custom',
+...     template='custom/report.html',
+...     context=context
+... )
+"""
+from .report_manager import ReportManager
+from .template_manager import TemplateManager
+from .asset_manager import AssetManager
+from .css_manager import CSSManager, CSSLayer
+from .validation_transformer import ValidationTransformer
+# Exporters
+from .exporters import (
+    HTMLExporter,
+    LaTeXExporter,
+    MarkdownExporter
+)
+__all__ = [
+    'ReportManager',
+    'TemplateManager',
+    'AssetManager',
+    'CSSManager',
+    'CSSLayer',
+    'ValidationTransformer',
+    'HTMLExporter',
+    'LaTeXExporter',
+    'MarkdownExporter',
+]