PyPI - python-gls - Versions diffs - 0.1.0__py3-none-any.whl - Mend

python-gls 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

python_gls/__init__.py +29 -0
python_gls/_parametrization.py +137 -0
python_gls/correlation/__init__.py +29 -0
python_gls/correlation/ar1.py +67 -0
python_gls/correlation/arma.py +118 -0
python_gls/correlation/base.py +125 -0
python_gls/correlation/car1.py +92 -0
python_gls/correlation/comp_symm.py +69 -0
python_gls/correlation/spatial.py +190 -0
python_gls/correlation/symm.py +85 -0
python_gls/likelihood.py +302 -0
python_gls/model.py +511 -0
python_gls/results.py +223 -0
python_gls/variance/__init__.py +19 -0
python_gls/variance/base.py +101 -0
python_gls/variance/comb.py +82 -0
python_gls/variance/const_power.py +50 -0
python_gls/variance/exp.py +50 -0
python_gls/variance/fixed.py +46 -0
python_gls/variance/ident.py +84 -0
python_gls/variance/power.py +52 -0
python_gls-0.1.0.dist-info/METADATA +361 -0
python_gls-0.1.0.dist-info/RECORD +26 -0
python_gls-0.1.0.dist-info/WHEEL +5 -0
python_gls-0.1.0.dist-info/licenses/LICENSE +21 -0
python_gls-0.1.0.dist-info/top_level.txt +1 -0

python_gls/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""python_gls: GLS with learned correlation and variance structures.
+Python equivalent of R's nlme::gls(). Estimates Generalized Least Squares
+models where the correlation and variance structures are learned from data
+via ML/REML, not pre-specified.
+Basic usage::
+    from python_gls import GLS
+    from python_gls.correlation import CorAR1
+    from python_gls.variance import VarIdent
+    result = GLS.from_formula(
+        "y ~ x1 + x2",
+        data=df,
+        correlation=CorAR1(),
+        variance=VarIdent("group"),
+        groups="subject",
+    ).fit()
+    print(result.summary())
+"""
+from python_gls.model import GLS
+from python_gls.results import GLSResults
+__version__ = "0.1.0"
+__all__ = ["GLS", "GLSResults"]

python_gls/_parametrization.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""Spherical parametrization for correlation matrices.
+Based on Pinheiro & Bates (1996). Transforms between correlation matrices
+and unconstrained angle parameters, ensuring positive-definiteness during
+optimization without bound constraints.
+A d×d correlation matrix has d(d-1)/2 free parameters. We parametrize via
+angles θ ∈ (0, π), which map to a Cholesky factor L such that R = LL'.
+The angles are unconstrained on the real line via logit-like transformation.
+"""
+import numpy as np
+from numpy.typing import NDArray
+def angles_to_cholesky(angles: NDArray, d: int) -> NDArray:
+    """Convert angle parameters to lower-triangular Cholesky factor.
+    Parameters
+    ----------
+    angles : array of shape (d*(d-1)/2,)
+        Angle parameters in (0, pi).
+    d : int
+        Dimension of the correlation matrix.
+    Returns
+    -------
+    L : array of shape (d, d)
+        Lower-triangular Cholesky factor such that L @ L.T is a
+        correlation matrix.
+    """
+    L = np.zeros((d, d))
+    idx = 0
+    for i in range(d):
+        for j in range(i + 1):
+            if j == 0 and i == 0:
+                L[i, j] = 1.0
+            elif j == 0:
+                L[i, j] = np.cos(angles[idx])
+                idx += 1
+            elif j < i:
+                prod = np.prod([np.sin(angles[idx - k - 1]) for k in range(j)])
+                L[i, j] = prod * np.cos(angles[idx])
+                idx += 1
+            else:  # j == i, last column
+                prod = np.prod([np.sin(angles[idx - k - 1]) for k in range(j)])
+                L[i, j] = prod
+    return L
+def cholesky_to_corr(L: NDArray) -> NDArray:
+    """Convert Cholesky factor to correlation matrix."""
+    R = L @ L.T
+    # Ensure exact ones on diagonal (numerical stability)
+    d = np.sqrt(np.diag(R))
+    R = R / np.outer(d, d)
+    np.fill_diagonal(R, 1.0)
+    return R
+def angles_to_corr(angles: NDArray, d: int) -> NDArray:
+    """Convert unconstrained angles to a correlation matrix.
+    Parameters
+    ----------
+    angles : array of shape (d*(d-1)/2,)
+        Angle parameters in (0, pi).
+    d : int
+        Dimension of the correlation matrix.
+    Returns
+    -------
+    R : array of shape (d, d)
+        Positive-definite correlation matrix.
+    """
+    L = angles_to_cholesky(angles, d)
+    return cholesky_to_corr(L)
+def corr_to_angles(R: NDArray) -> NDArray:
+    """Convert a correlation matrix to angle parameters.
+    Parameters
+    ----------
+    R : array of shape (d, d)
+        Positive-definite correlation matrix.
+    Returns
+    -------
+    angles : array of shape (d*(d-1)/2,)
+        Angle parameters in (0, pi).
+    """
+    d = R.shape[0]
+    L = np.linalg.cholesky(R)
+    # Normalize rows to unit length
+    norms = np.sqrt(np.sum(L ** 2, axis=1))
+    L = L / norms[:, np.newaxis]
+    n_angles = d * (d - 1) // 2
+    angles = np.zeros(n_angles)
+    idx = 0
+    for i in range(1, d):
+        for j in range(i):
+            if j == 0:
+                angles[idx] = np.arccos(np.clip(L[i, 0], -1, 1))
+            else:
+                prod = np.prod([np.sin(angles[idx - k - 1]) for k in range(j)])
+                if abs(prod) < 1e-15:
+                    angles[idx] = np.pi / 2
+                else:
+                    angles[idx] = np.arccos(np.clip(L[i, j] / prod, -1, 1))
+            idx += 1
+    return angles
+def unconstrained_to_angles(params: NDArray) -> NDArray:
+    """Map unconstrained parameters to (0, pi) via scaled sigmoid."""
+    return np.pi / (1 + np.exp(-params))
+def angles_to_unconstrained(angles: NDArray) -> NDArray:
+    """Map angles in (0, pi) to unconstrained parameters."""
+    # Clip to avoid log(0)
+    ratio = np.clip(angles / np.pi, 1e-10, 1 - 1e-10)
+    return np.log(ratio / (1 - ratio))
+def unconstrained_to_corr(params: NDArray, d: int) -> NDArray:
+    """Map unconstrained parameters directly to a correlation matrix."""
+    angles = unconstrained_to_angles(params)
+    return angles_to_corr(angles, d)
+def corr_to_unconstrained(R: NDArray) -> NDArray:
+    """Map a correlation matrix to unconstrained parameters."""
+    angles = corr_to_angles(R)
+    return angles_to_unconstrained(angles)

python_gls/correlation/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""Correlation structures for GLS estimation."""
+from python_gls.correlation.base import CorStruct
+from python_gls.correlation.symm import CorSymm
+from python_gls.correlation.comp_symm import CorCompSymm
+from python_gls.correlation.ar1 import CorAR1
+from python_gls.correlation.arma import CorARMA
+from python_gls.correlation.car1 import CorCAR1
+from python_gls.correlation.spatial import (
+    CorExp,
+    CorGaus,
+    CorLin,
+    CorRatio,
+    CorSpher,
+)
+__all__ = [
+    "CorStruct",
+    "CorSymm",
+    "CorCompSymm",
+    "CorAR1",
+    "CorARMA",
+    "CorCAR1",
+    "CorExp",
+    "CorGaus",
+    "CorLin",
+    "CorRatio",
+    "CorSpher",
+]

python_gls/correlation/ar1.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""AR(1) correlation structure."""
+import numpy as np
+from numpy.typing import NDArray
+from python_gls.correlation.base import CorStruct
+class CorAR1(CorStruct):
+    """First-order autoregressive correlation.
+    R[i,j] = phi^|i-j| for equally-spaced observations.
+    Equivalent to R's `corAR1()`.
+    Parameters
+    ----------
+    phi : float, optional
+        Autoregressive parameter, |phi| < 1.
+    """
+    def __init__(self, phi: float | None = None):
+        super().__init__()
+        if phi is not None:
+            if not isinstance(phi, (int, float)):
+                raise TypeError(f"phi must be a number, got {type(phi).__name__}")
+            if not -1 < phi < 1:
+                raise ValueError(
+                    f"phi must be in (-1, 1) for stationarity, got {phi}"
+                )
+            self._params = np.array([float(phi)])
+    @property
+    def n_params(self) -> int:
+        return 1
+    def get_correlation_matrix(self, group_size: int, **kwargs) -> NDArray:
+        if self._params is None:
+            return np.eye(group_size)
+        phi = self._params[0]
+        indices = np.arange(group_size)
+        R = phi ** np.abs(indices[:, None] - indices[None, :])
+        return R
+    def _get_init_params(self, residuals_by_group: list[NDArray]) -> NDArray:
+        # Estimate phi from lag-1 autocorrelation
+        lag1_corrs = []
+        for r in residuals_by_group:
+            if len(r) < 2:
+                continue
+            r_centered = r - np.mean(r)
+            var = np.var(r_centered)
+            if var > 1e-10:
+                lag1 = np.sum(r_centered[:-1] * r_centered[1:]) / (len(r) * var)
+                lag1_corrs.append(lag1)
+        if lag1_corrs:
+            phi = np.clip(np.mean(lag1_corrs), -0.99, 0.99)
+        else:
+            phi = 0.0
+        return np.array([phi])
+    def _params_to_unconstrained(self, params: NDArray) -> NDArray:
+        phi = np.clip(params[0], -0.999, 0.999)
+        return np.array([np.arctanh(phi)])
+    def _unconstrained_to_params(self, uparams: NDArray) -> NDArray:
+        return np.array([np.tanh(uparams[0])])

python_gls/correlation/arma.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""ARMA(p,q) correlation structure."""
+import numpy as np
+from numpy.typing import NDArray
+from scipy.linalg import toeplitz
+from python_gls.correlation.base import CorStruct
+class CorARMA(CorStruct):
+    """ARMA(p,q) correlation structure.
+    Defines correlation via an autoregressive moving-average process.
+    The autocorrelation function is computed from AR and MA coefficients.
+    Equivalent to R's `corARMA(p=p, q=q)`.
+    Parameters
+    ----------
+    p : int
+        Order of the AR component.
+    q : int
+        Order of the MA component.
+    """
+    def __init__(self, p: int = 0, q: int = 0):
+        super().__init__()
+        if not isinstance(p, int) or not isinstance(q, int):
+            raise TypeError(
+                f"p and q must be integers, got p={type(p).__name__}, q={type(q).__name__}"
+            )
+        if p < 0 or q < 0:
+            raise ValueError(f"p and q must be non-negative, got p={p}, q={q}")
+        if p == 0 and q == 0:
+            raise ValueError("At least one of p or q must be > 0.")
+        self.p = p
+        self.q = q
+    @property
+    def n_params(self) -> int:
+        return self.p + self.q
+    def _compute_acf(self, max_lag: int) -> NDArray:
+        """Compute autocorrelation function from ARMA parameters."""
+        ar = self._params[:self.p] if self.p > 0 else np.array([])
+        ma = self._params[self.p:] if self.q > 0 else np.array([])
+        # Compute ACF of ARMA(p,q) process via Yule-Walker-like recursion
+        acf = np.zeros(max_lag + 1)
+        acf[0] = 1.0
+        # For pure AR
+        if self.q == 0 and self.p > 0:
+            # Yule-Walker: gamma(h) = sum_i phi_i * gamma(h-i)
+            for h in range(1, max_lag + 1):
+                for i in range(min(self.p, h)):
+                    if h - i - 1 >= 0:
+                        acf[h] += ar[i] * acf[abs(h - i - 1)]
+            return acf
+        # For pure MA
+        if self.p == 0 and self.q > 0:
+            theta = np.concatenate([[1.0], ma])
+            for h in range(min(self.q + 1, max_lag + 1)):
+                num = sum(
+                    theta[j] * theta[j + h]
+                    for j in range(self.q + 1 - h)
+                )
+                denom = sum(theta[j] ** 2 for j in range(self.q + 1))
+                acf[h] = num / denom
+            return acf
+        # General ARMA: use impulse response function
+        n_impulse = max(max_lag + 1, 100)
+        psi = np.zeros(n_impulse)
+        psi[0] = 1.0
+        ma_full = np.zeros(n_impulse)
+        ma_full[:self.q] = ma
+        ar_full = np.zeros(n_impulse)
+        ar_full[:self.p] = ar
+        for i in range(1, n_impulse):
+            if i <= self.q:
+                psi[i] = ma_full[i - 1]
+            for j in range(min(self.p, i)):
+                psi[i] += ar_full[j] * psi[i - j - 1]
+        # ACF from impulse response
+        for h in range(max_lag + 1):
+            num = sum(psi[j] * psi[j + h] for j in range(n_impulse - h))
+            denom = sum(psi[j] ** 2 for j in range(n_impulse))
+            acf[h] = num / denom
+        return acf
+    def get_correlation_matrix(self, group_size: int, **kwargs) -> NDArray:
+        if self._params is None:
+            return np.eye(group_size)
+        acf = self._compute_acf(group_size - 1)
+        R = toeplitz(acf)
+        # Ensure positive-definiteness
+        eigvals = np.linalg.eigvalsh(R)
+        if np.min(eigvals) < 1e-10:
+            R += (1e-10 - np.min(eigvals) + 1e-10) * np.eye(group_size)
+            d = np.sqrt(np.diag(R))
+            R = R / np.outer(d, d)
+        return R
+    def _get_init_params(self, residuals_by_group: list[NDArray]) -> NDArray:
+        # Small initial values
+        return np.zeros(self.n_params) + 0.1
+    def _params_to_unconstrained(self, params: NDArray) -> NDArray:
+        # Use tanh transform for stability
+        return np.arctanh(np.clip(params, -0.999, 0.999))
+    def _unconstrained_to_params(self, uparams: NDArray) -> NDArray:
+        return np.tanh(uparams)

python_gls/correlation/base.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Base class for correlation structures."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+from numpy.typing import NDArray
+class CorStruct(ABC):
+    """Abstract base class for correlation structures.
+    A correlation structure defines within-group correlations for GLS
+    estimation. Each group (e.g., subject, cluster) has its own
+    correlation matrix, but all groups share the same parameters.
+    Subclasses must implement:
+        - get_correlation_matrix(group_size, **kwargs)
+        - n_params (property)
+        - _get_init_params(residuals_by_group)
+    """
+    def __init__(self) -> None:
+        self._params: NDArray | None = None
+        self._unconstrained_params: NDArray | None = None
+    @abstractmethod
+    def get_correlation_matrix(self, group_size: int, **kwargs) -> NDArray:
+        """Return the correlation matrix for a group of given size.
+        Parameters
+        ----------
+        group_size : int
+            Number of observations in this group.
+        **kwargs
+            Additional context (e.g., time points, positions).
+        Returns
+        -------
+        R : (group_size, group_size) correlation matrix.
+        """
+    @property
+    @abstractmethod
+    def n_params(self) -> int:
+        """Number of correlation parameters."""
+    @abstractmethod
+    def _get_init_params(self, residuals_by_group: list[NDArray]) -> NDArray:
+        """Compute initial parameter values from OLS residuals.
+        Parameters
+        ----------
+        residuals_by_group : list of arrays
+            Residuals split by group.
+        Returns
+        -------
+        params : array of initial parameter values.
+        """
+    def get_params(self) -> NDArray:
+        """Get current parameter values."""
+        if self._params is None:
+            raise ValueError(
+                f"{type(self).__name__} parameters not yet initialized. "
+                f"Call initialize() first or fit the model."
+            )
+        return self._params.copy()
+    def set_params(self, params: NDArray) -> None:
+        """Set parameter values."""
+        params = np.asarray(params, dtype=float)
+        if params.ndim != 1:
+            raise ValueError(
+                f"params must be a 1-D array, got shape {params.shape}"
+            )
+        self._params = params
+    def get_unconstrained_params(self) -> NDArray:
+        """Get unconstrained (transformed) parameters for optimization."""
+        if self._unconstrained_params is None:
+            if self._params is None:
+                raise ValueError(
+                    f"{type(self).__name__} parameters not yet initialized. "
+                    f"Call initialize() first or fit the model."
+                )
+            return self._params_to_unconstrained(self._params)
+        return self._unconstrained_params.copy()
+    def set_unconstrained_params(self, uparams: NDArray) -> None:
+        """Set parameters from unconstrained (transformed) values."""
+        uparams = np.asarray(uparams, dtype=float)
+        self._unconstrained_params = uparams
+        self._params = self._unconstrained_to_params(uparams)
+    def _params_to_unconstrained(self, params: NDArray) -> NDArray:
+        """Transform natural parameters to unconstrained space.
+        Default: identity (override for constrained parameters).
+        """
+        return params.copy()
+    def _unconstrained_to_params(self, uparams: NDArray) -> NDArray:
+        """Transform unconstrained parameters to natural space.
+        Default: identity (override for constrained parameters).
+        """
+        return uparams.copy()
+    def initialize(self, residuals_by_group: list[NDArray]) -> None:
+        """Initialize parameters from OLS residuals.
+        Parameters
+        ----------
+        residuals_by_group : list of arrays
+            Residuals split by group.
+        """
+        if not residuals_by_group:
+            raise ValueError(
+                "residuals_by_group must be a non-empty list of arrays"
+            )
+        self._params = self._get_init_params(residuals_by_group)
+        self._unconstrained_params = self._params_to_unconstrained(self._params)

python_gls/correlation/car1.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""Continuous-time AR(1) correlation structure."""
+import numpy as np
+from numpy.typing import NDArray
+from python_gls.correlation.base import CorStruct
+class CorCAR1(CorStruct):
+    """Continuous-time first-order autoregressive correlation.
+    R[i,j] = phi^|t_i - t_j| where t_i are (possibly irregular) time points.
+    Unlike CorAR1, this handles irregularly-spaced observations.
+    Equivalent to R's `corCAR1()`.
+    Parameters
+    ----------
+    phi : float, optional
+        Decay parameter, 0 < phi < 1.
+    """
+    def __init__(self, phi: float | None = None):
+        super().__init__()
+        if phi is not None:
+            if not isinstance(phi, (int, float)):
+                raise TypeError(f"phi must be a number, got {type(phi).__name__}")
+            if not 0 < phi < 1:
+                raise ValueError(
+                    f"phi must be in (0, 1) for continuous-time AR(1), got {phi}"
+                )
+            self._params = np.array([float(phi)])
+        self._time_points: dict[int, NDArray] = {}
+    @property
+    def n_params(self) -> int:
+        return 1
+    def set_time_points(self, group_id: int, times: NDArray) -> None:
+        """Set time points for a specific group.
+        Parameters
+        ----------
+        group_id : int
+            Group index.
+        times : array
+            Time points for this group.
+        """
+        self._time_points[group_id] = np.asarray(times, dtype=float)
+    def get_correlation_matrix(self, group_size: int, **kwargs) -> NDArray:
+        if self._params is None:
+            return np.eye(group_size)
+        phi = self._params[0]
+        group_id = kwargs.get("group_id", None)
+        if group_id is not None and group_id in self._time_points:
+            times = self._time_points[group_id]
+        else:
+            # Default to equally-spaced
+            times = np.arange(group_size, dtype=float)
+        time_diffs = np.abs(times[:, None] - times[None, :])
+        R = phi ** time_diffs
+        return R
+    def _get_init_params(self, residuals_by_group: list[NDArray]) -> NDArray:
+        lag1_corrs = []
+        for r in residuals_by_group:
+            if len(r) < 2:
+                continue
+            r_centered = r - np.mean(r)
+            var = np.var(r_centered)
+            if var > 1e-10:
+                lag1 = np.sum(r_centered[:-1] * r_centered[1:]) / (len(r) * var)
+                lag1_corrs.append(lag1)
+        if lag1_corrs:
+            phi = np.clip(np.mean(lag1_corrs), 0.01, 0.99)
+        else:
+            phi = 0.5
+        return np.array([phi])
+    def _params_to_unconstrained(self, params: NDArray) -> NDArray:
+        # phi in (0, 1) -> logit
+        phi = np.clip(params[0], 1e-6, 1 - 1e-6)
+        return np.array([np.log(phi / (1 - phi))])
+    def _unconstrained_to_params(self, uparams: NDArray) -> NDArray:
+        # Clip to avoid overflow in exp for very large negative values
+        u = np.clip(uparams[0], -500, 500)
+        return np.array([1 / (1 + np.exp(-u))])

python_gls/correlation/comp_symm.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Compound symmetry (exchangeable) correlation structure."""
+import numpy as np
+from numpy.typing import NDArray
+from python_gls.correlation.base import CorStruct
+class CorCompSymm(CorStruct):
+    """Compound symmetry (exchangeable) correlation.
+    All pairwise correlations are equal to rho. The correlation matrix is:
+        R[i,j] = rho for i != j, 1 for i == j.
+    Equivalent to R's `corCompSymm()`.
+    Parameters
+    ----------
+    rho : float, optional
+        Initial correlation value. Must be in (-1/(d-1), 1) for
+        positive-definiteness.
+    """
+    def __init__(self, rho: float | None = None):
+        super().__init__()
+        if rho is not None:
+            if not isinstance(rho, (int, float)):
+                raise TypeError(f"rho must be a number, got {type(rho).__name__}")
+            if not -1 < rho < 1:
+                raise ValueError(
+                    f"rho must be in (-1, 1) for positive-definiteness, got {rho}"
+                )
+            self._params = np.array([float(rho)])
+    @property
+    def n_params(self) -> int:
+        return 1
+    def get_correlation_matrix(self, group_size: int, **kwargs) -> NDArray:
+        if self._params is None:
+            return np.eye(group_size)
+        rho = self._params[0]
+        R = np.full((group_size, group_size), rho)
+        np.fill_diagonal(R, 1.0)
+        return R
+    def _get_init_params(self, residuals_by_group: list[NDArray]) -> NDArray:
+        # Estimate rho from average pairwise correlation of residuals
+        corrs = []
+        for r in residuals_by_group:
+            d = len(r)
+            if d < 2:
+                continue
+            for i in range(d):
+                for j in range(i + 1, d):
+                    corrs.append(r[i] * r[j] / (np.std(r) ** 2 + 1e-10))
+        if corrs:
+            rho = np.clip(np.mean(corrs), -0.9, 0.9)
+        else:
+            rho = 0.0
+        return np.array([rho])
+    def _params_to_unconstrained(self, params: NDArray) -> NDArray:
+        # Fisher z-transform: rho -> atanh(rho)
+        rho = np.clip(params[0], -0.999, 0.999)
+        return np.array([np.arctanh(rho)])
+    def _unconstrained_to_params(self, uparams: NDArray) -> NDArray:
+        return np.array([np.tanh(uparams[0])])