PyPI - skfolio - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

skfolio 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

skfolio/__init__.py +2 -2
skfolio/cluster/__init__.py +1 -1
skfolio/cluster/_hierarchical.py +1 -1
skfolio/datasets/__init__.py +1 -1
skfolio/datasets/_base.py +2 -2
skfolio/datasets/data/__init__.py +1 -0
skfolio/distance/__init__.py +1 -1
skfolio/distance/_base.py +2 -2
skfolio/distance/_distance.py +4 -4
skfolio/distribution/__init__.py +56 -0
skfolio/distribution/_base.py +203 -0
skfolio/distribution/copula/__init__.py +35 -0
skfolio/distribution/copula/_base.py +456 -0
skfolio/distribution/copula/_clayton.py +539 -0
skfolio/distribution/copula/_gaussian.py +407 -0
skfolio/distribution/copula/_gumbel.py +560 -0
skfolio/distribution/copula/_independent.py +196 -0
skfolio/distribution/copula/_joe.py +609 -0
skfolio/distribution/copula/_selection.py +111 -0
skfolio/distribution/copula/_student_t.py +486 -0
skfolio/distribution/copula/_utils.py +509 -0
skfolio/distribution/multivariate/__init__.py +11 -0
skfolio/distribution/multivariate/_base.py +241 -0
skfolio/distribution/multivariate/_utils.py +632 -0
skfolio/distribution/multivariate/_vine_copula.py +1254 -0
skfolio/distribution/univariate/__init__.py +19 -0
skfolio/distribution/univariate/_base.py +308 -0
skfolio/distribution/univariate/_gaussian.py +136 -0
skfolio/distribution/univariate/_johnson_su.py +152 -0
skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
skfolio/distribution/univariate/_selection.py +85 -0
skfolio/distribution/univariate/_student_t.py +144 -0
skfolio/exceptions.py +6 -6
skfolio/measures/__init__.py +1 -1
skfolio/measures/_enums.py +7 -7
skfolio/measures/_measures.py +4 -7
skfolio/metrics/__init__.py +2 -0
skfolio/metrics/_scorer.py +4 -4
skfolio/model_selection/__init__.py +2 -2
skfolio/model_selection/_combinatorial.py +15 -12
skfolio/model_selection/_validation.py +2 -2
skfolio/model_selection/_walk_forward.py +3 -3
skfolio/moments/covariance/_base.py +1 -1
skfolio/moments/covariance/_denoise_covariance.py +1 -1
skfolio/moments/covariance/_detone_covariance.py +1 -1
skfolio/moments/covariance/_empirical_covariance.py +1 -1
skfolio/moments/covariance/_ew_covariance.py +1 -1
skfolio/moments/covariance/_gerber_covariance.py +1 -1
skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
skfolio/moments/covariance/_implied_covariance.py +2 -7
skfolio/moments/covariance/_ledoit_wolf.py +1 -1
skfolio/moments/covariance/_oas.py +1 -1
skfolio/moments/covariance/_shrunk_covariance.py +1 -1
skfolio/moments/expected_returns/_base.py +1 -1
skfolio/moments/expected_returns/_empirical_mu.py +1 -1
skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
skfolio/moments/expected_returns/_ew_mu.py +1 -1
skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
skfolio/optimization/__init__.py +2 -0
skfolio/optimization/_base.py +2 -2
skfolio/optimization/cluster/__init__.py +2 -0
skfolio/optimization/cluster/_nco.py +7 -7
skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
skfolio/optimization/cluster/hierarchical/_base.py +1 -2
skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
skfolio/optimization/convex/__init__.py +2 -0
skfolio/optimization/convex/_base.py +8 -8
skfolio/optimization/convex/_distributionally_robust.py +4 -4
skfolio/optimization/convex/_maximum_diversification.py +5 -5
skfolio/optimization/convex/_mean_risk.py +5 -6
skfolio/optimization/convex/_risk_budgeting.py +3 -3
skfolio/optimization/ensemble/__init__.py +2 -0
skfolio/optimization/ensemble/_base.py +2 -2
skfolio/optimization/ensemble/_stacking.py +1 -1
skfolio/optimization/naive/__init__.py +2 -0
skfolio/optimization/naive/_naive.py +1 -1
skfolio/population/__init__.py +2 -0
skfolio/population/_population.py +35 -9
skfolio/portfolio/_base.py +42 -8
skfolio/portfolio/_multi_period_portfolio.py +3 -2
skfolio/portfolio/_portfolio.py +4 -4
skfolio/pre_selection/__init__.py +2 -0
skfolio/pre_selection/_drop_correlated.py +2 -2
skfolio/pre_selection/_select_complete.py +25 -26
skfolio/pre_selection/_select_k_extremes.py +2 -2
skfolio/pre_selection/_select_non_dominated.py +2 -2
skfolio/pre_selection/_select_non_expiring.py +2 -2
skfolio/preprocessing/__init__.py +2 -0
skfolio/preprocessing/_returns.py +2 -2
skfolio/prior/__init__.py +4 -0
skfolio/prior/_base.py +2 -2
skfolio/prior/_black_litterman.py +5 -3
skfolio/prior/_empirical.py +3 -1
skfolio/prior/_factor_model.py +8 -4
skfolio/prior/_synthetic_data.py +239 -0
skfolio/synthetic_returns/__init__.py +1 -0
skfolio/typing.py +1 -1
skfolio/uncertainty_set/__init__.py +2 -0
skfolio/uncertainty_set/_base.py +2 -2
skfolio/uncertainty_set/_bootstrap.py +1 -1
skfolio/uncertainty_set/_empirical.py +1 -1
skfolio/utils/__init__.py +1 -0
skfolio/utils/bootstrap.py +2 -2
skfolio/utils/equations.py +13 -10
skfolio/utils/sorting.py +2 -2
skfolio/utils/stats.py +7 -7
skfolio/utils/tools.py +76 -12
{skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/METADATA +99 -24
skfolio-0.8.1.dist-info/RECORD +120 -0
{skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/WHEEL +1 -1
skfolio-0.7.0.dist-info/RECORD +0 -95
{skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info/licenses}/LICENSE +0 -0
{skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/top_level.txt +0 -0

skfolio/distribution/copula/_gaussian.py ADDED Viewed

@@ -0,0 +1,407 @@
+"""Bivariate Gaussian Copula Estimation."""
+# Copyright (c) 2025
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
+# SPDX-License-Identifier: BSD-3-Clause
+import numpy as np
+import numpy.typing as npt
+import scipy.optimize as so
+import scipy.special as sp
+import scipy.stats as st
+import sklearn.utils.validation as skv
+from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
+from skfolio.distribution.copula._utils import _apply_margin_swap
+class GaussianCopula(BaseBivariateCopula):
+    r"""Bivariate Gaussian Copula Estimation.
+    The bivariate Gaussian copula is defined as:
+    .. math::
+        C_{\rho}(u, v) = \Phi_2\left(\Phi^{-1}(u), \Phi^{-1}(v) ; \rho\right)
+    where:
+        - :math:`\Phi_2` is the bivariate normal CDF with correlation :math:`\rho`.
+        - :math:`\Phi` is the standard normal CDF and :math:`\Phi^{-1}` its quantile function.
+        - :math:`\rho \in (-1, 1)` is the correlation coefficient.
+    .. note::
+        Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
+        because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
+        both positive and negative dependence, and they exhibit symmetric tail behavior.
+    Parameters
+    ----------
+    itau : bool, default=True
+        If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
+        otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
+        slower but more accurate.
+    kendall_tau : float, optional
+        If `itau` is True and `kendall_tau` is provided, this
+        value is used; otherwise, it is computed.
+    tolerance : float, default=1e-4
+        Convergence tolerance for the MLE optimization.
+    random_state : int, RandomState instance or None, default=None
+        Seed or random state to ensure reproducibility.
+    Attributes
+    ----------
+    rho_ : float
+        Fitted parameter (:math:`\rho`) in [-1, 1].
+    Examples
+    --------
+    >>> from skfolio.datasets import load_sp500_dataset
+    >>> from skfolio.preprocessing import prices_to_returns
+    >>> from skfolio.distribution import GaussianCopula, compute_pseudo_observations
+    >>>
+    >>> # Load historical prices and convert them to returns
+    >>> prices = load_sp500_dataset()
+    >>> X = prices_to_returns(prices)
+    >>> X = X[["AAPL", "JPM"]]
+    >>>
+    >>> # Convert returns to pseudo observation in the interval [0,1]
+    >>> X = compute_pseudo_observations(X)
+    >>>
+    >>> # Initialize the Copula estimator
+    >>> model = GaussianCopula()
+    >>>
+    >>> # Fit the model to the data.
+    >>> model.fit(X)
+    >>>
+    >>> # Display the fitted parameter and tail dependence coefficients
+    >>> print(model.fitted_repr)
+    GaussianCopula(rho=0.327)
+    >>> print(model.lower_tail_dependence)
+    0.0
+    >>> print(model.upper_tail_dependence)
+    0.0
+    >>>
+    >>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
+    >>> # Inverse Partial Derivative, AIC, and BIC
+    >>> log_likelihood = model.score_samples(X)
+    >>> score = model.score(X)
+    >>> cdf = model.cdf(X)
+    >>> p = model.partial_derivative(X)
+    >>> u = model.inverse_partial_derivative(X)
+    >>> aic = model.aic(X)
+    >>> bic = model.bic(X)
+    >>>
+    >>> # Generate 5 new samples
+    >>> samples = model.sample(n_samples=5)
+    >>>
+    >>> # Plot the tail concentration function.
+    >>> fig = model.plot_tail_concentration()
+    >>> fig.show()
+    >>>
+    >>> # Plot a 2D contour of the estimated PDF.
+    >>> fig = model.plot_pdf_2d()
+    >>> fig.show()
+    >>>
+    >>> # Plot a 3D surface of the estimated PDF.
+    >>> fig = model.plot_pdf_3d()
+    >>> fig.show()
+    References
+    ----------
+    .. [1] "An Introduction to Copulas (2nd ed.)",
+       Nelsen (2006)
+    .. [2] "Multivariate Models and Dependence Concepts",
+        Joe, Chapman & Hall (1997)
+    .. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
+        McNeil, Frey & Embrechts (2005)
+    .. [4] "The t Copula and Related Copulas",
+        Demarta & McNeil (2005)
+    .. [5] "Copula Methods in Finance",
+        Cherubini, Luciano & Vecchiato (2004)
+    """
+    rho_: float
+    _n_params = 1
+    def __init__(
+        self,
+        itau: bool = True,
+        kendall_tau: float | None = None,
+        tolerance: float = 1e-4,
+        random_state: int | None = None,
+    ):
+        super().__init__(random_state=random_state)
+        self.itau = itau
+        self.kendall_tau = kendall_tau
+        self.tolerance = tolerance
+    def fit(self, X: npt.ArrayLike, y=None) -> "GaussianCopula":
+        r"""Fit the Bivariate Gaussian Copula.
+        If `itau` is True, estimates :math:`\rho` using Kendall's tau inversion.
+        Otherwise, uses MLE by maximizing the log-likelihood.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval [0, 1],
+            having been transformed to uniform marginals.
+        y : None
+            Ignored. Provided for compatibility with scikit-learn's API.
+        Returns
+        -------
+        self : GaussianCopula
+            Returns the instance itself.
+        """
+        X = self._validate_X(X, reset=True)
+        if self.itau:
+            if self.kendall_tau is None:
+                kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
+            else:
+                kendall_tau = self.kendall_tau
+            self.rho_ = np.clip(
+                np.sin((np.pi * kendall_tau) / 2.0),
+                a_min=_RHO_BOUNDS[0],
+                a_max=_RHO_BOUNDS[1],
+            )
+        else:
+            result = so.minimize_scalar(
+                _neg_log_likelihood,
+                args=(X,),
+                bounds=_RHO_BOUNDS,
+                method="bounded",
+                options={"xatol": self.tolerance},
+            )
+            if not result.success:
+                raise RuntimeError(f"Optimization failed: {result.message}")
+            self.rho_ = result.x
+        return self
+    def cdf(self, X: npt.ArrayLike) -> np.ndarray:
+        """Compute the CDF of the bivariate Gaussian copula.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        Returns
+        -------
+        cdf : ndarray of shape (n_observations,)
+            CDF values for each observation in X.
+        """
+        skv.check_is_fitted(self)
+        X = self._validate_X(X, reset=False)
+        cdf = st.multivariate_normal.cdf(
+            x=sp.ndtri(X),
+            mean=np.array([0, 0]),
+            cov=np.array([[1, self.rho_], [self.rho_, 1]]),
+        )
+        return cdf
+    def partial_derivative(
+        self, X: npt.ArrayLike, first_margin: bool = False
+    ) -> np.ndarray:
+        r"""Compute the h-function (partial derivative) for the bivariate Gaussian
+        copula.
+        The h-function with respect to the second margin represents the conditional
+        distribution function of :math:`u` given :math:`v`:
+        .. math:: \begin{aligned}
+                  h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
+                  &= \Phi\Bigl(\frac{\Phi^{-1}(u)-\rho\,\Phi^{-1}(v)}{\sqrt{1-\rho^2}}\Bigr)
+                  \end{aligned}
+        where :math:`\Phi` is the standard normal CDF and :math:`\Phi^{-1}` is its
+        inverse (the quantile function).
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        first_margin : bool, default=False
+            If True, compute the partial derivative with respect to the first
+            margin `u`; otherwise, compute the partial derivative with respect to the
+            second margin `v`.
+        Returns
+        -------
+        p : ndarray of shape (n_observations,)
+            h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
+        """
+        skv.check_is_fitted(self)
+        X = self._validate_X(X, reset=False)
+        X = _apply_margin_swap(X, first_margin=first_margin)
+        # Compute the inverse CDF (percent point function) using ndtri for better
+        # performance
+        u_inv, v_inv = sp.ndtri(X).T
+        p = sp.ndtr((u_inv - self.rho_ * v_inv) / np.sqrt(1 - self.rho_**2))
+        return p
+    def inverse_partial_derivative(
+        self, X: npt.ArrayLike, first_margin: bool = False
+    ) -> np.ndarray:
+        r"""Compute the inverse of the bivariate copula's partial derivative, commonly
+        known as the inverse h-function [1]_.
+        Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
+        second margin is defined by
+        .. math::
+            h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
+        which is the conditional distribution of :math:`U` given :math:`V = v`.
+        The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
+        value :math:`u \in [0,1]` such that
+        .. math::
+            h(u \mid v) \;=\; p,
+            \quad \text{where } p \in [0,1].
+        In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
+        :math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
+        :math:`p = \partial C(u, v)/\partial v`.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
+            - The first column `p` corresponds to the value of the h-function.
+            - The second column `v` is the conditioning variable.
+        first_margin : bool, default=False
+            If True, compute the inverse partial derivative with respect to the first
+            margin `u`; otherwise, compute the inverse partial derivative with respect
+            to the second margin `v`.
+        Returns
+        -------
+        u : ndarray of shape (n_observations,)
+            A 1D-array of length `n_observations`, where each element is the computed
+            :math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
+        References
+        ----------
+        .. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
+        .. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
+        """
+        skv.check_is_fitted(self)
+        X = self._validate_X(X, reset=False)
+        X = _apply_margin_swap(X, first_margin=first_margin)
+        p_inv, v_inv = sp.ndtri(X).T
+        u_inv = p_inv * np.sqrt(1 - self.rho_**2) + self.rho_ * v_inv
+        u = sp.ndtr(u_inv)
+        return u
+    def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
+        """Compute the log-likelihood of each sample (log-pdf) under the model.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        Returns
+        -------
+        density : ndarray of shape (n_observations,)
+            The log-likelihood of each sample under the fitted copula.
+        """
+        skv.check_is_fitted(self)
+        X = self._validate_X(X, reset=False)
+        log_density = _base_sample_scores(X=X, rho=self.rho_)
+        return log_density
+    @property
+    def lower_tail_dependence(self) -> float:
+        """Theoretical lower tail dependence coefficient."""
+        return 0
+    @property
+    def upper_tail_dependence(self) -> float:
+        """Theoretical upper tail dependence coefficient."""
+        return 0
+    @property
+    def fitted_repr(self) -> str:
+        """String representation of the fitted copula."""
+        return f"{self.__class__.__name__}(rho={self.rho_:0.3f})"
+def _neg_log_likelihood(rho: float, X: np.ndarray) -> float:
+    """Negative log-likelihood function for optimization.
+    Parameters
+    ----------
+    X : array-like of shape (n_observations, 2)
+        An array of bivariate inputs `(u, v)` where each row represents a
+        bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+        having been transformed to uniform marginals.
+    rho : float
+        Correlation copula parameter.
+    Returns
+    -------
+    value : float
+        The negative log-likelihood value.
+    """
+    return -np.sum(_base_sample_scores(X=X, rho=rho))
+def _base_sample_scores(X: np.ndarray, rho: float) -> np.ndarray:
+    """Compute the log-likelihood of each sample (log-pdf) under the bivariate
+    Gaussian copula model.
+    Parameters
+    ----------
+    X : array-like of shape (n_observations, 2)
+        An array of bivariate inputs `(u, v)` where each row represents a
+        bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+        having been transformed to uniform marginals.
+    rho : float
+        Gaussian copula parameter.
+    Returns
+    -------
+    density : ndarray of shape (n_observations,)
+        The log-likelihood of each sample under the fitted copula.
+    Raises
+    ------
+    ValueError
+        If rho is not in (-1, 1)
+    """
+    if not (-1.0 <= rho <= 1.0):
+        raise ValueError("rho must be between -1 and 1.")
+    # Inverse CDF (ppf) using stdtrit for better performance
+    u_inv, v_inv = sp.ndtri(X).T
+    # Using np.log1p to avoid loss of precision
+    log_density = -0.5 * np.log1p(-(rho**2)) - rho * (
+        0.5 * rho * (u_inv**2 + v_inv**2) - u_inv * v_inv
+    ) / (1 - rho**2)
+    return log_density

skfolio 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

skfolio 0.7.0py3-none-any.whl → 0.8.1py3-none-any.whl