PyPI - skfolio - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

skfolio 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

skfolio/__init__.py +2 -2
skfolio/cluster/__init__.py +1 -1
skfolio/cluster/_hierarchical.py +1 -1
skfolio/datasets/__init__.py +1 -1
skfolio/datasets/_base.py +2 -2
skfolio/datasets/data/__init__.py +1 -0
skfolio/distance/__init__.py +1 -1
skfolio/distance/_base.py +2 -2
skfolio/distance/_distance.py +4 -4
skfolio/distribution/__init__.py +56 -0
skfolio/distribution/_base.py +203 -0
skfolio/distribution/copula/__init__.py +35 -0
skfolio/distribution/copula/_base.py +456 -0
skfolio/distribution/copula/_clayton.py +539 -0
skfolio/distribution/copula/_gaussian.py +407 -0
skfolio/distribution/copula/_gumbel.py +560 -0
skfolio/distribution/copula/_independent.py +196 -0
skfolio/distribution/copula/_joe.py +609 -0
skfolio/distribution/copula/_selection.py +111 -0
skfolio/distribution/copula/_student_t.py +486 -0
skfolio/distribution/copula/_utils.py +509 -0
skfolio/distribution/multivariate/__init__.py +11 -0
skfolio/distribution/multivariate/_base.py +241 -0
skfolio/distribution/multivariate/_utils.py +632 -0
skfolio/distribution/multivariate/_vine_copula.py +1254 -0
skfolio/distribution/univariate/__init__.py +19 -0
skfolio/distribution/univariate/_base.py +308 -0
skfolio/distribution/univariate/_gaussian.py +136 -0
skfolio/distribution/univariate/_johnson_su.py +152 -0
skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
skfolio/distribution/univariate/_selection.py +85 -0
skfolio/distribution/univariate/_student_t.py +144 -0
skfolio/exceptions.py +6 -6
skfolio/measures/__init__.py +1 -1
skfolio/measures/_enums.py +7 -7
skfolio/measures/_measures.py +4 -7
skfolio/metrics/__init__.py +2 -0
skfolio/metrics/_scorer.py +4 -4
skfolio/model_selection/__init__.py +2 -2
skfolio/model_selection/_combinatorial.py +15 -12
skfolio/model_selection/_validation.py +2 -2
skfolio/model_selection/_walk_forward.py +3 -3
skfolio/moments/covariance/_base.py +1 -1
skfolio/moments/covariance/_denoise_covariance.py +1 -1
skfolio/moments/covariance/_detone_covariance.py +1 -1
skfolio/moments/covariance/_empirical_covariance.py +1 -1
skfolio/moments/covariance/_ew_covariance.py +1 -1
skfolio/moments/covariance/_gerber_covariance.py +1 -1
skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
skfolio/moments/covariance/_implied_covariance.py +2 -7
skfolio/moments/covariance/_ledoit_wolf.py +1 -1
skfolio/moments/covariance/_oas.py +1 -1
skfolio/moments/covariance/_shrunk_covariance.py +1 -1
skfolio/moments/expected_returns/_base.py +1 -1
skfolio/moments/expected_returns/_empirical_mu.py +1 -1
skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
skfolio/moments/expected_returns/_ew_mu.py +1 -1
skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
skfolio/optimization/__init__.py +2 -0
skfolio/optimization/_base.py +2 -2
skfolio/optimization/cluster/__init__.py +2 -0
skfolio/optimization/cluster/_nco.py +7 -7
skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
skfolio/optimization/cluster/hierarchical/_base.py +1 -2
skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
skfolio/optimization/convex/__init__.py +2 -0
skfolio/optimization/convex/_base.py +8 -8
skfolio/optimization/convex/_distributionally_robust.py +4 -4
skfolio/optimization/convex/_maximum_diversification.py +5 -5
skfolio/optimization/convex/_mean_risk.py +5 -6
skfolio/optimization/convex/_risk_budgeting.py +3 -3
skfolio/optimization/ensemble/__init__.py +2 -0
skfolio/optimization/ensemble/_base.py +2 -2
skfolio/optimization/ensemble/_stacking.py +1 -1
skfolio/optimization/naive/__init__.py +2 -0
skfolio/optimization/naive/_naive.py +1 -1
skfolio/population/__init__.py +2 -0
skfolio/population/_population.py +34 -7
skfolio/portfolio/_base.py +42 -8
skfolio/portfolio/_multi_period_portfolio.py +3 -2
skfolio/portfolio/_portfolio.py +4 -4
skfolio/pre_selection/__init__.py +2 -0
skfolio/pre_selection/_drop_correlated.py +2 -2
skfolio/pre_selection/_select_complete.py +25 -26
skfolio/pre_selection/_select_k_extremes.py +2 -2
skfolio/pre_selection/_select_non_dominated.py +2 -2
skfolio/pre_selection/_select_non_expiring.py +2 -2
skfolio/preprocessing/__init__.py +2 -0
skfolio/preprocessing/_returns.py +2 -2
skfolio/prior/__init__.py +4 -0
skfolio/prior/_base.py +2 -2
skfolio/prior/_black_litterman.py +5 -3
skfolio/prior/_empirical.py +3 -1
skfolio/prior/_factor_model.py +8 -4
skfolio/prior/_synthetic_data.py +239 -0
skfolio/synthetic_returns/__init__.py +1 -0
skfolio/typing.py +1 -1
skfolio/uncertainty_set/__init__.py +2 -0
skfolio/uncertainty_set/_base.py +2 -2
skfolio/uncertainty_set/_bootstrap.py +1 -1
skfolio/uncertainty_set/_empirical.py +1 -1
skfolio/utils/__init__.py +1 -0
skfolio/utils/bootstrap.py +2 -2
skfolio/utils/equations.py +13 -10
skfolio/utils/sorting.py +2 -2
skfolio/utils/stats.py +7 -7
skfolio/utils/tools.py +76 -12
{skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
skfolio-0.8.0.dist-info/RECORD +120 -0
{skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
skfolio-0.7.0.dist-info/RECORD +0 -95
{skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
{skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0

skfolio/distribution/copula/_base.py ADDED Viewed

@@ -0,0 +1,456 @@
+"""Base Bivariate Copula Estimator."""
+# Copyright (c) 2025
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
+# SPDX-License-Identifier: BSD-3-Clause
+from abc import ABC, abstractmethod
+import numpy as np
+import numpy.typing as npt
+import plotly.graph_objects as go
+import sklearn.utils as sku
+import sklearn.utils.validation as skv
+from skfolio.distribution._base import BaseDistribution
+from skfolio.distribution.copula._utils import (
+    empirical_tail_concentration,
+    plot_tail_concentration,
+)
+UNIFORM_MARGINAL_EPSILON = 1e-9
+_RHO_BOUNDS = (-0.999, 0.999)
+class BaseBivariateCopula(BaseDistribution, ABC):
+    """Base class for Bivariate Copula Estimators.
+    This abstract class defines the interface for bivariate copula models, including
+    methods for fitting, sampling, scoring, and computing partial derivatives.
+    Parameters
+    ----------
+    random_state : int, RandomState instance or None, default=None
+        Seed or random state to ensure reproducibility.
+    """
+    # Used for AIC and BIC
+    _n_params: int
+    def __init__(self, random_state: int | None = None):
+        super().__init__(random_state=random_state)
+    def _validate_X(self, X: npt.ArrayLike, reset: bool) -> np.ndarray:
+        """Validate the input data.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`.
+        reset : bool, default=True
+            Whether to reset the `n_features_in_` attribute.
+            If False, the input will be checked for consistency with data
+            provided when reset was last True.
+        Returns
+        -------
+        validated_X: ndarray of shape (n_observations, 2)
+            The validated data array.
+        Raises
+        ------
+        ValueError
+            If input data is invalid (e.g., not in `[0, 1]` or incorrect shape).
+        """
+        X = skv.validate_data(self, X, dtype=np.float64, reset=reset)
+        if X.shape[1] != 2:
+            raise ValueError("X must contains two columns for Bivariate Copula")
+        if not np.all((X >= 0) & (X <= 1)):
+            raise ValueError(
+                "X must be in the interval `[0, 1]`, usually reprinting uniform "
+                "distributions obtained from marginals CDF transformation"
+            )
+        # Handle potential numerical issues by ensuring X doesn't contain exact 0 or 1.
+        X = np.clip(X, UNIFORM_MARGINAL_EPSILON, 1 - UNIFORM_MARGINAL_EPSILON)
+        return X
+    @property
+    def n_params(self) -> int:
+        """Number of model parameters."""
+        return self._n_params
+    @property
+    @abstractmethod
+    def lower_tail_dependence(self) -> float:
+        """Theoretical lower tail dependence coefficient."""
+        pass
+    @property
+    @abstractmethod
+    def upper_tail_dependence(self) -> float:
+        """Theoretical upper tail dependence coefficient."""
+        pass
+    @property
+    @abstractmethod
+    def fitted_repr(self) -> str:
+        """String representation of the fitted copula."""
+        pass
+    @abstractmethod
+    def fit(self, X: npt.ArrayLike, y=None) -> "BaseBivariateCopula":
+        """Fit the copula model.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval [0, 1],
+            having been transformed to uniform marginals.
+        y : None
+            Ignored. Provided for compatibility with scikit-learn's API.
+        Returns
+        -------
+        self : BaseBivariateCopula
+            Returns the instance itself.
+        """
+        pass
+    @abstractmethod
+    def cdf(self, X: npt.ArrayLike) -> np.ndarray:
+        """Compute the CDF of the bivariate copula.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        Returns
+        -------
+        cdf : ndarray of shape (n_observations,)
+            CDF values for each observation in X.
+        """
+        pass
+    @abstractmethod
+    def partial_derivative(
+        self, X: npt.ArrayLike, first_margin: bool = False
+    ) -> np.ndarray:
+        r"""Compute the h-function (partial derivative) for the bivariate copula
+        with respect to a specified margin.
+        The h-function with respect to the second margin represents the conditional
+        distribution function of :math:`u` given :math:`v`:
+        .. math::
+            h(u \mid v) = \frac{\partial C(u,v)}{\partial v}
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        first_margin : bool, default=False
+            If True, compute the partial derivative with respect to the first
+            margin `u`; otherwise, compute the partial derivative with respect to the
+            second margin `v`.
+        Returns
+        -------
+        p : ndarray of shape (n_observations,)
+            h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
+        """
+        pass
+    @abstractmethod
+    def inverse_partial_derivative(
+        self, X: npt.ArrayLike, first_margin: bool = False
+    ) -> np.ndarray:
+        r"""Compute the inverse of the bivariate copula's partial derivative, commonly
+        known as the inverse h-function [1]_.
+        Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
+        second margin is defined by
+        .. math::
+            h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
+        which is the conditional distribution of :math:`U` given :math:`V = v`.
+        The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
+        value :math:`u \in [0,1]` such that
+        .. math::
+            h(u \mid v) \;=\; p,
+            \quad \text{where } p \in [0,1].
+        In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
+        :math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
+        :math:`p = \partial C(u, v)/\partial v`.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
+            - The first column `p` corresponds to the value of the h-function.
+            - The second column `v` is the conditioning variable.
+        first_margin : bool, default=False
+            If True, compute the inverse partial derivative with respect to the first
+            margin `u`; otherwise, compute the inverse partial derivative with respect
+            to the second margin `v`.
+        Returns
+        -------
+        u : ndarray of shape (n_observations,)
+            A 1D-array of length `n_observations`, where each element is the computed
+            :math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
+        References
+        ----------
+        .. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
+        .. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
+        """
+        pass
+    @abstractmethod
+    def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
+        """Compute the log-likelihood of each sample (log-pdf) under the model.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
+            having been transformed to uniform marginals.
+        Returns
+        -------
+        density : ndarray of shape (n_observations,)
+            The log-likelihood of each sample under the fitted copula.
+        """
+        pass
+    def sample(self, n_samples: int = 1):
+        """Generate random samples from the bivariate copula using the inverse
+        Rosenblatt transform.
+        Parameters
+        ----------
+        n_samples : int, default=1
+            Number of samples to generate.
+        Returns
+        -------
+        X : array-like of shape (n_samples, 2)
+            An array of bivariate inputs `(u, v)` where each row represents a
+            bivariate observation. Both `u` and `v` are uniform marginals in the
+            interval `[0, 1]`.
+        """
+        skv.check_is_fitted(self)
+        rng = sku.check_random_state(self.random_state)
+        # Generate independent Uniform(0, 1) samples
+        X = rng.random(size=(n_samples, 2))
+        # Apply the inverse Rosenblatt transform on the first variable.
+        X[:, 1] = self.inverse_partial_derivative(X, first_margin=True)
+        return X
+    def tail_concentration(self, quantiles: np.ndarray) -> np.ndarray:
+        """
+        Compute the tail concentration function for a set of quantiles.
+        The tail concentration function is defined as follows:
+         - For quantiles q ≤ 0.5:
+             C(q) = P(U ≤ q, V ≤ q) / q
+         - For quantiles q > 0.5:
+             C(q) = (1 - 2q + P(U ≤ q, V ≤ q)) / (1 - q)
+        where U and V are the pseudo-observations of the first and second variables,
+        respectively. This function returns the concentration values for each q
+        provided.
+        Parameters
+        ----------
+        quantiles : ndarray of shape (n_quantiles,)
+           A 1D array of quantile levels (values between 0 and 1) at which to compute
+           the tail concentration.
+        Returns
+        -------
+        concentration : ndarray of shape (n_quantiles,)
+           The computed tail concentration values corresponding to each quantile.
+        References
+        ----------
+        .. [1] "Quantitative Risk Management: Concepts, Techniques, and Tools",
+            McNeil, Frey, Embrechts (2005)
+        Raises
+        ------
+        ValueError
+           If any value in `quantiles` is not in the interval [0, 1].
+        """
+        quantiles = np.asarray(quantiles)
+        if not np.all((quantiles >= 0) & (quantiles <= 1)):
+            raise ValueError("quantiles must be between 0.0 and 1.0.")
+        X = np.stack((quantiles, quantiles)).T
+        cdf = self.cdf(X)
+        concentration = np.where(
+            quantiles <= 0.5,
+            cdf / quantiles,
+            (1.0 - 2 * quantiles + cdf) / (1.0 - quantiles),
+        )
+        return concentration
+    def plot_tail_concentration(
+        self, X: npt.ArrayLike | None = None, title: str | None = None
+    ) -> go.Figure:
+        """
+        Plot the tail concentration function.
+        This method computes the tail concentration function at 100 evenly spaced
+        quantile levels between 0.005 and 0.995.
+        The plot displays the concentration values on the y-axis and the quantile levels
+        on the x-axis.
+        The tail concentration is defined as:
+          - Lower tail: λ_L(q) = P(U₂ ≤ q | U₁ ≤ q)
+          - Upper tail: λ_U(q) = P(U₂ ≥ q | U₁ ≥ q)
+        where U₁ and U₂ are the pseudo-observations of the first and second variables,
+        respectively.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, 2), optional
+            If provided, it is used to plot the empirical tail concentration for
+            comparison versus the model tail concentration.
+        title : str, optional
+            The title for the plot. If not provided, a default title based on the fitted
+            copula's representation is used.
+        Returns
+        -------
+        fig : go.Figure
+            A Plotly figure object containing the tail concentration curve.
+        References
+        ----------
+        .. [1] "Quantitative Risk Management: Concepts, Techniques, and Tools",
+            McNeil, Frey, Embrechts (2005)
+        """
+        if title is None:
+            title = f"Tail Concentration of Bivariate {self.__class__.__name__}"
+            if X is not None:
+                title += " vs Empirical"
+        quantiles = np.linspace(5e-3, 1.0 - 5e-3, num=100)
+        concentration = self.tail_concentration(quantiles)
+        tail_concentration_dict = {self.__class__.__name__: concentration}
+        if X is not None:
+            tail_concentration_dict["Empirical"] = empirical_tail_concentration(
+                X, quantiles=quantiles
+            )
+        fig = plot_tail_concentration(
+            tail_concentration_dict=tail_concentration_dict,
+            quantiles=quantiles,
+            title=title,
+            smoothing=1.3,
+        )
+        return fig
+    def plot_pdf_2d(self, title: str | None = None) -> go.Figure:
+        """
+        Plot a 2D contour of the estimated probability density function (PDF).
+        This method generates a grid over [0, 1]^2, computes the PDF, and displays a
+        contour plot of the PDF.
+        Contour levels are limited to the 97th quantile to avoid extreme densities.
+        Parameters
+        ----------
+        title : str, optional
+           The title for the plot. If not provided, a default title based on the fitted
+           copula's representation is used.
+        Returns
+        -------
+        fig : go.Figure
+           A Plotly figure object containing the 2D contour plot of the PDF.
+        """
+        skv.check_is_fitted(self)
+        if title is None:
+            title = f"PDF of the Bivariate {self.__class__.__name__}"
+        u = np.linspace(0.01, 0.99, 100)
+        U, V = np.meshgrid(u, u)
+        grid_points = np.column_stack((U.ravel(), V.ravel()))
+        pdfs = np.exp(self.score_samples(grid_points)).reshape(U.shape)
+        # After the 97th quantile, the pdf gets too dense, and it dilutes the plot.
+        end = round(np.quantile(pdfs, 0.97), 1)
+        fig = go.Figure(
+            data=go.Contour(
+                x=u,
+                y=u,
+                z=pdfs,
+                colorscale="Magma",
+                contours=dict(start=0, end=end, size=0.2),
+                line=dict(width=0),
+                colorbar=dict(title="PDF"),
+            )
+        )
+        fig.update_layout(
+            title=title,
+            xaxis_title="u",
+            yaxis_title="v",
+        )
+        return fig
+    def plot_pdf_3d(self, title: str | None = None) -> go.Figure:
+        """
+        Plot a 3D surface of the estimated probability density function (PDF).
+        This method generates a grid over [0, 1]^2, computes the PDF, and displays a
+        3D surface plot of the PDF using Plotly.
+        Parameters
+        ----------
+        title : str, optional
+           The title for the plot. If not provided, a default title based on the fitted
+           copula's representation is used.
+        Returns
+        -------
+        fig : go.Figure
+           A Plotly figure object containing a 3D surface plot of the PDF.
+        """
+        skv.check_is_fitted(self)
+        if title is None:
+            title = f"PDF of the Bivariate {self.__class__.__name__}"
+        u = np.linspace(0.03, 0.97, 100)
+        U, V = np.meshgrid(u, u)
+        grid_points = np.column_stack((U.ravel(), V.ravel()))
+        pdfs = np.exp(self.score_samples(grid_points)).reshape(U.shape)
+        fig = go.Figure(data=[go.Surface(x=U, y=V, z=pdfs, colorscale="Magma")])
+        fig.update_layout(
+            title=title, scene=dict(xaxis_title="u", yaxis_title="v", zaxis_title="PDF")
+        )
+        return fig

skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

skfolio 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl