PyPI - skfolio - Versions diffs - 0.0.1__py3-none-any.whl - Mend

skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

skfolio/__init__.py +29 -0
skfolio/cluster/__init__.py +8 -0
skfolio/cluster/_hierarchical.py +387 -0
skfolio/datasets/__init__.py +20 -0
skfolio/datasets/_base.py +389 -0
skfolio/datasets/data/__init__.py +0 -0
skfolio/datasets/data/factors_dataset.csv.gz +0 -0
skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
skfolio/datasets/data/sp500_index.csv.gz +0 -0
skfolio/distance/__init__.py +26 -0
skfolio/distance/_base.py +55 -0
skfolio/distance/_distance.py +574 -0
skfolio/exceptions.py +30 -0
skfolio/measures/__init__.py +76 -0
skfolio/measures/_enums.py +355 -0
skfolio/measures/_measures.py +607 -0
skfolio/metrics/__init__.py +3 -0
skfolio/metrics/_scorer.py +121 -0
skfolio/model_selection/__init__.py +18 -0
skfolio/model_selection/_combinatorial.py +407 -0
skfolio/model_selection/_validation.py +194 -0
skfolio/model_selection/_walk_forward.py +221 -0
skfolio/moments/__init__.py +41 -0
skfolio/moments/covariance/__init__.py +29 -0
skfolio/moments/covariance/_base.py +101 -0
skfolio/moments/covariance/_covariance.py +1108 -0
skfolio/moments/expected_returns/__init__.py +21 -0
skfolio/moments/expected_returns/_base.py +31 -0
skfolio/moments/expected_returns/_expected_returns.py +415 -0
skfolio/optimization/__init__.py +36 -0
skfolio/optimization/_base.py +147 -0
skfolio/optimization/cluster/__init__.py +13 -0
skfolio/optimization/cluster/_nco.py +348 -0
skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
skfolio/optimization/cluster/hierarchical/_base.py +440 -0
skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
skfolio/optimization/convex/__init__.py +16 -0
skfolio/optimization/convex/_base.py +1944 -0
skfolio/optimization/convex/_distributionally_robust.py +392 -0
skfolio/optimization/convex/_maximum_diversification.py +417 -0
skfolio/optimization/convex/_mean_risk.py +974 -0
skfolio/optimization/convex/_risk_budgeting.py +560 -0
skfolio/optimization/ensemble/__init__.py +6 -0
skfolio/optimization/ensemble/_base.py +87 -0
skfolio/optimization/ensemble/_stacking.py +326 -0
skfolio/optimization/naive/__init__.py +3 -0
skfolio/optimization/naive/_naive.py +173 -0
skfolio/population/__init__.py +3 -0
skfolio/population/_population.py +883 -0
skfolio/portfolio/__init__.py +13 -0
skfolio/portfolio/_base.py +1096 -0
skfolio/portfolio/_multi_period_portfolio.py +610 -0
skfolio/portfolio/_portfolio.py +842 -0
skfolio/pre_selection/__init__.py +7 -0
skfolio/pre_selection/_pre_selection.py +342 -0
skfolio/preprocessing/__init__.py +3 -0
skfolio/preprocessing/_returns.py +114 -0
skfolio/prior/__init__.py +18 -0
skfolio/prior/_base.py +63 -0
skfolio/prior/_black_litterman.py +238 -0
skfolio/prior/_empirical.py +163 -0
skfolio/prior/_factor_model.py +268 -0
skfolio/typing.py +50 -0
skfolio/uncertainty_set/__init__.py +23 -0
skfolio/uncertainty_set/_base.py +108 -0
skfolio/uncertainty_set/_bootstrap.py +281 -0
skfolio/uncertainty_set/_empirical.py +237 -0
skfolio/utils/__init__.py +0 -0
skfolio/utils/bootstrap.py +115 -0
skfolio/utils/equations.py +350 -0
skfolio/utils/sorting.py +117 -0
skfolio/utils/stats.py +466 -0
skfolio/utils/tools.py +567 -0
skfolio-0.0.1.dist-info/LICENSE +29 -0
skfolio-0.0.1.dist-info/METADATA +568 -0
skfolio-0.0.1.dist-info/RECORD +79 -0
skfolio-0.0.1.dist-info/WHEEL +5 -0
skfolio-0.0.1.dist-info/top_level.txt +1 -0

skfolio/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""skfolio package"""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import importlib.metadata
+from skfolio.measures import (
+    BaseMeasure,
+    ExtraRiskMeasure,
+    PerfMeasure,
+    RatioMeasure,
+    RiskMeasure,
+)
+from skfolio.population import Population
+from skfolio.portfolio import BasePortfolio, MultiPeriodPortfolio, Portfolio
+__version__ = importlib.metadata.version("skfolio")
+__all__ = [
+    "BaseMeasure",
+    "PerfMeasure",
+    "RiskMeasure",
+    "ExtraRiskMeasure",
+    "RatioMeasure",
+    "BasePortfolio",
+    "Portfolio",
+    "MultiPeriodPortfolio",
+    "Population",
+]

skfolio/cluster/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Hierarchical Clustering estimators."""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+from skfolio.cluster._hierarchical import HierarchicalClustering, LinkageMethod
+__all__ = ["LinkageMethod", "HierarchicalClustering"]

skfolio/cluster/_hierarchical.py ADDED Viewed

@@ -0,0 +1,387 @@
+"""Hierarchical Clustering estimators."""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+from enum import auto
+import numpy as np
+import numpy.typing as npt
+import plotly.figure_factory as ff
+import plotly.graph_objects as go
+import scipy.cluster.hierarchy as sch
+import scipy.spatial.distance as scd
+import sklearn.base as skb
+import sklearn.utils.validation as skv
+from skfolio.utils.stats import assert_is_distance, compute_optimal_n_clusters
+from skfolio.utils.tools import AutoEnum, default_asset_names
+class LinkageMethod(AutoEnum):
+    r"""Methods for calculating the distance between clusters in the linkage matrix.
+    See the `Linkage Methods` section of `scipy.cluster.hierarchy.linkage`
+    for full descriptions.
+    Parameters
+    ----------
+    SINGLE : str
+        Assigns
+        .. math:: d(u,v) = \min(dist(u[i],v[j]))
+        for all points :math:`i` in cluster :math:`u` and
+        :math:`j` in cluster :math:`v`. This is also known as the
+        Nearest Point Algorithm.
+    COMPLETE : str
+        Assigns
+        .. math:: d(u, v) = \max(dist(u[i],v[j]))
+        for all points :math:`i` in cluster u and :math:`j` in
+        cluster :math:`v`. This is also known by the Farthest Point
+        Algorithm or Voor Hees Algorithm.
+    AVERAGE : str
+        Assigns
+        .. math:: d(u,v) = \sum_{ij} \frac{d(u[i], v[j])}{(|u|*|v|)}
+        for all points :math:`i` and :math:`j` where :math:`|u|`
+        and :math:`|v|` are the cardinalities of clusters :math:`u`
+        and :math:`v`, respectively. This is also called the UPGMA
+        algorithm.
+    WEIGHTED : str
+        Assigns
+        .. math:: d(u,v) = (dist(s,v) + dist(t,v))/2
+        where cluster u was formed with cluster s and t and v
+        is a remaining cluster in the forest (also called WPGMA).
+    CENTROID : str
+        Assigns
+        .. math::
+           dist(s,t) = ||c_s-c_t||_2
+        where :math:`c_s` and :math:`c_t` are the centroids of
+        clusters :math:`s` and :math:`t`, respectively.
+        This is also known as the UPGMC
+        algorithm.
+    MEDIAN : str
+    assigns :math:`d(s,t)` like the ``centroid`` method.
+    This is also known as the WPGMC algorithm.
+    WARD : str
+        Uses the Ward variance minimization algorithm.
+        The new entry :math:`d(u,v)` is computed as follows,
+        .. math::
+           d(u,v) = \sqrt{\frac{|v|+|s|}
+                               {T}d(v,s)^2
+                        + \frac{|v|+|t|}
+                               {T}d(v,t)^2
+                        - \frac{|v|}
+                               {T}d(s,t)^2}
+        where :math:`u` is the newly joined cluster consisting of
+        clusters :math:`s` and :math:`t`, :math:`v` is an unused
+        cluster in the forest, :math:`T=|v|+|s|+|t|`, and
+        :math:`|*|` is the cardinality of its argument. This is also
+        known as the incremental algorithm.
+    """
+    SINGLE = auto()
+    COMPLETE = auto()
+    AVERAGE = auto()
+    WEIGHTED = auto()
+    CENTROID = auto()
+    MEDIAN = auto()
+    WARD = auto()
+class HierarchicalClustering(skb.ClusterMixin, skb.BaseEstimator):
+    r"""Hierarchical Clustering.
+    Parameters
+    ----------
+    max_clusters : int, optional
+        For coherent clustering, the algorithm finds a minimum threshold ``r`` so that
+        the cophenetic distance between any two original observations in the same flat
+        cluster is no more than ``r`` and no more than `max_clusters` flat clusters are
+        formed. The default (`None`) is to estimate the maximal number of clusters
+        based on the Two-Order Difference to Gap Statistic [1]_.
+    linkage_method : LinkageMethod, default=LinkageMethod.WARD
+        Methods for calculating the distance between clusters in the linkage matrix.
+        See the `Linkage Methods` section of `scipy.cluster.hierarchy.linkage` for
+        the full descriptions.
+        The default is the Ward variance minimization algorithm `LinkageMethod.WARD`.
+    Attributes
+    ----------
+    n_clusters_ : int
+        Number of formed clusters.
+    labels_ : ndarray of shape (n_assets,)
+        Labels of each asset.
+    linkage_matrix_ : ndarray of shape (n_assets - 1, 4)
+        Linkage matrix computed from the distance matrix of the `distance_estimator`.
+    condensed_distance_ : ndarray of shape (\\binom{n_assets}{2}, )
+        The 1-D condensed distance matrix.
+    n_features_in_ : int
+        Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of assets seen during `fit`. Defined only when `X`
+        has assets names that are all strings.
+    References
+    ----------
+    .. [1] "Application of two-order difference to gap statistic".
+        Yue, Wang & Wei (2009)
+    """
+    n_clusters_: int
+    labels_: np.ndarray
+    linkage_matrix_: np.ndarray
+    condensed_distance_: np.ndarray
+    def __init__(
+        self,
+        max_clusters: int | None = None,
+        linkage_method: LinkageMethod = LinkageMethod.WARD,
+    ):
+        self.max_clusters = max_clusters
+        self.linkage_method = linkage_method
+    def fit(self, X: npt.ArrayLike, y: None = None) -> "HierarchicalClustering":
+        """Fit the Hierarchical Equal Risk Contribution estimator.
+        Parameters
+        ----------
+        X : array-like of shape (n_assets, n_assets)
+            Distance matrix of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : HierarchicalClustering
+            Fitted estimator.
+        """
+        X = self._validate_data(X)
+        assert_is_distance(X)
+        self.condensed_distance_ = scd.squareform(X, checks=False)
+        self.linkage_matrix_ = sch.linkage(
+            self.condensed_distance_,
+            method=str(self.linkage_method.value),
+            # Not needed for clustering, only for
+            # visualization and can be slow. So we perform the optimal ordering only
+            # in `plot_dendrogram`.
+            optimal_ordering=False,
+        )
+        max_clusters = self.max_clusters
+        if max_clusters is None:
+            max_clusters = compute_optimal_n_clusters(
+                distance=X,
+                linkage_matrix=self.linkage_matrix_,
+            )
+        # Get the clusters from the linkage matrix
+        labels_ = sch.fcluster(
+            self.linkage_matrix_, t=max_clusters, criterion="maxclust"
+        )
+        labels_ -= 1  # Start at 0
+        self.n_clusters_ = len(set(labels_))
+        assert self.n_clusters_ == max(labels_) + 1 <= max_clusters
+        self.labels_ = labels_
+        return self
+    def plot_dendrogram(self, heatmap: bool = True) -> go.Figure:
+        """Plot the dendrogram.
+        The blue lines represent distinct clusters composed of a single asset.
+        The remaining colors represent clusters of more than one asset.
+        When `heatmap` is set to True, the heatmap of the reordered distance matrix is
+        displayed below the dendrogram and clusters are outlined with yellow squares.
+        The number of clusters used in the plot is the same as the `n_clusters_`
+        attribute if it exists, otherwise a default number is used corresponding to the
+        number of cluster with a distance above 70% of the maximum cluster distance.
+        Parameters
+        ----------
+        heatmap : bool, default=True
+            If this is set to True, the distance heatmap is returned with the clustered
+            outlined in yellow.
+        Returns
+        -------
+        fig : Figure
+            The dendrogram figure.
+        """
+        skv.check_is_fitted(self, "linkage_matrix_")
+        linkage_matrix = sch.optimal_leaf_ordering(
+            self.linkage_matrix_, self.condensed_distance_
+        )
+        n_assets = linkage_matrix.shape[0] + 1
+        cophenetic_distance_threshold = linkage_matrix[-(self.n_clusters_ - 1), 2]
+        if hasattr(self, "feature_names_in_"):
+            asset_names = self.feature_names_in_
+        else:
+            asset_names = default_asset_names(n_assets=n_assets)
+        if not heatmap:
+            fig = ff.create_dendrogram(
+                np.ones(1),
+                distfun=lambda x: None,
+                linkagefun=lambda x: linkage_matrix,
+                color_threshold=cophenetic_distance_threshold,
+                labels=asset_names,
+            )
+            fig.update_layout(
+                title="Dendrogram",
+                width=800,
+                height=400,
+                showlegend=False,
+                hovermode="closest",
+                xaxis={"title": "Assets"},
+                yaxis={"title": "Distance"},
+            )
+            return fig
+        # Initialize figure by creating upper dendrogram
+        fig = ff.create_dendrogram(
+            np.ones(1),
+            orientation="bottom",
+            distfun=lambda x: None,
+            linkagefun=lambda x: linkage_matrix,
+            color_threshold=cophenetic_distance_threshold,
+            labels=asset_names,
+        )
+        for i in range(len(fig["data"])):
+            fig["data"][i]["yaxis"] = "y2"
+        # Create Side Dendrogram
+        side_dendrogram = ff.create_dendrogram(
+            np.ones(1),
+            orientation="right",
+            distfun=lambda x: None,
+            linkagefun=lambda x: linkage_matrix,
+            color_threshold=cophenetic_distance_threshold,
+            labels=asset_names,
+        )
+        for i in range(len(side_dendrogram["data"])):
+            side_dendrogram["data"][i]["xaxis"] = "x2"
+        # Add Side Dendrogram Data to Figure
+        for data in side_dendrogram["data"]:
+            fig.add_trace(data)
+        # Create Heatmap
+        ordered_asset_names = side_dendrogram["layout"]["yaxis"]["ticktext"]
+        ordered_asset_names_idx = np.array(
+            [np.argwhere(x == asset_names)[0][0] for x in ordered_asset_names]
+        )
+        assert np.array_equal(asset_names[ordered_asset_names_idx], ordered_asset_names)
+        distance = scd.squareform(self.condensed_distance_, checks=False)
+        heat_data = distance[ordered_asset_names_idx, :][:, ordered_asset_names_idx]
+        heatmap = [
+            go.Heatmap(
+                x=ordered_asset_names,
+                y=ordered_asset_names,
+                z=heat_data,
+                colorscale="Blues",
+                name="",
+            )
+        ]
+        heatmap[0]["x"] = fig["layout"]["xaxis"]["tickvals"]
+        heatmap[0]["y"] = side_dendrogram["layout"]["yaxis"]["tickvals"]
+        # Add Heatmap Data to Figure
+        for data in heatmap:
+            fig.add_trace(data)
+        # Outline clusters
+        delta = heatmap[0]["x"][1] - heatmap[0]["x"][0]
+        clusters_ids = self.labels_[ordered_asset_names_idx]
+        for i in range(max(clusters_ids) + 1):
+            c_ids = np.argwhere(clusters_ids == i).ravel()
+            a = c_ids[0] * delta
+            b = (c_ids[-1] + 1) * delta
+            fig.add_shape(
+                type="rect",
+                x0=a,
+                y0=a,
+                x1=b,
+                y1=b,
+                line=dict(
+                    color="gold",
+                    width=2,
+                ),
+            )
+        fig.update_layout(
+            title="Dendrogram",
+            width=800,
+            height=800,
+            showlegend=False,
+            hovermode="closest",
+            xaxis={
+                "title": "Assets",
+                "domain": [0.15, 1],
+                "mirror": False,
+                "showgrid": False,
+                "showline": False,
+                "zeroline": False,
+                "ticks": "",
+            },
+            xaxis2={
+                "domain": [0, 0.15],
+                "mirror": False,
+                "showgrid": False,
+                "showline": False,
+                "zeroline": False,
+                "showticklabels": False,
+                "ticks": "",
+            },
+            yaxis={
+                "title": "Assets",
+                "domain": [0, 0.85],
+                "mirror": False,
+                "showgrid": False,
+                "showline": False,
+                "zeroline": False,
+                "showticklabels": False,
+                "ticks": "",
+                "tickvals": fig["layout"]["xaxis"]["tickvals"],
+                "ticktext": fig["layout"]["xaxis"]["ticktext"],
+            },
+            yaxis2={
+                "domain": [0.825, 0.975],
+                "mirror": False,
+                "showgrid": False,
+                "showline": False,
+                "zeroline": False,
+                "showticklabels": False,
+                "ticks": "",
+            },
+        )
+        return fig

skfolio/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Datasets module."""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+from skfolio.datasets._base import (
+    load_factors_dataset,
+    load_ftse100_dataset,
+    load_nasdaq_dataset,
+    load_sp500_dataset,
+    load_sp500_index,
+)
+__all__ = [
+    "load_nasdaq_dataset",
+    "load_factors_dataset",
+    "load_ftse100_dataset",
+    "load_sp500_dataset",
+    "load_sp500_index",
+]