PyPI - skfolio - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

skfolio 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

skfolio/moments/covariance/_implied_covariance.py +1 -1
skfolio/optimization/convex/_base.py +12 -1
skfolio/optimization/convex/_risk_budgeting.py +5 -12
skfolio/population/_population.py +1 -1
skfolio/pre_selection/__init__.py +12 -6
skfolio/pre_selection/_drop_correlated.py +108 -0
skfolio/pre_selection/_select_complete.py +116 -0
skfolio/pre_selection/_select_k_extremes.py +100 -0
skfolio/pre_selection/_select_non_dominated.py +161 -0
skfolio/pre_selection/_select_non_expiring.py +148 -0
skfolio/preprocessing/_returns.py +9 -3
skfolio/utils/stats.py +2 -2
{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/METADATA +1 -1
{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/RECORD +17 -13
{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/WHEEL +1 -1
skfolio/pre_selection/_pre_selection.py +0 -343
{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/LICENSE +0 -0
{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/top_level.txt +0 -0

skfolio/moments/covariance/_implied_covariance.py CHANGED Viewed

@@ -259,7 +259,7 @@ class ImpliedCovariance(BaseCovariance):
         if assets_names is not None:
             vol_assets_names = get_feature_names(implied_vol)
             if vol_assets_names is not None:
-                missing_assets = assets_names[~np.in1d(assets_names, vol_assets_names)]
+                missing_assets = assets_names[~np.isin(assets_names, vol_assets_names)]
                 if len(missing_assets) > 0:
                     raise ValueError(
                         f"The following assets are missing from "

skfolio/optimization/convex/_base.py CHANGED Viewed

@@ -622,7 +622,11 @@ class ConvexOptimization(BaseOptimization, ABC):
         self._cvx_cache = {}
     def _get_weight_constraints(
-        self, n_assets: int, w: cp.Variable, factor: skt.Factor
+        self,
+        n_assets: int,
+        w: cp.Variable,
+        factor: skt.Factor,
+        allow_negative_weights: bool = True,
     ) -> list[cpc.Constraint]:
         """Compute weight constraints from input parameters.
@@ -651,6 +655,13 @@ class ConvexOptimization(BaseOptimization, ABC):
                 fill_value=0,
                 name="min_weights",
             )
+            if not allow_negative_weights and np.any(min_weights < 0):
+                raise ValueError(
+                    f"{self.__class__.__name__} must have non negative `min_weights` "
+                    f"constraint otherwise the problem becomes non-convex."
+                )
             constraints.append(
                 w * self._scale_constraints
                 >= min_weights * factor * self._scale_constraints

skfolio/optimization/convex/_risk_budgeting.py CHANGED Viewed

@@ -432,15 +432,6 @@ class RiskBudgeting(ConvexOptimization):
         self.min_return = min_return
         self.risk_budget = risk_budget
-    def _validation(self) -> None:
-        if not isinstance(self.risk_measure, RiskMeasure):
-            raise TypeError("risk_measure must be of type `RiskMeasure`")
-        if self.min_weights < 0:
-            raise ValueError(
-                "Risk Budgeting must have non negative `min_weights` constraint"
-                " otherwise the problem becomes non-convex."
-            )
     def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "RiskBudgeting":
         """Fit the Risk Budgeting Optimization estimator.
@@ -462,8 +453,10 @@ class RiskBudgeting(ConvexOptimization):
         routed_params = skm.process_routing(self, "fit", **fit_params)
         self._check_feature_names(X, reset=True)
-        # Validate
-        self._validation()
+        if not isinstance(self.risk_measure, RiskMeasure):
+            raise TypeError("risk_measure must be of type `RiskMeasure`")
         # Used to avoid adding multiple times similar constrains linked to identical
         # risk models
         self.prior_estimator_ = check_estimator(
@@ -518,7 +511,7 @@ class RiskBudgeting(ConvexOptimization):
         # weight constraints
         constraints += self._get_weight_constraints(
-            n_assets=n_assets, w=w, factor=factor
+            n_assets=n_assets, w=w, factor=factor, allow_negative_weights=False
         )
         parameters_values = []

skfolio/population/_population.py CHANGED Viewed

@@ -653,7 +653,7 @@ class Population(list):
         spacing: float | None = None,
         display_sub_ptf_name: bool = True,
     ) -> go.Figure:
-        """Plot the contribution of each asset to a given measure of the portfolios
+        r"""Plot the contribution of each asset to a given measure of the portfolios
         in the population.
         Parameters

skfolio/pre_selection/__init__.py CHANGED Viewed

@@ -1,7 +1,13 @@
-from skfolio.pre_selection._pre_selection import (
-    DropCorrelated,
-    SelectKExtremes,
-    SelectNonDominated,
-)
+from skfolio.pre_selection._drop_correlated import DropCorrelated
+from skfolio.pre_selection._select_complete import SelectComplete
+from skfolio.pre_selection._select_k_extremes import SelectKExtremes
+from skfolio.pre_selection._select_non_dominated import SelectNonDominated
+from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
-__all__ = ["DropCorrelated", "SelectKExtremes", "SelectNonDominated"]
+__all__ = [
+    "DropCorrelated",
+    "SelectKExtremes",
+    "SelectNonDominated",
+    "SelectComplete",
+    "SelectNonExpiring",
+]

skfolio/pre_selection/_drop_correlated.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Pre-selection DropCorrelated module"""
+# Copyright (c) 2023
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import numpy.typing as npt
+import sklearn.base as skb
+import sklearn.feature_selection as skf
+import sklearn.utils.validation as skv
+class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
+    """Transformer for dropping highly correlated assets.
+    Simply removing all correlation pairs above the threshold will remove more assets
+    than necessary and a naive sequential removal is suboptimal and depends on the
+    initial assets ordering.
+    Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
+    the threshold and corr(Y,Z) below.
+    The first approach would remove X,Y,Z and the second approach would remove either
+    Y and Z or X depending on the initial ordering.
+    To avoid these shortcomings, we implement the below algorithm:
+        * Step 1: select all correlation pairs above the threshold.
+        * Step 2: sort all the selected correlation pairs from highest to lowest.
+        * Step 3: for each pair, if none of the two assets has been removed, keep the
+          asset with the lowest average correlation against the other assets.
+    Parameters
+    ----------
+    threshold : float, default=0.95
+        Correlation threshold. The default value is `0.95`.
+    absolute : bool, default=False
+        If this is set to True, we take the absolute value of the correlation. This has
+        for effect to also include negatively correlated assets.
+    Attributes
+    ----------
+    to_keep_ : ndarray of shape (n_assets, )
+        Boolean array indicating which assets are remaining.
+    n_features_in_ : int
+        Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of assets seen during `fit`. Defined only when `X`
+        has assets names that are all strings.
+    """
+    to_keep_: np.ndarray
+    def __init__(self, threshold: float = 0.95, absolute: bool = False):
+        self.threshold = threshold
+        self.absolute = absolute
+    def fit(self, X: npt.ArrayLike, y=None):
+        """Run the correlation transformer and get the appropriate assets.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, n_assets)
+            Price returns of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : DropCorrelated
+            Fitted estimator.
+        """
+        X = self._validate_data(X)
+        if not -1 <= self.threshold <= 1:
+            raise ValueError("`threshold` must be between -1 and 1")
+        n_assets = X.shape[1]
+        corr = np.corrcoef(X.T)
+        mean_corr = corr.mean(axis=0)
+        triu_idx = np.triu_indices(n_assets, 1)
+        # select all correlation pairs above the threshold
+        selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
+        # sort all the selected correlation pairs from highest to lowest
+        selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
+        # for each pair, if none of the two assets has been removed, keep the asset with
+        # the lowest average correlation with other assets
+        to_remove = set()
+        for idx in selected_idx:
+            i, j = triu_idx[0][idx], triu_idx[1][idx]
+            if i not in to_remove and j not in to_remove:
+                if mean_corr[i] > mean_corr[j]:
+                    to_remove.add(i)
+                else:
+                    to_remove.add(j)
+        self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
+        return self
+    def _get_support_mask(self):
+        skv.check_is_fitted(self)
+        return self.to_keep_

skfolio/pre_selection/_select_complete.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""pre-selection SelectComplete module"""
+# Copyright (c) 2023
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import numpy.typing as npt
+import sklearn.base as skb
+import sklearn.feature_selection as skf
+import sklearn.utils.validation as skv
+class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
+    """
+    Transformer to select assets with complete data across the entire observation
+    period.
+    This transformer removes assets (columns) that have missing values (NaNs) at the
+    beginning or end of the period.
+    This transformer is especially useful for financial datasets where assets
+    (e.g., stocks, bonds) may have data gaps due to late inception (assets that started
+    trading later), early expiry or default (assets that stopped trading before the
+    end of the period).
+    If missing values are not at the beginning or end but occur between non-missing
+    values, the asset is not removed unless `drop_assets_with_internal_nan` is set to
+    `True`.
+    Parameters
+    ----------
+    drop_assets_with_internal_nan : bool, default=False
+        If set to True, assets with missing values (NaNs) that appear between
+        non-missing values (i.e., internal NaNs) will also be removed. By default,
+        only assets with leading or trailing NaNs are removed.
+    Attributes
+    ----------
+    to_keep_ : ndarray of shape (n_assets, )
+       Boolean array indicating which assets are remaining.
+    n_features_in_ : int
+       Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+       Names of features seen during `fit`. Defined only when `X`
+       has feature names that are all strings.
+    Examples
+    --------
+        >>> import numpy as np
+        >>> import pandas as pd
+        >>> from skfolio.pre_selection import SelectComplete
+        >>> X = pd.DataFrame({
+        ...     'asset1': [np.nan, np.nan, 2, 3, 4],    # Starts late (inception)
+        ...     'asset2': [1, 2, 3, 4, 5],         # Complete data
+        ...     'asset3': [1, 2, 3, np.nan, 5], # Missing values within data
+        ...     'asset4': [1, 2, 3, 4, np.nan]      # Ends early (expiration)
+        ... })
+        >>> selector = SelectComplete()
+        >>> selector.fit_transform(X)
+         array([[ 1.,  1.],
+                [ 2.,  2.],
+                [ 3.,  3.],
+                [ 4., nan],
+                [ 5.,  5.]])
+        >>> selector = SelectComplete(drop_assets_with_internal_nan=True)
+        >>> selector.fit_transform(X)
+         array([[1.],
+               [2.],
+               [3.],
+               [4.],
+               [5.]])
+    """
+    to_keep_: np.ndarray
+    def __init__(self, drop_assets_with_internal_nan: bool = False):
+        self.drop_assets_with_internal_nan = drop_assets_with_internal_nan
+    def fit(self, X: npt.ArrayLike, y=None) -> "SelectComplete":
+        """Run the SelectComplete transformer and get the appropriate assets.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, n_assets)
+            Returns of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : SelectComplete
+            Fitted estimator.
+        """
+        # Validate by allowing NaNs
+        X = self._validate_data(X, force_all_finite="allow-nan")
+        if self.drop_assets_with_internal_nan:
+            # Identify columns with any NaNs
+            self.to_keep_ = ~np.isnan(X).any(axis=0)
+        else:
+            # Identify columns with no leading or trailing NaNs
+            self.to_keep_ = ~np.isnan(X[0, :]) & ~np.isnan(X[-1, :])
+        return self
+    def _get_support_mask(self):
+        skv.check_is_fitted(self)
+        return self.to_keep_
+    def _more_tags(self):
+        return {"allow_nan": True}

skfolio/pre_selection/_select_k_extremes.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Pre-selection SelectKExtremes module"""
+# Copyright (c) 2023
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import numpy.typing as npt
+import sklearn.base as skb
+import sklearn.feature_selection as skf
+import sklearn.utils.validation as skv
+import skfolio.typing as skt
+from skfolio.measures import RatioMeasure
+from skfolio.population import Population
+from skfolio.portfolio import Portfolio
+class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
+    """Transformer for selecting the `k` best or worst assets.
+    Keep the `k` best or worst assets according to a given measure.
+    Parameters
+    ----------
+    k : int, default=10
+        Number of assets to select. If `k` is higher than the number of assets, all
+        assets are selected.
+    measure : Measure, default=RatioMeasure.SHARPE_RATIO
+        The :ref:`measure <measures_ref>` used to sort the assets.
+        The default is `RatioMeasure.SHARPE_RATIO`.
+    highest : bool, default=True
+        If this is set to True, the `k` assets with the highest `measure` are selected,
+        otherwise it is the `k` lowest.
+    Attributes
+    ----------
+    to_keep_ : ndarray of shape (n_assets, )
+       Boolean array indicating which assets are remaining.
+    n_features_in_ : int
+       Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+       Names of features seen during `fit`. Defined only when `X`
+       has feature names that are all strings.
+    """
+    to_keep_: np.ndarray
+    def __init__(
+        self,
+        k: int = 10,
+        measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
+        highest: bool = True,
+    ):
+        self.k = k
+        self.measure = measure
+        self.highest = highest
+    def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
+        """Run the SelectKExtremes transformer and get the appropriate assets.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, n_assets)
+            Price returns of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : SelectKExtremes
+            Fitted estimator.
+        """
+        X = self._validate_data(X)
+        k = int(self.k)
+        if k <= 0:
+            raise ValueError("`k` must be strictly positive")
+        n_assets = X.shape[1]
+        # Build a population of single assets portfolio
+        population = Population([])
+        for i in range(n_assets):
+            weights = np.zeros(n_assets)
+            weights[i] = 1
+            population.append(Portfolio(X=X, weights=weights))
+        selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
+            :k
+        ]
+        selected_idx = [x.nonzero_assets_index[0] for x in selected]
+        self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
+        return self
+    def _get_support_mask(self):
+        skv.check_is_fitted(self)
+        return self.to_keep_

skfolio/pre_selection/_select_non_dominated.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Pre-selection SelectNonDominated module"""
+# Copyright (c) 2023
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import numpy.typing as npt
+import sklearn.base as skb
+import sklearn.feature_selection as skf
+import sklearn.utils.validation as skv
+import skfolio.typing as skt
+from skfolio.population import Population
+from skfolio.portfolio import Portfolio
+class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
+    """Transformer for selecting non dominated assets.
+    Pre-selection based on the Assets Preselection Process 2 [1]_.
+    Good single asset (for example with high return and low risk) is likely to
+    contribute to the final optimized portfolio. Each asset is considered as a portfolio
+    and these assets are ranked using the non-domination sorting method. The selection
+    is based on the ranks assigned to each asset based on their fitness until the number
+    of selected assets reaches the user-defined number.
+    Considering only the fitness of individual asset is insufficient because a pair of
+    negatively correlated assets has the potential to reduce the risk. Therefore,
+    negatively correlated pairs of assets are also considered.
+    Parameters
+    ----------
+    min_n_assets : int, optional
+        The minimum number of assets to select. If `min_n_assets` is reached before the
+        end of the current non-dominated front, we return the remaining assets of this
+        front. This is because all assets in the same front have same rank.
+        The default (`None`) is to select the first front.
+    threshold : float, default=0.0
+        Asset pair with a correlation below this threshold are included in the
+        non-domination sorting. The default value is `0.0`.
+    fitness_measures : list[Measure], optional
+        A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
+        The fitness is used to compare portfolios in terms of domination, compute the
+        pareto fronts and run the portfolio selection using non-denominated sorting.
+        The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
+    Attributes
+    ----------
+    to_keep_ : ndarray of shape (n_assets, )
+        Boolean array indicating which assets are remaining.
+    n_features_in_ : int
+        Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X`
+        has feature names that are all strings.
+    References
+    ----------
+    .. [1]  "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
+        Algorithms and Preselection Methods",
+        B.Y. Qu and Q.Zhou (2017).
+    """
+    to_keep_: np.ndarray
+    def __init__(
+        self,
+        min_n_assets: int | None = None,
+        threshold: float = -0.5,
+        fitness_measures: list[skt.Measure] | None = None,
+    ):
+        self.min_n_assets = min_n_assets
+        self.threshold = threshold
+        self.fitness_measures = fitness_measures
+    def fit(self, X: npt.ArrayLike, y=None):
+        """Run the Non Dominated transformer and get the appropriate assets.
+        Parameters
+        ----------
+        X : array-like of shape (n_observations, n_assets)
+            Price returns of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : SelectNonDominated
+            Fitted estimator.
+        """
+        X = self._validate_data(X)
+        if not -1 <= self.threshold <= 1:
+            raise ValueError("`threshold` must be between -1 and 1")
+        n_assets = X.shape[1]
+        if self.min_n_assets is not None and self.min_n_assets >= n_assets:
+            self.to_keep_ = np.full(n_assets, True)
+            return self
+        # Build a population of portfolio
+        population = Population([])
+        # Add single assets
+        for i in range(n_assets):
+            weights = np.zeros(n_assets)
+            weights[i] = 1
+            population.append(
+                Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
+            )
+        # Add pairs with correlation below threshold with minimum variance
+        # ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
+        # with w1 + w2 = 1
+        # To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
+        # respect to w1 and set to zero.
+        # By solving the obtained equation, we get:
+        # w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
+        # w2 = 1 - w1
+        corr = np.corrcoef(X.T)
+        covariance = np.cov(X.T)
+        for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
+            if corr[i, j] < self.threshold:
+                cov = covariance[i, j]
+                var1 = covariance[i, i]
+                var2 = covariance[j, j]
+                weights = np.zeros(n_assets)
+                weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
+                weights[j] = 1 - weights[i]
+                population.append(
+                    Portfolio(
+                        X=X, weights=weights, fitness_measures=self.fitness_measures
+                    )
+                )
+        fronts = population.non_denominated_sort(
+            first_front_only=self.min_n_assets is None
+        )
+        new_assets_idx = set()
+        i = 0
+        while i < len(fronts):
+            if (
+                self.min_n_assets is not None
+                and len(new_assets_idx) > self.min_n_assets
+            ):
+                break
+            for idx in fronts[i]:
+                new_assets_idx.update(population[idx].nonzero_assets_index)
+            i += 1
+        self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
+        return self
+    def _get_support_mask(self):
+        skv.check_is_fitted(self)
+        return self.to_keep_

skfolio/pre_selection/_select_non_expiring.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""pre-selection estimators module"""
+# Copyright (c) 2023
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# Implementation derived from:
+# Conway-Yu https://github.com/skfolio/skfolio/discussions/60
+# License: BSD 3 clause
+import datetime as dt
+import numpy as np
+import pandas as pd
+import sklearn.base as skb
+import sklearn.feature_selection as skf
+import sklearn.utils.validation as skv
+class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
+    """
+    Transformer to select assets that do not expire within a specified lookahead period
+    after the end of the observation period.
+    This transformer removes assets (columns) that have expiration dates within a
+    given lookahead period from the end of the dataset, allowing only assets that
+    remain active beyond this lookahead period to be selected.
+    This is useful when an exit strategy is needed before asset expiration, such as
+    for bonds or options with known end dates, or when applying WalkForward
+    cross-validation. It ensures that assets expiring during the test period are
+    excluded, so that only live assets are included in each training and test period.
+    Parameters
+    ----------
+    expiration_dates : dict[str, dt.datetime | pd.Timestamp], optional
+        Dictionary with asset names as keys and expiration dates as values.
+        Used to check if each asset expires within the date offset.
+        Assets with no expiration date will be retained by default.
+    expiration_lookahead : pd.offsets.BaseOffset | dt.timedelta, optional
+        The lookahead period after the end of the dataset within which assets with
+        expiration dates will be removed.
+    Attributes
+    ----------
+    to_keep_ : ndarray of shape (n_assets, )
+       Boolean array indicating which assets are remaining.
+    n_features_in_ : int
+       Number of assets seen during `fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+       Names of features seen during `fit`. Defined only when `X`
+       has feature names that are all strings.
+    Notes
+    -----
+    This transformer only supports DataFrames with a DateTime index.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import datetime as dt
+    >>> from sklearn import set_config
+    >>> set_config(transform_output="pandas")
+    >>> X = pd.DataFrame(
+    ...    {
+    ...        'asset1': [1, 2, 3, 4],
+    ...        'asset2': [2, 3, 4, 5],
+    ...        'asset3': [3, 4, 5, 6],
+    ...        'asset4': [4, 5, 6, 7]
+    ...    }, index=pd.date_range("2023-01-01", periods=4, freq="D")
+    ...)
+    >>> expiration_dates = {
+    ...    'asset1': pd.Timestamp("2023-01-10"),
+    ...    'asset2': pd.Timestamp("2023-01-02"),
+    ...    'asset3': pd.Timestamp("2023-01-06"),
+    ...    'asset4': dt.datetime(2023, 5, 1)
+    ... }
+    >>> selector = SelectNonExpiring(
+    ...    expiration_dates=expiration_dates,
+    ...    expiration_lookahead=pd.DateOffset(days=5)
+    ...)
+    >>> selector.fit_transform(X)
+               asset1  asset4
+    2023-01-01      1      4
+    2023-01-02      2      5
+    2023-01-03      3      6
+    2023-01-04      4      7
+    """
+    to_keep_: np.ndarray
+    def __init__(
+        self,
+        expiration_dates: dict[str, dt.datetime | pd.Timestamp] | None = None,
+        expiration_lookahead: pd.offsets.BaseOffset | dt.timedelta | None = None,
+    ):
+        self.expiration_dates = expiration_dates
+        self.expiration_lookahead = expiration_lookahead
+    def fit(self, X: pd.DataFrame, y=None) -> "SelectNonExpiring":
+        """Run the SelectNonExpiring transformer and get the appropriate assets.
+        Parameters
+        ----------
+        X : pd.DataFrame of shape (n_observations, n_assets)
+            Returns of the assets.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        self : SelectNonExpiring
+            Fitted estimator.
+        """
+        _ = self._validate_data(X, force_all_finite="allow-nan")
+        # Validate by allowing NaNs
+        if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
+            raise ValueError(
+                "X must be a DataFrame with an index of type DatetimeIndex"
+            )
+        if self.expiration_dates is None:
+            raise ValueError("`expiration_lookahead` must be provided")
+        if self.expiration_lookahead is None:
+            raise ValueError("`expiration_lookahead` must be provided")
+        # Calculate the cutoff date
+        end_date = X.index[-1]
+        cutoff_date = end_date + self.expiration_lookahead
+        self.to_keep_ = np.array(
+            [
+                self.expiration_dates.get(asset, pd.Timestamp.max) > cutoff_date
+                for asset in X.columns
+            ]
+        )
+        return self
+    def _get_support_mask(self):
+        skv.check_is_fitted(self)
+        return self.to_keep_
+    def _more_tags(self):
+        return {"allow_nan": True}

skfolio/preprocessing/_returns.py CHANGED Viewed

@@ -17,6 +17,7 @@ def prices_to_returns(
     nan_threshold: float = 1,
     join: Literal["left", "right", "inner", "outer", "cross"] = "outer",
     drop_inceptions_nan: bool = True,
+    fill_nan: bool = True,
 ) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
     r"""Transforms a DataFrame of prices to linear or logarithmic returns.
@@ -64,11 +65,15 @@ def prices_to_returns(
         this threshold. The default (`1.0`) is to keep all the observations.
     drop_inceptions_nan : bool, default=True
-        If this is set to True, observations at the beginning are dropped if any of
+        If set to True, observations at the beginning are dropped if any of
         the asset values are missing, otherwise we keep the NaNs. This is useful when
         you work with a large universe of assets with different inception dates coupled
         with a pre-selection Transformer.
+    fill_nan : bool, default=True
+        If set to True, missing prices (NaNs) are forward filled using the previous
+        price. Otherwise, NaNs are kept.
     Returns
     -------
     X : DataFrame
@@ -106,7 +111,8 @@ def prices_to_returns(
             df.drop(to_drop, axis=0, inplace=True)
     # Forward fill missing values
-    df.ffill(inplace=True)
+    if fill_nan:
+        df.ffill(inplace=True)
     # Drop rows according to drop_inceptions_nan
     # noinspection PyTypeChecker
     df.dropna(how="any" if drop_inceptions_nan else "all", inplace=True)
@@ -114,7 +120,7 @@ def prices_to_returns(
     df.dropna(axis=1, how="all", inplace=True)
     # returns
-    all_returns = df.pct_change().iloc[1:]
+    all_returns = df.pct_change(fill_method=None).iloc[1:]
     if log_returns:
         all_returns = np.log1p(all_returns)

skfolio/utils/stats.py CHANGED Viewed

@@ -185,7 +185,7 @@ def is_cholesky_dec(x: np.ndarray) -> bool:
     try:
         np.linalg.cholesky(x)
         return True
-    except np.linalg.linalg.LinAlgError:
+    except np.linalg.LinAlgError:
         return False
@@ -200,7 +200,7 @@ def is_positive_definite(x: np.ndarray) -> bool:
     Returns
     -------
     value : bool
-        True if if the matrix is positive definite, False otherwise.
+        True if the matrix is positive definite, False otherwise.
     """
     return np.all(np.linalg.eigvals(x) > 0)

{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: skfolio
-Version: 0.4.3
+Version: 0.5.1
 Summary: Portfolio optimization built on top of scikit-learn
 Author-email: Hugo Delatte <delatte.hugo@gmail.com>
 Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>

{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/RECORD RENAMED Viewed

@@ -30,7 +30,7 @@ skfolio/moments/covariance/_empirical_covariance.py,sha256=_7T1x4p-vdATQvQzQjQBM
 skfolio/moments/covariance/_ew_covariance.py,sha256=jzLE4zSEfLCToNBTIG5CMy1n9EYWo1IHJPifcyLVe1g,3673
 skfolio/moments/covariance/_gerber_covariance.py,sha256=3wSwZtji2cEr2rzZ6pi2knmuOSzTFpyb_4XJl_S3Yj0,5856
 skfolio/moments/covariance/_graphical_lasso_cv.py,sha256=_6WQ1sjYJRG8XDq8zb5YIPtDhpb8CmLhLBlfewBvqjM,6539
-skfolio/moments/covariance/_implied_covariance.py,sha256=6DiPWo7WVRA8EFvjYxBLBIrYaeRJWpr8yH5I64Sbbd0,17732
+skfolio/moments/covariance/_implied_covariance.py,sha256=L8odXiyNTfrnyroZUZSr8KkHv9_c3OCpdoqrtLqkonQ,17732
 skfolio/moments/covariance/_ledoit_wolf.py,sha256=iV92TpAopOAgQwa4zk7NF1rYdXkgm3uXn5ZZpbcMss0,4875
 skfolio/moments/covariance/_oas.py,sha256=ru8BNz7vQU75ARCuUbtJstmR2fy2fiD9OXLDlztUm5g,3684
 skfolio/moments/covariance/_shrunk_covariance.py,sha256=OOUahkiSdU3vFOb8i0iHtn8WU0AHl7o9pf8pFkG6Lv4,3095
@@ -49,26 +49,30 @@ skfolio/optimization/cluster/hierarchical/_base.py,sha256=l8rJHCH_79FOPdDL2I0dmA
 skfolio/optimization/cluster/hierarchical/_herc.py,sha256=LPtUrvyW9G60OZhMWlZH_GHZHdX8mJHksrYGB-WPRVg,20358
 skfolio/optimization/cluster/hierarchical/_hrp.py,sha256=dn6EKiTJ1wkoFhPdst6vlXnSQvXSYsMtB2zaGNVPpyA,18115
 skfolio/optimization/convex/__init__.py,sha256=F6BPFikTo0B-7JCKazqLGEwM3RkgTNbFm5GAGkaq9Uo,570
-skfolio/optimization/convex/_base.py,sha256=2at6Ll4qHkN_1wvYjl-yXWTbiRJj8fhNS-bfAT88YSw,76055
+skfolio/optimization/convex/_base.py,sha256=P1rSw1oJAZR_BuOxJeXJrYHlkFD0AwCOaBl3mj54E8U,76413
 skfolio/optimization/convex/_distributionally_robust.py,sha256=tw_UNSDfAXP02khE10hpmcdlz3DQXQD7ttDqFDSHV1E,17811
 skfolio/optimization/convex/_maximum_diversification.py,sha256=IVKVbK7bh4KPkhpNWLLerl-qx9Qcmf2cIIRotP8r8nI,19500
 skfolio/optimization/convex/_mean_risk.py,sha256=H4Ik6vvIETdAZnNCA4Jhk_OTirHJg26KQZ5iLsXgaHo,44176
-skfolio/optimization/convex/_risk_budgeting.py,sha256=ntPK57Ws-_U4QAiZjXFvKUYUELv9EBoJIWqofxx-0rY,23779
+skfolio/optimization/convex/_risk_budgeting.py,sha256=VXm6vUeB-BDEn6KhWxg1-9UmjqpFR1E04SM4NLcNuBY,23510
 skfolio/optimization/ensemble/__init__.py,sha256=8TXxcxH2_gG3C1xtgQj9OHHr0Le8lhdejtlURL6T3ZY,158
 skfolio/optimization/ensemble/_base.py,sha256=GaNDQu6ivosYuwMrb-b0PhToCsNrmhSYyXkxeM8W4rU,3399
 skfolio/optimization/ensemble/_stacking.py,sha256=ZoICUnc_MwoXDQAR2kewCg-KIezSOIUdDV1fuf7vMyA,14168
 skfolio/optimization/naive/__init__.py,sha256=Dkr55R48urC-jfYN007NTbei16N91Na_EDYLVqzhGgQ,147
 skfolio/optimization/naive/_naive.py,sha256=AhEyYKEUAm-Fjn4p8SHwhp7yE9iF0tRyDZIjKYV4EeU,6390
 skfolio/population/__init__.py,sha256=rsPPMUv95aTK7vmpPeQwF8NzFuBwk6RDo5g4HNaPzNM,80
-skfolio/population/_population.py,sha256=WYT6yTVmarzMH3nj1-rQCvD-X2nH6q9bo928-lenUXs,30426
+skfolio/population/_population.py,sha256=ej45tdk_CcMlNToCsx2VUk2YRktK3k4cRczGBpjlnDE,30427
 skfolio/portfolio/__init__.py,sha256=YYtcAPmA2zeCxFGTXegg2FXcA7py6CxOX7IMTdYuXl0,586
 skfolio/portfolio/_base.py,sha256=EFLsvHoxZmDvGPOKePr6hQGXU7y7TWsALvzYP9qt0fQ,39588
 skfolio/portfolio/_multi_period_portfolio.py,sha256=K2JfEwlPD9iGO58lOdk7WUbWuXZDWw2prPT5T7pOdto,24387
 skfolio/portfolio/_portfolio.py,sha256=gqvCKM6ZVfwZrgixiYdahgbQ1DRNW2LkGHkXOpjleb4,32753
-skfolio/pre_selection/__init__.py,sha256=VtUtDn-U-Mn_xR2k7yfld0Yb0rPhLakEAiBwUyi-4Z8,189
-skfolio/pre_selection/_pre_selection.py,sha256=w84T14nKmzkgzbw5CW_AIlci741lXYxKUwB5pBjhTTI,12163
+skfolio/pre_selection/__init__.py,sha256=_H0jziIOq0nUETFQvjBP4AtKGzdh0EGGSXaECTcUhxY,482
+skfolio/pre_selection/_drop_correlated.py,sha256=EDwRVqmkU-52VXQ-u350PYgjWCI5QnB8CfR1taLWffY,3818
+skfolio/pre_selection/_select_complete.py,sha256=sE9TCitUA5KbEqPssl0qsCBD-oV_5Vx-b-kdU0hsFHI,3885
+skfolio/pre_selection/_select_k_extremes.py,sha256=25FGievaDqlAHAxUmyznAd3LIq_7D3ajaSVD6E7luSI,3061
+skfolio/pre_selection/_select_non_dominated.py,sha256=HLGNS14vgQlg5I5zj-b1QpgCaZROd0FALQSmyXGpK7o,5983
+skfolio/pre_selection/_select_non_expiring.py,sha256=RAWnuW2u7y0ibsimJp5mRM9JQFOn0hHp-mWsp0FLPbs,4995
 skfolio/preprocessing/__init__.py,sha256=15A1bzfPsbfxxXgGP1gstf4R0E_347Wn18z5W5jH-hk,94
-skfolio/preprocessing/_returns.py,sha256=oo1Mm-UCHwq4ECjfmsRxWzzK1EPsuv-EEtnimvv_nXo,4345
+skfolio/preprocessing/_returns.py,sha256=6mdNi7Dun5eNK4LdqKAxP4CCZEVfAEz40HXVrOiAaLA,4561
 skfolio/prior/__init__.py,sha256=jql8NTiWlykPKJUXTOPdqm531mP8Pul1QAR6hXTXA6c,446
 skfolio/prior/_base.py,sha256=u9GLCKJl-Txiem5rIO-qkH3VIyem3taD6T9kMzsYPRY,1941
 skfolio/prior/_black_litterman.py,sha256=W3HbpvkViEiD7AOgpdVmNYTlWKSGDgo9Y3BfSrbMIQ4,10347
@@ -82,10 +86,10 @@ skfolio/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 skfolio/utils/bootstrap.py,sha256=3zY2kO_GQURKEcQMCasJOSByde9Mt2IAi3KJH0_a4mk,3550
 skfolio/utils/equations.py,sha256=MQ1w3VSM2n_j9bTIKAQA716aWKYyUqtw5yM2bU-9t-M,13745
 skfolio/utils/sorting.py,sha256=lSjMvH2L-sSj-06B3MlwBrH1rtjCeGEe4hG894W7TE0,3504
-skfolio/utils/stats.py,sha256=bzKlF2U7BN2WonwtuwG_cL_16Z3cTAxCAw5pZgbib54,17005
+skfolio/utils/stats.py,sha256=mWMpJ_XBy400kx7GlwBvR4Fwo8ValOZ9J3VDLODDaHQ,16995
 skfolio/utils/tools.py,sha256=4KrmBR9jOLiI6j0hb27gsPC--OHXo4Sp1xl-6i-k9Tg,20925
-skfolio-0.4.3.dist-info/LICENSE,sha256=F6Gi-ZJX5BlVzYK8R9NcvAkAsKa7KO29xB1OScbrH6Q,1526
-skfolio-0.4.3.dist-info/METADATA,sha256=PUf5onO29CqsRRaMyrMP3y0RKw6MJ43TNQ_2hMks7n0,19611
-skfolio-0.4.3.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-skfolio-0.4.3.dist-info/top_level.txt,sha256=NXEaoS9Ms7t32gxkb867nV0OKlU0KmssL7IJBVo0fJs,8
-skfolio-0.4.3.dist-info/RECORD,,
+skfolio-0.5.1.dist-info/LICENSE,sha256=F6Gi-ZJX5BlVzYK8R9NcvAkAsKa7KO29xB1OScbrH6Q,1526
+skfolio-0.5.1.dist-info/METADATA,sha256=ZV5d0XFTqEJzVJZsLvX2OopVxQu-_UCoM1R9wYWQlWE,19611
+skfolio-0.5.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+skfolio-0.5.1.dist-info/top_level.txt,sha256=NXEaoS9Ms7t32gxkb867nV0OKlU0KmssL7IJBVo0fJs,8
+skfolio-0.5.1.dist-info/RECORD,,

{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.2.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

skfolio/pre_selection/_pre_selection.py DELETED Viewed

@@ -1,343 +0,0 @@
-"""pre-selection estimators module"""
-# Copyright (c) 2023
-# Author: Hugo Delatte <delatte.hugo@gmail.com>
-# License: BSD 3 clause
-import numpy as np
-import numpy.typing as npt
-import sklearn.base as skb
-import sklearn.feature_selection as skf
-import sklearn.utils.validation as skv
-import skfolio.typing as skt
-from skfolio.measures import RatioMeasure
-from skfolio.population import Population
-from skfolio.portfolio import Portfolio
-class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
-    """Transformer for dropping highly correlated assets.
-    Simply removing all correlation pairs above the threshold will remove more assets
-    than necessary and a naive sequential removal is suboptimal and depends on the
-    initial assets ordering.
-    Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
-    the threshold and corr(Y,Z) below.
-    The first approach would remove X,Y,Z and the second approach would remove either
-    Y and Z or X depending on the initial ordering.
-    To avoid these shortcomings, we implement the below algorithm:
-        * Step 1: select all correlation pairs above the threshold.
-        * Step 2: sort all the selected correlation pairs from highest to lowest.
-        * Step 3: for each pair, if none of the two assets has been removed, keep the
-          asset with the lowest average correlation against the other assets.
-    Parameters
-    ----------
-    threshold : float, default=0.95
-        Correlation threshold. The default value is `0.95`.
-    absolute : bool, default=False
-        If this is set to True, we take the absolute value of the correlation. This has
-        for effect to also include negatively correlated assets.
-    Attributes
-    ----------
-    to_keep_ : ndarray of shape (n_assets, )
-        Boolean array indicating which assets are remaining.
-    n_features_in_ : int
-        Number of assets seen during `fit`.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
-        Names of assets seen during `fit`. Defined only when `X`
-        has assets names that are all strings.
-    """
-    to_keep_: np.ndarray
-    def __init__(self, threshold: float = 0.95, absolute: bool = False):
-        self.threshold = threshold
-        self.absolute = absolute
-    def fit(self, X: npt.ArrayLike, y=None):
-        """Run the correlation transformer and get the appropriate assets.
-        Parameters
-        ----------
-        X : array-like of shape (n_observations, n_assets)
-            Price returns of the assets.
-        y : Ignored
-            Not used, present for API consistency by convention.
-        Returns
-        -------
-        self : DropCorrelated
-            Fitted estimator.
-        """
-        X = self._validate_data(X)
-        if not -1 <= self.threshold <= 1:
-            raise ValueError("`threshold` must be between -1 and 1")
-        n_assets = X.shape[1]
-        corr = np.corrcoef(X.T)
-        mean_corr = corr.mean(axis=0)
-        triu_idx = np.triu_indices(n_assets, 1)
-        # select all correlation pairs above the threshold
-        selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
-        # sort all the selected correlation pairs from highest to lowest
-        selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
-        # for each pair, if none of the two assets has been removed, keep the asset with
-        # the lowest average correlation with other assets
-        to_remove = set()
-        for idx in selected_idx:
-            i, j = triu_idx[0][idx], triu_idx[1][idx]
-            if i not in to_remove and j not in to_remove:
-                if mean_corr[i] > mean_corr[j]:
-                    to_remove.add(i)
-                else:
-                    to_remove.add(j)
-        self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
-        return self
-    def _get_support_mask(self):
-        skv.check_is_fitted(self)
-        return self.to_keep_
-class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
-    """Transformer for selecting the `k` best or worst assets.
-    Keep the `k` best or worst assets according to a given measure.
-    Parameters
-    ----------
-    k : int, default=10
-        Number of assets to select. If `k` is higher than the number of assets, all
-        assets are selected.
-    measure : Measure, default=RatioMeasure.SHARPE_RATIO
-        The :ref:`measure <measures_ref>` used to sort the assets.
-        The default is `RatioMeasure.SHARPE_RATIO`.
-    highest : bool, default=True
-        If this is set to True, the `k` assets with the highest `measure` are selected,
-        otherwise it is the `k` lowest.
-    Attributes
-    ----------
-    to_keep_ : ndarray of shape (n_assets, )
-       Boolean array indicating which assets are remaining.
-    n_features_in_ : int
-       Number of assets seen during `fit`.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
-       Names of features seen during `fit`. Defined only when `X`
-       has feature names that are all strings.
-    """
-    to_keep_: np.ndarray
-    def __init__(
-        self,
-        k: int = 10,
-        measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
-        highest: bool = True,
-    ):
-        self.k = k
-        self.measure = measure
-        self.highest = highest
-    def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
-        """Run the SelectKExtremes transformer and get the appropriate assets.
-        Parameters
-        ----------
-        X : array-like of shape (n_observations, n_assets)
-            Price returns of the assets.
-        y : Ignored
-            Not used, present for API consistency by convention.
-        Returns
-        -------
-        self : SelectKExtremes
-            Fitted estimator.
-        """
-        X = self._validate_data(X)
-        k = int(self.k)
-        if k <= 0:
-            raise ValueError("`k` must be strictly positive")
-        n_assets = X.shape[1]
-        # Build a population of single assets portfolio
-        population = Population([])
-        for i in range(n_assets):
-            weights = np.zeros(n_assets)
-            weights[i] = 1
-            population.append(Portfolio(X=X, weights=weights))
-        selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
-            :k
-        ]
-        selected_idx = [x.nonzero_assets_index[0] for x in selected]
-        self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
-        return self
-    def _get_support_mask(self):
-        skv.check_is_fitted(self)
-        return self.to_keep_
-class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
-    """Transformer for selecting non dominated assets.
-    Pre-selection based on the Assets Preselection Process 2 [1]_.
-    Good single asset (for example with high return and low risk) is likely to
-    contribute to the final optimized portfolio. Each asset is considered as a portfolio
-    and these assets are ranked using the non-domination sorting method. The selection
-    is based on the ranks assigned to each asset based on their fitness until the number
-    of selected assets reaches the user-defined number.
-    Considering only the fitness of individual asset is insufficient because a pair of
-    negatively correlated assets has the potential to reduce the risk. Therefore,
-    negatively correlated pairs of assets are also considered.
-    Parameters
-    ----------
-    min_n_assets : int, optional
-        The minimum number of assets to select. If `min_n_assets` is reached before the
-        end of the current non-dominated front, we return the remaining assets of this
-        front. This is because all assets in the same front have same rank.
-        The default (`None`) is to select the first front.
-    threshold : float, default=0.0
-        Asset pair with a correlation below this threshold are included in the
-        non-domination sorting. The default value is `0.0`.
-    fitness_measures : list[Measure], optional
-        A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
-        The fitness is used to compare portfolios in terms of domination, compute the
-        pareto fronts and run the portfolio selection using non-denominated sorting.
-        The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
-    Attributes
-    ----------
-    to_keep_ : ndarray of shape (n_assets, )
-        Boolean array indicating which assets are remaining.
-    n_features_in_ : int
-        Number of assets seen during `fit`.
-    feature_names_in_ : ndarray of shape (`n_features_in_`,)
-        Names of features seen during `fit`. Defined only when `X`
-        has feature names that are all strings.
-    References
-    ----------
-    .. [1]  "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
-        Algorithms and Preselection Methods",
-        B.Y. Qu and Q.Zhou (2017).
-    """
-    to_keep_: np.ndarray
-    def __init__(
-        self,
-        min_n_assets: int | None = None,
-        threshold: float = -0.5,
-        fitness_measures: list[skt.Measure] | None = None,
-    ):
-        self.min_n_assets = min_n_assets
-        self.threshold = threshold
-        self.fitness_measures = fitness_measures
-    def fit(self, X: npt.ArrayLike, y=None):
-        """Run the Non Dominated transformer and get the appropriate assets.
-        Parameters
-        ----------
-        X : array-like of shape (n_observations, n_assets)
-            Price returns of the assets.
-        y : Ignored
-            Not used, present for API consistency by convention.
-        Returns
-        -------
-        self : SelectNonDominated
-            Fitted estimator.
-        """
-        X = self._validate_data(X)
-        if not -1 <= self.threshold <= 1:
-            raise ValueError("`threshold` must be between -1 and 1")
-        n_assets = X.shape[1]
-        if self.min_n_assets is not None and self.min_n_assets >= n_assets:
-            self.to_keep_ = np.full(n_assets, True)
-            return self
-        # Build a population of portfolio
-        population = Population([])
-        # Add single assets
-        for i in range(n_assets):
-            weights = np.zeros(n_assets)
-            weights[i] = 1
-            population.append(
-                Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
-            )
-        # Add pairs with correlation below threshold with minimum variance
-        # ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
-        # with w1 + w2 = 1
-        # To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
-        # respect to w1 and set to zero.
-        # By solving the obtained equation, we get:
-        # w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
-        # w2 = 1 - w1
-        corr = np.corrcoef(X.T)
-        covariance = np.cov(X.T)
-        for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
-            if corr[i, j] < self.threshold:
-                cov = covariance[i, j]
-                var1 = covariance[i, i]
-                var2 = covariance[j, j]
-                weights = np.zeros(n_assets)
-                weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
-                weights[j] = 1 - weights[i]
-                population.append(
-                    Portfolio(
-                        X=X, weights=weights, fitness_measures=self.fitness_measures
-                    )
-                )
-        fronts = population.non_denominated_sort(
-            first_front_only=self.min_n_assets is None
-        )
-        new_assets_idx = set()
-        i = 0
-        while i < len(fronts):
-            if (
-                self.min_n_assets is not None
-                and len(new_assets_idx) > self.min_n_assets
-            ):
-                break
-            for idx in fronts[i]:
-                new_assets_idx.update(population[idx].nonzero_assets_index)
-            i += 1
-        self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
-        return self
-    def _get_support_mask(self):
-        skv.check_is_fitted(self)
-        return self.to_keep_

{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{skfolio-0.4.3.dist-info → skfolio-0.5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

skfolio 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

skfolio 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl