skfolio 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -259,7 +259,7 @@ class ImpliedCovariance(BaseCovariance):
259
259
  if assets_names is not None:
260
260
  vol_assets_names = get_feature_names(implied_vol)
261
261
  if vol_assets_names is not None:
262
- missing_assets = assets_names[~np.in1d(assets_names, vol_assets_names)]
262
+ missing_assets = assets_names[~np.isin(assets_names, vol_assets_names)]
263
263
  if len(missing_assets) > 0:
264
264
  raise ValueError(
265
265
  f"The following assets are missing from "
@@ -622,7 +622,11 @@ class ConvexOptimization(BaseOptimization, ABC):
622
622
  self._cvx_cache = {}
623
623
 
624
624
  def _get_weight_constraints(
625
- self, n_assets: int, w: cp.Variable, factor: skt.Factor
625
+ self,
626
+ n_assets: int,
627
+ w: cp.Variable,
628
+ factor: skt.Factor,
629
+ allow_negative_weights: bool = True,
626
630
  ) -> list[cpc.Constraint]:
627
631
  """Compute weight constraints from input parameters.
628
632
 
@@ -651,6 +655,13 @@ class ConvexOptimization(BaseOptimization, ABC):
651
655
  fill_value=0,
652
656
  name="min_weights",
653
657
  )
658
+
659
+ if not allow_negative_weights and np.any(min_weights < 0):
660
+ raise ValueError(
661
+ f"{self.__class__.__name__} must have non negative `min_weights` "
662
+ f"constraint otherwise the problem becomes non-convex."
663
+ )
664
+
654
665
  constraints.append(
655
666
  w * self._scale_constraints
656
667
  >= min_weights * factor * self._scale_constraints
@@ -432,15 +432,6 @@ class RiskBudgeting(ConvexOptimization):
432
432
  self.min_return = min_return
433
433
  self.risk_budget = risk_budget
434
434
 
435
- def _validation(self) -> None:
436
- if not isinstance(self.risk_measure, RiskMeasure):
437
- raise TypeError("risk_measure must be of type `RiskMeasure`")
438
- if self.min_weights < 0:
439
- raise ValueError(
440
- "Risk Budgeting must have non negative `min_weights` constraint"
441
- " otherwise the problem becomes non-convex."
442
- )
443
-
444
435
  def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "RiskBudgeting":
445
436
  """Fit the Risk Budgeting Optimization estimator.
446
437
 
@@ -462,8 +453,10 @@ class RiskBudgeting(ConvexOptimization):
462
453
  routed_params = skm.process_routing(self, "fit", **fit_params)
463
454
 
464
455
  self._check_feature_names(X, reset=True)
465
- # Validate
466
- self._validation()
456
+
457
+ if not isinstance(self.risk_measure, RiskMeasure):
458
+ raise TypeError("risk_measure must be of type `RiskMeasure`")
459
+
467
460
  # Used to avoid adding multiple times similar constrains linked to identical
468
461
  # risk models
469
462
  self.prior_estimator_ = check_estimator(
@@ -518,7 +511,7 @@ class RiskBudgeting(ConvexOptimization):
518
511
 
519
512
  # weight constraints
520
513
  constraints += self._get_weight_constraints(
521
- n_assets=n_assets, w=w, factor=factor
514
+ n_assets=n_assets, w=w, factor=factor, allow_negative_weights=False
522
515
  )
523
516
 
524
517
  parameters_values = []
@@ -653,7 +653,7 @@ class Population(list):
653
653
  spacing: float | None = None,
654
654
  display_sub_ptf_name: bool = True,
655
655
  ) -> go.Figure:
656
- """Plot the contribution of each asset to a given measure of the portfolios
656
+ r"""Plot the contribution of each asset to a given measure of the portfolios
657
657
  in the population.
658
658
 
659
659
  Parameters
@@ -1,7 +1,13 @@
1
- from skfolio.pre_selection._pre_selection import (
2
- DropCorrelated,
3
- SelectKExtremes,
4
- SelectNonDominated,
5
- )
1
+ from skfolio.pre_selection._drop_correlated import DropCorrelated
2
+ from skfolio.pre_selection._select_complete import SelectComplete
3
+ from skfolio.pre_selection._select_k_extremes import SelectKExtremes
4
+ from skfolio.pre_selection._select_non_dominated import SelectNonDominated
5
+ from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
6
6
 
7
- __all__ = ["DropCorrelated", "SelectKExtremes", "SelectNonDominated"]
7
+ __all__ = [
8
+ "DropCorrelated",
9
+ "SelectKExtremes",
10
+ "SelectNonDominated",
11
+ "SelectComplete",
12
+ "SelectNonExpiring",
13
+ ]
@@ -0,0 +1,108 @@
1
+ """Pre-selection DropCorrelated module"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ import sklearn.base as skb
10
+ import sklearn.feature_selection as skf
11
+ import sklearn.utils.validation as skv
12
+
13
+
14
+ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
15
+ """Transformer for dropping highly correlated assets.
16
+
17
+ Simply removing all correlation pairs above the threshold will remove more assets
18
+ than necessary and a naive sequential removal is suboptimal and depends on the
19
+ initial assets ordering.
20
+
21
+ Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
22
+ the threshold and corr(Y,Z) below.
23
+ The first approach would remove X,Y,Z and the second approach would remove either
24
+ Y and Z or X depending on the initial ordering.
25
+
26
+ To avoid these shortcomings, we implement the below algorithm:
27
+
28
+ * Step 1: select all correlation pairs above the threshold.
29
+ * Step 2: sort all the selected correlation pairs from highest to lowest.
30
+ * Step 3: for each pair, if none of the two assets has been removed, keep the
31
+ asset with the lowest average correlation against the other assets.
32
+
33
+ Parameters
34
+ ----------
35
+ threshold : float, default=0.95
36
+ Correlation threshold. The default value is `0.95`.
37
+
38
+ absolute : bool, default=False
39
+ If this is set to True, we take the absolute value of the correlation. This has
40
+ for effect to also include negatively correlated assets.
41
+
42
+ Attributes
43
+ ----------
44
+ to_keep_ : ndarray of shape (n_assets, )
45
+ Boolean array indicating which assets are remaining.
46
+
47
+ n_features_in_ : int
48
+ Number of assets seen during `fit`.
49
+
50
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
51
+ Names of assets seen during `fit`. Defined only when `X`
52
+ has assets names that are all strings.
53
+ """
54
+
55
+ to_keep_: np.ndarray
56
+
57
+ def __init__(self, threshold: float = 0.95, absolute: bool = False):
58
+ self.threshold = threshold
59
+ self.absolute = absolute
60
+
61
+ def fit(self, X: npt.ArrayLike, y=None):
62
+ """Run the correlation transformer and get the appropriate assets.
63
+
64
+ Parameters
65
+ ----------
66
+ X : array-like of shape (n_observations, n_assets)
67
+ Price returns of the assets.
68
+
69
+ y : Ignored
70
+ Not used, present for API consistency by convention.
71
+
72
+ Returns
73
+ -------
74
+ self : DropCorrelated
75
+ Fitted estimator.
76
+ """
77
+ X = self._validate_data(X)
78
+ if not -1 <= self.threshold <= 1:
79
+ raise ValueError("`threshold` must be between -1 and 1")
80
+
81
+ n_assets = X.shape[1]
82
+ corr = np.corrcoef(X.T)
83
+ mean_corr = corr.mean(axis=0)
84
+
85
+ triu_idx = np.triu_indices(n_assets, 1)
86
+
87
+ # select all correlation pairs above the threshold
88
+ selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
89
+
90
+ # sort all the selected correlation pairs from highest to lowest
91
+ selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
92
+
93
+ # for each pair, if none of the two assets has been removed, keep the asset with
94
+ # the lowest average correlation with other assets
95
+ to_remove = set()
96
+ for idx in selected_idx:
97
+ i, j = triu_idx[0][idx], triu_idx[1][idx]
98
+ if i not in to_remove and j not in to_remove:
99
+ if mean_corr[i] > mean_corr[j]:
100
+ to_remove.add(i)
101
+ else:
102
+ to_remove.add(j)
103
+ self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
104
+ return self
105
+
106
+ def _get_support_mask(self):
107
+ skv.check_is_fitted(self)
108
+ return self.to_keep_
@@ -0,0 +1,116 @@
1
+ """pre-selection SelectComplete module"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ import sklearn.base as skb
10
+ import sklearn.feature_selection as skf
11
+ import sklearn.utils.validation as skv
12
+
13
+
14
+ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
15
+ """
16
+ Transformer to select assets with complete data across the entire observation
17
+ period.
18
+
19
+ This transformer removes assets (columns) that have missing values (NaNs) at the
20
+ beginning or end of the period.
21
+
22
+ This transformer is especially useful for financial datasets where assets
23
+ (e.g., stocks, bonds) may have data gaps due to late inception (assets that started
24
+ trading later), early expiry or default (assets that stopped trading before the
25
+ end of the period).
26
+
27
+ If missing values are not at the beginning or end but occur between non-missing
28
+ values, the asset is not removed unless `drop_assets_with_internal_nan` is set to
29
+ `True`.
30
+
31
+ Parameters
32
+ ----------
33
+ drop_assets_with_internal_nan : bool, default=False
34
+ If set to True, assets with missing values (NaNs) that appear between
35
+ non-missing values (i.e., internal NaNs) will also be removed. By default,
36
+ only assets with leading or trailing NaNs are removed.
37
+
38
+ Attributes
39
+ ----------
40
+ to_keep_ : ndarray of shape (n_assets, )
41
+ Boolean array indicating which assets are remaining.
42
+
43
+ n_features_in_ : int
44
+ Number of assets seen during `fit`.
45
+
46
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
47
+ Names of features seen during `fit`. Defined only when `X`
48
+ has feature names that are all strings.
49
+
50
+ Examples
51
+ --------
52
+
53
+ >>> import numpy as np
54
+ >>> import pandas as pd
55
+ >>> from skfolio.pre_selection import SelectComplete
56
+ >>> X = pd.DataFrame({
57
+ ... 'asset1': [np.nan, np.nan, 2, 3, 4], # Starts late (inception)
58
+ ... 'asset2': [1, 2, 3, 4, 5], # Complete data
59
+ ... 'asset3': [1, 2, 3, np.nan, 5], # Missing values within data
60
+ ... 'asset4': [1, 2, 3, 4, np.nan] # Ends early (expiration)
61
+ ... })
62
+ >>> selector = SelectComplete()
63
+ >>> selector.fit_transform(X)
64
+ array([[ 1., 1.],
65
+ [ 2., 2.],
66
+ [ 3., 3.],
67
+ [ 4., nan],
68
+ [ 5., 5.]])
69
+ >>> selector = SelectComplete(drop_assets_with_internal_nan=True)
70
+ >>> selector.fit_transform(X)
71
+ array([[1.],
72
+ [2.],
73
+ [3.],
74
+ [4.],
75
+ [5.]])
76
+ """
77
+
78
+ to_keep_: np.ndarray
79
+
80
+ def __init__(self, drop_assets_with_internal_nan: bool = False):
81
+ self.drop_assets_with_internal_nan = drop_assets_with_internal_nan
82
+
83
+ def fit(self, X: npt.ArrayLike, y=None) -> "SelectComplete":
84
+ """Run the SelectComplete transformer and get the appropriate assets.
85
+
86
+ Parameters
87
+ ----------
88
+ X : array-like of shape (n_observations, n_assets)
89
+ Returns of the assets.
90
+
91
+ y : Ignored
92
+ Not used, present for API consistency by convention.
93
+
94
+ Returns
95
+ -------
96
+ self : SelectComplete
97
+ Fitted estimator.
98
+ """
99
+ # Validate by allowing NaNs
100
+ X = self._validate_data(X, force_all_finite="allow-nan")
101
+
102
+ if self.drop_assets_with_internal_nan:
103
+ # Identify columns with any NaNs
104
+ self.to_keep_ = ~np.isnan(X).any(axis=0)
105
+ else:
106
+ # Identify columns with no leading or trailing NaNs
107
+ self.to_keep_ = ~np.isnan(X[0, :]) & ~np.isnan(X[-1, :])
108
+
109
+ return self
110
+
111
+ def _get_support_mask(self):
112
+ skv.check_is_fitted(self)
113
+ return self.to_keep_
114
+
115
+ def _more_tags(self):
116
+ return {"allow_nan": True}
@@ -0,0 +1,100 @@
1
+ """Pre-selection SelectKExtremes module"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ import sklearn.base as skb
10
+ import sklearn.feature_selection as skf
11
+ import sklearn.utils.validation as skv
12
+
13
+ import skfolio.typing as skt
14
+ from skfolio.measures import RatioMeasure
15
+ from skfolio.population import Population
16
+ from skfolio.portfolio import Portfolio
17
+
18
+
19
+ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
20
+ """Transformer for selecting the `k` best or worst assets.
21
+
22
+ Keep the `k` best or worst assets according to a given measure.
23
+
24
+ Parameters
25
+ ----------
26
+ k : int, default=10
27
+ Number of assets to select. If `k` is higher than the number of assets, all
28
+ assets are selected.
29
+
30
+ measure : Measure, default=RatioMeasure.SHARPE_RATIO
31
+ The :ref:`measure <measures_ref>` used to sort the assets.
32
+ The default is `RatioMeasure.SHARPE_RATIO`.
33
+
34
+ highest : bool, default=True
35
+ If this is set to True, the `k` assets with the highest `measure` are selected,
36
+ otherwise it is the `k` lowest.
37
+
38
+ Attributes
39
+ ----------
40
+ to_keep_ : ndarray of shape (n_assets, )
41
+ Boolean array indicating which assets are remaining.
42
+
43
+ n_features_in_ : int
44
+ Number of assets seen during `fit`.
45
+
46
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
47
+ Names of features seen during `fit`. Defined only when `X`
48
+ has feature names that are all strings.
49
+ """
50
+
51
+ to_keep_: np.ndarray
52
+
53
+ def __init__(
54
+ self,
55
+ k: int = 10,
56
+ measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
57
+ highest: bool = True,
58
+ ):
59
+ self.k = k
60
+ self.measure = measure
61
+ self.highest = highest
62
+
63
+ def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
64
+ """Run the SelectKExtremes transformer and get the appropriate assets.
65
+
66
+ Parameters
67
+ ----------
68
+ X : array-like of shape (n_observations, n_assets)
69
+ Price returns of the assets.
70
+
71
+ y : Ignored
72
+ Not used, present for API consistency by convention.
73
+
74
+ Returns
75
+ -------
76
+ self : SelectKExtremes
77
+ Fitted estimator.
78
+ """
79
+ X = self._validate_data(X)
80
+ k = int(self.k)
81
+ if k <= 0:
82
+ raise ValueError("`k` must be strictly positive")
83
+ n_assets = X.shape[1]
84
+ # Build a population of single assets portfolio
85
+ population = Population([])
86
+ for i in range(n_assets):
87
+ weights = np.zeros(n_assets)
88
+ weights[i] = 1
89
+ population.append(Portfolio(X=X, weights=weights))
90
+
91
+ selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
92
+ :k
93
+ ]
94
+ selected_idx = [x.nonzero_assets_index[0] for x in selected]
95
+ self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
96
+ return self
97
+
98
+ def _get_support_mask(self):
99
+ skv.check_is_fitted(self)
100
+ return self.to_keep_
@@ -0,0 +1,161 @@
1
+ """Pre-selection SelectNonDominated module"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ import sklearn.base as skb
10
+ import sklearn.feature_selection as skf
11
+ import sklearn.utils.validation as skv
12
+
13
+ import skfolio.typing as skt
14
+ from skfolio.population import Population
15
+ from skfolio.portfolio import Portfolio
16
+
17
+
18
+ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
19
+ """Transformer for selecting non dominated assets.
20
+
21
+ Pre-selection based on the Assets Preselection Process 2 [1]_.
22
+
23
+ Good single asset (for example with high return and low risk) is likely to
24
+ contribute to the final optimized portfolio. Each asset is considered as a portfolio
25
+ and these assets are ranked using the non-domination sorting method. The selection
26
+ is based on the ranks assigned to each asset based on their fitness until the number
27
+ of selected assets reaches the user-defined number.
28
+
29
+ Considering only the fitness of individual asset is insufficient because a pair of
30
+ negatively correlated assets has the potential to reduce the risk. Therefore,
31
+ negatively correlated pairs of assets are also considered.
32
+
33
+ Parameters
34
+ ----------
35
+ min_n_assets : int, optional
36
+ The minimum number of assets to select. If `min_n_assets` is reached before the
37
+ end of the current non-dominated front, we return the remaining assets of this
38
+ front. This is because all assets in the same front have same rank.
39
+ The default (`None`) is to select the first front.
40
+
41
+ threshold : float, default=0.0
42
+ Asset pair with a correlation below this threshold are included in the
43
+ non-domination sorting. The default value is `0.0`.
44
+
45
+ fitness_measures : list[Measure], optional
46
+ A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
47
+ The fitness is used to compare portfolios in terms of domination, compute the
48
+ pareto fronts and run the portfolio selection using non-denominated sorting.
49
+ The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
50
+
51
+ Attributes
52
+ ----------
53
+ to_keep_ : ndarray of shape (n_assets, )
54
+ Boolean array indicating which assets are remaining.
55
+
56
+ n_features_in_ : int
57
+ Number of assets seen during `fit`.
58
+
59
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
60
+ Names of features seen during `fit`. Defined only when `X`
61
+ has feature names that are all strings.
62
+
63
+ References
64
+ ----------
65
+ .. [1] "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
66
+ Algorithms and Preselection Methods",
67
+ B.Y. Qu and Q.Zhou (2017).
68
+ """
69
+
70
+ to_keep_: np.ndarray
71
+
72
+ def __init__(
73
+ self,
74
+ min_n_assets: int | None = None,
75
+ threshold: float = -0.5,
76
+ fitness_measures: list[skt.Measure] | None = None,
77
+ ):
78
+ self.min_n_assets = min_n_assets
79
+ self.threshold = threshold
80
+ self.fitness_measures = fitness_measures
81
+
82
+ def fit(self, X: npt.ArrayLike, y=None):
83
+ """Run the Non Dominated transformer and get the appropriate assets.
84
+
85
+ Parameters
86
+ ----------
87
+ X : array-like of shape (n_observations, n_assets)
88
+ Price returns of the assets.
89
+
90
+ y : Ignored
91
+ Not used, present for API consistency by convention.
92
+
93
+ Returns
94
+ -------
95
+ self : SelectNonDominated
96
+ Fitted estimator.
97
+ """
98
+ X = self._validate_data(X)
99
+ if not -1 <= self.threshold <= 1:
100
+ raise ValueError("`threshold` must be between -1 and 1")
101
+ n_assets = X.shape[1]
102
+
103
+ if self.min_n_assets is not None and self.min_n_assets >= n_assets:
104
+ self.to_keep_ = np.full(n_assets, True)
105
+ return self
106
+
107
+ # Build a population of portfolio
108
+ population = Population([])
109
+ # Add single assets
110
+ for i in range(n_assets):
111
+ weights = np.zeros(n_assets)
112
+ weights[i] = 1
113
+ population.append(
114
+ Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
115
+ )
116
+
117
+ # Add pairs with correlation below threshold with minimum variance
118
+ # ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
119
+ # with w1 + w2 = 1
120
+ # To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
121
+ # respect to w1 and set to zero.
122
+ # By solving the obtained equation, we get:
123
+ # w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
124
+ # w2 = 1 - w1
125
+
126
+ corr = np.corrcoef(X.T)
127
+ covariance = np.cov(X.T)
128
+ for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
129
+ if corr[i, j] < self.threshold:
130
+ cov = covariance[i, j]
131
+ var1 = covariance[i, i]
132
+ var2 = covariance[j, j]
133
+ weights = np.zeros(n_assets)
134
+ weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
135
+ weights[j] = 1 - weights[i]
136
+ population.append(
137
+ Portfolio(
138
+ X=X, weights=weights, fitness_measures=self.fitness_measures
139
+ )
140
+ )
141
+
142
+ fronts = population.non_denominated_sort(
143
+ first_front_only=self.min_n_assets is None
144
+ )
145
+ new_assets_idx = set()
146
+ i = 0
147
+ while i < len(fronts):
148
+ if (
149
+ self.min_n_assets is not None
150
+ and len(new_assets_idx) > self.min_n_assets
151
+ ):
152
+ break
153
+ for idx in fronts[i]:
154
+ new_assets_idx.update(population[idx].nonzero_assets_index)
155
+ i += 1
156
+ self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
157
+ return self
158
+
159
+ def _get_support_mask(self):
160
+ skv.check_is_fitted(self)
161
+ return self.to_keep_
@@ -0,0 +1,148 @@
1
+ """pre-selection estimators module"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # Implementation derived from:
6
+ # Conway-Yu https://github.com/skfolio/skfolio/discussions/60
7
+ # License: BSD 3 clause
8
+
9
+ import datetime as dt
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ import sklearn.base as skb
14
+ import sklearn.feature_selection as skf
15
+ import sklearn.utils.validation as skv
16
+
17
+
18
+ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
19
+ """
20
+ Transformer to select assets that do not expire within a specified lookahead period
21
+ after the end of the observation period.
22
+
23
+ This transformer removes assets (columns) that have expiration dates within a
24
+ given lookahead period from the end of the dataset, allowing only assets that
25
+ remain active beyond this lookahead period to be selected.
26
+
27
+ This is useful when an exit strategy is needed before asset expiration, such as
28
+ for bonds or options with known end dates, or when applying WalkForward
29
+ cross-validation. It ensures that assets expiring during the test period are
30
+ excluded, so that only live assets are included in each training and test period.
31
+
32
+ Parameters
33
+ ----------
34
+ expiration_dates : dict[str, dt.datetime | pd.Timestamp], optional
35
+ Dictionary with asset names as keys and expiration dates as values.
36
+ Used to check if each asset expires within the date offset.
37
+ Assets with no expiration date will be retained by default.
38
+
39
+ expiration_lookahead : pd.offsets.BaseOffset | dt.timedelta, optional
40
+ The lookahead period after the end of the dataset within which assets with
41
+ expiration dates will be removed.
42
+
43
+ Attributes
44
+ ----------
45
+ to_keep_ : ndarray of shape (n_assets, )
46
+ Boolean array indicating which assets are remaining.
47
+
48
+ n_features_in_ : int
49
+ Number of assets seen during `fit`.
50
+
51
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
52
+ Names of features seen during `fit`. Defined only when `X`
53
+ has feature names that are all strings.
54
+
55
+ Notes
56
+ -----
57
+ This transformer only supports DataFrames with a DateTime index.
58
+
59
+ Examples
60
+ --------
61
+ >>> import pandas as pd
62
+ >>> import datetime as dt
63
+ >>> from sklearn import set_config
64
+ >>> set_config(transform_output="pandas")
65
+ >>> X = pd.DataFrame(
66
+ ... {
67
+ ... 'asset1': [1, 2, 3, 4],
68
+ ... 'asset2': [2, 3, 4, 5],
69
+ ... 'asset3': [3, 4, 5, 6],
70
+ ... 'asset4': [4, 5, 6, 7]
71
+ ... }, index=pd.date_range("2023-01-01", periods=4, freq="D")
72
+ ...)
73
+ >>> expiration_dates = {
74
+ ... 'asset1': pd.Timestamp("2023-01-10"),
75
+ ... 'asset2': pd.Timestamp("2023-01-02"),
76
+ ... 'asset3': pd.Timestamp("2023-01-06"),
77
+ ... 'asset4': dt.datetime(2023, 5, 1)
78
+ ... }
79
+ >>> selector = SelectNonExpiring(
80
+ ... expiration_dates=expiration_dates,
81
+ ... expiration_lookahead=pd.DateOffset(days=5)
82
+ ...)
83
+ >>> selector.fit_transform(X)
84
+ asset1 asset4
85
+ 2023-01-01 1 4
86
+ 2023-01-02 2 5
87
+ 2023-01-03 3 6
88
+ 2023-01-04 4 7
89
+ """
90
+
91
+ to_keep_: np.ndarray
92
+
93
+ def __init__(
94
+ self,
95
+ expiration_dates: dict[str, dt.datetime | pd.Timestamp] | None = None,
96
+ expiration_lookahead: pd.offsets.BaseOffset | dt.timedelta | None = None,
97
+ ):
98
+ self.expiration_dates = expiration_dates
99
+ self.expiration_lookahead = expiration_lookahead
100
+
101
+ def fit(self, X: pd.DataFrame, y=None) -> "SelectNonExpiring":
102
+ """Run the SelectNonExpiring transformer and get the appropriate assets.
103
+
104
+ Parameters
105
+ ----------
106
+ X : pd.DataFrame of shape (n_observations, n_assets)
107
+ Returns of the assets.
108
+
109
+ y : Ignored
110
+ Not used, present for API consistency by convention.
111
+
112
+ Returns
113
+ -------
114
+ self : SelectNonExpiring
115
+ Fitted estimator.
116
+ """
117
+ _ = self._validate_data(X, force_all_finite="allow-nan")
118
+
119
+ # Validate by allowing NaNs
120
+ if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
121
+ raise ValueError(
122
+ "X must be a DataFrame with an index of type DatetimeIndex"
123
+ )
124
+
125
+ if self.expiration_dates is None:
126
+ raise ValueError("`expiration_lookahead` must be provided")
127
+
128
+ if self.expiration_lookahead is None:
129
+ raise ValueError("`expiration_lookahead` must be provided")
130
+
131
+ # Calculate the cutoff date
132
+ end_date = X.index[-1]
133
+ cutoff_date = end_date + self.expiration_lookahead
134
+ self.to_keep_ = np.array(
135
+ [
136
+ self.expiration_dates.get(asset, pd.Timestamp.max) > cutoff_date
137
+ for asset in X.columns
138
+ ]
139
+ )
140
+
141
+ return self
142
+
143
+ def _get_support_mask(self):
144
+ skv.check_is_fitted(self)
145
+ return self.to_keep_
146
+
147
+ def _more_tags(self):
148
+ return {"allow_nan": True}
@@ -17,6 +17,7 @@ def prices_to_returns(
17
17
  nan_threshold: float = 1,
18
18
  join: Literal["left", "right", "inner", "outer", "cross"] = "outer",
19
19
  drop_inceptions_nan: bool = True,
20
+ fill_nan: bool = True,
20
21
  ) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
21
22
  r"""Transforms a DataFrame of prices to linear or logarithmic returns.
22
23
 
@@ -64,11 +65,15 @@ def prices_to_returns(
64
65
  this threshold. The default (`1.0`) is to keep all the observations.
65
66
 
66
67
  drop_inceptions_nan : bool, default=True
67
- If this is set to True, observations at the beginning are dropped if any of
68
+ If set to True, observations at the beginning are dropped if any of
68
69
  the asset values are missing, otherwise we keep the NaNs. This is useful when
69
70
  you work with a large universe of assets with different inception dates coupled
70
71
  with a pre-selection Transformer.
71
72
 
73
+ fill_nan : bool, default=True
74
+ If set to True, missing prices (NaNs) are forward filled using the previous
75
+ price. Otherwise, NaNs are kept.
76
+
72
77
  Returns
73
78
  -------
74
79
  X : DataFrame
@@ -106,7 +111,8 @@ def prices_to_returns(
106
111
  df.drop(to_drop, axis=0, inplace=True)
107
112
 
108
113
  # Forward fill missing values
109
- df.ffill(inplace=True)
114
+ if fill_nan:
115
+ df.ffill(inplace=True)
110
116
  # Drop rows according to drop_inceptions_nan
111
117
  # noinspection PyTypeChecker
112
118
  df.dropna(how="any" if drop_inceptions_nan else "all", inplace=True)
@@ -114,7 +120,7 @@ def prices_to_returns(
114
120
  df.dropna(axis=1, how="all", inplace=True)
115
121
 
116
122
  # returns
117
- all_returns = df.pct_change().iloc[1:]
123
+ all_returns = df.pct_change(fill_method=None).iloc[1:]
118
124
  if log_returns:
119
125
  all_returns = np.log1p(all_returns)
120
126
 
skfolio/utils/stats.py CHANGED
@@ -185,7 +185,7 @@ def is_cholesky_dec(x: np.ndarray) -> bool:
185
185
  try:
186
186
  np.linalg.cholesky(x)
187
187
  return True
188
- except np.linalg.linalg.LinAlgError:
188
+ except np.linalg.LinAlgError:
189
189
  return False
190
190
 
191
191
 
@@ -200,7 +200,7 @@ def is_positive_definite(x: np.ndarray) -> bool:
200
200
  Returns
201
201
  -------
202
202
  value : bool
203
- True if if the matrix is positive definite, False otherwise.
203
+ True if the matrix is positive definite, False otherwise.
204
204
  """
205
205
  return np.all(np.linalg.eigvals(x) > 0)
206
206
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skfolio
3
- Version: 0.4.3
3
+ Version: 0.5.1
4
4
  Summary: Portfolio optimization built on top of scikit-learn
5
5
  Author-email: Hugo Delatte <delatte.hugo@gmail.com>
6
6
  Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
@@ -30,7 +30,7 @@ skfolio/moments/covariance/_empirical_covariance.py,sha256=_7T1x4p-vdATQvQzQjQBM
30
30
  skfolio/moments/covariance/_ew_covariance.py,sha256=jzLE4zSEfLCToNBTIG5CMy1n9EYWo1IHJPifcyLVe1g,3673
31
31
  skfolio/moments/covariance/_gerber_covariance.py,sha256=3wSwZtji2cEr2rzZ6pi2knmuOSzTFpyb_4XJl_S3Yj0,5856
32
32
  skfolio/moments/covariance/_graphical_lasso_cv.py,sha256=_6WQ1sjYJRG8XDq8zb5YIPtDhpb8CmLhLBlfewBvqjM,6539
33
- skfolio/moments/covariance/_implied_covariance.py,sha256=6DiPWo7WVRA8EFvjYxBLBIrYaeRJWpr8yH5I64Sbbd0,17732
33
+ skfolio/moments/covariance/_implied_covariance.py,sha256=L8odXiyNTfrnyroZUZSr8KkHv9_c3OCpdoqrtLqkonQ,17732
34
34
  skfolio/moments/covariance/_ledoit_wolf.py,sha256=iV92TpAopOAgQwa4zk7NF1rYdXkgm3uXn5ZZpbcMss0,4875
35
35
  skfolio/moments/covariance/_oas.py,sha256=ru8BNz7vQU75ARCuUbtJstmR2fy2fiD9OXLDlztUm5g,3684
36
36
  skfolio/moments/covariance/_shrunk_covariance.py,sha256=OOUahkiSdU3vFOb8i0iHtn8WU0AHl7o9pf8pFkG6Lv4,3095
@@ -49,26 +49,30 @@ skfolio/optimization/cluster/hierarchical/_base.py,sha256=l8rJHCH_79FOPdDL2I0dmA
49
49
  skfolio/optimization/cluster/hierarchical/_herc.py,sha256=LPtUrvyW9G60OZhMWlZH_GHZHdX8mJHksrYGB-WPRVg,20358
50
50
  skfolio/optimization/cluster/hierarchical/_hrp.py,sha256=dn6EKiTJ1wkoFhPdst6vlXnSQvXSYsMtB2zaGNVPpyA,18115
51
51
  skfolio/optimization/convex/__init__.py,sha256=F6BPFikTo0B-7JCKazqLGEwM3RkgTNbFm5GAGkaq9Uo,570
52
- skfolio/optimization/convex/_base.py,sha256=2at6Ll4qHkN_1wvYjl-yXWTbiRJj8fhNS-bfAT88YSw,76055
52
+ skfolio/optimization/convex/_base.py,sha256=P1rSw1oJAZR_BuOxJeXJrYHlkFD0AwCOaBl3mj54E8U,76413
53
53
  skfolio/optimization/convex/_distributionally_robust.py,sha256=tw_UNSDfAXP02khE10hpmcdlz3DQXQD7ttDqFDSHV1E,17811
54
54
  skfolio/optimization/convex/_maximum_diversification.py,sha256=IVKVbK7bh4KPkhpNWLLerl-qx9Qcmf2cIIRotP8r8nI,19500
55
55
  skfolio/optimization/convex/_mean_risk.py,sha256=H4Ik6vvIETdAZnNCA4Jhk_OTirHJg26KQZ5iLsXgaHo,44176
56
- skfolio/optimization/convex/_risk_budgeting.py,sha256=ntPK57Ws-_U4QAiZjXFvKUYUELv9EBoJIWqofxx-0rY,23779
56
+ skfolio/optimization/convex/_risk_budgeting.py,sha256=VXm6vUeB-BDEn6KhWxg1-9UmjqpFR1E04SM4NLcNuBY,23510
57
57
  skfolio/optimization/ensemble/__init__.py,sha256=8TXxcxH2_gG3C1xtgQj9OHHr0Le8lhdejtlURL6T3ZY,158
58
58
  skfolio/optimization/ensemble/_base.py,sha256=GaNDQu6ivosYuwMrb-b0PhToCsNrmhSYyXkxeM8W4rU,3399
59
59
  skfolio/optimization/ensemble/_stacking.py,sha256=ZoICUnc_MwoXDQAR2kewCg-KIezSOIUdDV1fuf7vMyA,14168
60
60
  skfolio/optimization/naive/__init__.py,sha256=Dkr55R48urC-jfYN007NTbei16N91Na_EDYLVqzhGgQ,147
61
61
  skfolio/optimization/naive/_naive.py,sha256=AhEyYKEUAm-Fjn4p8SHwhp7yE9iF0tRyDZIjKYV4EeU,6390
62
62
  skfolio/population/__init__.py,sha256=rsPPMUv95aTK7vmpPeQwF8NzFuBwk6RDo5g4HNaPzNM,80
63
- skfolio/population/_population.py,sha256=WYT6yTVmarzMH3nj1-rQCvD-X2nH6q9bo928-lenUXs,30426
63
+ skfolio/population/_population.py,sha256=ej45tdk_CcMlNToCsx2VUk2YRktK3k4cRczGBpjlnDE,30427
64
64
  skfolio/portfolio/__init__.py,sha256=YYtcAPmA2zeCxFGTXegg2FXcA7py6CxOX7IMTdYuXl0,586
65
65
  skfolio/portfolio/_base.py,sha256=EFLsvHoxZmDvGPOKePr6hQGXU7y7TWsALvzYP9qt0fQ,39588
66
66
  skfolio/portfolio/_multi_period_portfolio.py,sha256=K2JfEwlPD9iGO58lOdk7WUbWuXZDWw2prPT5T7pOdto,24387
67
67
  skfolio/portfolio/_portfolio.py,sha256=gqvCKM6ZVfwZrgixiYdahgbQ1DRNW2LkGHkXOpjleb4,32753
68
- skfolio/pre_selection/__init__.py,sha256=VtUtDn-U-Mn_xR2k7yfld0Yb0rPhLakEAiBwUyi-4Z8,189
69
- skfolio/pre_selection/_pre_selection.py,sha256=w84T14nKmzkgzbw5CW_AIlci741lXYxKUwB5pBjhTTI,12163
68
+ skfolio/pre_selection/__init__.py,sha256=_H0jziIOq0nUETFQvjBP4AtKGzdh0EGGSXaECTcUhxY,482
69
+ skfolio/pre_selection/_drop_correlated.py,sha256=EDwRVqmkU-52VXQ-u350PYgjWCI5QnB8CfR1taLWffY,3818
70
+ skfolio/pre_selection/_select_complete.py,sha256=sE9TCitUA5KbEqPssl0qsCBD-oV_5Vx-b-kdU0hsFHI,3885
71
+ skfolio/pre_selection/_select_k_extremes.py,sha256=25FGievaDqlAHAxUmyznAd3LIq_7D3ajaSVD6E7luSI,3061
72
+ skfolio/pre_selection/_select_non_dominated.py,sha256=HLGNS14vgQlg5I5zj-b1QpgCaZROd0FALQSmyXGpK7o,5983
73
+ skfolio/pre_selection/_select_non_expiring.py,sha256=RAWnuW2u7y0ibsimJp5mRM9JQFOn0hHp-mWsp0FLPbs,4995
70
74
  skfolio/preprocessing/__init__.py,sha256=15A1bzfPsbfxxXgGP1gstf4R0E_347Wn18z5W5jH-hk,94
71
- skfolio/preprocessing/_returns.py,sha256=oo1Mm-UCHwq4ECjfmsRxWzzK1EPsuv-EEtnimvv_nXo,4345
75
+ skfolio/preprocessing/_returns.py,sha256=6mdNi7Dun5eNK4LdqKAxP4CCZEVfAEz40HXVrOiAaLA,4561
72
76
  skfolio/prior/__init__.py,sha256=jql8NTiWlykPKJUXTOPdqm531mP8Pul1QAR6hXTXA6c,446
73
77
  skfolio/prior/_base.py,sha256=u9GLCKJl-Txiem5rIO-qkH3VIyem3taD6T9kMzsYPRY,1941
74
78
  skfolio/prior/_black_litterman.py,sha256=W3HbpvkViEiD7AOgpdVmNYTlWKSGDgo9Y3BfSrbMIQ4,10347
@@ -82,10 +86,10 @@ skfolio/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
86
  skfolio/utils/bootstrap.py,sha256=3zY2kO_GQURKEcQMCasJOSByde9Mt2IAi3KJH0_a4mk,3550
83
87
  skfolio/utils/equations.py,sha256=MQ1w3VSM2n_j9bTIKAQA716aWKYyUqtw5yM2bU-9t-M,13745
84
88
  skfolio/utils/sorting.py,sha256=lSjMvH2L-sSj-06B3MlwBrH1rtjCeGEe4hG894W7TE0,3504
85
- skfolio/utils/stats.py,sha256=bzKlF2U7BN2WonwtuwG_cL_16Z3cTAxCAw5pZgbib54,17005
89
+ skfolio/utils/stats.py,sha256=mWMpJ_XBy400kx7GlwBvR4Fwo8ValOZ9J3VDLODDaHQ,16995
86
90
  skfolio/utils/tools.py,sha256=4KrmBR9jOLiI6j0hb27gsPC--OHXo4Sp1xl-6i-k9Tg,20925
87
- skfolio-0.4.3.dist-info/LICENSE,sha256=F6Gi-ZJX5BlVzYK8R9NcvAkAsKa7KO29xB1OScbrH6Q,1526
88
- skfolio-0.4.3.dist-info/METADATA,sha256=PUf5onO29CqsRRaMyrMP3y0RKw6MJ43TNQ_2hMks7n0,19611
89
- skfolio-0.4.3.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
90
- skfolio-0.4.3.dist-info/top_level.txt,sha256=NXEaoS9Ms7t32gxkb867nV0OKlU0KmssL7IJBVo0fJs,8
91
- skfolio-0.4.3.dist-info/RECORD,,
91
+ skfolio-0.5.1.dist-info/LICENSE,sha256=F6Gi-ZJX5BlVzYK8R9NcvAkAsKa7KO29xB1OScbrH6Q,1526
92
+ skfolio-0.5.1.dist-info/METADATA,sha256=ZV5d0XFTqEJzVJZsLvX2OopVxQu-_UCoM1R9wYWQlWE,19611
93
+ skfolio-0.5.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
94
+ skfolio-0.5.1.dist-info/top_level.txt,sha256=NXEaoS9Ms7t32gxkb867nV0OKlU0KmssL7IJBVo0fJs,8
95
+ skfolio-0.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,343 +0,0 @@
1
- """pre-selection estimators module"""
2
-
3
- # Copyright (c) 2023
4
- # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
6
-
7
- import numpy as np
8
- import numpy.typing as npt
9
- import sklearn.base as skb
10
- import sklearn.feature_selection as skf
11
- import sklearn.utils.validation as skv
12
-
13
- import skfolio.typing as skt
14
- from skfolio.measures import RatioMeasure
15
- from skfolio.population import Population
16
- from skfolio.portfolio import Portfolio
17
-
18
-
19
- class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
20
- """Transformer for dropping highly correlated assets.
21
-
22
- Simply removing all correlation pairs above the threshold will remove more assets
23
- than necessary and a naive sequential removal is suboptimal and depends on the
24
- initial assets ordering.
25
-
26
- Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
27
- the threshold and corr(Y,Z) below.
28
- The first approach would remove X,Y,Z and the second approach would remove either
29
- Y and Z or X depending on the initial ordering.
30
-
31
- To avoid these shortcomings, we implement the below algorithm:
32
-
33
- * Step 1: select all correlation pairs above the threshold.
34
- * Step 2: sort all the selected correlation pairs from highest to lowest.
35
- * Step 3: for each pair, if none of the two assets has been removed, keep the
36
- asset with the lowest average correlation against the other assets.
37
-
38
- Parameters
39
- ----------
40
- threshold : float, default=0.95
41
- Correlation threshold. The default value is `0.95`.
42
-
43
- absolute : bool, default=False
44
- If this is set to True, we take the absolute value of the correlation. This has
45
- for effect to also include negatively correlated assets.
46
-
47
- Attributes
48
- ----------
49
- to_keep_ : ndarray of shape (n_assets, )
50
- Boolean array indicating which assets are remaining.
51
-
52
- n_features_in_ : int
53
- Number of assets seen during `fit`.
54
-
55
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
56
- Names of assets seen during `fit`. Defined only when `X`
57
- has assets names that are all strings.
58
- """
59
-
60
- to_keep_: np.ndarray
61
-
62
- def __init__(self, threshold: float = 0.95, absolute: bool = False):
63
- self.threshold = threshold
64
- self.absolute = absolute
65
-
66
- def fit(self, X: npt.ArrayLike, y=None):
67
- """Run the correlation transformer and get the appropriate assets.
68
-
69
- Parameters
70
- ----------
71
- X : array-like of shape (n_observations, n_assets)
72
- Price returns of the assets.
73
-
74
- y : Ignored
75
- Not used, present for API consistency by convention.
76
-
77
- Returns
78
- -------
79
- self : DropCorrelated
80
- Fitted estimator.
81
- """
82
- X = self._validate_data(X)
83
- if not -1 <= self.threshold <= 1:
84
- raise ValueError("`threshold` must be between -1 and 1")
85
-
86
- n_assets = X.shape[1]
87
- corr = np.corrcoef(X.T)
88
- mean_corr = corr.mean(axis=0)
89
-
90
- triu_idx = np.triu_indices(n_assets, 1)
91
-
92
- # select all correlation pairs above the threshold
93
- selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
94
-
95
- # sort all the selected correlation pairs from highest to lowest
96
- selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
97
-
98
- # for each pair, if none of the two assets has been removed, keep the asset with
99
- # the lowest average correlation with other assets
100
- to_remove = set()
101
- for idx in selected_idx:
102
- i, j = triu_idx[0][idx], triu_idx[1][idx]
103
- if i not in to_remove and j not in to_remove:
104
- if mean_corr[i] > mean_corr[j]:
105
- to_remove.add(i)
106
- else:
107
- to_remove.add(j)
108
- self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
109
- return self
110
-
111
- def _get_support_mask(self):
112
- skv.check_is_fitted(self)
113
- return self.to_keep_
114
-
115
-
116
- class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
117
- """Transformer for selecting the `k` best or worst assets.
118
-
119
- Keep the `k` best or worst assets according to a given measure.
120
-
121
- Parameters
122
- ----------
123
- k : int, default=10
124
- Number of assets to select. If `k` is higher than the number of assets, all
125
- assets are selected.
126
-
127
- measure : Measure, default=RatioMeasure.SHARPE_RATIO
128
- The :ref:`measure <measures_ref>` used to sort the assets.
129
- The default is `RatioMeasure.SHARPE_RATIO`.
130
-
131
- highest : bool, default=True
132
- If this is set to True, the `k` assets with the highest `measure` are selected,
133
- otherwise it is the `k` lowest.
134
-
135
- Attributes
136
- ----------
137
- to_keep_ : ndarray of shape (n_assets, )
138
- Boolean array indicating which assets are remaining.
139
-
140
- n_features_in_ : int
141
- Number of assets seen during `fit`.
142
-
143
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
144
- Names of features seen during `fit`. Defined only when `X`
145
- has feature names that are all strings.
146
- """
147
-
148
- to_keep_: np.ndarray
149
-
150
- def __init__(
151
- self,
152
- k: int = 10,
153
- measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
154
- highest: bool = True,
155
- ):
156
- self.k = k
157
- self.measure = measure
158
- self.highest = highest
159
-
160
- def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
161
- """Run the SelectKExtremes transformer and get the appropriate assets.
162
-
163
- Parameters
164
- ----------
165
- X : array-like of shape (n_observations, n_assets)
166
- Price returns of the assets.
167
-
168
- y : Ignored
169
- Not used, present for API consistency by convention.
170
-
171
- Returns
172
- -------
173
- self : SelectKExtremes
174
- Fitted estimator.
175
- """
176
- X = self._validate_data(X)
177
- k = int(self.k)
178
- if k <= 0:
179
- raise ValueError("`k` must be strictly positive")
180
- n_assets = X.shape[1]
181
- # Build a population of single assets portfolio
182
- population = Population([])
183
- for i in range(n_assets):
184
- weights = np.zeros(n_assets)
185
- weights[i] = 1
186
- population.append(Portfolio(X=X, weights=weights))
187
-
188
- selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
189
- :k
190
- ]
191
- selected_idx = [x.nonzero_assets_index[0] for x in selected]
192
- self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
193
- return self
194
-
195
- def _get_support_mask(self):
196
- skv.check_is_fitted(self)
197
- return self.to_keep_
198
-
199
-
200
- class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
201
- """Transformer for selecting non dominated assets.
202
-
203
- Pre-selection based on the Assets Preselection Process 2 [1]_.
204
-
205
- Good single asset (for example with high return and low risk) is likely to
206
- contribute to the final optimized portfolio. Each asset is considered as a portfolio
207
- and these assets are ranked using the non-domination sorting method. The selection
208
- is based on the ranks assigned to each asset based on their fitness until the number
209
- of selected assets reaches the user-defined number.
210
-
211
- Considering only the fitness of individual asset is insufficient because a pair of
212
- negatively correlated assets has the potential to reduce the risk. Therefore,
213
- negatively correlated pairs of assets are also considered.
214
-
215
- Parameters
216
- ----------
217
- min_n_assets : int, optional
218
- The minimum number of assets to select. If `min_n_assets` is reached before the
219
- end of the current non-dominated front, we return the remaining assets of this
220
- front. This is because all assets in the same front have same rank.
221
- The default (`None`) is to select the first front.
222
-
223
- threshold : float, default=0.0
224
- Asset pair with a correlation below this threshold are included in the
225
- non-domination sorting. The default value is `0.0`.
226
-
227
- fitness_measures : list[Measure], optional
228
- A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
229
- The fitness is used to compare portfolios in terms of domination, compute the
230
- pareto fronts and run the portfolio selection using non-denominated sorting.
231
- The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
232
-
233
- Attributes
234
- ----------
235
- to_keep_ : ndarray of shape (n_assets, )
236
- Boolean array indicating which assets are remaining.
237
-
238
- n_features_in_ : int
239
- Number of assets seen during `fit`.
240
-
241
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
242
- Names of features seen during `fit`. Defined only when `X`
243
- has feature names that are all strings.
244
-
245
- References
246
- ----------
247
- .. [1] "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
248
- Algorithms and Preselection Methods",
249
- B.Y. Qu and Q.Zhou (2017).
250
- """
251
-
252
- to_keep_: np.ndarray
253
-
254
- def __init__(
255
- self,
256
- min_n_assets: int | None = None,
257
- threshold: float = -0.5,
258
- fitness_measures: list[skt.Measure] | None = None,
259
- ):
260
- self.min_n_assets = min_n_assets
261
- self.threshold = threshold
262
- self.fitness_measures = fitness_measures
263
-
264
- def fit(self, X: npt.ArrayLike, y=None):
265
- """Run the Non Dominated transformer and get the appropriate assets.
266
-
267
- Parameters
268
- ----------
269
- X : array-like of shape (n_observations, n_assets)
270
- Price returns of the assets.
271
-
272
- y : Ignored
273
- Not used, present for API consistency by convention.
274
-
275
- Returns
276
- -------
277
- self : SelectNonDominated
278
- Fitted estimator.
279
- """
280
- X = self._validate_data(X)
281
- if not -1 <= self.threshold <= 1:
282
- raise ValueError("`threshold` must be between -1 and 1")
283
- n_assets = X.shape[1]
284
-
285
- if self.min_n_assets is not None and self.min_n_assets >= n_assets:
286
- self.to_keep_ = np.full(n_assets, True)
287
- return self
288
-
289
- # Build a population of portfolio
290
- population = Population([])
291
- # Add single assets
292
- for i in range(n_assets):
293
- weights = np.zeros(n_assets)
294
- weights[i] = 1
295
- population.append(
296
- Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
297
- )
298
-
299
- # Add pairs with correlation below threshold with minimum variance
300
- # ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
301
- # with w1 + w2 = 1
302
- # To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
303
- # respect to w1 and set to zero.
304
- # By solving the obtained equation, we get:
305
- # w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
306
- # w2 = 1 - w1
307
-
308
- corr = np.corrcoef(X.T)
309
- covariance = np.cov(X.T)
310
- for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
311
- if corr[i, j] < self.threshold:
312
- cov = covariance[i, j]
313
- var1 = covariance[i, i]
314
- var2 = covariance[j, j]
315
- weights = np.zeros(n_assets)
316
- weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
317
- weights[j] = 1 - weights[i]
318
- population.append(
319
- Portfolio(
320
- X=X, weights=weights, fitness_measures=self.fitness_measures
321
- )
322
- )
323
-
324
- fronts = population.non_denominated_sort(
325
- first_front_only=self.min_n_assets is None
326
- )
327
- new_assets_idx = set()
328
- i = 0
329
- while i < len(fronts):
330
- if (
331
- self.min_n_assets is not None
332
- and len(new_assets_idx) > self.min_n_assets
333
- ):
334
- break
335
- for idx in fronts[i]:
336
- new_assets_idx.update(population[idx].nonzero_assets_index)
337
- i += 1
338
- self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
339
- return self
340
-
341
- def _get_support_mask(self):
342
- skv.check_is_fitted(self)
343
- return self.to_keep_