skfolio 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,343 +0,0 @@
1
- """pre-selection estimators module"""
2
-
3
- # Copyright (c) 2023
4
- # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
6
-
7
- import numpy as np
8
- import numpy.typing as npt
9
- import sklearn.base as skb
10
- import sklearn.feature_selection as skf
11
- import sklearn.utils.validation as skv
12
-
13
- import skfolio.typing as skt
14
- from skfolio.measures import RatioMeasure
15
- from skfolio.population import Population
16
- from skfolio.portfolio import Portfolio
17
-
18
-
19
- class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
20
- """Transformer for dropping highly correlated assets.
21
-
22
- Simply removing all correlation pairs above the threshold will remove more assets
23
- than necessary and a naive sequential removal is suboptimal and depends on the
24
- initial assets ordering.
25
-
26
- Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
27
- the threshold and corr(Y,Z) below.
28
- The first approach would remove X,Y,Z and the second approach would remove either
29
- Y and Z or X depending on the initial ordering.
30
-
31
- To avoid these shortcomings, we implement the below algorithm:
32
-
33
- * Step 1: select all correlation pairs above the threshold.
34
- * Step 2: sort all the selected correlation pairs from highest to lowest.
35
- * Step 3: for each pair, if none of the two assets has been removed, keep the
36
- asset with the lowest average correlation against the other assets.
37
-
38
- Parameters
39
- ----------
40
- threshold : float, default=0.95
41
- Correlation threshold. The default value is `0.95`.
42
-
43
- absolute : bool, default=False
44
- If this is set to True, we take the absolute value of the correlation. This has
45
- for effect to also include negatively correlated assets.
46
-
47
- Attributes
48
- ----------
49
- to_keep_ : ndarray of shape (n_assets, )
50
- Boolean array indicating which assets are remaining.
51
-
52
- n_features_in_ : int
53
- Number of assets seen during `fit`.
54
-
55
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
56
- Names of assets seen during `fit`. Defined only when `X`
57
- has assets names that are all strings.
58
- """
59
-
60
- to_keep_: np.ndarray
61
-
62
- def __init__(self, threshold: float = 0.95, absolute: bool = False):
63
- self.threshold = threshold
64
- self.absolute = absolute
65
-
66
- def fit(self, X: npt.ArrayLike, y=None):
67
- """Run the correlation transformer and get the appropriate assets.
68
-
69
- Parameters
70
- ----------
71
- X : array-like of shape (n_observations, n_assets)
72
- Price returns of the assets.
73
-
74
- y : Ignored
75
- Not used, present for API consistency by convention.
76
-
77
- Returns
78
- -------
79
- self : DropCorrelated
80
- Fitted estimator.
81
- """
82
- X = self._validate_data(X)
83
- if not -1 <= self.threshold <= 1:
84
- raise ValueError("`threshold` must be between -1 and 1")
85
-
86
- n_assets = X.shape[1]
87
- corr = np.corrcoef(X.T)
88
- mean_corr = corr.mean(axis=0)
89
-
90
- triu_idx = np.triu_indices(n_assets, 1)
91
-
92
- # select all correlation pairs above the threshold
93
- selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
94
-
95
- # sort all the selected correlation pairs from highest to lowest
96
- selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
97
-
98
- # for each pair, if none of the two assets has been removed, keep the asset with
99
- # the lowest average correlation with other assets
100
- to_remove = set()
101
- for idx in selected_idx:
102
- i, j = triu_idx[0][idx], triu_idx[1][idx]
103
- if i not in to_remove and j not in to_remove:
104
- if mean_corr[i] > mean_corr[j]:
105
- to_remove.add(i)
106
- else:
107
- to_remove.add(j)
108
- self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
109
- return self
110
-
111
- def _get_support_mask(self):
112
- skv.check_is_fitted(self)
113
- return self.to_keep_
114
-
115
-
116
- class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
117
- """Transformer for selecting the `k` best or worst assets.
118
-
119
- Keep the `k` best or worst assets according to a given measure.
120
-
121
- Parameters
122
- ----------
123
- k : int, default=10
124
- Number of assets to select. If `k` is higher than the number of assets, all
125
- assets are selected.
126
-
127
- measure : Measure, default=RatioMeasure.SHARPE_RATIO
128
- The :ref:`measure <measures_ref>` used to sort the assets.
129
- The default is `RatioMeasure.SHARPE_RATIO`.
130
-
131
- highest : bool, default=True
132
- If this is set to True, the `k` assets with the highest `measure` are selected,
133
- otherwise it is the `k` lowest.
134
-
135
- Attributes
136
- ----------
137
- to_keep_ : ndarray of shape (n_assets, )
138
- Boolean array indicating which assets are remaining.
139
-
140
- n_features_in_ : int
141
- Number of assets seen during `fit`.
142
-
143
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
144
- Names of features seen during `fit`. Defined only when `X`
145
- has feature names that are all strings.
146
- """
147
-
148
- to_keep_: np.ndarray
149
-
150
- def __init__(
151
- self,
152
- k: int = 10,
153
- measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
154
- highest: bool = True,
155
- ):
156
- self.k = k
157
- self.measure = measure
158
- self.highest = highest
159
-
160
- def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
161
- """Run the SelectKExtremes transformer and get the appropriate assets.
162
-
163
- Parameters
164
- ----------
165
- X : array-like of shape (n_observations, n_assets)
166
- Price returns of the assets.
167
-
168
- y : Ignored
169
- Not used, present for API consistency by convention.
170
-
171
- Returns
172
- -------
173
- self : SelectKExtremes
174
- Fitted estimator.
175
- """
176
- X = self._validate_data(X)
177
- k = int(self.k)
178
- if k <= 0:
179
- raise ValueError("`k` must be strictly positive")
180
- n_assets = X.shape[1]
181
- # Build a population of single assets portfolio
182
- population = Population([])
183
- for i in range(n_assets):
184
- weights = np.zeros(n_assets)
185
- weights[i] = 1
186
- population.append(Portfolio(X=X, weights=weights))
187
-
188
- selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
189
- :k
190
- ]
191
- selected_idx = [x.nonzero_assets_index[0] for x in selected]
192
- self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
193
- return self
194
-
195
- def _get_support_mask(self):
196
- skv.check_is_fitted(self)
197
- return self.to_keep_
198
-
199
-
200
- class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
201
- """Transformer for selecting non dominated assets.
202
-
203
- Pre-selection based on the Assets Preselection Process 2 [1]_.
204
-
205
- Good single asset (for example with high return and low risk) is likely to
206
- contribute to the final optimized portfolio. Each asset is considered as a portfolio
207
- and these assets are ranked using the non-domination sorting method. The selection
208
- is based on the ranks assigned to each asset based on their fitness until the number
209
- of selected assets reaches the user-defined number.
210
-
211
- Considering only the fitness of individual asset is insufficient because a pair of
212
- negatively correlated assets has the potential to reduce the risk. Therefore,
213
- negatively correlated pairs of assets are also considered.
214
-
215
- Parameters
216
- ----------
217
- min_n_assets : int, optional
218
- The minimum number of assets to select. If `min_n_assets` is reached before the
219
- end of the current non-dominated front, we return the remaining assets of this
220
- front. This is because all assets in the same front have same rank.
221
- The default (`None`) is to select the first front.
222
-
223
- threshold : float, default=0.0
224
- Asset pair with a correlation below this threshold are included in the
225
- non-domination sorting. The default value is `0.0`.
226
-
227
- fitness_measures : list[Measure], optional
228
- A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
229
- The fitness is used to compare portfolios in terms of domination, compute the
230
- pareto fronts and run the portfolio selection using non-denominated sorting.
231
- The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
232
-
233
- Attributes
234
- ----------
235
- to_keep_ : ndarray of shape (n_assets, )
236
- Boolean array indicating which assets are remaining.
237
-
238
- n_features_in_ : int
239
- Number of assets seen during `fit`.
240
-
241
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
242
- Names of features seen during `fit`. Defined only when `X`
243
- has feature names that are all strings.
244
-
245
- References
246
- ----------
247
- .. [1] "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
248
- Algorithms and Preselection Methods",
249
- B.Y. Qu and Q.Zhou (2017).
250
- """
251
-
252
- to_keep_: np.ndarray
253
-
254
- def __init__(
255
- self,
256
- min_n_assets: int | None = None,
257
- threshold: float = -0.5,
258
- fitness_measures: list[skt.Measure] | None = None,
259
- ):
260
- self.min_n_assets = min_n_assets
261
- self.threshold = threshold
262
- self.fitness_measures = fitness_measures
263
-
264
- def fit(self, X: npt.ArrayLike, y=None):
265
- """Run the Non Dominated transformer and get the appropriate assets.
266
-
267
- Parameters
268
- ----------
269
- X : array-like of shape (n_observations, n_assets)
270
- Price returns of the assets.
271
-
272
- y : Ignored
273
- Not used, present for API consistency by convention.
274
-
275
- Returns
276
- -------
277
- self : SelectNonDominated
278
- Fitted estimator.
279
- """
280
- X = self._validate_data(X)
281
- if not -1 <= self.threshold <= 1:
282
- raise ValueError("`threshold` must be between -1 and 1")
283
- n_assets = X.shape[1]
284
-
285
- if self.min_n_assets is not None and self.min_n_assets >= n_assets:
286
- self.to_keep_ = np.full(n_assets, True)
287
- return self
288
-
289
- # Build a population of portfolio
290
- population = Population([])
291
- # Add single assets
292
- for i in range(n_assets):
293
- weights = np.zeros(n_assets)
294
- weights[i] = 1
295
- population.append(
296
- Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
297
- )
298
-
299
- # Add pairs with correlation below threshold with minimum variance
300
- # ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
301
- # with w1 + w2 = 1
302
- # To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
303
- # respect to w1 and set to zero.
304
- # By solving the obtained equation, we get:
305
- # w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
306
- # w2 = 1 - w1
307
-
308
- corr = np.corrcoef(X.T)
309
- covariance = np.cov(X.T)
310
- for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
311
- if corr[i, j] < self.threshold:
312
- cov = covariance[i, j]
313
- var1 = covariance[i, i]
314
- var2 = covariance[j, j]
315
- weights = np.zeros(n_assets)
316
- weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
317
- weights[j] = 1 - weights[i]
318
- population.append(
319
- Portfolio(
320
- X=X, weights=weights, fitness_measures=self.fitness_measures
321
- )
322
- )
323
-
324
- fronts = population.non_denominated_sort(
325
- first_front_only=self.min_n_assets is None
326
- )
327
- new_assets_idx = set()
328
- i = 0
329
- while i < len(fronts):
330
- if (
331
- self.min_n_assets is not None
332
- and len(new_assets_idx) > self.min_n_assets
333
- ):
334
- break
335
- for idx in fronts[i]:
336
- new_assets_idx.update(population[idx].nonzero_assets_index)
337
- i += 1
338
- self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
339
- return self
340
-
341
- def _get_support_mask(self):
342
- skv.check_is_fitted(self)
343
- return self.to_keep_