skfolio 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/optimization/cluster/hierarchical/_base.py +12 -65
- skfolio/optimization/cluster/hierarchical/_herc.py +75 -26
- skfolio/optimization/cluster/hierarchical/_hrp.py +68 -12
- skfolio/population/_population.py +1 -1
- skfolio/pre_selection/__init__.py +12 -6
- skfolio/pre_selection/_drop_correlated.py +108 -0
- skfolio/pre_selection/_select_complete.py +116 -0
- skfolio/pre_selection/_select_k_extremes.py +100 -0
- skfolio/pre_selection/_select_non_dominated.py +161 -0
- skfolio/pre_selection/_select_non_expiring.py +148 -0
- skfolio/preprocessing/_returns.py +9 -3
- skfolio/utils/stats.py +87 -0
- {skfolio-0.4.2.dist-info → skfolio-0.5.0.dist-info}/METADATA +2 -2
- {skfolio-0.4.2.dist-info → skfolio-0.5.0.dist-info}/RECORD +17 -13
- {skfolio-0.4.2.dist-info → skfolio-0.5.0.dist-info}/WHEEL +1 -1
- skfolio/pre_selection/_pre_selection.py +0 -343
- {skfolio-0.4.2.dist-info → skfolio-0.5.0.dist-info}/LICENSE +0 -0
- {skfolio-0.4.2.dist-info → skfolio-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
"""Pre-selection SelectKExtremes module"""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
import sklearn.base as skb
|
10
|
+
import sklearn.feature_selection as skf
|
11
|
+
import sklearn.utils.validation as skv
|
12
|
+
|
13
|
+
import skfolio.typing as skt
|
14
|
+
from skfolio.measures import RatioMeasure
|
15
|
+
from skfolio.population import Population
|
16
|
+
from skfolio.portfolio import Portfolio
|
17
|
+
|
18
|
+
|
19
|
+
class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
|
20
|
+
"""Transformer for selecting the `k` best or worst assets.
|
21
|
+
|
22
|
+
Keep the `k` best or worst assets according to a given measure.
|
23
|
+
|
24
|
+
Parameters
|
25
|
+
----------
|
26
|
+
k : int, default=10
|
27
|
+
Number of assets to select. If `k` is higher than the number of assets, all
|
28
|
+
assets are selected.
|
29
|
+
|
30
|
+
measure : Measure, default=RatioMeasure.SHARPE_RATIO
|
31
|
+
The :ref:`measure <measures_ref>` used to sort the assets.
|
32
|
+
The default is `RatioMeasure.SHARPE_RATIO`.
|
33
|
+
|
34
|
+
highest : bool, default=True
|
35
|
+
If this is set to True, the `k` assets with the highest `measure` are selected,
|
36
|
+
otherwise it is the `k` lowest.
|
37
|
+
|
38
|
+
Attributes
|
39
|
+
----------
|
40
|
+
to_keep_ : ndarray of shape (n_assets, )
|
41
|
+
Boolean array indicating which assets are remaining.
|
42
|
+
|
43
|
+
n_features_in_ : int
|
44
|
+
Number of assets seen during `fit`.
|
45
|
+
|
46
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
47
|
+
Names of features seen during `fit`. Defined only when `X`
|
48
|
+
has feature names that are all strings.
|
49
|
+
"""
|
50
|
+
|
51
|
+
to_keep_: np.ndarray
|
52
|
+
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
k: int = 10,
|
56
|
+
measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
|
57
|
+
highest: bool = True,
|
58
|
+
):
|
59
|
+
self.k = k
|
60
|
+
self.measure = measure
|
61
|
+
self.highest = highest
|
62
|
+
|
63
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
|
64
|
+
"""Run the SelectKExtremes transformer and get the appropriate assets.
|
65
|
+
|
66
|
+
Parameters
|
67
|
+
----------
|
68
|
+
X : array-like of shape (n_observations, n_assets)
|
69
|
+
Price returns of the assets.
|
70
|
+
|
71
|
+
y : Ignored
|
72
|
+
Not used, present for API consistency by convention.
|
73
|
+
|
74
|
+
Returns
|
75
|
+
-------
|
76
|
+
self : SelectKExtremes
|
77
|
+
Fitted estimator.
|
78
|
+
"""
|
79
|
+
X = self._validate_data(X)
|
80
|
+
k = int(self.k)
|
81
|
+
if k <= 0:
|
82
|
+
raise ValueError("`k` must be strictly positive")
|
83
|
+
n_assets = X.shape[1]
|
84
|
+
# Build a population of single assets portfolio
|
85
|
+
population = Population([])
|
86
|
+
for i in range(n_assets):
|
87
|
+
weights = np.zeros(n_assets)
|
88
|
+
weights[i] = 1
|
89
|
+
population.append(Portfolio(X=X, weights=weights))
|
90
|
+
|
91
|
+
selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
|
92
|
+
:k
|
93
|
+
]
|
94
|
+
selected_idx = [x.nonzero_assets_index[0] for x in selected]
|
95
|
+
self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
|
96
|
+
return self
|
97
|
+
|
98
|
+
def _get_support_mask(self):
|
99
|
+
skv.check_is_fitted(self)
|
100
|
+
return self.to_keep_
|
@@ -0,0 +1,161 @@
|
|
1
|
+
"""Pre-selection SelectNonDominated module"""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
import sklearn.base as skb
|
10
|
+
import sklearn.feature_selection as skf
|
11
|
+
import sklearn.utils.validation as skv
|
12
|
+
|
13
|
+
import skfolio.typing as skt
|
14
|
+
from skfolio.population import Population
|
15
|
+
from skfolio.portfolio import Portfolio
|
16
|
+
|
17
|
+
|
18
|
+
class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
|
19
|
+
"""Transformer for selecting non dominated assets.
|
20
|
+
|
21
|
+
Pre-selection based on the Assets Preselection Process 2 [1]_.
|
22
|
+
|
23
|
+
Good single asset (for example with high return and low risk) is likely to
|
24
|
+
contribute to the final optimized portfolio. Each asset is considered as a portfolio
|
25
|
+
and these assets are ranked using the non-domination sorting method. The selection
|
26
|
+
is based on the ranks assigned to each asset based on their fitness until the number
|
27
|
+
of selected assets reaches the user-defined number.
|
28
|
+
|
29
|
+
Considering only the fitness of individual asset is insufficient because a pair of
|
30
|
+
negatively correlated assets has the potential to reduce the risk. Therefore,
|
31
|
+
negatively correlated pairs of assets are also considered.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
min_n_assets : int, optional
|
36
|
+
The minimum number of assets to select. If `min_n_assets` is reached before the
|
37
|
+
end of the current non-dominated front, we return the remaining assets of this
|
38
|
+
front. This is because all assets in the same front have same rank.
|
39
|
+
The default (`None`) is to select the first front.
|
40
|
+
|
41
|
+
threshold : float, default=0.0
|
42
|
+
Asset pair with a correlation below this threshold are included in the
|
43
|
+
non-domination sorting. The default value is `0.0`.
|
44
|
+
|
45
|
+
fitness_measures : list[Measure], optional
|
46
|
+
A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
|
47
|
+
The fitness is used to compare portfolios in terms of domination, compute the
|
48
|
+
pareto fronts and run the portfolio selection using non-denominated sorting.
|
49
|
+
The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
|
50
|
+
|
51
|
+
Attributes
|
52
|
+
----------
|
53
|
+
to_keep_ : ndarray of shape (n_assets, )
|
54
|
+
Boolean array indicating which assets are remaining.
|
55
|
+
|
56
|
+
n_features_in_ : int
|
57
|
+
Number of assets seen during `fit`.
|
58
|
+
|
59
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
60
|
+
Names of features seen during `fit`. Defined only when `X`
|
61
|
+
has feature names that are all strings.
|
62
|
+
|
63
|
+
References
|
64
|
+
----------
|
65
|
+
.. [1] "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
|
66
|
+
Algorithms and Preselection Methods",
|
67
|
+
B.Y. Qu and Q.Zhou (2017).
|
68
|
+
"""
|
69
|
+
|
70
|
+
to_keep_: np.ndarray
|
71
|
+
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
min_n_assets: int | None = None,
|
75
|
+
threshold: float = -0.5,
|
76
|
+
fitness_measures: list[skt.Measure] | None = None,
|
77
|
+
):
|
78
|
+
self.min_n_assets = min_n_assets
|
79
|
+
self.threshold = threshold
|
80
|
+
self.fitness_measures = fitness_measures
|
81
|
+
|
82
|
+
def fit(self, X: npt.ArrayLike, y=None):
|
83
|
+
"""Run the Non Dominated transformer and get the appropriate assets.
|
84
|
+
|
85
|
+
Parameters
|
86
|
+
----------
|
87
|
+
X : array-like of shape (n_observations, n_assets)
|
88
|
+
Price returns of the assets.
|
89
|
+
|
90
|
+
y : Ignored
|
91
|
+
Not used, present for API consistency by convention.
|
92
|
+
|
93
|
+
Returns
|
94
|
+
-------
|
95
|
+
self : SelectNonDominated
|
96
|
+
Fitted estimator.
|
97
|
+
"""
|
98
|
+
X = self._validate_data(X)
|
99
|
+
if not -1 <= self.threshold <= 1:
|
100
|
+
raise ValueError("`threshold` must be between -1 and 1")
|
101
|
+
n_assets = X.shape[1]
|
102
|
+
|
103
|
+
if self.min_n_assets is not None and self.min_n_assets >= n_assets:
|
104
|
+
self.to_keep_ = np.full(n_assets, True)
|
105
|
+
return self
|
106
|
+
|
107
|
+
# Build a population of portfolio
|
108
|
+
population = Population([])
|
109
|
+
# Add single assets
|
110
|
+
for i in range(n_assets):
|
111
|
+
weights = np.zeros(n_assets)
|
112
|
+
weights[i] = 1
|
113
|
+
population.append(
|
114
|
+
Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
|
115
|
+
)
|
116
|
+
|
117
|
+
# Add pairs with correlation below threshold with minimum variance
|
118
|
+
# ptf_variance = sigma1^2 w1^2 + sigma2^2 w2^2 + 2 sigma12 w1 w2 (1)
|
119
|
+
# with w1 + w2 = 1
|
120
|
+
# To find the minimum we substitute w2 = 1 - w1 in (1) and differentiate with
|
121
|
+
# respect to w1 and set to zero.
|
122
|
+
# By solving the obtained equation, we get:
|
123
|
+
# w1 = (sigma2^2 - sigma12) / (sigma1^2 + sigma2^2 - 2 sigma12)
|
124
|
+
# w2 = 1 - w1
|
125
|
+
|
126
|
+
corr = np.corrcoef(X.T)
|
127
|
+
covariance = np.cov(X.T)
|
128
|
+
for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
|
129
|
+
if corr[i, j] < self.threshold:
|
130
|
+
cov = covariance[i, j]
|
131
|
+
var1 = covariance[i, i]
|
132
|
+
var2 = covariance[j, j]
|
133
|
+
weights = np.zeros(n_assets)
|
134
|
+
weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
|
135
|
+
weights[j] = 1 - weights[i]
|
136
|
+
population.append(
|
137
|
+
Portfolio(
|
138
|
+
X=X, weights=weights, fitness_measures=self.fitness_measures
|
139
|
+
)
|
140
|
+
)
|
141
|
+
|
142
|
+
fronts = population.non_denominated_sort(
|
143
|
+
first_front_only=self.min_n_assets is None
|
144
|
+
)
|
145
|
+
new_assets_idx = set()
|
146
|
+
i = 0
|
147
|
+
while i < len(fronts):
|
148
|
+
if (
|
149
|
+
self.min_n_assets is not None
|
150
|
+
and len(new_assets_idx) > self.min_n_assets
|
151
|
+
):
|
152
|
+
break
|
153
|
+
for idx in fronts[i]:
|
154
|
+
new_assets_idx.update(population[idx].nonzero_assets_index)
|
155
|
+
i += 1
|
156
|
+
self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
|
157
|
+
return self
|
158
|
+
|
159
|
+
def _get_support_mask(self):
|
160
|
+
skv.check_is_fitted(self)
|
161
|
+
return self.to_keep_
|
@@ -0,0 +1,148 @@
|
|
1
|
+
"""pre-selection estimators module"""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# Implementation derived from:
|
6
|
+
# Conway-Yu https://github.com/skfolio/skfolio/discussions/60
|
7
|
+
# License: BSD 3 clause
|
8
|
+
|
9
|
+
import datetime as dt
|
10
|
+
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
import sklearn.base as skb
|
14
|
+
import sklearn.feature_selection as skf
|
15
|
+
import sklearn.utils.validation as skv
|
16
|
+
|
17
|
+
|
18
|
+
class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
|
19
|
+
"""
|
20
|
+
Transformer to select assets that do not expire within a specified lookahead period
|
21
|
+
after the end of the observation period.
|
22
|
+
|
23
|
+
This transformer removes assets (columns) that have expiration dates within a
|
24
|
+
given lookahead period from the end of the dataset, allowing only assets that
|
25
|
+
remain active beyond this lookahead period to be selected.
|
26
|
+
|
27
|
+
This is useful when an exit strategy is needed before asset expiration, such as
|
28
|
+
for bonds or options with known end dates, or when applying WalkForward
|
29
|
+
cross-validation. It ensures that assets expiring during the test period are
|
30
|
+
excluded, so that only live assets are included in each training and test period.
|
31
|
+
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
expiration_dates : dict[str, dt.datetime | pd.Timestamp], optional
|
35
|
+
Dictionary with asset names as keys and expiration dates as values.
|
36
|
+
Used to check if each asset expires within the date offset.
|
37
|
+
Assets with no expiration date will be retained by default.
|
38
|
+
|
39
|
+
expiration_lookahead : pd.offsets.BaseOffset | dt.timedelta, optional
|
40
|
+
The lookahead period after the end of the dataset within which assets with
|
41
|
+
expiration dates will be removed.
|
42
|
+
|
43
|
+
Attributes
|
44
|
+
----------
|
45
|
+
to_keep_ : ndarray of shape (n_assets, )
|
46
|
+
Boolean array indicating which assets are remaining.
|
47
|
+
|
48
|
+
n_features_in_ : int
|
49
|
+
Number of assets seen during `fit`.
|
50
|
+
|
51
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
52
|
+
Names of features seen during `fit`. Defined only when `X`
|
53
|
+
has feature names that are all strings.
|
54
|
+
|
55
|
+
Notes
|
56
|
+
-----
|
57
|
+
This transformer only supports DataFrames with a DateTime index.
|
58
|
+
|
59
|
+
Examples
|
60
|
+
--------
|
61
|
+
>>> import pandas as pd
|
62
|
+
>>> import datetime as dt
|
63
|
+
>>> from sklearn import set_config
|
64
|
+
>>> set_config(transform_output="pandas")
|
65
|
+
>>> X = pd.DataFrame(
|
66
|
+
... {
|
67
|
+
... 'asset1': [1, 2, 3, 4],
|
68
|
+
... 'asset2': [2, 3, 4, 5],
|
69
|
+
... 'asset3': [3, 4, 5, 6],
|
70
|
+
... 'asset4': [4, 5, 6, 7]
|
71
|
+
... }, index=pd.date_range("2023-01-01", periods=4, freq="D")
|
72
|
+
...)
|
73
|
+
>>> expiration_dates = {
|
74
|
+
... 'asset1': pd.Timestamp("2023-01-10"),
|
75
|
+
... 'asset2': pd.Timestamp("2023-01-02"),
|
76
|
+
... 'asset3': pd.Timestamp("2023-01-06"),
|
77
|
+
... 'asset4': dt.datetime(2023, 5, 1)
|
78
|
+
... }
|
79
|
+
>>> selector = SelectNonExpiring(
|
80
|
+
... expiration_dates=expiration_dates,
|
81
|
+
... expiration_lookahead=pd.DateOffset(days=5)
|
82
|
+
...)
|
83
|
+
>>> selector.fit_transform(X)
|
84
|
+
asset1 asset4
|
85
|
+
2023-01-01 1 4
|
86
|
+
2023-01-02 2 5
|
87
|
+
2023-01-03 3 6
|
88
|
+
2023-01-04 4 7
|
89
|
+
"""
|
90
|
+
|
91
|
+
to_keep_: np.ndarray
|
92
|
+
|
93
|
+
def __init__(
|
94
|
+
self,
|
95
|
+
expiration_dates: dict[str, dt.datetime | pd.Timestamp] | None = None,
|
96
|
+
expiration_lookahead: pd.offsets.BaseOffset | dt.timedelta | None = None,
|
97
|
+
):
|
98
|
+
self.expiration_dates = expiration_dates
|
99
|
+
self.expiration_lookahead = expiration_lookahead
|
100
|
+
|
101
|
+
def fit(self, X: pd.DataFrame, y=None) -> "SelectNonExpiring":
|
102
|
+
"""Run the SelectNonExpiring transformer and get the appropriate assets.
|
103
|
+
|
104
|
+
Parameters
|
105
|
+
----------
|
106
|
+
X : pd.DataFrame of shape (n_observations, n_assets)
|
107
|
+
Returns of the assets.
|
108
|
+
|
109
|
+
y : Ignored
|
110
|
+
Not used, present for API consistency by convention.
|
111
|
+
|
112
|
+
Returns
|
113
|
+
-------
|
114
|
+
self : SelectNonExpiring
|
115
|
+
Fitted estimator.
|
116
|
+
"""
|
117
|
+
_ = self._validate_data(X, force_all_finite="allow-nan")
|
118
|
+
|
119
|
+
# Validate by allowing NaNs
|
120
|
+
if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
|
121
|
+
raise ValueError(
|
122
|
+
"X must be a DataFrame with an index of type DatetimeIndex"
|
123
|
+
)
|
124
|
+
|
125
|
+
if self.expiration_dates is None:
|
126
|
+
raise ValueError("`expiration_lookahead` must be provided")
|
127
|
+
|
128
|
+
if self.expiration_lookahead is None:
|
129
|
+
raise ValueError("`expiration_lookahead` must be provided")
|
130
|
+
|
131
|
+
# Calculate the cutoff date
|
132
|
+
end_date = X.index[-1]
|
133
|
+
cutoff_date = end_date + self.expiration_lookahead
|
134
|
+
self.to_keep_ = np.array(
|
135
|
+
[
|
136
|
+
self.expiration_dates.get(asset, pd.Timestamp.max) > cutoff_date
|
137
|
+
for asset in X.columns
|
138
|
+
]
|
139
|
+
)
|
140
|
+
|
141
|
+
return self
|
142
|
+
|
143
|
+
def _get_support_mask(self):
|
144
|
+
skv.check_is_fitted(self)
|
145
|
+
return self.to_keep_
|
146
|
+
|
147
|
+
def _more_tags(self):
|
148
|
+
return {"allow_nan": True}
|
@@ -17,6 +17,7 @@ def prices_to_returns(
|
|
17
17
|
nan_threshold: float = 1,
|
18
18
|
join: Literal["left", "right", "inner", "outer", "cross"] = "outer",
|
19
19
|
drop_inceptions_nan: bool = True,
|
20
|
+
fill_nan: bool = True,
|
20
21
|
) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
|
21
22
|
r"""Transforms a DataFrame of prices to linear or logarithmic returns.
|
22
23
|
|
@@ -64,11 +65,15 @@ def prices_to_returns(
|
|
64
65
|
this threshold. The default (`1.0`) is to keep all the observations.
|
65
66
|
|
66
67
|
drop_inceptions_nan : bool, default=True
|
67
|
-
If
|
68
|
+
If set to True, observations at the beginning are dropped if any of
|
68
69
|
the asset values are missing, otherwise we keep the NaNs. This is useful when
|
69
70
|
you work with a large universe of assets with different inception dates coupled
|
70
71
|
with a pre-selection Transformer.
|
71
72
|
|
73
|
+
fill_nan : bool, default=True
|
74
|
+
If set to True, missing prices (NaNs) are forward filled using the previous
|
75
|
+
price. Otherwise, NaNs are kept.
|
76
|
+
|
72
77
|
Returns
|
73
78
|
-------
|
74
79
|
X : DataFrame
|
@@ -106,7 +111,8 @@ def prices_to_returns(
|
|
106
111
|
df.drop(to_drop, axis=0, inplace=True)
|
107
112
|
|
108
113
|
# Forward fill missing values
|
109
|
-
|
114
|
+
if fill_nan:
|
115
|
+
df.ffill(inplace=True)
|
110
116
|
# Drop rows according to drop_inceptions_nan
|
111
117
|
# noinspection PyTypeChecker
|
112
118
|
df.dropna(how="any" if drop_inceptions_nan else "all", inplace=True)
|
@@ -114,7 +120,7 @@ def prices_to_returns(
|
|
114
120
|
df.dropna(axis=1, how="all", inplace=True)
|
115
121
|
|
116
122
|
# returns
|
117
|
-
all_returns = df.pct_change().iloc[1:]
|
123
|
+
all_returns = df.pct_change(fill_method=None).iloc[1:]
|
118
124
|
if log_returns:
|
119
125
|
all_returns = np.log1p(all_returns)
|
120
126
|
|
skfolio/utils/stats.py
CHANGED
@@ -10,9 +10,11 @@ import warnings
|
|
10
10
|
# Statsmodels, Copyright (C) 2006, Jonathan E. Taylor, Licensed under BSD 3 clause.
|
11
11
|
from enum import auto
|
12
12
|
|
13
|
+
import cvxpy as cp
|
13
14
|
import numpy as np
|
14
15
|
import scipy.cluster.hierarchy as sch
|
15
16
|
import scipy.optimize as sco
|
17
|
+
import scipy.sparse.linalg as scl
|
16
18
|
import scipy.spatial.distance as scd
|
17
19
|
import scipy.special as scs
|
18
20
|
from scipy.sparse import csr_matrix
|
@@ -34,6 +36,7 @@ __all__ = [
|
|
34
36
|
"compute_optimal_n_clusters",
|
35
37
|
"rand_weights",
|
36
38
|
"rand_weights_dirichlet",
|
39
|
+
"minimize_relative_weight_deviation",
|
37
40
|
]
|
38
41
|
|
39
42
|
|
@@ -488,3 +491,87 @@ def compute_optimal_n_clusters(distance: np.ndarray, linkage_matrix: np.ndarray)
|
|
488
491
|
# k=0 represents one cluster
|
489
492
|
k = np.argmax(gaps) + 2
|
490
493
|
return k
|
494
|
+
|
495
|
+
|
496
|
+
def minimize_relative_weight_deviation(
|
497
|
+
weights: np.ndarray,
|
498
|
+
min_weights: np.ndarray,
|
499
|
+
max_weights: np.ndarray,
|
500
|
+
solver: str = "CLARABEL",
|
501
|
+
solver_params: dict | None = None,
|
502
|
+
) -> np.ndarray:
|
503
|
+
r"""
|
504
|
+
Apply weight constraints to an initial array of weights by minimizing the relative
|
505
|
+
weight deviation of the final weights from the initial weights.
|
506
|
+
|
507
|
+
.. math::
|
508
|
+
\begin{cases}
|
509
|
+
\begin{aligned}
|
510
|
+
&\min_{w} & & \Vert \frac{w - w_{init}}{w_{init}} \Vert_{2}^{2} \\
|
511
|
+
&\text{s.t.} & & \sum_{i=1}^{N} w_{i} = 1 \\
|
512
|
+
& & & w_{min} \leq w_i \leq w_{max}, \quad \forall i
|
513
|
+
\end{aligned}
|
514
|
+
\end{cases}
|
515
|
+
|
516
|
+
Parameters
|
517
|
+
----------
|
518
|
+
weights : ndarray of shape (n_assets,)
|
519
|
+
Initial weights.
|
520
|
+
|
521
|
+
min_weights : ndarray of shape (n_assets,)
|
522
|
+
Minimum assets weights (weights lower bounds).
|
523
|
+
|
524
|
+
max_weights : ndarray of shape (n_assets,)
|
525
|
+
Maximum assets weights (weights upper bounds).
|
526
|
+
|
527
|
+
solver : str, default="CLARABEL"
|
528
|
+
The solver to use. The default is "CLARABEL" which is written in Rust and has
|
529
|
+
better numerical stability and performance than ECOS and SCS.
|
530
|
+
For more details about available solvers, check the CVXPY documentation:
|
531
|
+
https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver
|
532
|
+
|
533
|
+
solver_params : dict, optional
|
534
|
+
Solver parameters. For example, `solver_params=dict(verbose=True)`.
|
535
|
+
The default (`None`) is to use the CVXPY default.
|
536
|
+
For more details about solver arguments, check the CVXPY documentation:
|
537
|
+
https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
|
538
|
+
"""
|
539
|
+
if not (weights.shape == min_weights.shape == max_weights.shape):
|
540
|
+
raise ValueError("`min_weights` and `max_weights` must have same size")
|
541
|
+
|
542
|
+
if np.any(weights < 0):
|
543
|
+
raise ValueError("Initial weights must be strictly positive")
|
544
|
+
|
545
|
+
if not np.isclose(np.sum(weights), 1.0):
|
546
|
+
raise ValueError("Initial weights must sum to one")
|
547
|
+
|
548
|
+
if np.any(max_weights < min_weights):
|
549
|
+
raise ValueError("`min_weights` must be lower or equal to `max_weights`")
|
550
|
+
|
551
|
+
if np.all((weights >= min_weights) & (weights <= max_weights)):
|
552
|
+
return weights
|
553
|
+
|
554
|
+
if solver_params is None:
|
555
|
+
solver_params = {}
|
556
|
+
|
557
|
+
n = len(weights)
|
558
|
+
w = cp.Variable(n)
|
559
|
+
|
560
|
+
objective = cp.Minimize(cp.norm(w / weights - 1))
|
561
|
+
constraints = [cp.sum(w) == 1, w >= min_weights, w <= max_weights]
|
562
|
+
problem = cp.Problem(objective, constraints)
|
563
|
+
|
564
|
+
try:
|
565
|
+
problem.solve(solver=solver, **solver_params)
|
566
|
+
|
567
|
+
if w.value is None:
|
568
|
+
raise cp.SolverError("No solution found")
|
569
|
+
|
570
|
+
except (cp.SolverError, scl.ArpackNoConvergence):
|
571
|
+
raise cp.SolverError(
|
572
|
+
f"Solver '{solver}' failed. Try another"
|
573
|
+
" solver, or solve with solver_params=dict(verbose=True) for more"
|
574
|
+
" information"
|
575
|
+
) from None
|
576
|
+
|
577
|
+
return w.value
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skfolio
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Portfolio optimization built on top of scikit-learn
|
5
5
|
Author-email: Hugo Delatte <delatte.hugo@gmail.com>
|
6
6
|
Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
|
@@ -599,7 +599,7 @@ K-fold Cross-Validation
|
|
599
599
|
# mmp is the predicted MultiPeriodPortfolio object composed of 5 Portfolios (1 per testing fold)
|
600
600
|
|
601
601
|
mmp.plot_cumulative_returns()
|
602
|
-
print(mmp.summary()
|
602
|
+
print(mmp.summary())
|
603
603
|
|
604
604
|
|
605
605
|
Combinatorial Purged Cross-Validation
|
@@ -45,9 +45,9 @@ skfolio/optimization/_base.py,sha256=LoRONJP70AwbFpdgqVS_g145pCx0JGkazjWvkQzT_iM
|
|
45
45
|
skfolio/optimization/cluster/__init__.py,sha256=M3xVdYhNKp4e9CB7hzb4yjTxkkNCHh7Mt_KGFFrkOgs,388
|
46
46
|
skfolio/optimization/cluster/_nco.py,sha256=J3pPd9XkrAcWaKPSW5vMdtaFpDshBvOdUudbDGQSoNI,16366
|
47
47
|
skfolio/optimization/cluster/hierarchical/__init__.py,sha256=YnfcPHvjwB6kcG4hoQqc0NqIJKaG7OjBtmXNbOxCq08,405
|
48
|
-
skfolio/optimization/cluster/hierarchical/_base.py,sha256=
|
49
|
-
skfolio/optimization/cluster/hierarchical/_herc.py,sha256=
|
50
|
-
skfolio/optimization/cluster/hierarchical/_hrp.py,sha256=
|
48
|
+
skfolio/optimization/cluster/hierarchical/_base.py,sha256=l8rJHCH_79FOPdDL2I0dmAWcVWnNkcXHtzt0U-L7BN8,16280
|
49
|
+
skfolio/optimization/cluster/hierarchical/_herc.py,sha256=LPtUrvyW9G60OZhMWlZH_GHZHdX8mJHksrYGB-WPRVg,20358
|
50
|
+
skfolio/optimization/cluster/hierarchical/_hrp.py,sha256=dn6EKiTJ1wkoFhPdst6vlXnSQvXSYsMtB2zaGNVPpyA,18115
|
51
51
|
skfolio/optimization/convex/__init__.py,sha256=F6BPFikTo0B-7JCKazqLGEwM3RkgTNbFm5GAGkaq9Uo,570
|
52
52
|
skfolio/optimization/convex/_base.py,sha256=2at6Ll4qHkN_1wvYjl-yXWTbiRJj8fhNS-bfAT88YSw,76055
|
53
53
|
skfolio/optimization/convex/_distributionally_robust.py,sha256=tw_UNSDfAXP02khE10hpmcdlz3DQXQD7ttDqFDSHV1E,17811
|
@@ -60,15 +60,19 @@ skfolio/optimization/ensemble/_stacking.py,sha256=ZoICUnc_MwoXDQAR2kewCg-KIezSOI
|
|
60
60
|
skfolio/optimization/naive/__init__.py,sha256=Dkr55R48urC-jfYN007NTbei16N91Na_EDYLVqzhGgQ,147
|
61
61
|
skfolio/optimization/naive/_naive.py,sha256=AhEyYKEUAm-Fjn4p8SHwhp7yE9iF0tRyDZIjKYV4EeU,6390
|
62
62
|
skfolio/population/__init__.py,sha256=rsPPMUv95aTK7vmpPeQwF8NzFuBwk6RDo5g4HNaPzNM,80
|
63
|
-
skfolio/population/_population.py,sha256=
|
63
|
+
skfolio/population/_population.py,sha256=ej45tdk_CcMlNToCsx2VUk2YRktK3k4cRczGBpjlnDE,30427
|
64
64
|
skfolio/portfolio/__init__.py,sha256=YYtcAPmA2zeCxFGTXegg2FXcA7py6CxOX7IMTdYuXl0,586
|
65
65
|
skfolio/portfolio/_base.py,sha256=EFLsvHoxZmDvGPOKePr6hQGXU7y7TWsALvzYP9qt0fQ,39588
|
66
66
|
skfolio/portfolio/_multi_period_portfolio.py,sha256=K2JfEwlPD9iGO58lOdk7WUbWuXZDWw2prPT5T7pOdto,24387
|
67
67
|
skfolio/portfolio/_portfolio.py,sha256=gqvCKM6ZVfwZrgixiYdahgbQ1DRNW2LkGHkXOpjleb4,32753
|
68
|
-
skfolio/pre_selection/__init__.py,sha256=
|
69
|
-
skfolio/pre_selection/
|
68
|
+
skfolio/pre_selection/__init__.py,sha256=_H0jziIOq0nUETFQvjBP4AtKGzdh0EGGSXaECTcUhxY,482
|
69
|
+
skfolio/pre_selection/_drop_correlated.py,sha256=EDwRVqmkU-52VXQ-u350PYgjWCI5QnB8CfR1taLWffY,3818
|
70
|
+
skfolio/pre_selection/_select_complete.py,sha256=sE9TCitUA5KbEqPssl0qsCBD-oV_5Vx-b-kdU0hsFHI,3885
|
71
|
+
skfolio/pre_selection/_select_k_extremes.py,sha256=25FGievaDqlAHAxUmyznAd3LIq_7D3ajaSVD6E7luSI,3061
|
72
|
+
skfolio/pre_selection/_select_non_dominated.py,sha256=HLGNS14vgQlg5I5zj-b1QpgCaZROd0FALQSmyXGpK7o,5983
|
73
|
+
skfolio/pre_selection/_select_non_expiring.py,sha256=RAWnuW2u7y0ibsimJp5mRM9JQFOn0hHp-mWsp0FLPbs,4995
|
70
74
|
skfolio/preprocessing/__init__.py,sha256=15A1bzfPsbfxxXgGP1gstf4R0E_347Wn18z5W5jH-hk,94
|
71
|
-
skfolio/preprocessing/_returns.py,sha256=
|
75
|
+
skfolio/preprocessing/_returns.py,sha256=6mdNi7Dun5eNK4LdqKAxP4CCZEVfAEz40HXVrOiAaLA,4561
|
72
76
|
skfolio/prior/__init__.py,sha256=jql8NTiWlykPKJUXTOPdqm531mP8Pul1QAR6hXTXA6c,446
|
73
77
|
skfolio/prior/_base.py,sha256=u9GLCKJl-Txiem5rIO-qkH3VIyem3taD6T9kMzsYPRY,1941
|
74
78
|
skfolio/prior/_black_litterman.py,sha256=W3HbpvkViEiD7AOgpdVmNYTlWKSGDgo9Y3BfSrbMIQ4,10347
|
@@ -82,10 +86,10 @@ skfolio/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
86
|
skfolio/utils/bootstrap.py,sha256=3zY2kO_GQURKEcQMCasJOSByde9Mt2IAi3KJH0_a4mk,3550
|
83
87
|
skfolio/utils/equations.py,sha256=MQ1w3VSM2n_j9bTIKAQA716aWKYyUqtw5yM2bU-9t-M,13745
|
84
88
|
skfolio/utils/sorting.py,sha256=lSjMvH2L-sSj-06B3MlwBrH1rtjCeGEe4hG894W7TE0,3504
|
85
|
-
skfolio/utils/stats.py,sha256=
|
89
|
+
skfolio/utils/stats.py,sha256=bzKlF2U7BN2WonwtuwG_cL_16Z3cTAxCAw5pZgbib54,17005
|
86
90
|
skfolio/utils/tools.py,sha256=4KrmBR9jOLiI6j0hb27gsPC--OHXo4Sp1xl-6i-k9Tg,20925
|
87
|
-
skfolio-0.
|
88
|
-
skfolio-0.
|
89
|
-
skfolio-0.
|
90
|
-
skfolio-0.
|
91
|
-
skfolio-0.
|
91
|
+
skfolio-0.5.0.dist-info/LICENSE,sha256=F6Gi-ZJX5BlVzYK8R9NcvAkAsKa7KO29xB1OScbrH6Q,1526
|
92
|
+
skfolio-0.5.0.dist-info/METADATA,sha256=yHEHbXE0miG8QngS1WprxyB9QrKnml44TPGScw8SqqM,19611
|
93
|
+
skfolio-0.5.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
94
|
+
skfolio-0.5.0.dist-info/top_level.txt,sha256=NXEaoS9Ms7t32gxkb867nV0OKlU0KmssL7IJBVo0fJs,8
|
95
|
+
skfolio-0.5.0.dist-info/RECORD,,
|