skfolio 0.4.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skfolio-0.4.2/src/skfolio.egg-info → skfolio-0.5.0}/PKG-INFO +2 -2
- {skfolio-0.4.2 → skfolio-0.5.0}/README.rst +1 -1
- {skfolio-0.4.2 → skfolio-0.5.0}/pyproject.toml +1 -1
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/hierarchical/_base.py +12 -65
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/hierarchical/_herc.py +75 -26
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/hierarchical/_hrp.py +68 -12
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/population/_population.py +1 -1
- skfolio-0.5.0/src/skfolio/pre_selection/__init__.py +13 -0
- skfolio-0.5.0/src/skfolio/pre_selection/_drop_correlated.py +108 -0
- skfolio-0.5.0/src/skfolio/pre_selection/_select_complete.py +116 -0
- skfolio-0.5.0/src/skfolio/pre_selection/_select_k_extremes.py +100 -0
- skfolio-0.5.0/src/skfolio/pre_selection/_select_non_dominated.py +161 -0
- skfolio-0.5.0/src/skfolio/pre_selection/_select_non_expiring.py +148 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/preprocessing/_returns.py +9 -3
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/stats.py +87 -0
- {skfolio-0.4.2 → skfolio-0.5.0/src/skfolio.egg-info}/PKG-INFO +2 -2
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio.egg-info/SOURCES.txt +5 -1
- skfolio-0.4.2/src/skfolio/pre_selection/__init__.py +0 -7
- skfolio-0.4.2/src/skfolio/pre_selection/_pre_selection.py +0 -343
- {skfolio-0.4.2 → skfolio-0.5.0}/LICENSE +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/MANIFEST.in +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/setup.cfg +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/cluster/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/cluster/_hierarchical.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/data/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/data/factors_dataset.csv.gz +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/datasets/data/sp500_index.csv.gz +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/distance/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/distance/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/distance/_distance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/exceptions.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/measures/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/measures/_enums.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/measures/_measures.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/metrics/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/metrics/_scorer.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/model_selection/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/model_selection/_combinatorial.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/model_selection/_validation.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/model_selection/_walk_forward.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_denoise_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_detone_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_empirical_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_ew_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_gerber_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_graphical_lasso_cv.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_implied_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_ledoit_wolf.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_oas.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/covariance/_shrunk_covariance.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/_empirical_mu.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/_equilibrium_mu.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/_ew_mu.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/moments/expected_returns/_shrunk_mu.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/_nco.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/cluster/hierarchical/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/_distributionally_robust.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/_maximum_diversification.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/_mean_risk.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/convex/_risk_budgeting.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/ensemble/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/ensemble/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/ensemble/_stacking.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/naive/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/optimization/naive/_naive.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/population/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/portfolio/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/portfolio/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/portfolio/_multi_period_portfolio.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/portfolio/_portfolio.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/preprocessing/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/prior/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/prior/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/prior/_black_litterman.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/prior/_empirical.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/prior/_factor_model.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/typing.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/uncertainty_set/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/uncertainty_set/_base.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/uncertainty_set/_bootstrap.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/uncertainty_set/_empirical.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/__init__.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/bootstrap.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/equations.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/sorting.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio/utils/tools.py +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio.egg-info/dependency_links.txt +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio.egg-info/requires.txt +0 -0
- {skfolio-0.4.2 → skfolio-0.5.0}/src/skfolio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skfolio
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Portfolio optimization built on top of scikit-learn
|
5
5
|
Author-email: Hugo Delatte <delatte.hugo@gmail.com>
|
6
6
|
Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
|
@@ -600,7 +600,7 @@ K-fold Cross-Validation
|
|
600
600
|
# mmp is the predicted MultiPeriodPortfolio object composed of 5 Portfolios (1 per testing fold)
|
601
601
|
|
602
602
|
mmp.plot_cumulative_returns()
|
603
|
-
print(mmp.summary()
|
603
|
+
print(mmp.summary())
|
604
604
|
|
605
605
|
|
606
606
|
Combinatorial Purged Cross-Validation
|
@@ -515,7 +515,7 @@ K-fold Cross-Validation
|
|
515
515
|
# mmp is the predicted MultiPeriodPortfolio object composed of 5 Portfolios (1 per testing fold)
|
516
516
|
|
517
517
|
mmp.plot_cumulative_returns()
|
518
|
-
print(mmp.summary()
|
518
|
+
print(mmp.summary())
|
519
519
|
|
520
520
|
|
521
521
|
Combinatorial Purged Cross-Validation
|
@@ -52,8 +52,6 @@ class BaseHierarchicalOptimization(BaseOptimization, ABC):
|
|
52
52
|
* ENTROPIC_RISK_MEASURE
|
53
53
|
* FOURTH_CENTRAL_MOMENT
|
54
54
|
* FOURTH_LOWER_PARTIAL_MOMENT
|
55
|
-
* SKEW
|
56
|
-
* KURTOSIS
|
57
55
|
|
58
56
|
The default is `RiskMeasure.VARIANCE`.
|
59
57
|
|
@@ -80,12 +78,12 @@ class BaseHierarchicalOptimization(BaseOptimization, ABC):
|
|
80
78
|
|
81
79
|
min_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
|
82
80
|
Minimum assets weights (weights lower bounds). Negative weights are not allowed.
|
83
|
-
If a float is provided, it is applied to each asset.
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
dictionary, assets values that are not provided are assigned a
|
88
|
-
of `0.0`. The default is 0.0 (no short selling).
|
81
|
+
If a float is provided, it is applied to each asset.
|
82
|
+
If a dictionary is provided, its (key/value) pair must be the
|
83
|
+
(asset name/asset minium weight) and the input `X` of the `fit` methods must be
|
84
|
+
a DataFrame with the assets names in columns.
|
85
|
+
When using a dictionary, assets values that are not provided are assigned a
|
86
|
+
minimum weight of `0.0`. The default is 0.0 (no short selling).
|
89
87
|
|
90
88
|
Example:
|
91
89
|
|
@@ -96,12 +94,12 @@ class BaseHierarchicalOptimization(BaseOptimization, ABC):
|
|
96
94
|
|
97
95
|
max_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=1.0
|
98
96
|
Maximum assets weights (weights upper bounds). Weights above 1.0 are not
|
99
|
-
allowed. If a float is provided, it is applied to each asset.
|
100
|
-
|
101
|
-
(
|
102
|
-
|
103
|
-
using a dictionary, assets values that are not provided are assigned a
|
104
|
-
weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
97
|
+
allowed. If a float is provided, it is applied to each asset.
|
98
|
+
If a dictionary is provided, its (key/value) pair must be the
|
99
|
+
(asset name/asset maximum weight) and the input `X` of the `fit` method must be
|
100
|
+
a DataFrame with the assets names in columns.
|
101
|
+
When using a dictionary, assets values that are not provided are assigned a
|
102
|
+
minimum weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
105
103
|
|
106
104
|
Example:
|
107
105
|
|
@@ -388,57 +386,6 @@ class BaseHierarchicalOptimization(BaseOptimization, ABC):
|
|
388
386
|
|
389
387
|
return min_weights, max_weights
|
390
388
|
|
391
|
-
@staticmethod
|
392
|
-
def _apply_weight_constraints_to_alpha(
|
393
|
-
alpha: float,
|
394
|
-
max_weights: np.ndarray,
|
395
|
-
min_weights: np.ndarray,
|
396
|
-
weights: np.ndarray,
|
397
|
-
left_cluster: np.ndarray,
|
398
|
-
right_cluster: np.ndarray,
|
399
|
-
) -> float:
|
400
|
-
"""Apply weight constraints to the alpha multiplication factor of the
|
401
|
-
Hierarchical Tree Clustering algorithm.
|
402
|
-
|
403
|
-
Parameters
|
404
|
-
----------
|
405
|
-
alpha : float
|
406
|
-
The alpha multiplication factor of the Hierarchical Tree Clustering
|
407
|
-
algorithm.
|
408
|
-
|
409
|
-
min_weights : ndarray of shape (n_assets,)
|
410
|
-
The weight lower bound 1D array.
|
411
|
-
|
412
|
-
max_weights : ndarray of shape (n_assets,)
|
413
|
-
The weight upper bound 1D array.
|
414
|
-
|
415
|
-
weights : np.ndarray of shape (n_assets,)
|
416
|
-
The assets weights.
|
417
|
-
|
418
|
-
left_cluster : ndarray of shape (n_left_cluster,)
|
419
|
-
Indices of the left cluster weights.
|
420
|
-
|
421
|
-
right_cluster : ndarray of shape (n_right_cluster,)
|
422
|
-
Indices of the right cluster weights.
|
423
|
-
|
424
|
-
Returns
|
425
|
-
-------
|
426
|
-
value : float
|
427
|
-
The transformed alpha incorporating the weight constraints.
|
428
|
-
"""
|
429
|
-
alpha = min(
|
430
|
-
np.sum(max_weights[left_cluster]) / weights[left_cluster[0]],
|
431
|
-
max(np.sum(min_weights[left_cluster]) / weights[left_cluster[0]], alpha),
|
432
|
-
)
|
433
|
-
alpha = 1 - min(
|
434
|
-
np.sum(max_weights[right_cluster]) / weights[right_cluster[0]],
|
435
|
-
max(
|
436
|
-
np.sum(min_weights[right_cluster]) / weights[right_cluster[0]],
|
437
|
-
1 - alpha,
|
438
|
-
),
|
439
|
-
)
|
440
|
-
return alpha
|
441
|
-
|
442
389
|
def get_metadata_routing(self):
|
443
390
|
# noinspection PyTypeChecker
|
444
391
|
router = (
|
@@ -3,8 +3,7 @@
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
5
|
# License: BSD 3 clause
|
6
|
-
#
|
7
|
-
# from Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
6
|
+
# Weight constraints is a novel implementation, see docstring for more details.
|
8
7
|
|
9
8
|
import numpy as np
|
10
9
|
import numpy.typing as npt
|
@@ -20,6 +19,7 @@ from skfolio.optimization.cluster.hierarchical._base import (
|
|
20
19
|
BaseHierarchicalOptimization,
|
21
20
|
)
|
22
21
|
from skfolio.prior import BasePrior, EmpiricalPrior
|
22
|
+
from skfolio.utils.stats import minimize_relative_weight_deviation
|
23
23
|
from skfolio.utils.tools import check_estimator
|
24
24
|
|
25
25
|
|
@@ -45,6 +45,32 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
45
45
|
which is more stable and has better properties than the single-linkage
|
46
46
|
method [4]_.
|
47
47
|
|
48
|
+
Also, the initial paper does not provide an algorithm for handling weight
|
49
|
+
constraints, and no standard solution currently exists.
|
50
|
+
In contrast to HRP (Hierarchical Risk Parity), where weight constraints
|
51
|
+
can be applied to the split factor at each bisection step, HERC
|
52
|
+
(Hierarchical Equal Risk Contribution) cannot incorporate weight constraints
|
53
|
+
during the intermediate steps of the allocation. Therefore, in HERC, the
|
54
|
+
weight constraints must be enforced after the top-down allocation has been
|
55
|
+
completed.
|
56
|
+
In skfolio, we minimize the relative deviation of the final weights from
|
57
|
+
the initial weights. This is formulated as a convex optimization problem:
|
58
|
+
|
59
|
+
.. math::
|
60
|
+
\begin{cases}
|
61
|
+
\begin{aligned}
|
62
|
+
&\min_{w} & & \Vert \frac{w - w_{init}}{w_{init}} \Vert_{2}^{2} \\
|
63
|
+
&\text{s.t.} & & \sum_{i=1}^{N} w_{i} = 1 \\
|
64
|
+
& & & w_{min} \leq w_i \leq w_{max}, \quad \forall i
|
65
|
+
\end{aligned}
|
66
|
+
\end{cases}
|
67
|
+
|
68
|
+
The reason for minimizing the relative deviation (as opposed to the absolute
|
69
|
+
deviation) is that we want to limit the impact on the risk contribution of
|
70
|
+
each asset. Since HERC allocates inversely to risk, adjusting the weights
|
71
|
+
based on relative deviation ensures that the assets' risk contributions
|
72
|
+
remain proportionally consistent with the initial allocation.
|
73
|
+
|
48
74
|
Parameters
|
49
75
|
----------
|
50
76
|
risk_measure : RiskMeasure or ExtraRiskMeasure, default=RiskMeasure.VARIANCE
|
@@ -70,8 +96,6 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
70
96
|
* ENTROPIC_RISK_MEASURE
|
71
97
|
* FOURTH_CENTRAL_MOMENT
|
72
98
|
* FOURTH_LOWER_PARTIAL_MOMENT
|
73
|
-
* SKEW
|
74
|
-
* KURTOSIS
|
75
99
|
|
76
100
|
The default is `RiskMeasure.VARIANCE`.
|
77
101
|
|
@@ -98,12 +122,12 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
98
122
|
|
99
123
|
min_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
|
100
124
|
Minimum assets weights (weights lower bounds). Negative weights are not allowed.
|
101
|
-
If a float is provided, it is applied to each asset.
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
dictionary, assets values that are not provided are assigned a
|
106
|
-
of `0.0`. The default is 0.0 (no short selling).
|
125
|
+
If a float is provided, it is applied to each asset.
|
126
|
+
If a dictionary is provided, its (key/value) pair must be the
|
127
|
+
(asset name/asset minium weight) and the input `X` of the `fit` methods must be
|
128
|
+
a DataFrame with the assets names in columns.
|
129
|
+
When using a dictionary, assets values that are not provided are assigned a
|
130
|
+
minimum weight of `0.0`. The default is 0.0 (no short selling).
|
107
131
|
|
108
132
|
Example:
|
109
133
|
|
@@ -114,12 +138,12 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
114
138
|
|
115
139
|
max_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=1.0
|
116
140
|
Maximum assets weights (weights upper bounds). Weights above 1.0 are not
|
117
|
-
allowed. If a float is provided, it is applied to each asset.
|
118
|
-
|
119
|
-
(
|
120
|
-
|
121
|
-
using a dictionary, assets values that are not provided are assigned a
|
122
|
-
weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
141
|
+
allowed. If a float is provided, it is applied to each asset.
|
142
|
+
If a dictionary is provided, its (key/value) pair must be the
|
143
|
+
(asset name/asset maximum weight) and the input `X` of the `fit` method must be
|
144
|
+
a DataFrame with the assets names in columns.
|
145
|
+
When using a dictionary, assets values that are not provided are assigned a
|
146
|
+
minimum weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
123
147
|
|
124
148
|
Example:
|
125
149
|
|
@@ -208,6 +232,19 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
208
232
|
`management_fees`, `previous_weights` and `risk_free_rate` are copied from the
|
209
233
|
optimization model and passed to the portfolio.
|
210
234
|
|
235
|
+
solver : str, default="CLARABEL"
|
236
|
+
The solver used for the weights constraints optimization. The default is
|
237
|
+
"CLARABEL" which is written in Rust and has better numerical stability and
|
238
|
+
performance than ECOS and SCS.
|
239
|
+
For more details about available solvers, check the CVXPY documentation:
|
240
|
+
https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver
|
241
|
+
|
242
|
+
solver_params : dict, optional
|
243
|
+
Solver parameters. For example, `solver_params=dict(verbose=True)`.
|
244
|
+
The default (`None`) is to use the CVXPY default.
|
245
|
+
For more details about solver arguments, check the CVXPY documentation:
|
246
|
+
https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
|
247
|
+
|
211
248
|
Attributes
|
212
249
|
----------
|
213
250
|
weights_ : ndarray of shape (n_assets,)
|
@@ -251,6 +288,8 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
251
288
|
hierarchical_clustering_estimator: HierarchicalClustering | None = None,
|
252
289
|
min_weights: skt.MultiInput | None = 0.0,
|
253
290
|
max_weights: skt.MultiInput | None = 1.0,
|
291
|
+
solver: str = "CLARABEL",
|
292
|
+
solver_params: dict | None = None,
|
254
293
|
transaction_costs: skt.MultiInput = 0.0,
|
255
294
|
management_fees: skt.MultiInput = 0.0,
|
256
295
|
previous_weights: skt.MultiInput | None = None,
|
@@ -268,6 +307,8 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
268
307
|
previous_weights=previous_weights,
|
269
308
|
portfolio_params=portfolio_params,
|
270
309
|
)
|
310
|
+
self.solver = solver
|
311
|
+
self.solver_params = solver_params
|
271
312
|
|
272
313
|
def fit(
|
273
314
|
self, X: npt.ArrayLike, y: None = None, **fit_params
|
@@ -301,6 +342,13 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
301
342
|
raise TypeError(
|
302
343
|
"`risk_measure` must be of type `RiskMeasure` or `ExtraRiskMeasure`"
|
303
344
|
)
|
345
|
+
|
346
|
+
if self.risk_measure in [ExtraRiskMeasure.SKEW, ExtraRiskMeasure.KURTOSIS]:
|
347
|
+
# Because Skew and Kurtosis can take negative values
|
348
|
+
raise ValueError(
|
349
|
+
f"risk_measure {self.risk_measure} currently not supported" f"in HERC"
|
350
|
+
)
|
351
|
+
|
304
352
|
self.prior_estimator_ = check_estimator(
|
305
353
|
self.prior_estimator,
|
306
354
|
default=EmpiricalPrior(),
|
@@ -393,21 +441,12 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
393
441
|
|
394
442
|
left_cluster = np.array(left_cluster)
|
395
443
|
right_cluster = np.array(right_cluster)
|
444
|
+
|
396
445
|
left_risk = np.sum(cluster_risks[left_cluster])
|
397
446
|
right_risk = np.sum(cluster_risks[right_cluster])
|
398
447
|
|
399
448
|
alpha = 1 - left_risk / (left_risk + right_risk)
|
400
449
|
|
401
|
-
# Weights constraints
|
402
|
-
alpha = self._apply_weight_constraints_to_alpha(
|
403
|
-
alpha=alpha,
|
404
|
-
weights=weights,
|
405
|
-
max_weights=max_weights,
|
406
|
-
min_weights=min_weights,
|
407
|
-
left_cluster=left_cluster,
|
408
|
-
right_cluster=right_cluster,
|
409
|
-
)
|
410
|
-
|
411
450
|
clusters_weights[left_cluster] *= alpha
|
412
451
|
clusters_weights[right_cluster] *= 1 - alpha
|
413
452
|
|
@@ -421,5 +460,15 @@ class HierarchicalEqualRiskContribution(BaseHierarchicalOptimization):
|
|
421
460
|
for i, cluster_ids in enumerate(clusters):
|
422
461
|
weights[cluster_ids] *= clusters_weights[i]
|
423
462
|
|
463
|
+
# Apply weights constraints
|
464
|
+
weights = minimize_relative_weight_deviation(
|
465
|
+
weights=weights,
|
466
|
+
min_weights=min_weights,
|
467
|
+
max_weights=max_weights,
|
468
|
+
solver=self.solver,
|
469
|
+
solver_params=self.solver_params,
|
470
|
+
)
|
471
|
+
|
424
472
|
self.weights_ = weights
|
473
|
+
|
425
474
|
return self
|
@@ -72,8 +72,6 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
72
72
|
* ENTROPIC_RISK_MEASURE
|
73
73
|
* FOURTH_CENTRAL_MOMENT
|
74
74
|
* FOURTH_LOWER_PARTIAL_MOMENT
|
75
|
-
* SKEW
|
76
|
-
* KURTOSIS
|
77
75
|
|
78
76
|
The default is `RiskMeasure.VARIANCE`.
|
79
77
|
|
@@ -100,9 +98,9 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
100
98
|
|
101
99
|
min_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
|
102
100
|
Minimum assets weights (weights lower bounds). Negative weights are not allowed.
|
103
|
-
If a float is provided, it is applied to each asset.
|
104
|
-
|
105
|
-
|
101
|
+
If a float is provided, it is applied to each asset.
|
102
|
+
If a dictionary is provided, its (key/value) pair must be the
|
103
|
+
(asset name/asset minium weight) and the input `X` of the `fit`
|
106
104
|
methods must be a DataFrame with the assets names in columns. When using a
|
107
105
|
dictionary, assets values that are not provided are assigned a minimum weight
|
108
106
|
of `0.0`. The default is 0.0 (no short selling).
|
@@ -116,12 +114,12 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
116
114
|
|
117
115
|
max_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=1.0
|
118
116
|
Maximum assets weights (weights upper bounds). Weights above 1.0 are not
|
119
|
-
allowed. If a float is provided, it is applied to each asset.
|
120
|
-
|
121
|
-
(
|
122
|
-
|
123
|
-
using a dictionary, assets values that are not provided are assigned a
|
124
|
-
weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
117
|
+
allowed. If a float is provided, it is applied to each asset.
|
118
|
+
If a dictionary is provided, its (key/value) pair must be the
|
119
|
+
(asset name/asset maximum weight) and the input `X` of the `fit` method must
|
120
|
+
be a DataFrame with the assets names in columns.
|
121
|
+
When using a dictionary, assets values that are not provided are assigned a
|
122
|
+
minimum weight of `1.0`. The default is 1.0 (each asset is below 100%).
|
125
123
|
|
126
124
|
Example:
|
127
125
|
|
@@ -296,6 +294,13 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
296
294
|
raise TypeError(
|
297
295
|
"`risk_measure` must be of type `RiskMeasure` or `ExtraRiskMeasure`"
|
298
296
|
)
|
297
|
+
|
298
|
+
if self.risk_measure in [ExtraRiskMeasure.SKEW, ExtraRiskMeasure.KURTOSIS]:
|
299
|
+
# Because Skew and Kurtosis can take negative values
|
300
|
+
raise ValueError(
|
301
|
+
f"risk_measure {self.risk_measure} currently not supported" f"in HRP"
|
302
|
+
)
|
303
|
+
|
299
304
|
self.prior_estimator_ = check_estimator(
|
300
305
|
self.prior_estimator,
|
301
306
|
default=EmpiricalPrior(),
|
@@ -365,7 +370,7 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
365
370
|
left_cluster, right_cluster = clusters_ids
|
366
371
|
alpha = 1 - left_risk / (left_risk + right_risk)
|
367
372
|
# Weights constraints
|
368
|
-
alpha =
|
373
|
+
alpha = _apply_weight_constraints_to_split_factor(
|
369
374
|
alpha=alpha,
|
370
375
|
weights=weights,
|
371
376
|
max_weights=max_weights,
|
@@ -379,3 +384,54 @@ class HierarchicalRiskParity(BaseHierarchicalOptimization):
|
|
379
384
|
|
380
385
|
self.weights_ = weights
|
381
386
|
return self
|
387
|
+
|
388
|
+
|
389
|
+
def _apply_weight_constraints_to_split_factor(
|
390
|
+
alpha: float,
|
391
|
+
max_weights: np.ndarray,
|
392
|
+
min_weights: np.ndarray,
|
393
|
+
weights: np.ndarray,
|
394
|
+
left_cluster: np.ndarray,
|
395
|
+
right_cluster: np.ndarray,
|
396
|
+
) -> float:
|
397
|
+
"""
|
398
|
+
Apply weight constraints to the split factor alpha of the ,Hierarchical Tree
|
399
|
+
Clustering algorithm.
|
400
|
+
|
401
|
+
Parameters
|
402
|
+
----------
|
403
|
+
alpha : float
|
404
|
+
The split factor alpha of the Hierarchical Tree Clustering algorithm.
|
405
|
+
|
406
|
+
min_weights : ndarray of shape (n_assets,)
|
407
|
+
The weight lower bound 1D array.
|
408
|
+
|
409
|
+
max_weights : ndarray of shape (n_assets,)
|
410
|
+
The weight upper bound 1D array.
|
411
|
+
|
412
|
+
weights : np.ndarray of shape (n_assets,)
|
413
|
+
The assets weights.
|
414
|
+
|
415
|
+
left_cluster : ndarray of shape (n_left_cluster,)
|
416
|
+
Indices of the left cluster weights.
|
417
|
+
|
418
|
+
right_cluster : ndarray of shape (n_right_cluster,)
|
419
|
+
Indices of the right cluster weights.
|
420
|
+
|
421
|
+
Returns
|
422
|
+
-------
|
423
|
+
value : float
|
424
|
+
The transformed split factor alpha incorporating the weight constraints.
|
425
|
+
"""
|
426
|
+
alpha = min(
|
427
|
+
np.sum(max_weights[left_cluster]) / weights[left_cluster[0]],
|
428
|
+
max(np.sum(min_weights[left_cluster]) / weights[left_cluster[0]], alpha),
|
429
|
+
)
|
430
|
+
alpha = 1 - min(
|
431
|
+
np.sum(max_weights[right_cluster]) / weights[right_cluster[0]],
|
432
|
+
max(
|
433
|
+
np.sum(min_weights[right_cluster]) / weights[right_cluster[0]],
|
434
|
+
1 - alpha,
|
435
|
+
),
|
436
|
+
)
|
437
|
+
return alpha
|
@@ -653,7 +653,7 @@ class Population(list):
|
|
653
653
|
spacing: float | None = None,
|
654
654
|
display_sub_ptf_name: bool = True,
|
655
655
|
) -> go.Figure:
|
656
|
-
"""Plot the contribution of each asset to a given measure of the portfolios
|
656
|
+
r"""Plot the contribution of each asset to a given measure of the portfolios
|
657
657
|
in the population.
|
658
658
|
|
659
659
|
Parameters
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from skfolio.pre_selection._drop_correlated import DropCorrelated
|
2
|
+
from skfolio.pre_selection._select_complete import SelectComplete
|
3
|
+
from skfolio.pre_selection._select_k_extremes import SelectKExtremes
|
4
|
+
from skfolio.pre_selection._select_non_dominated import SelectNonDominated
|
5
|
+
from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
|
6
|
+
|
7
|
+
__all__ = [
|
8
|
+
"DropCorrelated",
|
9
|
+
"SelectKExtremes",
|
10
|
+
"SelectNonDominated",
|
11
|
+
"SelectComplete",
|
12
|
+
"SelectNonExpiring",
|
13
|
+
]
|
@@ -0,0 +1,108 @@
|
|
1
|
+
"""Pre-selection DropCorrelated module"""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
import sklearn.base as skb
|
10
|
+
import sklearn.feature_selection as skf
|
11
|
+
import sklearn.utils.validation as skv
|
12
|
+
|
13
|
+
|
14
|
+
class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
|
15
|
+
"""Transformer for dropping highly correlated assets.
|
16
|
+
|
17
|
+
Simply removing all correlation pairs above the threshold will remove more assets
|
18
|
+
than necessary and a naive sequential removal is suboptimal and depends on the
|
19
|
+
initial assets ordering.
|
20
|
+
|
21
|
+
Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
|
22
|
+
the threshold and corr(Y,Z) below.
|
23
|
+
The first approach would remove X,Y,Z and the second approach would remove either
|
24
|
+
Y and Z or X depending on the initial ordering.
|
25
|
+
|
26
|
+
To avoid these shortcomings, we implement the below algorithm:
|
27
|
+
|
28
|
+
* Step 1: select all correlation pairs above the threshold.
|
29
|
+
* Step 2: sort all the selected correlation pairs from highest to lowest.
|
30
|
+
* Step 3: for each pair, if none of the two assets has been removed, keep the
|
31
|
+
asset with the lowest average correlation against the other assets.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
threshold : float, default=0.95
|
36
|
+
Correlation threshold. The default value is `0.95`.
|
37
|
+
|
38
|
+
absolute : bool, default=False
|
39
|
+
If this is set to True, we take the absolute value of the correlation. This has
|
40
|
+
for effect to also include negatively correlated assets.
|
41
|
+
|
42
|
+
Attributes
|
43
|
+
----------
|
44
|
+
to_keep_ : ndarray of shape (n_assets, )
|
45
|
+
Boolean array indicating which assets are remaining.
|
46
|
+
|
47
|
+
n_features_in_ : int
|
48
|
+
Number of assets seen during `fit`.
|
49
|
+
|
50
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
51
|
+
Names of assets seen during `fit`. Defined only when `X`
|
52
|
+
has assets names that are all strings.
|
53
|
+
"""
|
54
|
+
|
55
|
+
to_keep_: np.ndarray
|
56
|
+
|
57
|
+
def __init__(self, threshold: float = 0.95, absolute: bool = False):
|
58
|
+
self.threshold = threshold
|
59
|
+
self.absolute = absolute
|
60
|
+
|
61
|
+
def fit(self, X: npt.ArrayLike, y=None):
|
62
|
+
"""Run the correlation transformer and get the appropriate assets.
|
63
|
+
|
64
|
+
Parameters
|
65
|
+
----------
|
66
|
+
X : array-like of shape (n_observations, n_assets)
|
67
|
+
Price returns of the assets.
|
68
|
+
|
69
|
+
y : Ignored
|
70
|
+
Not used, present for API consistency by convention.
|
71
|
+
|
72
|
+
Returns
|
73
|
+
-------
|
74
|
+
self : DropCorrelated
|
75
|
+
Fitted estimator.
|
76
|
+
"""
|
77
|
+
X = self._validate_data(X)
|
78
|
+
if not -1 <= self.threshold <= 1:
|
79
|
+
raise ValueError("`threshold` must be between -1 and 1")
|
80
|
+
|
81
|
+
n_assets = X.shape[1]
|
82
|
+
corr = np.corrcoef(X.T)
|
83
|
+
mean_corr = corr.mean(axis=0)
|
84
|
+
|
85
|
+
triu_idx = np.triu_indices(n_assets, 1)
|
86
|
+
|
87
|
+
# select all correlation pairs above the threshold
|
88
|
+
selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
|
89
|
+
|
90
|
+
# sort all the selected correlation pairs from highest to lowest
|
91
|
+
selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
|
92
|
+
|
93
|
+
# for each pair, if none of the two assets has been removed, keep the asset with
|
94
|
+
# the lowest average correlation with other assets
|
95
|
+
to_remove = set()
|
96
|
+
for idx in selected_idx:
|
97
|
+
i, j = triu_idx[0][idx], triu_idx[1][idx]
|
98
|
+
if i not in to_remove and j not in to_remove:
|
99
|
+
if mean_corr[i] > mean_corr[j]:
|
100
|
+
to_remove.add(i)
|
101
|
+
else:
|
102
|
+
to_remove.add(j)
|
103
|
+
self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
|
104
|
+
return self
|
105
|
+
|
106
|
+
def _get_support_mask(self):
|
107
|
+
skv.check_is_fitted(self)
|
108
|
+
return self.to_keep_
|