skfolio 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/datasets/__init__.py +2 -0
- skfolio/datasets/_base.py +51 -0
- skfolio/distance/_distance.py +15 -4
- skfolio/model_selection/_combinatorial.py +2 -2
- skfolio/model_selection/_validation.py +70 -15
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +2 -0
- skfolio/moments/covariance/__init__.py +11 -11
- skfolio/moments/covariance/_base.py +10 -9
- skfolio/moments/covariance/_denoise_covariance.py +181 -0
- skfolio/moments/covariance/_detone_covariance.py +158 -0
- skfolio/moments/covariance/_empirical_covariance.py +100 -0
- skfolio/moments/covariance/_ew_covariance.py +109 -0
- skfolio/moments/covariance/_gerber_covariance.py +157 -0
- skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
- skfolio/moments/covariance/_implied_covariance.py +454 -0
- skfolio/moments/covariance/_ledoit_wolf.py +140 -0
- skfolio/moments/covariance/_oas.py +115 -0
- skfolio/moments/covariance/_shrunk_covariance.py +104 -0
- skfolio/moments/expected_returns/__init__.py +4 -7
- skfolio/moments/expected_returns/_empirical_mu.py +63 -0
- skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
- skfolio/moments/expected_returns/_ew_mu.py +69 -0
- skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
- skfolio/optimization/cluster/_nco.py +46 -8
- skfolio/optimization/cluster/hierarchical/_base.py +21 -1
- skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
- skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
- skfolio/optimization/convex/_base.py +10 -1
- skfolio/optimization/convex/_distributionally_robust.py +12 -2
- skfolio/optimization/convex/_maximum_diversification.py +9 -2
- skfolio/optimization/convex/_mean_risk.py +33 -6
- skfolio/optimization/convex/_risk_budgeting.py +5 -2
- skfolio/optimization/ensemble/_stacking.py +32 -9
- skfolio/optimization/naive/_naive.py +20 -2
- skfolio/population/_population.py +2 -0
- skfolio/prior/_base.py +1 -1
- skfolio/prior/_black_litterman.py +20 -2
- skfolio/prior/_empirical.py +38 -5
- skfolio/prior/_factor_model.py +44 -7
- skfolio/uncertainty_set/_base.py +30 -9
- skfolio/uncertainty_set/_bootstrap.py +26 -10
- skfolio/uncertainty_set/_empirical.py +25 -10
- skfolio/utils/stats.py +24 -3
- skfolio/utils/tools.py +213 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/METADATA +3 -2
- skfolio-0.3.0.dist-info/RECORD +91 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/WHEEL +1 -1
- skfolio/moments/covariance/_covariance.py +0 -1114
- skfolio-0.2.3.dist-info/RECORD +0 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/LICENSE +0 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,158 @@
|
|
1
|
+
"""Covariance Detoning Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
import sklearn.utils.metadata_routing as skm
|
13
|
+
|
14
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
15
|
+
from skfolio.moments.covariance._empirical_covariance import EmpiricalCovariance
|
16
|
+
from skfolio.utils.stats import corr_to_cov, cov_to_corr
|
17
|
+
from skfolio.utils.tools import check_estimator
|
18
|
+
|
19
|
+
|
20
|
+
class DetoneCovariance(BaseCovariance):
|
21
|
+
"""Covariance Detoning estimator.
|
22
|
+
|
23
|
+
Financial covariance matrices usually incorporate a market component corresponding
|
24
|
+
to the first eigenvectors [1]_.
|
25
|
+
For some applications like clustering, removing the market component (loud tone)
|
26
|
+
allow a greater portion of the covariance to be explained by components that affect
|
27
|
+
specific subsets of the securities.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
covariance_estimator : BaseCovariance, optional
|
32
|
+
:ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
|
33
|
+
matrix prior detoning.
|
34
|
+
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
35
|
+
|
36
|
+
n_markets : int, default=1
|
37
|
+
Number of eigenvectors related to the market.
|
38
|
+
The default value is `1`.
|
39
|
+
|
40
|
+
nearest : bool, default=True
|
41
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
42
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
43
|
+
computed. The variance is left unchanged.
|
44
|
+
A covariance matrix that is not positive definite often occurs in high
|
45
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
46
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
47
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
48
|
+
The default is `True`.
|
49
|
+
|
50
|
+
higham : bool, default=False
|
51
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
52
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
53
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
54
|
+
Higham & Nick algorithm can be slow for large datasets.
|
55
|
+
|
56
|
+
higham_max_iteration : int, default=100
|
57
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
58
|
+
The default value is `100`.
|
59
|
+
|
60
|
+
Attributes
|
61
|
+
----------
|
62
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
63
|
+
Estimated covariance.
|
64
|
+
|
65
|
+
covariance_estimator_ : BaseCovariance
|
66
|
+
Fitted `covariance_estimator`.
|
67
|
+
|
68
|
+
n_features_in_ : int
|
69
|
+
Number of assets seen during `fit`.
|
70
|
+
|
71
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
72
|
+
Names of assets seen during `fit`. Defined only when `X`
|
73
|
+
has assets names that are all strings.
|
74
|
+
|
75
|
+
References
|
76
|
+
----------
|
77
|
+
.. [1] "Machine Learning for Asset Managers".
|
78
|
+
Elements in Quantitative Finance.
|
79
|
+
Lòpez de Prado (2020).
|
80
|
+
"""
|
81
|
+
|
82
|
+
covariance_estimator_: BaseCovariance
|
83
|
+
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
covariance_estimator: BaseCovariance | None = None,
|
87
|
+
n_markets: float = 1,
|
88
|
+
nearest: bool = True,
|
89
|
+
higham: bool = False,
|
90
|
+
higham_max_iteration: int = 100,
|
91
|
+
):
|
92
|
+
super().__init__(
|
93
|
+
nearest=nearest,
|
94
|
+
higham=higham,
|
95
|
+
higham_max_iteration=higham_max_iteration,
|
96
|
+
)
|
97
|
+
self.covariance_estimator = covariance_estimator
|
98
|
+
self.n_markets = n_markets
|
99
|
+
|
100
|
+
def get_metadata_routing(self):
|
101
|
+
# noinspection PyTypeChecker
|
102
|
+
router = skm.MetadataRouter(owner=self.__class__.__name__).add(
|
103
|
+
covariance_estimator=self.covariance_estimator,
|
104
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
105
|
+
)
|
106
|
+
return router
|
107
|
+
|
108
|
+
def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "DetoneCovariance":
|
109
|
+
"""Fit the Covariance Detoning estimator.
|
110
|
+
|
111
|
+
Parameters
|
112
|
+
----------
|
113
|
+
X : array-like of shape (n_observations, n_assets)
|
114
|
+
Price returns of the assets.
|
115
|
+
|
116
|
+
y : Ignored
|
117
|
+
Not used, present for API consistency by convention.
|
118
|
+
|
119
|
+
**fit_params : dict
|
120
|
+
Parameters to pass to the underlying estimators.
|
121
|
+
Only available if `enable_metadata_routing=True`, which can be
|
122
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
123
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
124
|
+
more details.
|
125
|
+
|
126
|
+
Returns
|
127
|
+
-------
|
128
|
+
self : DetoneCovariance
|
129
|
+
Fitted estimator.
|
130
|
+
"""
|
131
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
132
|
+
|
133
|
+
# fitting estimators
|
134
|
+
self.covariance_estimator_ = check_estimator(
|
135
|
+
self.covariance_estimator,
|
136
|
+
default=EmpiricalCovariance(),
|
137
|
+
check_type=BaseCovariance,
|
138
|
+
)
|
139
|
+
# noinspection PyArgumentList
|
140
|
+
self.covariance_estimator_.fit(X, y, **routed_params.covariance_estimator.fit)
|
141
|
+
|
142
|
+
# we validate and convert to numpy after all models have been fitted to keep
|
143
|
+
# features names information.
|
144
|
+
_ = self._validate_data(X)
|
145
|
+
corr, std = cov_to_corr(self.covariance_estimator_.covariance_)
|
146
|
+
e_val, e_vec = np.linalg.eigh(corr)
|
147
|
+
indices = e_val.argsort()[::-1]
|
148
|
+
e_val, e_vec = e_val[indices], e_vec[:, indices]
|
149
|
+
# market eigenvalues and eigenvectors
|
150
|
+
market_e_val, market_e_vec = e_val[: self.n_markets], e_vec[:, : self.n_markets]
|
151
|
+
# market correlation
|
152
|
+
market_corr = market_e_vec @ np.diag(market_e_val) @ market_e_vec.T
|
153
|
+
# Removing the market correlation
|
154
|
+
corr -= market_corr
|
155
|
+
corr, _ = cov_to_corr(corr)
|
156
|
+
covariance = corr_to_cov(corr, std)
|
157
|
+
self._set_covariance(covariance)
|
158
|
+
return self
|
@@ -0,0 +1,100 @@
|
|
1
|
+
"""Empirical Covariance Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
|
13
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
14
|
+
|
15
|
+
|
16
|
+
class EmpiricalCovariance(BaseCovariance):
|
17
|
+
"""Empirical Covariance estimator.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
window_size : int, optional
|
22
|
+
Window size. The model is fitted on the last `window_size` observations.
|
23
|
+
The default (`None`) is to use all the data.
|
24
|
+
|
25
|
+
ddof : int, default=1
|
26
|
+
Normalization is by `(n_observations - ddof)`.
|
27
|
+
Note that `ddof=1` will return the unbiased estimate, and `ddof=0`
|
28
|
+
will return the simple average. The default value is `1`.
|
29
|
+
|
30
|
+
nearest : bool, default=True
|
31
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
32
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
33
|
+
computed. The variance is left unchanged.
|
34
|
+
A covariance matrix that is not positive definite often occurs in high
|
35
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
36
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
37
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
38
|
+
The default is `True`.
|
39
|
+
|
40
|
+
higham : bool, default=False
|
41
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
42
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
43
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
44
|
+
Higham & Nick algorithm can be slow for large datasets.
|
45
|
+
|
46
|
+
higham_max_iteration : int, default=100
|
47
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
48
|
+
The default value is `100`.
|
49
|
+
|
50
|
+
Attributes
|
51
|
+
----------
|
52
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
53
|
+
Estimated covariance matrix.
|
54
|
+
|
55
|
+
n_features_in_ : int
|
56
|
+
Number of assets seen during `fit`.
|
57
|
+
|
58
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
59
|
+
Names of assets seen during `fit`. Defined only when `X`
|
60
|
+
has assets names that are all strings.
|
61
|
+
"""
|
62
|
+
|
63
|
+
def __init__(
|
64
|
+
self,
|
65
|
+
window_size: int | None = None,
|
66
|
+
ddof: int = 1,
|
67
|
+
nearest: bool = True,
|
68
|
+
higham: bool = False,
|
69
|
+
higham_max_iteration: int = 100,
|
70
|
+
):
|
71
|
+
super().__init__(
|
72
|
+
nearest=nearest,
|
73
|
+
higham=higham,
|
74
|
+
higham_max_iteration=higham_max_iteration,
|
75
|
+
)
|
76
|
+
self.window_size = window_size
|
77
|
+
self.ddof = ddof
|
78
|
+
|
79
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "EmpiricalCovariance":
|
80
|
+
"""Fit the empirical covariance estimator.
|
81
|
+
|
82
|
+
Parameters
|
83
|
+
----------
|
84
|
+
X : array-like of shape (n_observations, n_assets)
|
85
|
+
Price returns of the assets.
|
86
|
+
|
87
|
+
y : Ignored
|
88
|
+
Not used, present for API consistency by convention.
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
self : EmpiricalCovariance
|
93
|
+
Fitted estimator.
|
94
|
+
"""
|
95
|
+
X = self._validate_data(X)
|
96
|
+
if self.window_size is not None:
|
97
|
+
X = X[-int(self.window_size) :]
|
98
|
+
covariance = np.cov(X.T, ddof=self.ddof)
|
99
|
+
self._set_covariance(covariance)
|
100
|
+
return self
|
@@ -0,0 +1,109 @@
|
|
1
|
+
"""Exponentially Weighted Covariance Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy.typing as npt
|
11
|
+
import pandas as pd
|
12
|
+
|
13
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
14
|
+
|
15
|
+
|
16
|
+
class EWCovariance(BaseCovariance):
|
17
|
+
r"""Exponentially Weighted Covariance estimator.
|
18
|
+
|
19
|
+
Estimator of the covariance using the historical exponentially weighted returns.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
window_size : int, optional
|
24
|
+
Window size. The model is fitted on the last `window_size` observations.
|
25
|
+
The default (`None`) is to use all the data.
|
26
|
+
|
27
|
+
alpha : float, default=0.2
|
28
|
+
Exponential smoothing factor. The default value is `0.2`.
|
29
|
+
|
30
|
+
:math:`0 < \alpha \leq 1`.
|
31
|
+
|
32
|
+
nearest : bool, default=True
|
33
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
34
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
35
|
+
computed. The variance is left unchanged.
|
36
|
+
A covariance matrix that is not positive definite often occurs in high
|
37
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
38
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
39
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
40
|
+
The default is `True`.
|
41
|
+
|
42
|
+
higham : bool, default=False
|
43
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
44
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
45
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
46
|
+
Higham & Nick algorithm can be slow for large datasets.
|
47
|
+
|
48
|
+
higham_max_iteration : int, default=100
|
49
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
50
|
+
The default value is `100`.
|
51
|
+
|
52
|
+
Attributes
|
53
|
+
----------
|
54
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
55
|
+
Estimated covariance.
|
56
|
+
|
57
|
+
n_features_in_ : int
|
58
|
+
Number of assets seen during `fit`.
|
59
|
+
|
60
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
61
|
+
Names of features seen during `fit`. Defined only when `X`
|
62
|
+
has feature names that are all strings.
|
63
|
+
"""
|
64
|
+
|
65
|
+
def __init__(
|
66
|
+
self,
|
67
|
+
window_size: int | None = None,
|
68
|
+
alpha: float = 0.2,
|
69
|
+
nearest: bool = True,
|
70
|
+
higham: bool = False,
|
71
|
+
higham_max_iteration: int = 100,
|
72
|
+
):
|
73
|
+
super().__init__(
|
74
|
+
nearest=nearest,
|
75
|
+
higham=higham,
|
76
|
+
higham_max_iteration=higham_max_iteration,
|
77
|
+
)
|
78
|
+
self.window_size = window_size
|
79
|
+
self.alpha = alpha
|
80
|
+
|
81
|
+
def fit(self, X: npt.ArrayLike, y=None):
|
82
|
+
"""Fit the Exponentially Weighted Covariance estimator.
|
83
|
+
|
84
|
+
Parameters
|
85
|
+
----------
|
86
|
+
X : array-like of shape (n_observations, n_assets)
|
87
|
+
Price returns of the assets.
|
88
|
+
|
89
|
+
y : Ignored
|
90
|
+
Not used, present for API consistency by convention.
|
91
|
+
|
92
|
+
Returns
|
93
|
+
-------
|
94
|
+
self : EWCovariance
|
95
|
+
Fitted estimator.
|
96
|
+
"""
|
97
|
+
X = self._validate_data(X)
|
98
|
+
if self.window_size is not None:
|
99
|
+
X = X[-int(self.window_size) :]
|
100
|
+
n_observations = X.shape[0]
|
101
|
+
covariance = (
|
102
|
+
pd.DataFrame(X)
|
103
|
+
.ewm(alpha=self.alpha)
|
104
|
+
.cov()
|
105
|
+
.loc[(n_observations - 1, slice(None)), :]
|
106
|
+
.to_numpy()
|
107
|
+
)
|
108
|
+
self._set_covariance(covariance)
|
109
|
+
return self
|
@@ -0,0 +1,157 @@
|
|
1
|
+
"""Gerber Covariance Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
|
13
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
14
|
+
from skfolio.utils.stats import corr_to_cov
|
15
|
+
|
16
|
+
|
17
|
+
class GerberCovariance(BaseCovariance):
|
18
|
+
"""Gerber Covariance estimator.
|
19
|
+
|
20
|
+
Robust co-movement measure which ignores fluctuations below a certain threshold
|
21
|
+
while simultaneously limiting the effects of extreme movements.
|
22
|
+
The Gerber statistic extends Kendall's Tau by counting the proportion of
|
23
|
+
simultaneous co-movements in series when their amplitudes exceed data-dependent
|
24
|
+
thresholds.
|
25
|
+
|
26
|
+
Three variant has been published:
|
27
|
+
|
28
|
+
* Gerber et al. (2015): tend to produce matrices that are non-PSD.
|
29
|
+
* Gerber et al. (2019): alteration of the denominator of the above statistic.
|
30
|
+
* Gerber et al. (2022): final alteration to ensure PSD matrix.
|
31
|
+
|
32
|
+
The last two variants are implemented.
|
33
|
+
|
34
|
+
Parameters
|
35
|
+
----------
|
36
|
+
window_size : int, optional
|
37
|
+
Window size. The model is fitted on the last `window_size` observations.
|
38
|
+
The default (`None`) is to use all the data.
|
39
|
+
|
40
|
+
threshold : float, default=0.5
|
41
|
+
Gerber threshold. The default value is `0.5`.
|
42
|
+
|
43
|
+
psd_variant : bool, default=True
|
44
|
+
If this is set to True, the Gerber et al. (2022) variant is used to ensure a
|
45
|
+
positive semi-definite matrix.
|
46
|
+
Otherwise, the Gerber et al. (2019) variant is used.
|
47
|
+
The default is `True`.
|
48
|
+
|
49
|
+
nearest : bool, default=True
|
50
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
51
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
52
|
+
computed. The variance is left unchanged.
|
53
|
+
A covariance matrix that is not positive definite often occurs in high
|
54
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
55
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
56
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
57
|
+
The default is `True`.
|
58
|
+
|
59
|
+
higham : bool, default=False
|
60
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
61
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
62
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
63
|
+
Higham & Nick algorithm can be slow for large datasets.
|
64
|
+
|
65
|
+
higham_max_iteration : int, default=100
|
66
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
67
|
+
The default value is `100`.
|
68
|
+
|
69
|
+
Attributes
|
70
|
+
----------
|
71
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
72
|
+
Estimated covariance.
|
73
|
+
|
74
|
+
n_features_in_ : int
|
75
|
+
Number of assets seen during `fit`.
|
76
|
+
|
77
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
78
|
+
Names of assets seen during `fit`. Defined only when `X`
|
79
|
+
has assets names that are all strings.
|
80
|
+
|
81
|
+
References
|
82
|
+
----------
|
83
|
+
.. [1] "The gerber statistic: A robust co-movement measure for portfolio
|
84
|
+
optimization".
|
85
|
+
The Journal of Portfolio Management.
|
86
|
+
Gerber, S., B. Javid, H. Markowitz, P. Sargen, and D. Starer (2022).
|
87
|
+
|
88
|
+
.. [2] "The gerber statistic: A robust measure of correlation".
|
89
|
+
Gerber, S., B. Javid, H. Markowitz, P. Sargen, and D. Starer (2019).
|
90
|
+
|
91
|
+
.. [3] "Enhancing multi-asset portfolio construction under modern portfolio theory
|
92
|
+
with a robust co-movement measure".
|
93
|
+
Social Science Research network Working Paper Series.
|
94
|
+
Gerber, S., H. Markowitz, and P. Pujara (2015).
|
95
|
+
|
96
|
+
.. [4] "Deconstructing the Gerber Statistic".
|
97
|
+
Flint & Polakow, 2023.
|
98
|
+
"""
|
99
|
+
|
100
|
+
def __init__(
|
101
|
+
self,
|
102
|
+
window_size: int | None = None,
|
103
|
+
threshold: float = 0.5,
|
104
|
+
psd_variant: bool = True,
|
105
|
+
nearest: bool = True,
|
106
|
+
higham: bool = False,
|
107
|
+
higham_max_iteration: int = 100,
|
108
|
+
):
|
109
|
+
super().__init__(
|
110
|
+
nearest=nearest,
|
111
|
+
higham=higham,
|
112
|
+
higham_max_iteration=higham_max_iteration,
|
113
|
+
)
|
114
|
+
self.window_size = window_size
|
115
|
+
self.threshold = threshold
|
116
|
+
self.psd_variant = psd_variant
|
117
|
+
|
118
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "GerberCovariance":
|
119
|
+
"""Fit the Gerber covariance estimator.
|
120
|
+
|
121
|
+
Parameters
|
122
|
+
----------
|
123
|
+
X : array-like of shape (n_observations, n_assets)
|
124
|
+
Price returns of the assets.
|
125
|
+
|
126
|
+
y : Ignored
|
127
|
+
Not used, present for API consistency by convention.
|
128
|
+
|
129
|
+
Returns
|
130
|
+
-------
|
131
|
+
self : GerberCovariance
|
132
|
+
Fitted estimator.
|
133
|
+
"""
|
134
|
+
X = self._validate_data(X)
|
135
|
+
if self.window_size is not None:
|
136
|
+
X = X[-self.window_size :]
|
137
|
+
if not (1 > self.threshold > 0):
|
138
|
+
raise ValueError("The threshold must be between 0 and 1")
|
139
|
+
n_observations = X.shape[0]
|
140
|
+
std = X.std(axis=0).reshape((-1, 1))
|
141
|
+
u = X >= std.T * self.threshold
|
142
|
+
d = X <= -std.T * self.threshold
|
143
|
+
n = np.invert(u) & np.invert(d) # np.invert preferred that ~ for type hint
|
144
|
+
n = n.astype(int)
|
145
|
+
u = u.astype(int)
|
146
|
+
d = d.astype(int)
|
147
|
+
concordant = u.T @ u + d.T @ d
|
148
|
+
discordant = u.T @ d + d.T @ u
|
149
|
+
h = concordant - discordant
|
150
|
+
if self.psd_variant:
|
151
|
+
corr = h / (n_observations - n.T @ n)
|
152
|
+
else:
|
153
|
+
h_sqrt = np.sqrt(np.diag(h)).reshape((-1, 1))
|
154
|
+
corr = h / (h_sqrt @ h_sqrt.T)
|
155
|
+
covariance = corr_to_cov(corr, std.reshape(-1))
|
156
|
+
self._set_covariance(covariance)
|
157
|
+
return self
|