skfolio 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/datasets/__init__.py +2 -0
- skfolio/datasets/_base.py +51 -0
- skfolio/distance/_distance.py +15 -4
- skfolio/model_selection/_combinatorial.py +2 -2
- skfolio/model_selection/_validation.py +70 -15
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +2 -0
- skfolio/moments/covariance/__init__.py +11 -11
- skfolio/moments/covariance/_base.py +10 -9
- skfolio/moments/covariance/_denoise_covariance.py +181 -0
- skfolio/moments/covariance/_detone_covariance.py +158 -0
- skfolio/moments/covariance/_empirical_covariance.py +100 -0
- skfolio/moments/covariance/_ew_covariance.py +109 -0
- skfolio/moments/covariance/_gerber_covariance.py +157 -0
- skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
- skfolio/moments/covariance/_implied_covariance.py +462 -0
- skfolio/moments/covariance/_ledoit_wolf.py +140 -0
- skfolio/moments/covariance/_oas.py +115 -0
- skfolio/moments/covariance/_shrunk_covariance.py +104 -0
- skfolio/moments/expected_returns/__init__.py +4 -7
- skfolio/moments/expected_returns/_empirical_mu.py +63 -0
- skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
- skfolio/moments/expected_returns/_ew_mu.py +69 -0
- skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
- skfolio/optimization/cluster/_nco.py +46 -8
- skfolio/optimization/cluster/hierarchical/_base.py +21 -1
- skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
- skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
- skfolio/optimization/convex/_base.py +10 -1
- skfolio/optimization/convex/_distributionally_robust.py +12 -2
- skfolio/optimization/convex/_maximum_diversification.py +9 -2
- skfolio/optimization/convex/_mean_risk.py +33 -6
- skfolio/optimization/convex/_risk_budgeting.py +5 -2
- skfolio/optimization/ensemble/_stacking.py +32 -9
- skfolio/optimization/naive/_naive.py +20 -2
- skfolio/population/_population.py +2 -0
- skfolio/prior/_base.py +1 -1
- skfolio/prior/_black_litterman.py +20 -2
- skfolio/prior/_empirical.py +38 -5
- skfolio/prior/_factor_model.py +44 -7
- skfolio/uncertainty_set/_base.py +30 -9
- skfolio/uncertainty_set/_bootstrap.py +26 -10
- skfolio/uncertainty_set/_empirical.py +25 -10
- skfolio/utils/stats.py +24 -3
- skfolio/utils/tools.py +213 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/METADATA +3 -2
- skfolio-0.3.1.dist-info/RECORD +91 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/WHEEL +1 -1
- skfolio/moments/covariance/_covariance.py +0 -1114
- skfolio-0.2.3.dist-info/RECORD +0 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/LICENSE +0 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,1114 +0,0 @@
|
|
1
|
-
"""Covariance Estimators."""
|
2
|
-
|
3
|
-
# Copyright (c) 2023
|
4
|
-
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD 3 clause
|
6
|
-
# Implementation derived from:
|
7
|
-
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
-
# Grisel Licensed under BSD 3 clause.
|
9
|
-
|
10
|
-
import numpy as np
|
11
|
-
import numpy.typing as npt
|
12
|
-
import pandas as pd
|
13
|
-
import scipy.optimize as sco
|
14
|
-
import sklearn.covariance as skc
|
15
|
-
import sklearn.neighbors as skn
|
16
|
-
|
17
|
-
from skfolio.moments.covariance._base import BaseCovariance
|
18
|
-
from skfolio.utils.stats import corr_to_cov, cov_to_corr
|
19
|
-
from skfolio.utils.tools import check_estimator
|
20
|
-
|
21
|
-
|
22
|
-
class EmpiricalCovariance(BaseCovariance):
|
23
|
-
"""Empirical covariance estimator.
|
24
|
-
|
25
|
-
Parameters
|
26
|
-
----------
|
27
|
-
window_size : int, optional
|
28
|
-
Window size. The model is fitted on the last `window_size` observations.
|
29
|
-
The default (`None`) is to use all the data.
|
30
|
-
|
31
|
-
ddof : int, default=1
|
32
|
-
Normalization is by `(n_observations - ddof)`.
|
33
|
-
Note that `ddof=1` will return the unbiased estimate, and `ddof=0`
|
34
|
-
will return the simple average. The default value is `1`.
|
35
|
-
|
36
|
-
nearest : bool, default=False
|
37
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
38
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
39
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
40
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
41
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
42
|
-
This often occurs in high dimensional problems.
|
43
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
44
|
-
The default is `False`.
|
45
|
-
|
46
|
-
higham : bool, default=False
|
47
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
48
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
49
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
50
|
-
Higham & Nick algorithm can be slow for large datasets.
|
51
|
-
|
52
|
-
higham_max_iteration : int, default=100
|
53
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
54
|
-
The default value is `100`.
|
55
|
-
|
56
|
-
Attributes
|
57
|
-
----------
|
58
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
59
|
-
Estimated covariance matrix.
|
60
|
-
|
61
|
-
n_features_in_ : int
|
62
|
-
Number of assets seen during `fit`.
|
63
|
-
|
64
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
65
|
-
Names of assets seen during `fit`. Defined only when `X`
|
66
|
-
has assets names that are all strings.
|
67
|
-
"""
|
68
|
-
|
69
|
-
def __init__(
|
70
|
-
self,
|
71
|
-
window_size: int | None = None,
|
72
|
-
ddof: int = 1,
|
73
|
-
nearest: bool = False,
|
74
|
-
higham: bool = False,
|
75
|
-
higham_max_iteration: int = 100,
|
76
|
-
):
|
77
|
-
super().__init__(
|
78
|
-
nearest=nearest,
|
79
|
-
higham=higham,
|
80
|
-
higham_max_iteration=higham_max_iteration,
|
81
|
-
)
|
82
|
-
self.window_size = window_size
|
83
|
-
self.ddof = ddof
|
84
|
-
|
85
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "EmpiricalCovariance":
|
86
|
-
"""Fit the empirical covariance estimator.
|
87
|
-
|
88
|
-
Parameters
|
89
|
-
----------
|
90
|
-
X : array-like of shape (n_observations, n_assets)
|
91
|
-
Price returns of the assets.
|
92
|
-
|
93
|
-
y : Ignored
|
94
|
-
Not used, present for API consistency by convention.
|
95
|
-
|
96
|
-
Returns
|
97
|
-
-------
|
98
|
-
self : EmpiricalCovariance
|
99
|
-
Fitted estimator.
|
100
|
-
"""
|
101
|
-
X = self._validate_data(X)
|
102
|
-
if self.window_size is not None:
|
103
|
-
X = X[-self.window_size :]
|
104
|
-
covariance = np.cov(X.T, ddof=self.ddof)
|
105
|
-
self._set_covariance(covariance)
|
106
|
-
return self
|
107
|
-
|
108
|
-
|
109
|
-
class GerberCovariance(BaseCovariance):
|
110
|
-
"""Gerber covariance estimator.
|
111
|
-
|
112
|
-
Robust co-movement measure which ignores fluctuations below a certain threshold
|
113
|
-
while simultaneously limiting the effects of extreme movements.
|
114
|
-
The Gerber statistic extends Kendall's Tau by counting the proportion of
|
115
|
-
simultaneous co-movements in series when their amplitudes exceed data-dependent
|
116
|
-
thresholds.
|
117
|
-
|
118
|
-
Three variant has been published:
|
119
|
-
|
120
|
-
* Gerber et al. (2015): tend to produce matrices that are non-PSD.
|
121
|
-
* Gerber et al. (2019): alteration of the denominator of the above statistic.
|
122
|
-
* Gerber et al. (2022): final alteration to ensure PSD matrix.
|
123
|
-
|
124
|
-
The last two variants are implemented.
|
125
|
-
|
126
|
-
Parameters
|
127
|
-
----------
|
128
|
-
window_size : int, optional
|
129
|
-
Window size. The model is fitted on the last `window_size` observations.
|
130
|
-
The default (`None`) is to use all the data.
|
131
|
-
|
132
|
-
threshold : float, default=0.5
|
133
|
-
Gerber threshold. The default value is `0.5`.
|
134
|
-
|
135
|
-
psd_variant : bool, default=True
|
136
|
-
If this is set to True, the Gerber et al. (2022) variant is used to ensure a
|
137
|
-
positive semi-definite matrix.
|
138
|
-
Otherwise, the Gerber et al. (2019) variant is used.
|
139
|
-
The default is `True`.
|
140
|
-
|
141
|
-
nearest : bool, default=False
|
142
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
143
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
144
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
145
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
146
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
147
|
-
This often occurs in high dimensional problems.
|
148
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
149
|
-
The default is `False`.
|
150
|
-
|
151
|
-
higham : bool, default=False
|
152
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
153
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
154
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
155
|
-
Higham & Nick algorithm can be slow for large datasets.
|
156
|
-
|
157
|
-
higham_max_iteration : int, default=100
|
158
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
159
|
-
The default value is `100`.
|
160
|
-
|
161
|
-
Attributes
|
162
|
-
----------
|
163
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
164
|
-
Estimated covariance.
|
165
|
-
|
166
|
-
n_features_in_ : int
|
167
|
-
Number of assets seen during `fit`.
|
168
|
-
|
169
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
170
|
-
Names of assets seen during `fit`. Defined only when `X`
|
171
|
-
has assets names that are all strings.
|
172
|
-
|
173
|
-
References
|
174
|
-
----------
|
175
|
-
.. [1] "The gerber statistic: A robust co-movement measure for portfolio
|
176
|
-
optimization".
|
177
|
-
The Journal of Portfolio Management.
|
178
|
-
Gerber, S., B. Javid, H. Markowitz, P. Sargen, and D. Starer (2022).
|
179
|
-
|
180
|
-
.. [2] "The gerber statistic: A robust measure of correlation".
|
181
|
-
Gerber, S., B. Javid, H. Markowitz, P. Sargen, and D. Starer (2019).
|
182
|
-
|
183
|
-
.. [3] "Enhancing multi-asset portfolio construction under modern portfolio theory
|
184
|
-
with a robust co-movement measure".
|
185
|
-
Social Science Research network Working Paper Series.
|
186
|
-
Gerber, S., H. Markowitz, and P. Pujara (2015).
|
187
|
-
|
188
|
-
.. [4] "Deconstructing the Gerber Statistic".
|
189
|
-
Flint & Polakow, 2023.
|
190
|
-
"""
|
191
|
-
|
192
|
-
def __init__(
|
193
|
-
self,
|
194
|
-
window_size: int | None = None,
|
195
|
-
threshold: float = 0.5,
|
196
|
-
psd_variant: bool = True,
|
197
|
-
nearest: bool = False,
|
198
|
-
higham: bool = False,
|
199
|
-
higham_max_iteration: int = 100,
|
200
|
-
):
|
201
|
-
super().__init__(
|
202
|
-
nearest=nearest,
|
203
|
-
higham=higham,
|
204
|
-
higham_max_iteration=higham_max_iteration,
|
205
|
-
)
|
206
|
-
self.window_size = window_size
|
207
|
-
self.threshold = threshold
|
208
|
-
self.psd_variant = psd_variant
|
209
|
-
|
210
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "GerberCovariance":
|
211
|
-
"""Fit the Gerber covariance estimator.
|
212
|
-
|
213
|
-
Parameters
|
214
|
-
----------
|
215
|
-
X : array-like of shape (n_observations, n_assets)
|
216
|
-
Price returns of the assets.
|
217
|
-
|
218
|
-
y : Ignored
|
219
|
-
Not used, present for API consistency by convention.
|
220
|
-
|
221
|
-
Returns
|
222
|
-
-------
|
223
|
-
self : GerberCovariance
|
224
|
-
Fitted estimator.
|
225
|
-
"""
|
226
|
-
X = self._validate_data(X)
|
227
|
-
if self.window_size is not None:
|
228
|
-
X = X[-self.window_size :]
|
229
|
-
if not (1 > self.threshold > 0):
|
230
|
-
raise ValueError("The threshold must be between 0 and 1")
|
231
|
-
n_observations = X.shape[0]
|
232
|
-
std = X.std(axis=0).reshape((-1, 1))
|
233
|
-
u = X >= std.T * self.threshold
|
234
|
-
d = X <= -std.T * self.threshold
|
235
|
-
n = np.invert(u) & np.invert(d) # np.invert preferred that ~ for type hint
|
236
|
-
n = n.astype(int)
|
237
|
-
u = u.astype(int)
|
238
|
-
d = d.astype(int)
|
239
|
-
concordant = u.T @ u + d.T @ d
|
240
|
-
discordant = u.T @ d + d.T @ u
|
241
|
-
h = concordant - discordant
|
242
|
-
if self.psd_variant:
|
243
|
-
corr = h / (n_observations - n.T @ n)
|
244
|
-
else:
|
245
|
-
h_sqrt = np.sqrt(np.diag(h)).reshape((-1, 1))
|
246
|
-
corr = h / (h_sqrt @ h_sqrt.T)
|
247
|
-
covariance = corr_to_cov(corr, std.reshape(-1))
|
248
|
-
self._set_covariance(covariance)
|
249
|
-
return self
|
250
|
-
|
251
|
-
|
252
|
-
class DenoiseCovariance(BaseCovariance):
|
253
|
-
"""Covariance Denoising estimator.
|
254
|
-
|
255
|
-
The goal of Covariance Denoising is to reduce the noise and enhance the signal of
|
256
|
-
the empirical covariance matrix [1]_.
|
257
|
-
It reduces the ill-conditioning of the traditional covariance estimate by
|
258
|
-
differentiating the eigenvalues associated with noise from the eigenvalues
|
259
|
-
associated with signal.
|
260
|
-
Denoising replaces the eigenvalues of the eigenvectors classified as random by
|
261
|
-
Marčenko-Pastur with a constant eigenvalue.
|
262
|
-
|
263
|
-
Parameters
|
264
|
-
----------
|
265
|
-
covariance_estimator : BaseCovariance, optional
|
266
|
-
:ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
|
267
|
-
matrix that will be denoised.
|
268
|
-
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
269
|
-
|
270
|
-
nearest : bool, default=False
|
271
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
272
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
273
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
274
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
275
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
276
|
-
This often occurs in high dimensional problems.
|
277
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
278
|
-
The default is `False`.
|
279
|
-
|
280
|
-
higham : bool, default=False
|
281
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
282
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
283
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
284
|
-
Higham & Nick algorithm can be slow for large datasets.
|
285
|
-
|
286
|
-
higham_max_iteration : int, default=100
|
287
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
288
|
-
The default value is `100`.
|
289
|
-
|
290
|
-
Attributes
|
291
|
-
----------
|
292
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
293
|
-
Estimated covariance.
|
294
|
-
|
295
|
-
covariance_estimator_ : BaseCovariance
|
296
|
-
Fitted `covariance_estimator`.
|
297
|
-
|
298
|
-
n_features_in_ : int
|
299
|
-
Number of assets seen during `fit`.
|
300
|
-
|
301
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
302
|
-
Names of assets seen during `fit`. Defined only when `X`
|
303
|
-
has assets names that are all strings.
|
304
|
-
|
305
|
-
References
|
306
|
-
----------
|
307
|
-
.. [1] "Machine Learning for Asset Managers".
|
308
|
-
Elements in Quantitative Finance.
|
309
|
-
Lòpez de Prado (2020).
|
310
|
-
"""
|
311
|
-
|
312
|
-
covariance_estimator_: BaseCovariance
|
313
|
-
|
314
|
-
def __init__(
|
315
|
-
self,
|
316
|
-
covariance_estimator: BaseCovariance | None = None,
|
317
|
-
nearest: bool = False,
|
318
|
-
higham: bool = False,
|
319
|
-
higham_max_iteration: int = 100,
|
320
|
-
):
|
321
|
-
super().__init__(
|
322
|
-
nearest=nearest,
|
323
|
-
higham=higham,
|
324
|
-
higham_max_iteration=higham_max_iteration,
|
325
|
-
)
|
326
|
-
self.covariance_estimator = covariance_estimator
|
327
|
-
|
328
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "DenoiseCovariance":
|
329
|
-
"""Fit the Covariance Denoising estimator.
|
330
|
-
|
331
|
-
Parameters
|
332
|
-
----------
|
333
|
-
X : array-like of shape (n_observations, n_assets)
|
334
|
-
Price returns of the assets.
|
335
|
-
|
336
|
-
y : Ignored
|
337
|
-
Not used, present for API consistency by convention.
|
338
|
-
|
339
|
-
Returns
|
340
|
-
-------
|
341
|
-
self : DenoiseCovariance
|
342
|
-
Fitted estimator.
|
343
|
-
"""
|
344
|
-
# fitting estimators
|
345
|
-
self.covariance_estimator_ = check_estimator(
|
346
|
-
self.covariance_estimator,
|
347
|
-
default=EmpiricalCovariance(),
|
348
|
-
check_type=BaseCovariance,
|
349
|
-
)
|
350
|
-
self.covariance_estimator_.fit(X)
|
351
|
-
|
352
|
-
# we validate and convert to numpy after all models have been fitted to keep
|
353
|
-
# features names information.
|
354
|
-
X = self._validate_data(X)
|
355
|
-
n_observations, n_assets = X.shape
|
356
|
-
q = n_observations / n_assets
|
357
|
-
corr, std = cov_to_corr(self.covariance_estimator_.covariance_)
|
358
|
-
e_val, e_vec = np.linalg.eigh(corr)
|
359
|
-
indices = e_val.argsort()[::-1]
|
360
|
-
e_val, e_vec = e_val[indices], e_vec[:, indices]
|
361
|
-
|
362
|
-
def _marchenko(x_var):
|
363
|
-
e_min, e_max = (
|
364
|
-
x_var * (1 - (1.0 / q) ** 0.5) ** 2,
|
365
|
-
x_var * (1 + (1.0 / q) ** 0.5) ** 2,
|
366
|
-
)
|
367
|
-
e_val_lin = np.linspace(e_min, e_max, 1000)
|
368
|
-
pdf_0 = (
|
369
|
-
q
|
370
|
-
/ (2 * np.pi * x_var * e_val_lin)
|
371
|
-
* ((e_max - e_val_lin) * (e_val_lin - e_min)) ** 0.5
|
372
|
-
)
|
373
|
-
kde = skn.KernelDensity(kernel="gaussian", bandwidth=0.01).fit(
|
374
|
-
e_val.reshape(-1, 1)
|
375
|
-
)
|
376
|
-
# noinspection PyUnresolvedReferences
|
377
|
-
pdf_1 = np.exp(kde.score_samples(pdf_0.reshape(-1, 1)))
|
378
|
-
return np.sum((pdf_1 - pdf_0) ** 2)
|
379
|
-
|
380
|
-
# noinspection PyTypeChecker
|
381
|
-
res = sco.minimize(_marchenko, x0=0.5, bounds=((1e-5, 1 - 1e-5),))
|
382
|
-
|
383
|
-
var = res["x"][0]
|
384
|
-
n_facts = e_val.shape[0] - e_val[::-1].searchsorted(
|
385
|
-
var * (1 + (1.0 / q) ** 0.5) ** 2
|
386
|
-
)
|
387
|
-
e_val_ = e_val.copy()
|
388
|
-
e_val_[n_facts:] = e_val_[n_facts:].sum() / float(e_val_.shape[0] - n_facts)
|
389
|
-
corr = e_vec @ np.diag(e_val_) @ e_vec.T
|
390
|
-
corr, _ = cov_to_corr(corr)
|
391
|
-
covariance = corr_to_cov(corr, std)
|
392
|
-
self._set_covariance(covariance)
|
393
|
-
return self
|
394
|
-
|
395
|
-
|
396
|
-
class DetoneCovariance(BaseCovariance):
|
397
|
-
"""Covariance Detoning estimator.
|
398
|
-
|
399
|
-
Financial covariance matrices usually incorporate a market component corresponding
|
400
|
-
to the first eigenvectors [1]_.
|
401
|
-
For some applications like clustering, removing the market component (loud tone)
|
402
|
-
allow a greater portion of the covariance to be explained by components that affect
|
403
|
-
specific subsets of the securities.
|
404
|
-
|
405
|
-
Parameters
|
406
|
-
----------
|
407
|
-
covariance_estimator : BaseCovariance, optional
|
408
|
-
:ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
|
409
|
-
matrix prior detoning.
|
410
|
-
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
411
|
-
|
412
|
-
n_markets : int, default=1
|
413
|
-
Number of eigenvectors related to the market.
|
414
|
-
The default value is `1`.
|
415
|
-
|
416
|
-
nearest : bool, default=False
|
417
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
418
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
419
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
420
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
421
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
422
|
-
This often occurs in high dimensional problems.
|
423
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
424
|
-
The default is `False`.
|
425
|
-
|
426
|
-
higham : bool, default=False
|
427
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
428
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
429
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
430
|
-
Higham & Nick algorithm can be slow for large datasets.
|
431
|
-
|
432
|
-
higham_max_iteration : int, default=100
|
433
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
434
|
-
The default value is `100`.
|
435
|
-
|
436
|
-
Attributes
|
437
|
-
----------
|
438
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
439
|
-
Estimated covariance.
|
440
|
-
|
441
|
-
covariance_estimator_ : BaseCovariance
|
442
|
-
Fitted `covariance_estimator`.
|
443
|
-
|
444
|
-
n_features_in_ : int
|
445
|
-
Number of assets seen during `fit`.
|
446
|
-
|
447
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
448
|
-
Names of assets seen during `fit`. Defined only when `X`
|
449
|
-
has assets names that are all strings.
|
450
|
-
|
451
|
-
References
|
452
|
-
----------
|
453
|
-
.. [1] "Machine Learning for Asset Managers".
|
454
|
-
Elements in Quantitative Finance.
|
455
|
-
Lòpez de Prado (2020).
|
456
|
-
"""
|
457
|
-
|
458
|
-
covariance_estimator_: BaseCovariance
|
459
|
-
|
460
|
-
def __init__(
|
461
|
-
self,
|
462
|
-
covariance_estimator: BaseCovariance | None = None,
|
463
|
-
n_markets: float = 1,
|
464
|
-
nearest: bool = False,
|
465
|
-
higham: bool = False,
|
466
|
-
higham_max_iteration: int = 100,
|
467
|
-
):
|
468
|
-
super().__init__(
|
469
|
-
nearest=nearest,
|
470
|
-
higham=higham,
|
471
|
-
higham_max_iteration=higham_max_iteration,
|
472
|
-
)
|
473
|
-
self.covariance_estimator = covariance_estimator
|
474
|
-
self.n_markets = n_markets
|
475
|
-
|
476
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "DetoneCovariance":
|
477
|
-
"""Fit the Covariance Detoning estimator.
|
478
|
-
|
479
|
-
Parameters
|
480
|
-
----------
|
481
|
-
X : array-like of shape (n_observations, n_assets)
|
482
|
-
Price returns of the assets.
|
483
|
-
|
484
|
-
y : Ignored
|
485
|
-
Not used, present for API consistency by convention.
|
486
|
-
|
487
|
-
Returns
|
488
|
-
-------
|
489
|
-
self : DetoneCovariance
|
490
|
-
Fitted estimator.
|
491
|
-
"""
|
492
|
-
# fitting estimators
|
493
|
-
self.covariance_estimator_ = check_estimator(
|
494
|
-
self.covariance_estimator,
|
495
|
-
default=EmpiricalCovariance(),
|
496
|
-
check_type=BaseCovariance,
|
497
|
-
)
|
498
|
-
self.covariance_estimator_.fit(X)
|
499
|
-
|
500
|
-
# we validate and convert to numpy after all models have been fitted to keep
|
501
|
-
# features names information.
|
502
|
-
_ = self._validate_data(X)
|
503
|
-
corr, std = cov_to_corr(self.covariance_estimator_.covariance_)
|
504
|
-
e_val, e_vec = np.linalg.eigh(corr)
|
505
|
-
indices = e_val.argsort()[::-1]
|
506
|
-
e_val, e_vec = e_val[indices], e_vec[:, indices]
|
507
|
-
# market eigenvalues and eigenvectors
|
508
|
-
market_e_val, market_e_vec = e_val[: self.n_markets], e_vec[:, : self.n_markets]
|
509
|
-
# market correlation
|
510
|
-
market_corr = market_e_vec @ np.diag(market_e_val) @ market_e_vec.T
|
511
|
-
# Removing the market correlation
|
512
|
-
corr -= market_corr
|
513
|
-
corr, _ = cov_to_corr(corr)
|
514
|
-
covariance = corr_to_cov(corr, std)
|
515
|
-
self._set_covariance(covariance)
|
516
|
-
return self
|
517
|
-
|
518
|
-
|
519
|
-
class EWCovariance(BaseCovariance):
|
520
|
-
r"""Exponentially Weighted Covariance estimator.
|
521
|
-
|
522
|
-
Estimator of the covariance using the historical exponentially weighted returns.
|
523
|
-
|
524
|
-
Parameters
|
525
|
-
----------
|
526
|
-
window_size : int, optional
|
527
|
-
Window size. The model is fitted on the last `window_size` observations.
|
528
|
-
The default (`None`) is to use all the data.
|
529
|
-
|
530
|
-
alpha : float, default=0.2
|
531
|
-
Exponential smoothing factor. The default value is `0.2`.
|
532
|
-
|
533
|
-
:math:`0 < \alpha \leq 1`.
|
534
|
-
|
535
|
-
nearest : bool, default=False
|
536
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
537
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
538
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
539
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
540
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
541
|
-
This often occurs in high dimensional problems.
|
542
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
543
|
-
The default is `False`.
|
544
|
-
|
545
|
-
higham : bool, default=False
|
546
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
547
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
548
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
549
|
-
Higham & Nick algorithm can be slow for large datasets.
|
550
|
-
|
551
|
-
higham_max_iteration : int, default=100
|
552
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
553
|
-
The default value is `100`.
|
554
|
-
|
555
|
-
Attributes
|
556
|
-
----------
|
557
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
558
|
-
Estimated covariance.
|
559
|
-
|
560
|
-
n_features_in_ : int
|
561
|
-
Number of assets seen during `fit`.
|
562
|
-
|
563
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
564
|
-
Names of features seen during `fit`. Defined only when `X`
|
565
|
-
has feature names that are all strings.
|
566
|
-
"""
|
567
|
-
|
568
|
-
def __init__(
|
569
|
-
self,
|
570
|
-
window_size: int | None = None,
|
571
|
-
alpha: float = 0.2,
|
572
|
-
nearest: bool = False,
|
573
|
-
higham: bool = False,
|
574
|
-
higham_max_iteration: int = 100,
|
575
|
-
):
|
576
|
-
super().__init__(
|
577
|
-
nearest=nearest,
|
578
|
-
higham=higham,
|
579
|
-
higham_max_iteration=higham_max_iteration,
|
580
|
-
)
|
581
|
-
self.window_size = window_size
|
582
|
-
self.alpha = alpha
|
583
|
-
|
584
|
-
def fit(self, X: npt.ArrayLike, y=None):
|
585
|
-
"""Fit the Exponentially Weighted Covariance estimator.
|
586
|
-
|
587
|
-
Parameters
|
588
|
-
----------
|
589
|
-
X : array-like of shape (n_observations, n_assets)
|
590
|
-
Price returns of the assets.
|
591
|
-
|
592
|
-
y : Ignored
|
593
|
-
Not used, present for API consistency by convention.
|
594
|
-
|
595
|
-
Returns
|
596
|
-
-------
|
597
|
-
self : EWCovariance
|
598
|
-
Fitted estimator.
|
599
|
-
"""
|
600
|
-
X = self._validate_data(X)
|
601
|
-
if self.window_size is not None:
|
602
|
-
X = X[-self.window_size :]
|
603
|
-
n_observations = X.shape[0]
|
604
|
-
covariance = (
|
605
|
-
pd.DataFrame(X)
|
606
|
-
.ewm(alpha=self.alpha)
|
607
|
-
.cov()
|
608
|
-
.loc[(n_observations - 1, slice(None)), :]
|
609
|
-
.to_numpy()
|
610
|
-
)
|
611
|
-
self._set_covariance(covariance)
|
612
|
-
return self
|
613
|
-
|
614
|
-
|
615
|
-
class LedoitWolf(BaseCovariance, skc.LedoitWolf):
|
616
|
-
"""LedoitWolf Estimator.
|
617
|
-
|
618
|
-
Ledoit-Wolf is a particular form of shrinkage, where the shrinkage
|
619
|
-
coefficient is computed using O. Ledoit and M. Wolf's formula as
|
620
|
-
described in [1]_.
|
621
|
-
|
622
|
-
Read more in `scikit-learn
|
623
|
-
<https://scikit-learn.org/stable/modules/generated/sklearn.covariance.ShrunkCovariance.html>`_.
|
624
|
-
|
625
|
-
Parameters
|
626
|
-
----------
|
627
|
-
store_precision : bool, default=True
|
628
|
-
Specify if the estimated precision is stored.
|
629
|
-
|
630
|
-
assume_centered : bool, default=False
|
631
|
-
If True, data will not be centered before computation.
|
632
|
-
Useful when working with data whose mean is almost, but not exactly
|
633
|
-
zero.
|
634
|
-
If False (default), data will be centered before computation.
|
635
|
-
|
636
|
-
block_size : int, default=1000
|
637
|
-
Size of blocks into which the covariance matrix will be split
|
638
|
-
during its Ledoit-Wolf estimation. This is purely a memory
|
639
|
-
optimization and does not affect results.
|
640
|
-
|
641
|
-
nearest : bool, default=False
|
642
|
-
If this is set to True, the covariance is replaced by the nearest covariance
|
643
|
-
matrix that is positive definite and with a Cholesky decomposition than can be
|
644
|
-
computed. The variance is left unchanged. A covariance matrix is in theory PSD.
|
645
|
-
However, due to floating-point inaccuracies, we can end up with a covariance
|
646
|
-
matrix that is slightly non-PSD or where Cholesky decomposition is failing.
|
647
|
-
This often occurs in high dimensional problems.
|
648
|
-
For more details, see :func:`~skfolio.units.stats.cov_nearest`.
|
649
|
-
The default is `False`.
|
650
|
-
|
651
|
-
higham : bool, default=False
|
652
|
-
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
653
|
-
nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
|
654
|
-
above zeros (1e-13). The default is `False` and use the clipping method as the
|
655
|
-
Higham & Nick algorithm can be slow for large datasets.
|
656
|
-
|
657
|
-
higham_max_iteration : int, default=100
|
658
|
-
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
659
|
-
The default value is `100`.
|
660
|
-
|
661
|
-
Attributes
|
662
|
-
----------
|
663
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
664
|
-
Estimated covariance.
|
665
|
-
|
666
|
-
location_ : ndarray of shape (n_assets,)
|
667
|
-
Estimated location, i.e. the estimated mean.
|
668
|
-
|
669
|
-
precision_ : ndarray of shape (n_assets, n_assets)
|
670
|
-
Estimated pseudo inverse matrix.
|
671
|
-
(stored only if store_precision is True)
|
672
|
-
|
673
|
-
shrinkage_ : float
|
674
|
-
Coefficient in the convex combination used for the computation
|
675
|
-
of the shrunk estimate. Range is [0, 1].
|
676
|
-
|
677
|
-
n_features_in_ : int
|
678
|
-
Number of assets seen during `fit`.
|
679
|
-
|
680
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
681
|
-
Names of features seen during `fit`. Defined only when `X`
|
682
|
-
has feature names that are all strings.
|
683
|
-
|
684
|
-
Notes
|
685
|
-
-----
|
686
|
-
The regularised covariance is:
|
687
|
-
|
688
|
-
(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
|
689
|
-
|
690
|
-
where mu = trace(cov) / n_features
|
691
|
-
and shrinkage is given by the Ledoit and Wolf formula (see References)
|
692
|
-
|
693
|
-
References
|
694
|
-
----------
|
695
|
-
.. [1] "A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices".
|
696
|
-
Ledoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2.
|
697
|
-
February 2004, pages 365-41.
|
698
|
-
"""
|
699
|
-
|
700
|
-
def __init__(
|
701
|
-
self,
|
702
|
-
store_precision=True,
|
703
|
-
assume_centered=False,
|
704
|
-
block_size=1000,
|
705
|
-
nearest: bool = False,
|
706
|
-
higham: bool = False,
|
707
|
-
higham_max_iteration: int = 100,
|
708
|
-
):
|
709
|
-
super().__init__(
|
710
|
-
nearest=nearest,
|
711
|
-
higham=higham,
|
712
|
-
higham_max_iteration=higham_max_iteration,
|
713
|
-
)
|
714
|
-
skc.LedoitWolf.__init__(
|
715
|
-
self,
|
716
|
-
store_precision=store_precision,
|
717
|
-
assume_centered=assume_centered,
|
718
|
-
block_size=block_size,
|
719
|
-
)
|
720
|
-
|
721
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "LedoitWolf":
|
722
|
-
"""Fit the Ledoit-Wolf shrunk covariance model to X.
|
723
|
-
|
724
|
-
Parameters
|
725
|
-
----------
|
726
|
-
X : array-like of shape (n_observations, n_assets)
|
727
|
-
Price returns of the assets.
|
728
|
-
|
729
|
-
y : Ignored
|
730
|
-
Not used, present for API consistency by convention.
|
731
|
-
|
732
|
-
Returns
|
733
|
-
-------
|
734
|
-
self : LedoitWolf
|
735
|
-
Fitted estimator.
|
736
|
-
"""
|
737
|
-
skc.LedoitWolf.fit(self, X)
|
738
|
-
self._set_covariance(self.covariance_)
|
739
|
-
return self
|
740
|
-
|
741
|
-
|
742
|
-
class OAS(BaseCovariance, skc.OAS):
|
743
|
-
"""Oracle Approximating Shrinkage Estimator as proposed in [1]_.
|
744
|
-
|
745
|
-
Read more in `scikit-learn
|
746
|
-
<https://scikit-learn.org/stable/modules/generated/sklearn.covariance.ShrunkCovariance.html>`_.
|
747
|
-
|
748
|
-
Parameters
|
749
|
-
----------
|
750
|
-
store_precision : bool, default=True
|
751
|
-
Specify if the estimated precision is stored.
|
752
|
-
|
753
|
-
assume_centered : bool, default=False
|
754
|
-
If True, data will not be centered before computation.
|
755
|
-
Useful when working with data whose mean is almost, but not exactly
|
756
|
-
zero.
|
757
|
-
If False (default), data will be centered before computation.
|
758
|
-
|
759
|
-
Attributes
|
760
|
-
----------
|
761
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
762
|
-
Estimated covariance.
|
763
|
-
|
764
|
-
location_ : ndarray of shape (n_assets,)
|
765
|
-
Estimated location, i.e. the estimated mean.
|
766
|
-
|
767
|
-
precision_ : ndarray of shape (n_assets, n_assets)
|
768
|
-
Estimated pseudo inverse matrix.
|
769
|
-
(stored only if store_precision is True)
|
770
|
-
|
771
|
-
shrinkage_ : float
|
772
|
-
Coefficient in the convex combination used for the computation
|
773
|
-
of the shrunk estimate. Range is [0, 1].
|
774
|
-
|
775
|
-
n_features_in_ : int
|
776
|
-
Number of assets seen during `fit`.
|
777
|
-
|
778
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
779
|
-
Names of features seen during `fit`. Defined only when `X`
|
780
|
-
has feature names that are all strings.
|
781
|
-
|
782
|
-
Notes
|
783
|
-
-----
|
784
|
-
The regularised covariance is:
|
785
|
-
|
786
|
-
(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features),
|
787
|
-
|
788
|
-
where mu = trace(cov) / n_features and shrinkage is given by the OAS formula
|
789
|
-
(see [1]_).
|
790
|
-
|
791
|
-
The shrinkage formulation implemented here differs from Eq. 23 in [1]_. In
|
792
|
-
the original article, formula (23) states that 2/p (p being the number of
|
793
|
-
features) is multiplied by Trace(cov*cov) in both the numerator and
|
794
|
-
denominator, but this operation is omitted because for a large p, the value
|
795
|
-
of 2/p is so small that it doesn't affect the value of the estimator.
|
796
|
-
|
797
|
-
References
|
798
|
-
----------
|
799
|
-
.. [1] "Shrinkage algorithms for MMSE covariance estimation".
|
800
|
-
Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O.
|
801
|
-
IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.
|
802
|
-
"""
|
803
|
-
|
804
|
-
def __init__(
|
805
|
-
self,
|
806
|
-
store_precision=True,
|
807
|
-
assume_centered=False,
|
808
|
-
nearest: bool = False,
|
809
|
-
higham: bool = False,
|
810
|
-
higham_max_iteration: int = 100,
|
811
|
-
):
|
812
|
-
super().__init__(
|
813
|
-
nearest=nearest,
|
814
|
-
higham=higham,
|
815
|
-
higham_max_iteration=higham_max_iteration,
|
816
|
-
)
|
817
|
-
skc.OAS.__init__(
|
818
|
-
self,
|
819
|
-
store_precision=store_precision,
|
820
|
-
assume_centered=assume_centered,
|
821
|
-
)
|
822
|
-
|
823
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "OAS":
|
824
|
-
"""Fit the Oracle Approximating Shrinkage covariance model to X.
|
825
|
-
|
826
|
-
Parameters
|
827
|
-
----------
|
828
|
-
X : array-like of shape (n_observations, n_assets)
|
829
|
-
Price returns of the assets.
|
830
|
-
|
831
|
-
y : Ignored
|
832
|
-
Not used, present for API consistency by convention.
|
833
|
-
|
834
|
-
Returns
|
835
|
-
-------
|
836
|
-
self : OAS
|
837
|
-
Fitted estimator.
|
838
|
-
"""
|
839
|
-
skc.OAS.fit(self, X)
|
840
|
-
self._set_covariance(self.covariance_)
|
841
|
-
return self
|
842
|
-
|
843
|
-
|
844
|
-
class ShrunkCovariance(BaseCovariance, skc.ShrunkCovariance):
|
845
|
-
"""Covariance estimator with shrinkage.
|
846
|
-
|
847
|
-
Read more in `scikit-learn
|
848
|
-
<https://scikit-learn.org/stable/modules/generated/sklearn.covariance.ShrunkCovariance.html>`_.
|
849
|
-
|
850
|
-
Parameters
|
851
|
-
----------
|
852
|
-
store_precision : bool, default=True
|
853
|
-
Specify if the estimated precision is stored.
|
854
|
-
|
855
|
-
assume_centered : bool, default=False
|
856
|
-
If True, data will not be centered before computation.
|
857
|
-
Useful when working with data whose mean is almost, but not exactly
|
858
|
-
zero.
|
859
|
-
If False (default), data will be centered before computation.
|
860
|
-
|
861
|
-
shrinkage : float, default=0.1
|
862
|
-
Coefficient in the convex combination used for the computation
|
863
|
-
of the shrunk estimate. Range is [0, 1].
|
864
|
-
|
865
|
-
Attributes
|
866
|
-
----------
|
867
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
868
|
-
Estimated covariance.
|
869
|
-
|
870
|
-
location_ : ndarray of shape (n_assets,)
|
871
|
-
Estimated location, i.e. the estimated mean.
|
872
|
-
|
873
|
-
precision_ : ndarray of shape (n_assets, n_assets)
|
874
|
-
Estimated pseudo inverse matrix.
|
875
|
-
(stored only if store_precision is True)
|
876
|
-
|
877
|
-
n_features_in_ : int
|
878
|
-
Number of assets seen during `fit`.
|
879
|
-
|
880
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
881
|
-
Names of features seen during `fit`. Defined only when `X`
|
882
|
-
has feature names that are all strings.
|
883
|
-
|
884
|
-
Notes
|
885
|
-
-----
|
886
|
-
The regularized covariance is given by:
|
887
|
-
|
888
|
-
(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
|
889
|
-
|
890
|
-
where mu = trace(cov) / n_features
|
891
|
-
"""
|
892
|
-
|
893
|
-
def __init__(
|
894
|
-
self,
|
895
|
-
store_precision=True,
|
896
|
-
assume_centered=False,
|
897
|
-
shrinkage=0.1,
|
898
|
-
nearest: bool = False,
|
899
|
-
higham: bool = False,
|
900
|
-
higham_max_iteration: int = 100,
|
901
|
-
):
|
902
|
-
super().__init__(
|
903
|
-
nearest=nearest,
|
904
|
-
higham=higham,
|
905
|
-
higham_max_iteration=higham_max_iteration,
|
906
|
-
)
|
907
|
-
skc.ShrunkCovariance.__init__(
|
908
|
-
self,
|
909
|
-
store_precision=store_precision,
|
910
|
-
assume_centered=assume_centered,
|
911
|
-
shrinkage=shrinkage,
|
912
|
-
)
|
913
|
-
|
914
|
-
def fit(self, X: npt.ArrayLike, y=None) -> "ShrunkCovariance":
|
915
|
-
"""Fit the shrunk covariance model to X.
|
916
|
-
|
917
|
-
Parameters
|
918
|
-
----------
|
919
|
-
X : array-like of shape (n_observations, n_assets)
|
920
|
-
Price returns of the assets.
|
921
|
-
|
922
|
-
y : Ignored
|
923
|
-
Not used, present for API consistency by convention.
|
924
|
-
|
925
|
-
Returns
|
926
|
-
-------
|
927
|
-
self : ShrunkCovariance
|
928
|
-
Fitted estimator.
|
929
|
-
"""
|
930
|
-
skc.ShrunkCovariance.fit(self, X)
|
931
|
-
self._set_covariance(self.covariance_)
|
932
|
-
return self
|
933
|
-
|
934
|
-
|
935
|
-
class GraphicalLassoCV(BaseCovariance, skc.GraphicalLassoCV):
|
936
|
-
"""Sparse inverse covariance with cross-validated choice of the l1 penalty.
|
937
|
-
|
938
|
-
Read more in `scikit-learn
|
939
|
-
<https://scikit-learn.org/stable/auto_examples/covariance/plot_sparse_cov.html>`_.
|
940
|
-
|
941
|
-
Parameters
|
942
|
-
----------
|
943
|
-
alphas : int or array-like of shape (n_alphas,), dtype=float, default=4
|
944
|
-
If an integer is given, it fixes the number of points on the
|
945
|
-
grids of alpha to be used. If a list is given, it gives the
|
946
|
-
grid to be used. See the notes in the class docstring for
|
947
|
-
more details. Range is [1, inf) for an integer.
|
948
|
-
Range is (0, inf] for an array-like of floats.
|
949
|
-
|
950
|
-
n_refinements : int, default=4
|
951
|
-
The number of times the grid is refined. Not used if explicit
|
952
|
-
values of alphas are passed. Range is [1, inf).
|
953
|
-
|
954
|
-
cv : int, cross-validation generator or iterable, default=None
|
955
|
-
Determines the cross-validation splitting strategy.
|
956
|
-
Possible inputs for cv are:
|
957
|
-
|
958
|
-
- None, to use the default 5-fold cross-validation,
|
959
|
-
- integer, to specify the number of folds.
|
960
|
-
- `CV splitter`,
|
961
|
-
- An iterable yielding (train, test) splits as arrays of indices.
|
962
|
-
|
963
|
-
For integer/None inputs :class:`KFold` is used.
|
964
|
-
|
965
|
-
tol : float, default=1e-4
|
966
|
-
The tolerance to declare convergence: if the dual gap goes below
|
967
|
-
this value, iterations are stopped. Range is (0, inf].
|
968
|
-
|
969
|
-
enet_tol : float, default=1e-4
|
970
|
-
The tolerance for the elastic net solver used to calculate the descent
|
971
|
-
direction. This parameter controls the accuracy of the search direction
|
972
|
-
for a given column update, not of the overall parameter estimate. Only
|
973
|
-
used for mode='cd'. Range is (0, inf].
|
974
|
-
|
975
|
-
max_iter : int, default=100
|
976
|
-
Maximum number of iterations.
|
977
|
-
|
978
|
-
mode : {'cd', 'lars'}, default='cd'
|
979
|
-
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
980
|
-
very sparse underlying graphs, where number of features is greater
|
981
|
-
than number of samples. Elsewhere prefer cd which is more numerically
|
982
|
-
stable.
|
983
|
-
|
984
|
-
n_jobs : int, default=None
|
985
|
-
Number of jobs to run in parallel.
|
986
|
-
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
987
|
-
`-1` means using all processors.
|
988
|
-
|
989
|
-
verbose : bool, default=False
|
990
|
-
If verbose is True, the objective function and duality gap are
|
991
|
-
printed at each iteration.
|
992
|
-
|
993
|
-
assume_centered : bool, default=False
|
994
|
-
If True, data are not centered before computation.
|
995
|
-
Useful when working with data whose mean is almost, but not exactly
|
996
|
-
zero.
|
997
|
-
If False, data are centered before computation.
|
998
|
-
|
999
|
-
Attributes
|
1000
|
-
----------
|
1001
|
-
covariance_ : ndarray of shape (n_assets, n_assets)
|
1002
|
-
Estimated covariance.
|
1003
|
-
|
1004
|
-
location_ : ndarray of shape (n_assets,)
|
1005
|
-
Estimated location, i.e. the estimated mean.
|
1006
|
-
|
1007
|
-
precision_ : ndarray of shape (n_assets, n_assets)
|
1008
|
-
Estimated pseudo inverse matrix.
|
1009
|
-
(stored only if store_precision is True)
|
1010
|
-
|
1011
|
-
alpha_ : float
|
1012
|
-
Penalization parameter selected.
|
1013
|
-
|
1014
|
-
cv_results_ : dict of ndarrays
|
1015
|
-
A dict with keys:
|
1016
|
-
|
1017
|
-
alphas : ndarray of shape (n_alphas,)
|
1018
|
-
All penalization parameters explored.
|
1019
|
-
|
1020
|
-
split(k)_test_score : ndarray of shape (n_alphas,)
|
1021
|
-
Log-likelihood score on left-out data across (k)th fold.
|
1022
|
-
|
1023
|
-
.. versionadded:: 1.0
|
1024
|
-
|
1025
|
-
mean_test_score : ndarray of shape (n_alphas,)
|
1026
|
-
Mean of scores over the folds.
|
1027
|
-
|
1028
|
-
.. versionadded:: 1.0
|
1029
|
-
|
1030
|
-
std_test_score : ndarray of shape (n_alphas,)
|
1031
|
-
Standard deviation of scores over the folds.
|
1032
|
-
|
1033
|
-
.. versionadded:: 1.0
|
1034
|
-
|
1035
|
-
n_iter_ : int
|
1036
|
-
Number of iterations run for the optimal alpha.
|
1037
|
-
|
1038
|
-
n_features_in_ : int
|
1039
|
-
Number of assets seen during `fit`.
|
1040
|
-
|
1041
|
-
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
1042
|
-
Names of features seen during `fit`. Defined only when `X`
|
1043
|
-
has feature names that are all strings.
|
1044
|
-
|
1045
|
-
Notes
|
1046
|
-
-----
|
1047
|
-
The search for the optimal penalization parameter (`alpha`) is done on an
|
1048
|
-
iteratively refined grid: first the cross-validated scores on a grid are
|
1049
|
-
computed, then a new refined grid is centered around the maximum, and so
|
1050
|
-
on.
|
1051
|
-
|
1052
|
-
One of the challenges which is faced here is that the solvers can
|
1053
|
-
fail to converge to a well-conditioned estimate. The corresponding
|
1054
|
-
values of `alpha` then come out as missing values, but the optimum may
|
1055
|
-
be close to these missing values.
|
1056
|
-
|
1057
|
-
In `fit`, once the best parameter `alpha` is found through
|
1058
|
-
cross-validation, the model is fit again using the entire training set.
|
1059
|
-
"""
|
1060
|
-
|
1061
|
-
def __init__(
|
1062
|
-
self,
|
1063
|
-
alphas=4,
|
1064
|
-
n_refinements=4,
|
1065
|
-
cv=None,
|
1066
|
-
tol=1e-4,
|
1067
|
-
enet_tol=1e-4,
|
1068
|
-
max_iter=100,
|
1069
|
-
mode="cd",
|
1070
|
-
n_jobs=None,
|
1071
|
-
verbose=False,
|
1072
|
-
assume_centered=False,
|
1073
|
-
nearest: bool = False,
|
1074
|
-
higham: bool = False,
|
1075
|
-
higham_max_iteration: int = 100,
|
1076
|
-
):
|
1077
|
-
super().__init__(
|
1078
|
-
nearest=nearest,
|
1079
|
-
higham=higham,
|
1080
|
-
higham_max_iteration=higham_max_iteration,
|
1081
|
-
)
|
1082
|
-
skc.GraphicalLassoCV.__init__(
|
1083
|
-
self,
|
1084
|
-
alphas=alphas,
|
1085
|
-
n_refinements=n_refinements,
|
1086
|
-
cv=cv,
|
1087
|
-
tol=tol,
|
1088
|
-
enet_tol=enet_tol,
|
1089
|
-
max_iter=max_iter,
|
1090
|
-
mode=mode,
|
1091
|
-
n_jobs=n_jobs,
|
1092
|
-
verbose=verbose,
|
1093
|
-
assume_centered=assume_centered,
|
1094
|
-
)
|
1095
|
-
|
1096
|
-
def fit(self, X, y=None) -> "GraphicalLassoCV":
|
1097
|
-
"""Fit the GraphicalLasso covariance model to X.
|
1098
|
-
|
1099
|
-
Parameters
|
1100
|
-
----------
|
1101
|
-
X : array-like of shape (n_observations, n_assets)
|
1102
|
-
Price returns of the assets.
|
1103
|
-
|
1104
|
-
y : Ignored
|
1105
|
-
Not used, present for API consistency by convention.
|
1106
|
-
|
1107
|
-
Returns
|
1108
|
-
-------
|
1109
|
-
self : GraphicalLassoCV
|
1110
|
-
Fitted estimator.
|
1111
|
-
"""
|
1112
|
-
skc.GraphicalLassoCV.fit(self, X)
|
1113
|
-
self._set_covariance(self.covariance_)
|
1114
|
-
return self
|