skfolio 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/datasets/__init__.py +2 -0
- skfolio/datasets/_base.py +51 -0
- skfolio/distance/_distance.py +15 -4
- skfolio/model_selection/_combinatorial.py +2 -2
- skfolio/model_selection/_validation.py +70 -15
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +2 -0
- skfolio/moments/covariance/__init__.py +11 -11
- skfolio/moments/covariance/_base.py +10 -9
- skfolio/moments/covariance/_denoise_covariance.py +181 -0
- skfolio/moments/covariance/_detone_covariance.py +158 -0
- skfolio/moments/covariance/_empirical_covariance.py +100 -0
- skfolio/moments/covariance/_ew_covariance.py +109 -0
- skfolio/moments/covariance/_gerber_covariance.py +157 -0
- skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
- skfolio/moments/covariance/_implied_covariance.py +462 -0
- skfolio/moments/covariance/_ledoit_wolf.py +140 -0
- skfolio/moments/covariance/_oas.py +115 -0
- skfolio/moments/covariance/_shrunk_covariance.py +104 -0
- skfolio/moments/expected_returns/__init__.py +4 -7
- skfolio/moments/expected_returns/_empirical_mu.py +63 -0
- skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
- skfolio/moments/expected_returns/_ew_mu.py +69 -0
- skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
- skfolio/optimization/cluster/_nco.py +46 -8
- skfolio/optimization/cluster/hierarchical/_base.py +21 -1
- skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
- skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
- skfolio/optimization/convex/_base.py +10 -1
- skfolio/optimization/convex/_distributionally_robust.py +12 -2
- skfolio/optimization/convex/_maximum_diversification.py +9 -2
- skfolio/optimization/convex/_mean_risk.py +33 -6
- skfolio/optimization/convex/_risk_budgeting.py +5 -2
- skfolio/optimization/ensemble/_stacking.py +32 -9
- skfolio/optimization/naive/_naive.py +20 -2
- skfolio/population/_population.py +2 -0
- skfolio/prior/_base.py +1 -1
- skfolio/prior/_black_litterman.py +20 -2
- skfolio/prior/_empirical.py +38 -5
- skfolio/prior/_factor_model.py +44 -7
- skfolio/uncertainty_set/_base.py +30 -9
- skfolio/uncertainty_set/_bootstrap.py +26 -10
- skfolio/uncertainty_set/_empirical.py +25 -10
- skfolio/utils/stats.py +24 -3
- skfolio/utils/tools.py +213 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/METADATA +3 -2
- skfolio-0.3.1.dist-info/RECORD +91 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/WHEEL +1 -1
- skfolio/moments/covariance/_covariance.py +0 -1114
- skfolio-0.2.3.dist-info/RECORD +0 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/LICENSE +0 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,194 @@
|
|
1
|
+
"""Graphical Lasso CV Covariance Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import sklearn.covariance as skc
|
11
|
+
|
12
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
13
|
+
|
14
|
+
|
15
|
+
class GraphicalLassoCV(BaseCovariance, skc.GraphicalLassoCV):
|
16
|
+
"""Sparse inverse covariance with cross-validated choice of the l1 penalty.
|
17
|
+
|
18
|
+
Read more in `scikit-learn
|
19
|
+
<https://scikit-learn.org/stable/auto_examples/covariance/plot_sparse_cov.html>`_.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
alphas : int or array-like of shape (n_alphas,), dtype=float, default=4
|
24
|
+
If an integer is given, it fixes the number of points on the
|
25
|
+
grids of alpha to be used. If a list is given, it gives the
|
26
|
+
grid to be used. See the notes in the class docstring for
|
27
|
+
more details. Range is [1, inf) for an integer.
|
28
|
+
Range is (0, inf] for an array-like of floats.
|
29
|
+
|
30
|
+
n_refinements : int, default=4
|
31
|
+
The number of times the grid is refined. Not used if explicit
|
32
|
+
values of alphas are passed. Range is [1, inf).
|
33
|
+
|
34
|
+
cv : int, cross-validation generator or iterable, default=None
|
35
|
+
Determines the cross-validation splitting strategy.
|
36
|
+
Possible inputs for cv are:
|
37
|
+
|
38
|
+
- None, to use the default 5-fold cross-validation,
|
39
|
+
- integer, to specify the number of folds.
|
40
|
+
- `CV splitter`,
|
41
|
+
- An iterable yielding (train, test) splits as arrays of indices.
|
42
|
+
|
43
|
+
For integer/None inputs :class:`KFold` is used.
|
44
|
+
|
45
|
+
tol : float, default=1e-4
|
46
|
+
The tolerance to declare convergence: if the dual gap goes below
|
47
|
+
this value, iterations are stopped. Range is (0, inf].
|
48
|
+
|
49
|
+
enet_tol : float, default=1e-4
|
50
|
+
The tolerance for the elastic net solver used to calculate the descent
|
51
|
+
direction. This parameter controls the accuracy of the search direction
|
52
|
+
for a given column update, not of the overall parameter estimate. Only
|
53
|
+
used for mode='cd'. Range is (0, inf].
|
54
|
+
|
55
|
+
max_iter : int, default=100
|
56
|
+
Maximum number of iterations.
|
57
|
+
|
58
|
+
mode : {'cd', 'lars'}, default='cd'
|
59
|
+
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
60
|
+
very sparse underlying graphs, where number of features is greater
|
61
|
+
than number of samples. Elsewhere prefer cd which is more numerically
|
62
|
+
stable.
|
63
|
+
|
64
|
+
n_jobs : int, default=None
|
65
|
+
Number of jobs to run in parallel.
|
66
|
+
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
67
|
+
`-1` means using all processors.
|
68
|
+
|
69
|
+
verbose : bool, default=False
|
70
|
+
If verbose is True, the objective function and duality gap are
|
71
|
+
printed at each iteration.
|
72
|
+
|
73
|
+
assume_centered : bool, default=False
|
74
|
+
If True, data are not centered before computation.
|
75
|
+
Useful when working with data whose mean is almost, but not exactly
|
76
|
+
zero.
|
77
|
+
If False, data are centered before computation.
|
78
|
+
|
79
|
+
Attributes
|
80
|
+
----------
|
81
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
82
|
+
Estimated covariance.
|
83
|
+
|
84
|
+
location_ : ndarray of shape (n_assets,)
|
85
|
+
Estimated location, i.e. the estimated mean.
|
86
|
+
|
87
|
+
precision_ : ndarray of shape (n_assets, n_assets)
|
88
|
+
Estimated pseudo inverse matrix.
|
89
|
+
(stored only if store_precision is True)
|
90
|
+
|
91
|
+
alpha_ : float
|
92
|
+
Penalization parameter selected.
|
93
|
+
|
94
|
+
cv_results_ : dict of ndarrays
|
95
|
+
A dict with keys:
|
96
|
+
|
97
|
+
alphas : ndarray of shape (n_alphas,)
|
98
|
+
All penalization parameters explored.
|
99
|
+
|
100
|
+
split(k)_test_score : ndarray of shape (n_alphas,)
|
101
|
+
Log-likelihood score on left-out data across (k)th fold.
|
102
|
+
|
103
|
+
.. versionadded:: 1.0
|
104
|
+
|
105
|
+
mean_test_score : ndarray of shape (n_alphas,)
|
106
|
+
Mean of scores over the folds.
|
107
|
+
|
108
|
+
.. versionadded:: 1.0
|
109
|
+
|
110
|
+
std_test_score : ndarray of shape (n_alphas,)
|
111
|
+
Standard deviation of scores over the folds.
|
112
|
+
|
113
|
+
.. versionadded:: 1.0
|
114
|
+
|
115
|
+
n_iter_ : int
|
116
|
+
Number of iterations run for the optimal alpha.
|
117
|
+
|
118
|
+
n_features_in_ : int
|
119
|
+
Number of assets seen during `fit`.
|
120
|
+
|
121
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
122
|
+
Names of features seen during `fit`. Defined only when `X`
|
123
|
+
has feature names that are all strings.
|
124
|
+
|
125
|
+
Notes
|
126
|
+
-----
|
127
|
+
The search for the optimal penalization parameter (`alpha`) is done on an
|
128
|
+
iteratively refined grid: first the cross-validated scores on a grid are
|
129
|
+
computed, then a new refined grid is centered around the maximum, and so
|
130
|
+
on.
|
131
|
+
|
132
|
+
One of the challenges which is faced here is that the solvers can
|
133
|
+
fail to converge to a well-conditioned estimate. The corresponding
|
134
|
+
values of `alpha` then come out as missing values, but the optimum may
|
135
|
+
be close to these missing values.
|
136
|
+
|
137
|
+
In `fit`, once the best parameter `alpha` is found through
|
138
|
+
cross-validation, the model is fit again using the entire training set.
|
139
|
+
"""
|
140
|
+
|
141
|
+
def __init__(
|
142
|
+
self,
|
143
|
+
alphas=4,
|
144
|
+
n_refinements=4,
|
145
|
+
cv=None,
|
146
|
+
tol=1e-4,
|
147
|
+
enet_tol=1e-4,
|
148
|
+
max_iter=100,
|
149
|
+
mode="cd",
|
150
|
+
n_jobs=None,
|
151
|
+
verbose=False,
|
152
|
+
assume_centered=False,
|
153
|
+
nearest: bool = True,
|
154
|
+
higham: bool = False,
|
155
|
+
higham_max_iteration: int = 100,
|
156
|
+
):
|
157
|
+
super().__init__(
|
158
|
+
nearest=nearest,
|
159
|
+
higham=higham,
|
160
|
+
higham_max_iteration=higham_max_iteration,
|
161
|
+
)
|
162
|
+
skc.GraphicalLassoCV.__init__(
|
163
|
+
self,
|
164
|
+
alphas=alphas,
|
165
|
+
n_refinements=n_refinements,
|
166
|
+
cv=cv,
|
167
|
+
tol=tol,
|
168
|
+
enet_tol=enet_tol,
|
169
|
+
max_iter=max_iter,
|
170
|
+
mode=mode,
|
171
|
+
n_jobs=n_jobs,
|
172
|
+
verbose=verbose,
|
173
|
+
assume_centered=assume_centered,
|
174
|
+
)
|
175
|
+
|
176
|
+
def fit(self, X, y=None, **fit_params) -> "GraphicalLassoCV":
|
177
|
+
"""Fit the GraphicalLasso covariance model to X.
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
X : array-like of shape (n_observations, n_assets)
|
182
|
+
Price returns of the assets.
|
183
|
+
|
184
|
+
y : Ignored
|
185
|
+
Not used, present for API consistency by convention.
|
186
|
+
|
187
|
+
Returns
|
188
|
+
-------
|
189
|
+
self : GraphicalLassoCV
|
190
|
+
Fitted estimator.
|
191
|
+
"""
|
192
|
+
skc.GraphicalLassoCV.fit(self, X, **fit_params)
|
193
|
+
self._set_covariance(self.covariance_)
|
194
|
+
return self
|
@@ -0,0 +1,462 @@
|
|
1
|
+
"""Implied Covariance Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
import sklearn as sk
|
13
|
+
import sklearn.base as skb
|
14
|
+
import sklearn.linear_model as skl
|
15
|
+
import sklearn.metrics as sks
|
16
|
+
import sklearn.utils.metadata_routing as skm
|
17
|
+
import sklearn.utils.validation as skv
|
18
|
+
|
19
|
+
import skfolio.typing as skt
|
20
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
21
|
+
from skfolio.moments.covariance._empirical_covariance import EmpiricalCovariance
|
22
|
+
from skfolio.utils.stats import corr_to_cov, cov_to_corr
|
23
|
+
from skfolio.utils.tools import (
|
24
|
+
check_estimator,
|
25
|
+
get_feature_names,
|
26
|
+
input_to_array,
|
27
|
+
safe_indexing,
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
class ImpliedCovariance(BaseCovariance):
|
32
|
+
r"""Implied Covariance estimator.
|
33
|
+
|
34
|
+
For each asset, the implied volatility time series is used to estimate the realised
|
35
|
+
volatility using the non-overlapping log-transformed OLS model [6]_:
|
36
|
+
|
37
|
+
.. math:: \ln(RV_{t}) = \alpha + \beta_{1} \ln(IV_{t-1}) + \beta_{2} \ln(RV_{t-1}) + \epsilon
|
38
|
+
|
39
|
+
with :math:`\alpha`, :math:`\beta_{1}` and :math:`\beta_{2}` the intercept and
|
40
|
+
coefficients to estimate, :math:`RV` the realised volatility, and :math:`IV` the
|
41
|
+
implied volatility. The training set uses non-overlapping data of sample size
|
42
|
+
`window_size` to avoid possible regression errors caused by auto-correlation.
|
43
|
+
The logarithmic transformation of volatilities is used for its better finite sample
|
44
|
+
properties and distribution, which is closer to normality, less skewed and
|
45
|
+
leptokurtic [6]_.
|
46
|
+
|
47
|
+
Alternatively, if `volatility_risk_premium_adj` is provided, the realised
|
48
|
+
volatility is estimated using:
|
49
|
+
|
50
|
+
.. math:: RV_{t} = \frac{IV_{t-1}}{VRPA}
|
51
|
+
|
52
|
+
with :math:`VRPA` the volatility risk premium adjustment.
|
53
|
+
|
54
|
+
The final step is the reconstruction of the covariance matrix from the correlation
|
55
|
+
and estimated realised volatilities :math:`D`:
|
56
|
+
|
57
|
+
.. math:: \Sigma = D \ Corr \ D
|
58
|
+
|
59
|
+
With :math:`Corr`, the correlation matrix computed from the prior covariance
|
60
|
+
estimator. The default is the `EmpiricalCovariance`. It can be changed to any
|
61
|
+
covariance estimator using `prior_covariance_estimator`.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
prior_covariance_estimator : BaseCovariance, optional
|
66
|
+
:ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
|
67
|
+
matrix used for the correlation estimates prior the volatilities update.
|
68
|
+
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
69
|
+
|
70
|
+
annualized_factor : float, default=252
|
71
|
+
Annualized factor (AF) used to covert the implied volatilities into the same
|
72
|
+
frequency as the returns using :math:`\frac{IV}{\sqrt{AF}}`.
|
73
|
+
The default is 252 which corresponds to **daily** returns and implied volatility
|
74
|
+
expressed in **p.a.**
|
75
|
+
|
76
|
+
window_size : int, default=20
|
77
|
+
Window size used to construct the non-overlapping training set of realised
|
78
|
+
volatilities and implied volatilities used in the regression.
|
79
|
+
The default is 20 observations.
|
80
|
+
|
81
|
+
linear_regressor : BaseEstimator, optional
|
82
|
+
Estimator of the linear regression used to estimate the realised volatilities
|
83
|
+
from the implied volatilities. The default is to use the scikit-learn OLS
|
84
|
+
estimator `LinearRegression`.
|
85
|
+
|
86
|
+
volatility_risk_premium_adj : float | dict[str, float] | array-like of shape (n_assets, ), optional
|
87
|
+
If provided, instead of using the regression model, the realised volatilities
|
88
|
+
are estimated using:
|
89
|
+
|
90
|
+
.. math:: RV_{t} = \frac{IV_{t-1}}{VRPA}
|
91
|
+
|
92
|
+
with :math:`VRPA` the volatility risk premium adjustment.
|
93
|
+
|
94
|
+
If a float is provided, it is applied to each asset.
|
95
|
+
If a dictionary is provided, its (key/value) pair must be the
|
96
|
+
(asset name/asset :math:`VRPA`) and the input `X` of the `fit` method must be a
|
97
|
+
DataFrame with the assets names in columns.
|
98
|
+
|
99
|
+
nearest : bool, default=True
|
100
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
101
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
102
|
+
computed. The variance is left unchanged.
|
103
|
+
A covariance matrix that is not positive definite often occurs in high
|
104
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
105
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
106
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
107
|
+
The default is `True`.
|
108
|
+
|
109
|
+
higham : bool, default=False
|
110
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
111
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
112
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
113
|
+
Higham & Nick algorithm can be slow for large datasets.
|
114
|
+
|
115
|
+
higham_max_iteration : int, default=100
|
116
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
117
|
+
The default value is `100`.
|
118
|
+
|
119
|
+
Attributes
|
120
|
+
----------
|
121
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
122
|
+
Estimated covariance matrix.
|
123
|
+
|
124
|
+
prior_covariance_estimator_ : BaseEstimator
|
125
|
+
Fitted prior covariance estimator.
|
126
|
+
|
127
|
+
pred_realised_vols_ : ndarray of shape (n_assets,)
|
128
|
+
The predicted realised volatilities
|
129
|
+
|
130
|
+
linear_regressors_ : list[BaseEstimator]
|
131
|
+
The fitted linear regressions.
|
132
|
+
|
133
|
+
coefs_ : ndarray of shape (n_assets, 2)
|
134
|
+
The coefficients of the log transformed regression model for each asset.
|
135
|
+
|
136
|
+
intercepts_ : ndarray of shape (n_assets,)
|
137
|
+
The intercepts of the log transformed regression model for each asset.
|
138
|
+
|
139
|
+
n_features_in_ : int
|
140
|
+
Number of assets seen during `fit`.
|
141
|
+
|
142
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
143
|
+
Names of assets seen during `fit`. Defined only when `returns`
|
144
|
+
has assets names that are all strings.
|
145
|
+
|
146
|
+
References
|
147
|
+
----------
|
148
|
+
.. [1] "New evidence on the implied-realized volatility relation".
|
149
|
+
Christensen & Hansen (2002).
|
150
|
+
|
151
|
+
.. [2] "The relation between implied and realized volatility".
|
152
|
+
Christensen & Prabhala (2002).
|
153
|
+
|
154
|
+
.. [3] "Can implied volatility predict returns on the carry trade?".
|
155
|
+
Egbers & Swinkels (2015).
|
156
|
+
|
157
|
+
.. [4] "Volatility and correlation forecasting".
|
158
|
+
Egbers & Swinkels (2015).
|
159
|
+
|
160
|
+
.. [5] "Volatility and correlation forecasting".
|
161
|
+
Andersen, Bollerslev, Christoffersen & Diebol (2006).
|
162
|
+
|
163
|
+
.. [6] "How Well Does Implied Volatility Predict Future Stock Index Returns and
|
164
|
+
Volatility? : A Study of Option-Implied Volatility Derived from OMXS30 Index
|
165
|
+
Options".
|
166
|
+
Sara Vikberg & Julia Björkman (2020).
|
167
|
+
"""
|
168
|
+
|
169
|
+
prior_covariance_estimator_: BaseCovariance
|
170
|
+
pred_realised_vols_: np.ndarray
|
171
|
+
linear_regressors_: list
|
172
|
+
coefs_: np.ndarray
|
173
|
+
intercepts_: np.ndarray
|
174
|
+
r2_scores_: np.ndarray
|
175
|
+
|
176
|
+
def __init__(
|
177
|
+
self,
|
178
|
+
prior_covariance_estimator: BaseCovariance | None = None,
|
179
|
+
annualized_factor: float = 252.0,
|
180
|
+
window_size: int = 20,
|
181
|
+
linear_regressor: skb.BaseEstimator | None = None,
|
182
|
+
volatility_risk_premium_adj: skt.MultiInput | None = None,
|
183
|
+
nearest: bool = True,
|
184
|
+
higham: bool = False,
|
185
|
+
higham_max_iteration: int = 100,
|
186
|
+
):
|
187
|
+
super().__init__(
|
188
|
+
nearest=nearest,
|
189
|
+
higham=higham,
|
190
|
+
higham_max_iteration=higham_max_iteration,
|
191
|
+
)
|
192
|
+
self.prior_covariance_estimator = prior_covariance_estimator
|
193
|
+
self.annualized_factor = annualized_factor
|
194
|
+
self.linear_regressor = linear_regressor
|
195
|
+
self.window_size = window_size
|
196
|
+
self.volatility_risk_premium_adj = volatility_risk_premium_adj
|
197
|
+
|
198
|
+
def get_metadata_routing(self):
|
199
|
+
# noinspection PyTypeChecker
|
200
|
+
router = (
|
201
|
+
skm.MetadataRouter(owner=self.__class__.__name__)
|
202
|
+
.add_self_request(self)
|
203
|
+
.add(
|
204
|
+
prior_covariance_estimator=self.prior_covariance_estimator,
|
205
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
206
|
+
)
|
207
|
+
)
|
208
|
+
return router
|
209
|
+
|
210
|
+
def fit(
|
211
|
+
self, X: npt.ArrayLike, y=None, implied_vol: npt.ArrayLike = None, **fit_params
|
212
|
+
) -> "ImpliedCovariance":
|
213
|
+
"""Fit the implied covariance estimator.
|
214
|
+
|
215
|
+
Parameters
|
216
|
+
----------
|
217
|
+
X : array-like of shape (n_observations, n_assets)
|
218
|
+
Price returns of the assets.
|
219
|
+
|
220
|
+
y : Ignored
|
221
|
+
Not used, present for API consistency by convention.
|
222
|
+
|
223
|
+
implied_vol : array-like of shape (n_observations, n_assets)
|
224
|
+
Implied volatilities of the assets.
|
225
|
+
|
226
|
+
**fit_params : dict
|
227
|
+
Parameters to pass to the underlying estimators.
|
228
|
+
Only available if `enable_metadata_routing=True`, which can be
|
229
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
230
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
231
|
+
more details.
|
232
|
+
|
233
|
+
Returns
|
234
|
+
-------
|
235
|
+
self : ImpliedCovariance
|
236
|
+
Fitted estimator.
|
237
|
+
"""
|
238
|
+
if implied_vol is not None:
|
239
|
+
# noinspection PyTypeChecker
|
240
|
+
fit_params["implied_vol"] = implied_vol
|
241
|
+
|
242
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
243
|
+
|
244
|
+
window_size = int(self.window_size)
|
245
|
+
# fitting estimators
|
246
|
+
self.prior_covariance_estimator_ = check_estimator(
|
247
|
+
self.prior_covariance_estimator,
|
248
|
+
default=EmpiricalCovariance(),
|
249
|
+
check_type=BaseCovariance,
|
250
|
+
)
|
251
|
+
# noinspection PyArgumentList
|
252
|
+
self.prior_covariance_estimator_.fit(
|
253
|
+
X, y, **routed_params.prior_covariance_estimator.fit
|
254
|
+
)
|
255
|
+
|
256
|
+
corr, _ = cov_to_corr(self.prior_covariance_estimator_.covariance_)
|
257
|
+
|
258
|
+
assets_names = get_feature_names(X)
|
259
|
+
if assets_names is not None:
|
260
|
+
vol_assets_names = get_feature_names(implied_vol)
|
261
|
+
if vol_assets_names is not None:
|
262
|
+
missing_assets = assets_names[~np.in1d(assets_names, vol_assets_names)]
|
263
|
+
if len(missing_assets) > 0:
|
264
|
+
raise ValueError(
|
265
|
+
f"The following assets are missing from "
|
266
|
+
f"`implied_vol`: {missing_assets}"
|
267
|
+
)
|
268
|
+
indices = [
|
269
|
+
np.argwhere(x == vol_assets_names)[0][0] for x in assets_names
|
270
|
+
]
|
271
|
+
# Select same columns as returns (needed for Pipeline with preselection)
|
272
|
+
# and re-order to follow returns ordering.
|
273
|
+
implied_vol = safe_indexing(implied_vol, indices=indices, axis=1)
|
274
|
+
|
275
|
+
X = self._validate_data(X)
|
276
|
+
_, n_assets = X.shape
|
277
|
+
implied_vol = check_implied_vol(implied_vol=implied_vol, X=X)
|
278
|
+
implied_vol /= np.sqrt(self.annualized_factor)
|
279
|
+
|
280
|
+
if self.volatility_risk_premium_adj is not None:
|
281
|
+
if np.isscalar(self.volatility_risk_premium_adj):
|
282
|
+
volatility_risk_premium_adj = self.volatility_risk_premium_adj
|
283
|
+
else:
|
284
|
+
volatility_risk_premium_adj = input_to_array(
|
285
|
+
items=self.volatility_risk_premium_adj,
|
286
|
+
n_assets=n_assets,
|
287
|
+
fill_value=np.nan,
|
288
|
+
dim=1,
|
289
|
+
assets_names=(
|
290
|
+
self.feature_names_in_
|
291
|
+
if hasattr(self, "feature_names_in_")
|
292
|
+
else None
|
293
|
+
),
|
294
|
+
name="volatility_risk_premium_adj",
|
295
|
+
)
|
296
|
+
|
297
|
+
if np.any(np.isnan(volatility_risk_premium_adj)):
|
298
|
+
raise ValueError(
|
299
|
+
"volatility_risk_premium_adj must contain a value for each assets, "
|
300
|
+
f"received {self.volatility_risk_premium_adj}"
|
301
|
+
)
|
302
|
+
if np.any(volatility_risk_premium_adj <= 0):
|
303
|
+
raise ValueError(
|
304
|
+
"volatility_risk_premium_adj must be strictly positive, "
|
305
|
+
f"received {self.volatility_risk_premium_adj}"
|
306
|
+
)
|
307
|
+
|
308
|
+
self.pred_realised_vols_ = implied_vol[-1] / volatility_risk_premium_adj
|
309
|
+
else:
|
310
|
+
if window_size is None or window_size < 3:
|
311
|
+
raise ValueError(
|
312
|
+
f"window must be strictly greater than 2, "
|
313
|
+
f"received {self.window_size}"
|
314
|
+
)
|
315
|
+
_linear_regressor = check_estimator(
|
316
|
+
self.linear_regressor,
|
317
|
+
default=skl.LinearRegression(fit_intercept=True),
|
318
|
+
check_type=skb.BaseEstimator,
|
319
|
+
)
|
320
|
+
# OLS of ln(RV(t) = a + b1 ln(IV(t-1)) + b2 ln(RV(t-1)) + epsilon
|
321
|
+
self._predict_realised_vols(
|
322
|
+
linear_regressor=_linear_regressor,
|
323
|
+
returns=X,
|
324
|
+
implied_vol=implied_vol,
|
325
|
+
window_size=window_size,
|
326
|
+
)
|
327
|
+
|
328
|
+
covariance = corr_to_cov(corr, self.pred_realised_vols_)
|
329
|
+
|
330
|
+
self._set_covariance(covariance)
|
331
|
+
return self
|
332
|
+
|
333
|
+
def _predict_realised_vols(
|
334
|
+
self,
|
335
|
+
linear_regressor: skb.BaseEstimator,
|
336
|
+
returns: np.ndarray,
|
337
|
+
implied_vol: np.ndarray,
|
338
|
+
window_size: int,
|
339
|
+
) -> None:
|
340
|
+
n_observations, n_assets = returns.shape
|
341
|
+
|
342
|
+
n_folds = n_observations // window_size
|
343
|
+
if n_folds < 3:
|
344
|
+
raise ValueError(
|
345
|
+
f"Not enough observations to compute the volatility regression "
|
346
|
+
f"coefficients. The window size of {window_size} on {n_observations} "
|
347
|
+
f"observations produces {n_folds} non-overlapping folds. "
|
348
|
+
f"The minimum number of fold is 3. You can either increase the number "
|
349
|
+
f"of observation in your training set or decrease the window size."
|
350
|
+
)
|
351
|
+
|
352
|
+
realised_vol = _compute_realised_vol(
|
353
|
+
returns=returns, window_size=window_size, ddof=1
|
354
|
+
)
|
355
|
+
|
356
|
+
implied_vol = _compute_implied_vol(
|
357
|
+
implied_vol=implied_vol, window_size=window_size
|
358
|
+
)
|
359
|
+
|
360
|
+
if realised_vol.shape != implied_vol.shape:
|
361
|
+
raise ValueError("`realised_vol`and `implied_vol` must have same shape")
|
362
|
+
|
363
|
+
assert realised_vol.shape[0] == n_folds
|
364
|
+
|
365
|
+
rv = np.log(realised_vol)
|
366
|
+
iv = np.log(implied_vol)
|
367
|
+
|
368
|
+
self.linear_regressors_ = []
|
369
|
+
self.pred_realised_vols_ = np.zeros(n_assets)
|
370
|
+
self.coefs_ = np.zeros((n_assets, 2))
|
371
|
+
self.intercepts_ = np.zeros(n_assets)
|
372
|
+
self.r2_scores_ = np.zeros(n_assets)
|
373
|
+
for i in range(n_assets):
|
374
|
+
model = sk.clone(linear_regressor)
|
375
|
+
X = np.hstack((iv[:, [i]], rv[:, [i]]))
|
376
|
+
X_train = X[:-1]
|
377
|
+
X_pred = X[[-1]]
|
378
|
+
y_train = rv[1:, i]
|
379
|
+
|
380
|
+
model.fit(X=X_train, y=y_train)
|
381
|
+
self.coefs_[i, :] = model.coef_
|
382
|
+
self.intercepts_[i] = model.intercept_
|
383
|
+
self.r2_scores_[i] = sks.r2_score(y_train, model.predict(X_train))
|
384
|
+
rv_pred = model.predict(X_pred)
|
385
|
+
self.pred_realised_vols_[i] = np.exp(rv_pred[0])
|
386
|
+
self.linear_regressors_.append(model)
|
387
|
+
|
388
|
+
|
389
|
+
def _compute_realised_vol(
|
390
|
+
returns: np.ndarray, window_size: int, ddof: int = 1
|
391
|
+
) -> np.ndarray:
|
392
|
+
"""Create the realised volatilities samples for the regression model."""
|
393
|
+
n_observations, n_assets = returns.shape
|
394
|
+
chunks = n_observations // window_size
|
395
|
+
|
396
|
+
return np.std(
|
397
|
+
np.reshape(
|
398
|
+
returns[n_observations - chunks * window_size :, :],
|
399
|
+
(chunks, window_size, n_assets),
|
400
|
+
),
|
401
|
+
ddof=ddof,
|
402
|
+
axis=1,
|
403
|
+
)
|
404
|
+
|
405
|
+
|
406
|
+
def _compute_implied_vol(implied_vol: np.ndarray, window_size: int) -> np.ndarray:
|
407
|
+
"""Create the implied volatilities samples for the regression model."""
|
408
|
+
n_observations, _ = implied_vol.shape
|
409
|
+
chunks = n_observations // window_size
|
410
|
+
return implied_vol[
|
411
|
+
np.arange(
|
412
|
+
n_observations - (chunks - 1) * window_size - 1, n_observations, window_size
|
413
|
+
)
|
414
|
+
]
|
415
|
+
|
416
|
+
|
417
|
+
def check_implied_vol(implied_vol: npt.ArrayLike, X: npt.ArrayLike) -> np.ndarray:
|
418
|
+
"""Validate implied volatilities.
|
419
|
+
|
420
|
+
|
421
|
+
Parameters
|
422
|
+
----------
|
423
|
+
implied_vol : array-like of shape (n_observations, n_assets)
|
424
|
+
Implied volatilities of the assets.
|
425
|
+
|
426
|
+
X : array-like of shape (n_observations, n_assets)
|
427
|
+
Price returns of the assets.
|
428
|
+
|
429
|
+
Returns
|
430
|
+
-------
|
431
|
+
implied_vol : ndarray of shape (n_observations, n_assets)
|
432
|
+
Validated implied volatilities.
|
433
|
+
"""
|
434
|
+
# noinspection PyUnresolvedReferences
|
435
|
+
n_observations, n_assets = X.shape
|
436
|
+
|
437
|
+
if implied_vol is None:
|
438
|
+
raise ValueError("`implied_vol` cannot be None")
|
439
|
+
else:
|
440
|
+
implied_vol = skv.check_array(
|
441
|
+
implied_vol,
|
442
|
+
accept_sparse=False,
|
443
|
+
ensure_2d=False,
|
444
|
+
dtype=[np.float64, np.float32],
|
445
|
+
order="C",
|
446
|
+
copy=False,
|
447
|
+
input_name="implied_vol",
|
448
|
+
)
|
449
|
+
if implied_vol.ndim != 2:
|
450
|
+
raise ValueError(
|
451
|
+
"Sample weights must be 2D array of shape (n_observation, n_assets)"
|
452
|
+
)
|
453
|
+
|
454
|
+
if implied_vol.shape != (n_observations, n_assets):
|
455
|
+
raise ValueError(
|
456
|
+
f"implied_vol.shape == {(implied_vol.shape,)}, "
|
457
|
+
f"expected {(n_observations, n_assets)}"
|
458
|
+
)
|
459
|
+
|
460
|
+
skv.check_non_negative((n_observations, n_assets), "`implied_vol`")
|
461
|
+
# noinspection PyTypeChecker
|
462
|
+
return implied_vol
|