skfolio 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. skfolio/datasets/__init__.py +2 -0
  2. skfolio/datasets/_base.py +51 -0
  3. skfolio/distance/_distance.py +15 -4
  4. skfolio/model_selection/_combinatorial.py +2 -2
  5. skfolio/model_selection/_validation.py +70 -15
  6. skfolio/model_selection/_walk_forward.py +3 -3
  7. skfolio/moments/__init__.py +2 -0
  8. skfolio/moments/covariance/__init__.py +11 -11
  9. skfolio/moments/covariance/_base.py +10 -9
  10. skfolio/moments/covariance/_denoise_covariance.py +181 -0
  11. skfolio/moments/covariance/_detone_covariance.py +158 -0
  12. skfolio/moments/covariance/_empirical_covariance.py +100 -0
  13. skfolio/moments/covariance/_ew_covariance.py +109 -0
  14. skfolio/moments/covariance/_gerber_covariance.py +157 -0
  15. skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
  16. skfolio/moments/covariance/_implied_covariance.py +454 -0
  17. skfolio/moments/covariance/_ledoit_wolf.py +140 -0
  18. skfolio/moments/covariance/_oas.py +115 -0
  19. skfolio/moments/covariance/_shrunk_covariance.py +104 -0
  20. skfolio/moments/expected_returns/__init__.py +4 -7
  21. skfolio/moments/expected_returns/_empirical_mu.py +63 -0
  22. skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
  23. skfolio/moments/expected_returns/_ew_mu.py +69 -0
  24. skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
  25. skfolio/optimization/cluster/_nco.py +46 -8
  26. skfolio/optimization/cluster/hierarchical/_base.py +21 -1
  27. skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
  28. skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
  29. skfolio/optimization/convex/_base.py +10 -1
  30. skfolio/optimization/convex/_distributionally_robust.py +12 -2
  31. skfolio/optimization/convex/_maximum_diversification.py +9 -2
  32. skfolio/optimization/convex/_mean_risk.py +33 -6
  33. skfolio/optimization/convex/_risk_budgeting.py +5 -2
  34. skfolio/optimization/ensemble/_stacking.py +32 -9
  35. skfolio/optimization/naive/_naive.py +20 -2
  36. skfolio/population/_population.py +2 -0
  37. skfolio/prior/_base.py +1 -1
  38. skfolio/prior/_black_litterman.py +20 -2
  39. skfolio/prior/_empirical.py +38 -5
  40. skfolio/prior/_factor_model.py +44 -7
  41. skfolio/uncertainty_set/_base.py +30 -9
  42. skfolio/uncertainty_set/_bootstrap.py +26 -10
  43. skfolio/uncertainty_set/_empirical.py +25 -10
  44. skfolio/utils/stats.py +24 -3
  45. skfolio/utils/tools.py +213 -79
  46. {skfolio-0.2.2.dist-info → skfolio-0.3.0.dist-info}/METADATA +4 -3
  47. skfolio-0.3.0.dist-info/RECORD +91 -0
  48. {skfolio-0.2.2.dist-info → skfolio-0.3.0.dist-info}/WHEEL +1 -1
  49. skfolio/moments/covariance/_covariance.py +0 -1114
  50. skfolio-0.2.2.dist-info/RECORD +0 -79
  51. {skfolio-0.2.2.dist-info → skfolio-0.3.0.dist-info}/LICENSE +0 -0
  52. {skfolio-0.2.2.dist-info → skfolio-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,194 @@
1
+ """Graphical Lasso CV Covariance Estimators."""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+ # Implementation derived from:
7
+ # scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
8
+ # Grisel Licensed under BSD 3 clause.
9
+
10
+ import sklearn.covariance as skc
11
+
12
+ from skfolio.moments.covariance._base import BaseCovariance
13
+
14
+
15
+ class GraphicalLassoCV(BaseCovariance, skc.GraphicalLassoCV):
16
+ """Sparse inverse covariance with cross-validated choice of the l1 penalty.
17
+
18
+ Read more in `scikit-learn
19
+ <https://scikit-learn.org/stable/auto_examples/covariance/plot_sparse_cov.html>`_.
20
+
21
+ Parameters
22
+ ----------
23
+ alphas : int or array-like of shape (n_alphas,), dtype=float, default=4
24
+ If an integer is given, it fixes the number of points on the
25
+ grids of alpha to be used. If a list is given, it gives the
26
+ grid to be used. See the notes in the class docstring for
27
+ more details. Range is [1, inf) for an integer.
28
+ Range is (0, inf] for an array-like of floats.
29
+
30
+ n_refinements : int, default=4
31
+ The number of times the grid is refined. Not used if explicit
32
+ values of alphas are passed. Range is [1, inf).
33
+
34
+ cv : int, cross-validation generator or iterable, default=None
35
+ Determines the cross-validation splitting strategy.
36
+ Possible inputs for cv are:
37
+
38
+ - None, to use the default 5-fold cross-validation,
39
+ - integer, to specify the number of folds.
40
+ - `CV splitter`,
41
+ - An iterable yielding (train, test) splits as arrays of indices.
42
+
43
+ For integer/None inputs :class:`KFold` is used.
44
+
45
+ tol : float, default=1e-4
46
+ The tolerance to declare convergence: if the dual gap goes below
47
+ this value, iterations are stopped. Range is (0, inf].
48
+
49
+ enet_tol : float, default=1e-4
50
+ The tolerance for the elastic net solver used to calculate the descent
51
+ direction. This parameter controls the accuracy of the search direction
52
+ for a given column update, not of the overall parameter estimate. Only
53
+ used for mode='cd'. Range is (0, inf].
54
+
55
+ max_iter : int, default=100
56
+ Maximum number of iterations.
57
+
58
+ mode : {'cd', 'lars'}, default='cd'
59
+ The Lasso solver to use: coordinate descent or LARS. Use LARS for
60
+ very sparse underlying graphs, where number of features is greater
61
+ than number of samples. Elsewhere prefer cd which is more numerically
62
+ stable.
63
+
64
+ n_jobs : int, default=None
65
+ Number of jobs to run in parallel.
66
+ `None` means 1 unless in a :obj:`joblib.parallel_backend` context.
67
+ `-1` means using all processors.
68
+
69
+ verbose : bool, default=False
70
+ If verbose is True, the objective function and duality gap are
71
+ printed at each iteration.
72
+
73
+ assume_centered : bool, default=False
74
+ If True, data are not centered before computation.
75
+ Useful when working with data whose mean is almost, but not exactly
76
+ zero.
77
+ If False, data are centered before computation.
78
+
79
+ Attributes
80
+ ----------
81
+ covariance_ : ndarray of shape (n_assets, n_assets)
82
+ Estimated covariance.
83
+
84
+ location_ : ndarray of shape (n_assets,)
85
+ Estimated location, i.e. the estimated mean.
86
+
87
+ precision_ : ndarray of shape (n_assets, n_assets)
88
+ Estimated pseudo inverse matrix.
89
+ (stored only if store_precision is True)
90
+
91
+ alpha_ : float
92
+ Penalization parameter selected.
93
+
94
+ cv_results_ : dict of ndarrays
95
+ A dict with keys:
96
+
97
+ alphas : ndarray of shape (n_alphas,)
98
+ All penalization parameters explored.
99
+
100
+ split(k)_test_score : ndarray of shape (n_alphas,)
101
+ Log-likelihood score on left-out data across (k)th fold.
102
+
103
+ .. versionadded:: 1.0
104
+
105
+ mean_test_score : ndarray of shape (n_alphas,)
106
+ Mean of scores over the folds.
107
+
108
+ .. versionadded:: 1.0
109
+
110
+ std_test_score : ndarray of shape (n_alphas,)
111
+ Standard deviation of scores over the folds.
112
+
113
+ .. versionadded:: 1.0
114
+
115
+ n_iter_ : int
116
+ Number of iterations run for the optimal alpha.
117
+
118
+ n_features_in_ : int
119
+ Number of assets seen during `fit`.
120
+
121
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
122
+ Names of features seen during `fit`. Defined only when `X`
123
+ has feature names that are all strings.
124
+
125
+ Notes
126
+ -----
127
+ The search for the optimal penalization parameter (`alpha`) is done on an
128
+ iteratively refined grid: first the cross-validated scores on a grid are
129
+ computed, then a new refined grid is centered around the maximum, and so
130
+ on.
131
+
132
+ One of the challenges which is faced here is that the solvers can
133
+ fail to converge to a well-conditioned estimate. The corresponding
134
+ values of `alpha` then come out as missing values, but the optimum may
135
+ be close to these missing values.
136
+
137
+ In `fit`, once the best parameter `alpha` is found through
138
+ cross-validation, the model is fit again using the entire training set.
139
+ """
140
+
141
+ def __init__(
142
+ self,
143
+ alphas=4,
144
+ n_refinements=4,
145
+ cv=None,
146
+ tol=1e-4,
147
+ enet_tol=1e-4,
148
+ max_iter=100,
149
+ mode="cd",
150
+ n_jobs=None,
151
+ verbose=False,
152
+ assume_centered=False,
153
+ nearest: bool = True,
154
+ higham: bool = False,
155
+ higham_max_iteration: int = 100,
156
+ ):
157
+ super().__init__(
158
+ nearest=nearest,
159
+ higham=higham,
160
+ higham_max_iteration=higham_max_iteration,
161
+ )
162
+ skc.GraphicalLassoCV.__init__(
163
+ self,
164
+ alphas=alphas,
165
+ n_refinements=n_refinements,
166
+ cv=cv,
167
+ tol=tol,
168
+ enet_tol=enet_tol,
169
+ max_iter=max_iter,
170
+ mode=mode,
171
+ n_jobs=n_jobs,
172
+ verbose=verbose,
173
+ assume_centered=assume_centered,
174
+ )
175
+
176
+ def fit(self, X, y=None, **fit_params) -> "GraphicalLassoCV":
177
+ """Fit the GraphicalLasso covariance model to X.
178
+
179
+ Parameters
180
+ ----------
181
+ X : array-like of shape (n_observations, n_assets)
182
+ Price returns of the assets.
183
+
184
+ y : Ignored
185
+ Not used, present for API consistency by convention.
186
+
187
+ Returns
188
+ -------
189
+ self : GraphicalLassoCV
190
+ Fitted estimator.
191
+ """
192
+ skc.GraphicalLassoCV.fit(self, X, **fit_params)
193
+ self._set_covariance(self.covariance_)
194
+ return self
@@ -0,0 +1,454 @@
1
+ """Implied Covariance Estimators."""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+ # Implementation derived from:
7
+ # scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
8
+ # Grisel Licensed under BSD 3 clause.
9
+
10
+ import numpy as np
11
+ import numpy.typing as npt
12
+ import sklearn as sk
13
+ import sklearn.base as skb
14
+ import sklearn.linear_model as skl
15
+ import sklearn.metrics as sks
16
+ import sklearn.utils.metadata_routing as skm
17
+ import sklearn.utils.validation as skv
18
+
19
+ import skfolio.typing as skt
20
+ from skfolio.moments.covariance._base import BaseCovariance
21
+ from skfolio.moments.covariance._empirical_covariance import EmpiricalCovariance
22
+ from skfolio.utils.stats import corr_to_cov, cov_to_corr
23
+ from skfolio.utils.tools import (
24
+ check_estimator,
25
+ get_feature_names,
26
+ input_to_array,
27
+ safe_indexing,
28
+ )
29
+
30
+
31
+ class ImpliedCovariance(BaseCovariance):
32
+ r"""Implied Covariance estimator.
33
+
34
+ For each asset, the implied volatility time series is used to estimate the realised
35
+ volatility using the non-overlapping log-transformed OLS model [6]_:
36
+
37
+ .. math:: \ln(RV_{t}) = \alpha + \beta_{1} \ln(IV_{t-1}) + \beta_{2} \ln(RV_{t-1}) + \epsilon
38
+
39
+ with :math:`\alpha`, :math:`\beta_{1}` and :math:`\beta_{2}` the intercept and
40
+ coefficients to estimate, :math:`RV` the realised volatility, and :math:`IV` the
41
+ implied volatility. The training set uses non-overlapping data of sample size
42
+ `window_size` to avoid possible regression errors caused by auto-correlation.
43
+ The logarithmic transformation of volatilities is used for its better finite sample
44
+ properties and distribution, which is closer to normality, less skewed and
45
+ leptokurtic [6]_.
46
+
47
+ Alternatively, if `volatility_risk_premium_adj` is provided, the realised
48
+ volatility is estimated using:
49
+
50
+ .. math:: RV_{t} = \frac{IV_{t-1}}{VRPA}
51
+
52
+ with :math:`VRPA` the volatility risk premium adjustment.
53
+
54
+ The covariance estimator is then used to compute the correlation matrix.
55
+ The final step is the reconstruction of the covariance matrix from the correlation
56
+ and estimated realised volatilities :math:`D`:
57
+
58
+ .. math:: \Sigma = D \ Corr \ D
59
+
60
+ Parameters
61
+ ----------
62
+ covariance_estimator : BaseCovariance, optional
63
+ :ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
64
+ matrix used for the correlation estimates.
65
+ The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
66
+
67
+ annualized_factor : float, default=252
68
+ Annualized factor (AF) used to covert the implied volatilities into the same
69
+ frequency as the returns using :math:`\frac{IV}{\sqrt{AF}}`.
70
+ The default is 252 which corresponds to **daily** returns and implied volatility
71
+ expressed in **p.a.**
72
+
73
+ window_size : int, default=20
74
+ Window size used to construct the non-overlapping training set of realised
75
+ volatilities and implied volatilities used in the regression.
76
+ The default is 20 observations.
77
+
78
+ linear_regressor : BaseEstimator, optional
79
+ Estimator of the linear regression used to estimate the realised volatilities
80
+ from the implied volatilities. The default is to use the scikit-learn OLS
81
+ estimator `LinearRegression`.
82
+
83
+ volatility_risk_premium_adj : float | dict[str, float] | array-like of shape (n_assets, ), optional
84
+ If provided, instead of using the regression model, the realised volatilities
85
+ are estimated using:
86
+
87
+ .. math:: RV_{t} = \frac{IV_{t-1}}{VRPA}
88
+
89
+ with :math:`VRPA` the volatility risk premium adjustment.
90
+
91
+ If a float is provided, it is applied to each asset.
92
+ If a dictionary is provided, its (key/value) pair must be the
93
+ (asset name/asset :math:`VRPA`) and the input `X` of the `fit` method must be a
94
+ DataFrame with the assets names in columns.
95
+
96
+ nearest : bool, default=True
97
+ If this is set to True, the covariance is replaced by the nearest covariance
98
+ matrix that is positive definite and with a Cholesky decomposition than can be
99
+ computed. The variance is left unchanged.
100
+ A covariance matrix that is not positive definite often occurs in high
101
+ dimensional problems. It can be due to multicollinearity, floating-point
102
+ inaccuracies, or when the number of observations is smaller than the number of
103
+ assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
104
+ The default is `True`.
105
+
106
+ higham : bool, default=False
107
+ If this is set to True, the Higham & Nick (2002) algorithm is used to find the
108
+ nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
109
+ above zeros (1e-13). The default is `False` and use the clipping method as the
110
+ Higham & Nick algorithm can be slow for large datasets.
111
+
112
+ higham_max_iteration : int, default=100
113
+ Maximum number of iteration of the Higham & Nick (2002) algorithm.
114
+ The default value is `100`.
115
+
116
+ Attributes
117
+ ----------
118
+ covariance_ : ndarray of shape (n_assets, n_assets)
119
+ Estimated covariance matrix.
120
+
121
+ pred_realised_vols_ : ndarray of shape (n_assets,)
122
+ The predicted realised volatilities
123
+
124
+ linear_regressors_ : list[BaseEstimator]
125
+ The fitted linear regressions.
126
+
127
+ coefs_ : ndarray of shape (n_assets, 2)
128
+ The coefficients of the log transformed regression model for each asset.
129
+
130
+ intercepts_ : ndarray of shape (n_assets,)
131
+ The intercepts of the log transformed regression model for each asset.
132
+
133
+ n_features_in_ : int
134
+ Number of assets seen during `fit`.
135
+
136
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
137
+ Names of assets seen during `fit`. Defined only when `returns`
138
+ has assets names that are all strings.
139
+
140
+ References
141
+ ----------
142
+ .. [1] "New evidence on the implied-realized volatility relation".
143
+ Christensen & Hansen (2002).
144
+
145
+ .. [2] "The relation between implied and realized volatility".
146
+ Christensen & Prabhala (2002).
147
+
148
+ .. [3] "Can implied volatility predict returns on the carry trade?".
149
+ Egbers & Swinkels (2015).
150
+
151
+ .. [4] "Volatility and correlation forecasting".
152
+ Egbers & Swinkels (2015).
153
+
154
+ .. [5] "Volatility and correlation forecasting".
155
+ Andersen, Bollerslev, Christoffersen & Diebol (2006).
156
+
157
+ .. [6] "How Well Does Implied Volatility Predict Future Stock Index Returns and
158
+ Volatility? : A Study of Option-Implied Volatility Derived from OMXS30 Index
159
+ Options".
160
+ Sara Vikberg & Julia Björkman (2020).
161
+ """
162
+
163
+ covariance_estimator_: BaseCovariance
164
+ pred_realised_vols_: np.ndarray
165
+ linear_regressors_: list
166
+ coefs_: np.ndarray
167
+ intercepts_: np.ndarray
168
+ r2_scores_: np.ndarray
169
+
170
+ def __init__(
171
+ self,
172
+ covariance_estimator: BaseCovariance | None = None,
173
+ annualized_factor: float = 252.0,
174
+ window_size: int = 20,
175
+ linear_regressor: skb.BaseEstimator | None = None,
176
+ volatility_risk_premium_adj: skt.MultiInput | None = None,
177
+ nearest: bool = True,
178
+ higham: bool = False,
179
+ higham_max_iteration: int = 100,
180
+ ):
181
+ super().__init__(
182
+ nearest=nearest,
183
+ higham=higham,
184
+ higham_max_iteration=higham_max_iteration,
185
+ )
186
+ self.covariance_estimator = covariance_estimator
187
+ self.annualized_factor = annualized_factor
188
+ self.linear_regressor = linear_regressor
189
+ self.window_size = window_size
190
+ self.volatility_risk_premium_adj = volatility_risk_premium_adj
191
+
192
+ def get_metadata_routing(self):
193
+ # noinspection PyTypeChecker
194
+ router = (
195
+ skm.MetadataRouter(owner=self.__class__.__name__)
196
+ .add_self_request(self)
197
+ .add(
198
+ covariance_estimator=self.covariance_estimator,
199
+ method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
200
+ )
201
+ )
202
+ return router
203
+
204
+ def fit(
205
+ self, X: npt.ArrayLike, y=None, implied_vol: npt.ArrayLike = None, **fit_params
206
+ ) -> "ImpliedCovariance":
207
+ """Fit the implied covariance estimator.
208
+
209
+ Parameters
210
+ ----------
211
+ X : array-like of shape (n_observations, n_assets)
212
+ Price returns of the assets.
213
+
214
+ y : Ignored
215
+ Not used, present for API consistency by convention.
216
+
217
+ implied_vol : array-like of shape (n_observations, n_assets)
218
+ Implied volatilities of the assets.
219
+
220
+ **fit_params : dict
221
+ Parameters to pass to the underlying estimators.
222
+ Only available if `enable_metadata_routing=True`, which can be
223
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
224
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
225
+ more details.
226
+
227
+ Returns
228
+ -------
229
+ self : ImpliedCovariance
230
+ Fitted estimator.
231
+ """
232
+ if implied_vol is not None:
233
+ # noinspection PyTypeChecker
234
+ fit_params["implied_vol"] = implied_vol
235
+
236
+ routed_params = skm.process_routing(self, "fit", **fit_params)
237
+
238
+ window_size = int(self.window_size)
239
+ # fitting estimators
240
+ self.covariance_estimator_ = check_estimator(
241
+ self.covariance_estimator,
242
+ default=EmpiricalCovariance(),
243
+ check_type=BaseCovariance,
244
+ )
245
+ # noinspection PyArgumentList
246
+ self.covariance_estimator_.fit(X, y, **routed_params.covariance_estimator.fit)
247
+
248
+ corr, _ = cov_to_corr(self.covariance_estimator_.covariance_)
249
+
250
+ assets_names = get_feature_names(X)
251
+ if assets_names is not None:
252
+ vol_assets_names = get_feature_names(implied_vol)
253
+ if vol_assets_names is not None:
254
+ missing_assets = assets_names[~np.in1d(assets_names, vol_assets_names)]
255
+ if len(missing_assets) > 0:
256
+ raise ValueError(
257
+ f"The following assets are missing from "
258
+ f"`implied_vol`: {missing_assets}"
259
+ )
260
+ indices = [
261
+ np.argwhere(x == vol_assets_names)[0][0] for x in assets_names
262
+ ]
263
+ # Select same columns as returns (needed for Pipeline with preselection)
264
+ # and re-order to follow returns ordering.
265
+ implied_vol = safe_indexing(implied_vol, indices=indices, axis=1)
266
+
267
+ X = self._validate_data(X)
268
+ _, n_assets = X.shape
269
+ implied_vol = check_implied_vol(implied_vol=implied_vol, X=X)
270
+ implied_vol /= np.sqrt(self.annualized_factor)
271
+
272
+ if self.volatility_risk_premium_adj is not None:
273
+ if np.isscalar(self.volatility_risk_premium_adj):
274
+ volatility_risk_premium_adj = self.volatility_risk_premium_adj
275
+ else:
276
+ volatility_risk_premium_adj = input_to_array(
277
+ items=self.volatility_risk_premium_adj,
278
+ n_assets=n_assets,
279
+ fill_value=np.nan,
280
+ dim=1,
281
+ assets_names=(
282
+ self.feature_names_in_
283
+ if hasattr(self, "feature_names_in_")
284
+ else None
285
+ ),
286
+ name="volatility_risk_premium_adj",
287
+ )
288
+
289
+ if np.any(np.isnan(volatility_risk_premium_adj)):
290
+ raise ValueError(
291
+ "volatility_risk_premium_adj must contain a value for each assets, "
292
+ f"received {self.volatility_risk_premium_adj}"
293
+ )
294
+ if np.any(volatility_risk_premium_adj <= 0):
295
+ raise ValueError(
296
+ "volatility_risk_premium_adj must be strictly positive, "
297
+ f"received {self.volatility_risk_premium_adj}"
298
+ )
299
+
300
+ self.pred_realised_vols_ = implied_vol[-1] / volatility_risk_premium_adj
301
+ else:
302
+ if window_size is None or window_size < 3:
303
+ raise ValueError(
304
+ f"window must be strictly greater than 2, "
305
+ f"received {self.window_size}"
306
+ )
307
+ _linear_regressor = check_estimator(
308
+ self.linear_regressor,
309
+ default=skl.LinearRegression(fit_intercept=True),
310
+ check_type=skb.BaseEstimator,
311
+ )
312
+ # OLS of ln(RV(t) = a + b1 ln(IV(t-1)) + b2 ln(RV(t-1)) + epsilon
313
+ self._predict_realised_vols(
314
+ linear_regressor=_linear_regressor,
315
+ returns=X,
316
+ implied_vol=implied_vol,
317
+ window_size=window_size,
318
+ )
319
+
320
+ covariance = corr_to_cov(corr, self.pred_realised_vols_)
321
+
322
+ self._set_covariance(covariance)
323
+ return self
324
+
325
+ def _predict_realised_vols(
326
+ self,
327
+ linear_regressor: skb.BaseEstimator,
328
+ returns: np.ndarray,
329
+ implied_vol: np.ndarray,
330
+ window_size: int,
331
+ ) -> None:
332
+ n_observations, n_assets = returns.shape
333
+
334
+ n_folds = n_observations // window_size
335
+ if n_folds < 3:
336
+ raise ValueError(
337
+ f"Not enough observations to compute the volatility regression "
338
+ f"coefficients. The window size of {window_size} on {n_observations} "
339
+ f"observations produces {n_folds} non-overlapping folds. "
340
+ f"The minimum number of fold is 3. You can either increase the number "
341
+ f"of observation in your training set or decrease the window size."
342
+ )
343
+
344
+ realised_vol = _compute_realised_vol(
345
+ returns=returns, window_size=window_size, ddof=1
346
+ )
347
+
348
+ implied_vol = _compute_implied_vol(
349
+ implied_vol=implied_vol, window_size=window_size
350
+ )
351
+
352
+ if realised_vol.shape != implied_vol.shape:
353
+ raise ValueError("`realised_vol`and `implied_vol` must have same shape")
354
+
355
+ assert realised_vol.shape[0] == n_folds
356
+
357
+ rv = np.log(realised_vol)
358
+ iv = np.log(implied_vol)
359
+
360
+ self.linear_regressors_ = []
361
+ self.pred_realised_vols_ = np.zeros(n_assets)
362
+ self.coefs_ = np.zeros((n_assets, 2))
363
+ self.intercepts_ = np.zeros(n_assets)
364
+ self.r2_scores_ = np.zeros(n_assets)
365
+ for i in range(n_assets):
366
+ model = sk.clone(linear_regressor)
367
+ X = np.hstack((iv[:, [i]], rv[:, [i]]))
368
+ X_train = X[:-1]
369
+ X_pred = X[[-1]]
370
+ y_train = rv[1:, i]
371
+
372
+ model.fit(X=X_train, y=y_train)
373
+ self.coefs_[i, :] = model.coef_
374
+ self.intercepts_[i] = model.intercept_
375
+ self.r2_scores_[i] = sks.r2_score(y_train, model.predict(X_train))
376
+ rv_pred = model.predict(X_pred)
377
+ self.pred_realised_vols_[i] = np.exp(rv_pred[0])
378
+ self.linear_regressors_.append(model)
379
+
380
+
381
+ def _compute_realised_vol(
382
+ returns: np.ndarray, window_size: int, ddof: int = 1
383
+ ) -> np.ndarray:
384
+ """Create the realised volatilities samples for the regression model."""
385
+ n_observations, n_assets = returns.shape
386
+ chunks = n_observations // window_size
387
+
388
+ return np.std(
389
+ np.reshape(
390
+ returns[n_observations - chunks * window_size :, :],
391
+ (chunks, window_size, n_assets),
392
+ ),
393
+ ddof=ddof,
394
+ axis=1,
395
+ )
396
+
397
+
398
+ def _compute_implied_vol(implied_vol: np.ndarray, window_size: int) -> np.ndarray:
399
+ """Create the implied volatilities samples for the regression model."""
400
+ n_observations, _ = implied_vol.shape
401
+ chunks = n_observations // window_size
402
+ return implied_vol[
403
+ np.arange(
404
+ n_observations - (chunks - 1) * window_size - 1, n_observations, window_size
405
+ )
406
+ ]
407
+
408
+
409
+ def check_implied_vol(implied_vol: npt.ArrayLike, X: npt.ArrayLike) -> np.ndarray:
410
+ """Validate implied volatilities.
411
+
412
+
413
+ Parameters
414
+ ----------
415
+ implied_vol : array-like of shape (n_observations, n_assets)
416
+ Implied volatilities of the assets.
417
+
418
+ X : array-like of shape (n_observations, n_assets)
419
+ Price returns of the assets.
420
+
421
+ Returns
422
+ -------
423
+ implied_vol : ndarray of shape (n_observations, n_assets)
424
+ Validated implied volatilities.
425
+ """
426
+ # noinspection PyUnresolvedReferences
427
+ n_observations, n_assets = X.shape
428
+
429
+ if implied_vol is None:
430
+ raise ValueError("`implied_vol` cannot be None")
431
+ else:
432
+ implied_vol = skv.check_array(
433
+ implied_vol,
434
+ accept_sparse=False,
435
+ ensure_2d=False,
436
+ dtype=[np.float64, np.float32],
437
+ order="C",
438
+ copy=False,
439
+ input_name="implied_vol",
440
+ )
441
+ if implied_vol.ndim != 2:
442
+ raise ValueError(
443
+ "Sample weights must be 2D array of shape (n_observation, n_assets)"
444
+ )
445
+
446
+ if implied_vol.shape != (n_observations, n_assets):
447
+ raise ValueError(
448
+ f"implied_vol.shape == {(implied_vol.shape,)}, "
449
+ f"expected {(n_observations, n_assets)}"
450
+ )
451
+
452
+ skv.check_non_negative((n_observations, n_assets), "`implied_vol`")
453
+ # noinspection PyTypeChecker
454
+ return implied_vol