skfolio 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +29 -0
- skfolio/cluster/__init__.py +8 -0
- skfolio/cluster/_hierarchical.py +387 -0
- skfolio/datasets/__init__.py +20 -0
- skfolio/datasets/_base.py +389 -0
- skfolio/datasets/data/__init__.py +0 -0
- skfolio/datasets/data/factors_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_index.csv.gz +0 -0
- skfolio/distance/__init__.py +26 -0
- skfolio/distance/_base.py +55 -0
- skfolio/distance/_distance.py +574 -0
- skfolio/exceptions.py +30 -0
- skfolio/measures/__init__.py +76 -0
- skfolio/measures/_enums.py +355 -0
- skfolio/measures/_measures.py +607 -0
- skfolio/metrics/__init__.py +3 -0
- skfolio/metrics/_scorer.py +121 -0
- skfolio/model_selection/__init__.py +18 -0
- skfolio/model_selection/_combinatorial.py +407 -0
- skfolio/model_selection/_validation.py +194 -0
- skfolio/model_selection/_walk_forward.py +221 -0
- skfolio/moments/__init__.py +41 -0
- skfolio/moments/covariance/__init__.py +29 -0
- skfolio/moments/covariance/_base.py +101 -0
- skfolio/moments/covariance/_covariance.py +1108 -0
- skfolio/moments/expected_returns/__init__.py +21 -0
- skfolio/moments/expected_returns/_base.py +31 -0
- skfolio/moments/expected_returns/_expected_returns.py +415 -0
- skfolio/optimization/__init__.py +36 -0
- skfolio/optimization/_base.py +147 -0
- skfolio/optimization/cluster/__init__.py +13 -0
- skfolio/optimization/cluster/_nco.py +348 -0
- skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
- skfolio/optimization/cluster/hierarchical/_base.py +440 -0
- skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
- skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
- skfolio/optimization/convex/__init__.py +16 -0
- skfolio/optimization/convex/_base.py +1944 -0
- skfolio/optimization/convex/_distributionally_robust.py +392 -0
- skfolio/optimization/convex/_maximum_diversification.py +417 -0
- skfolio/optimization/convex/_mean_risk.py +974 -0
- skfolio/optimization/convex/_risk_budgeting.py +560 -0
- skfolio/optimization/ensemble/__init__.py +6 -0
- skfolio/optimization/ensemble/_base.py +87 -0
- skfolio/optimization/ensemble/_stacking.py +326 -0
- skfolio/optimization/naive/__init__.py +3 -0
- skfolio/optimization/naive/_naive.py +173 -0
- skfolio/population/__init__.py +3 -0
- skfolio/population/_population.py +883 -0
- skfolio/portfolio/__init__.py +13 -0
- skfolio/portfolio/_base.py +1096 -0
- skfolio/portfolio/_multi_period_portfolio.py +610 -0
- skfolio/portfolio/_portfolio.py +842 -0
- skfolio/pre_selection/__init__.py +7 -0
- skfolio/pre_selection/_pre_selection.py +342 -0
- skfolio/preprocessing/__init__.py +3 -0
- skfolio/preprocessing/_returns.py +114 -0
- skfolio/prior/__init__.py +18 -0
- skfolio/prior/_base.py +63 -0
- skfolio/prior/_black_litterman.py +238 -0
- skfolio/prior/_empirical.py +163 -0
- skfolio/prior/_factor_model.py +268 -0
- skfolio/typing.py +50 -0
- skfolio/uncertainty_set/__init__.py +23 -0
- skfolio/uncertainty_set/_base.py +108 -0
- skfolio/uncertainty_set/_bootstrap.py +281 -0
- skfolio/uncertainty_set/_empirical.py +237 -0
- skfolio/utils/__init__.py +0 -0
- skfolio/utils/bootstrap.py +115 -0
- skfolio/utils/equations.py +350 -0
- skfolio/utils/sorting.py +117 -0
- skfolio/utils/stats.py +466 -0
- skfolio/utils/tools.py +567 -0
- skfolio-0.0.1.dist-info/LICENSE +29 -0
- skfolio-0.0.1.dist-info/METADATA +568 -0
- skfolio-0.0.1.dist-info/RECORD +79 -0
- skfolio-0.0.1.dist-info/WHEEL +5 -0
- skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
"""Black & Litterman Prior Model estimator."""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import numpy.typing as npt
|
8
|
+
|
9
|
+
from skfolio.moments import EquilibriumMu
|
10
|
+
from skfolio.prior._base import BasePrior, PriorModel
|
11
|
+
from skfolio.prior._empirical import EmpiricalPrior
|
12
|
+
from skfolio.utils.equations import equations_to_matrix
|
13
|
+
from skfolio.utils.tools import check_estimator, input_to_array
|
14
|
+
|
15
|
+
|
16
|
+
class BlackLitterman(BasePrior):
|
17
|
+
"""Black & Litterman Prior Model estimator.
|
18
|
+
|
19
|
+
The Black & Litterman model [1]_ takes a Bayesian approach by using a prior estimate
|
20
|
+
of the assets expected returns and covariance matrix which are updated using the
|
21
|
+
analyst views to get a posterior estimate.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
views : array-like of floats of shape (n_views,)
|
26
|
+
The analyst views about the assets expected returns.
|
27
|
+
The views must match the following patterns:
|
28
|
+
|
29
|
+
* Absolute view: "asset_i = a"
|
30
|
+
* Relative view: "asset_i - asset_j = b"
|
31
|
+
|
32
|
+
With "asset_i" and "asset_j" the assets names and "a" and "b" the analyst views
|
33
|
+
about the assets expected returns expressed in the same frequency as the
|
34
|
+
returns `X`.
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
|
38
|
+
* "SPX = 0.00015" --> the SPX will have a daily expected return of 0.015%
|
39
|
+
* "SX5E - TLT = 0.00039" --> the SX5E will outperform the TLT by a daily expected return of 0.039%
|
40
|
+
* "SX5E - SPX = -0.0002" --> the SX5E will underperform the SPX by a daily expected return of 0.02%
|
41
|
+
* "Equity = 0.00010" --> the sum of Equity assets will have a daily expected return of 0.01%
|
42
|
+
* "Europe - US = 0.0004" --> the sum of European assets will outperform the sum of US assets by a daily expected return of 0.04%
|
43
|
+
|
44
|
+
groups : dict[str, list[str]] or array-like of strings of shape (n_groups, n_assets), optional
|
45
|
+
The assets groups to be referenced in `views`.
|
46
|
+
If a dictionary is provided, its (key/value) pair must be the
|
47
|
+
(asset name/asset groups) and the input `X` of the `fit` methods must be a
|
48
|
+
DataFrame with the assets names in columns.
|
49
|
+
|
50
|
+
Examples:
|
51
|
+
|
52
|
+
* groups = {"SX5E": ["Equity", "Europe"], "SPX": ["Equity", "US"], "TLT": ["Bond", "US"]}
|
53
|
+
* groups = [["Equity", "Equity", "Bond"], ["Europe", "US", "US"]]
|
54
|
+
|
55
|
+
prior_estimator : BasePrior, optional
|
56
|
+
The assets' :ref:`prior model estimator <prior>`. It is used to estimate
|
57
|
+
the :class:`~skfolio.prior.PriorModel` containing the estimation of the assets
|
58
|
+
expected returns, covariance matrix, returns and Cholesky decomposition.
|
59
|
+
The default (`None`) is to use `EmpiricalPrior(mu_estimator=EquilibriumMu())`.
|
60
|
+
|
61
|
+
tau : float, default=0.05
|
62
|
+
Tau controls the degree of uncertainty given to the analyst views. A low value
|
63
|
+
means high uncertainty and will put less weight on the analyst views compared to
|
64
|
+
the prior returns. The default value is `0.05`.
|
65
|
+
Other common values used in the literature are `1.0` or the inverse of the
|
66
|
+
number of observations.
|
67
|
+
|
68
|
+
view_confidences : array-like of floats of shape (n_views,), optional
|
69
|
+
Instead of using a diagonal uncertainty matrix (Omega) proportional to the prior
|
70
|
+
covariance matrix, you can provide the vector of view confidences (between 0
|
71
|
+
and 1) as describe by the Idzorek's method [2]_.
|
72
|
+
|
73
|
+
risk_free_rate : float, default=0.0
|
74
|
+
The risk-free rate.
|
75
|
+
|
76
|
+
Attributes
|
77
|
+
----------
|
78
|
+
prior_model_ : PriorModel
|
79
|
+
The :class:`~skfolio.prior.PriorModel`.
|
80
|
+
|
81
|
+
groups_ : ndarray of shape(n_groups, n_assets)
|
82
|
+
Assets names and groups converted to an 2D array.
|
83
|
+
|
84
|
+
views_ : ndarray of shape (n_views,)
|
85
|
+
The analyst views converted to a ndarray of floats.
|
86
|
+
|
87
|
+
picking_matrix_ : ndarray of shape (n_views, n_assets)
|
88
|
+
Picking matrix computed from the views and assets names/groups.
|
89
|
+
|
90
|
+
prior_estimator_ : BasePrior
|
91
|
+
Fitted `prior_estimator`.
|
92
|
+
|
93
|
+
n_features_in_ : int
|
94
|
+
Number of assets seen during `fit`.
|
95
|
+
|
96
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
97
|
+
Names of features seen during `fit`. Defined only when `X`
|
98
|
+
has feature names that are all strings.
|
99
|
+
|
100
|
+
References
|
101
|
+
----------
|
102
|
+
.. [1] "Combining investor views with market equilibrium",
|
103
|
+
The Journal of Fixed Income,
|
104
|
+
Fischer Black and Robert Litterman, 1991.
|
105
|
+
|
106
|
+
.. [2] "A step-by-step guide to the Black-Litterman model : Incorporating
|
107
|
+
user-specified confidence",
|
108
|
+
Forecasting Expected Returns in the Financial Markets,
|
109
|
+
Idzorek T, 2007.
|
110
|
+
"""
|
111
|
+
|
112
|
+
groups_: np.ndarray
|
113
|
+
views_: np.ndarray
|
114
|
+
picking_matrix_: np.ndarray
|
115
|
+
prior_estimator_: BasePrior
|
116
|
+
|
117
|
+
def __init__(
|
118
|
+
self,
|
119
|
+
views: npt.ArrayLike,
|
120
|
+
groups: dict[str, list[str]] | npt.ArrayLike | None = None,
|
121
|
+
prior_estimator: BasePrior | None = None,
|
122
|
+
tau: float = 0.05,
|
123
|
+
view_confidences: npt.ArrayLike | None = None,
|
124
|
+
risk_free_rate: float = 0,
|
125
|
+
):
|
126
|
+
self.views = views
|
127
|
+
self.groups = groups
|
128
|
+
self.prior_estimator = prior_estimator
|
129
|
+
self.tau = tau
|
130
|
+
self.view_confidences = view_confidences
|
131
|
+
self.risk_free_rate = risk_free_rate
|
132
|
+
|
133
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "BlackLitterman":
|
134
|
+
"""Fit the Black & Litterman estimator.
|
135
|
+
|
136
|
+
Parameters
|
137
|
+
----------
|
138
|
+
X : array-like of shape (n_observations, n_assets)
|
139
|
+
Price returns of the assets.
|
140
|
+
|
141
|
+
y : Ignored
|
142
|
+
Not used, present for API consistency by convention.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
self : BlackLitterman
|
147
|
+
Fitted estimator.
|
148
|
+
"""
|
149
|
+
self.prior_estimator_ = check_estimator(
|
150
|
+
self.prior_estimator,
|
151
|
+
default=EmpiricalPrior(mu_estimator=EquilibriumMu()),
|
152
|
+
check_type=BasePrior,
|
153
|
+
)
|
154
|
+
# fitting prior estimator
|
155
|
+
self.prior_estimator_.fit(X)
|
156
|
+
|
157
|
+
prior_mu = self.prior_estimator_.prior_model_.mu
|
158
|
+
prior_covariance = self.prior_estimator_.prior_model_.covariance
|
159
|
+
prior_returns = self.prior_estimator_.prior_model_.returns
|
160
|
+
|
161
|
+
# we validate after all models have been fitted to keep features names
|
162
|
+
# information.
|
163
|
+
self._validate_data(X)
|
164
|
+
|
165
|
+
n_assets = prior_returns.shape[1]
|
166
|
+
views = np.asarray(self.views)
|
167
|
+
if views.ndim != 1:
|
168
|
+
raise ValueError(f"`views` must be a 1D array, got a {views.ndim}D array.")
|
169
|
+
if self.groups is None:
|
170
|
+
if not hasattr(self, "feature_names_in_"):
|
171
|
+
raise ValueError(
|
172
|
+
"You must provide either `groups`"
|
173
|
+
" or `X` as a DataFrame with asset names in columns"
|
174
|
+
)
|
175
|
+
self.groups_ = np.asarray([self.feature_names_in_])
|
176
|
+
else:
|
177
|
+
self.groups_ = input_to_array(
|
178
|
+
items=self.groups,
|
179
|
+
n_assets=n_assets,
|
180
|
+
fill_value="",
|
181
|
+
dim=2,
|
182
|
+
assets_names=(
|
183
|
+
self.feature_names_in_
|
184
|
+
if hasattr(self, "feature_names_in_")
|
185
|
+
else None
|
186
|
+
),
|
187
|
+
name="groups",
|
188
|
+
)
|
189
|
+
self.picking_matrix_, self.views_ = equations_to_matrix(
|
190
|
+
groups=self.groups_,
|
191
|
+
equations=views,
|
192
|
+
sum_to_one=True,
|
193
|
+
raise_if_group_missing=True,
|
194
|
+
names=("groups", "views"),
|
195
|
+
)
|
196
|
+
|
197
|
+
if self.view_confidences is None:
|
198
|
+
omega = np.diag(
|
199
|
+
np.diag(
|
200
|
+
self.tau
|
201
|
+
* self.picking_matrix_
|
202
|
+
@ prior_covariance
|
203
|
+
@ self.picking_matrix_.T
|
204
|
+
)
|
205
|
+
)
|
206
|
+
else:
|
207
|
+
# Idzorek's method using Jay Walters closed form solution
|
208
|
+
view_confidences = np.asarray(self.view_confidences)
|
209
|
+
if np.any(view_confidences < 0) or np.any(view_confidences > 1):
|
210
|
+
raise ValueError(
|
211
|
+
"all values of view_confidences must be between 0 and 1"
|
212
|
+
)
|
213
|
+
view_confidences[view_confidences == 0] = 1e-16
|
214
|
+
alphas = 1 / view_confidences - 1
|
215
|
+
omega = np.diag(
|
216
|
+
np.diag(
|
217
|
+
self.tau
|
218
|
+
* alphas[:, np.newaxis]
|
219
|
+
* self.picking_matrix_
|
220
|
+
@ prior_covariance
|
221
|
+
@ self.picking_matrix_.T
|
222
|
+
)
|
223
|
+
)
|
224
|
+
|
225
|
+
# solving linear system instead of matrix inversion
|
226
|
+
_v = self.tau * prior_covariance @ self.picking_matrix_.T
|
227
|
+
_a = self.picking_matrix_ @ _v + omega
|
228
|
+
_b = self.views_ - self.picking_matrix_ @ prior_mu
|
229
|
+
posterior_mu = prior_mu + _v @ np.linalg.solve(_a, _b) + self.risk_free_rate
|
230
|
+
posterior_covariance = (
|
231
|
+
prior_covariance
|
232
|
+
+ self.tau * prior_covariance
|
233
|
+
- _v @ np.linalg.solve(_a, _v.T)
|
234
|
+
)
|
235
|
+
self.prior_model_ = PriorModel(
|
236
|
+
mu=posterior_mu, covariance=posterior_covariance, returns=prior_returns
|
237
|
+
)
|
238
|
+
return self
|
@@ -0,0 +1,163 @@
|
|
1
|
+
"""Empirical Prior Model estimator."""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
import numpy as np
|
6
|
+
import numpy.typing as npt
|
7
|
+
|
8
|
+
from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
|
9
|
+
from skfolio.prior._base import BasePrior, PriorModel
|
10
|
+
from skfolio.utils.tools import check_estimator
|
11
|
+
|
12
|
+
|
13
|
+
class EmpiricalPrior(BasePrior):
|
14
|
+
"""Empirical Prior estimator.
|
15
|
+
|
16
|
+
The Empirical Prior estimates the :class:`~skfolio.prior.PriorModel` by fitting a
|
17
|
+
`mu_estimator` and a `covariance_estimator` separately.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
mu_estimator : BaseMu, optional
|
22
|
+
The assets :ref:`expected returns estimator <mu_estimator>`.
|
23
|
+
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalMu`.
|
24
|
+
|
25
|
+
covariance_estimator : BaseCovariance , optional
|
26
|
+
The assets :ref:`covariance matrix estimator <covariance_estimator>`.
|
27
|
+
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
28
|
+
|
29
|
+
is_log_normal : bool, default=False
|
30
|
+
If this is set to True, the moments are estimated on the logarithmic returns
|
31
|
+
as opposed to the linear returns. Then the moments estimations of the
|
32
|
+
logarithmic returns are projected to the investment horizon and transformed
|
33
|
+
to obtain the moments estimation of the linear returns at the investment
|
34
|
+
horizon. If True, `investment_horizon` must be provided. The input `X` must be
|
35
|
+
**linear returns**. They will be converted into logarithmic returns only for the
|
36
|
+
moments estimation.
|
37
|
+
|
38
|
+
.. seealso::
|
39
|
+
|
40
|
+
:ref:`data preparation <data_preparation>`
|
41
|
+
|
42
|
+
|
43
|
+
investment_horizon : float, optional
|
44
|
+
The investment horizon used for the moments estimation of the linear returns
|
45
|
+
when `is_log_normal` is `True`.
|
46
|
+
|
47
|
+
Attributes
|
48
|
+
----------
|
49
|
+
prior_model_ : PriorModel
|
50
|
+
The assets :class:`~skfolio.prior.PriorModel`.
|
51
|
+
|
52
|
+
mu_estimator_ : BaseMu
|
53
|
+
Fitted `mu_estimator`.
|
54
|
+
|
55
|
+
covariance_estimator_ : BaseCovariance
|
56
|
+
Fitted `covariance_estimator`.
|
57
|
+
|
58
|
+
n_features_in_ : int
|
59
|
+
Number of assets seen during `fit`.
|
60
|
+
|
61
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
62
|
+
Names of features seen during `fit`. Defined only when `X`
|
63
|
+
has feature names that are all strings.
|
64
|
+
|
65
|
+
References
|
66
|
+
----------
|
67
|
+
.. [1] "Linear vs. Compounded Returns – Common Pitfalls in Portfolio Management".
|
68
|
+
GARP Risk Professional.
|
69
|
+
Attilio Meucci (2010).
|
70
|
+
"""
|
71
|
+
|
72
|
+
mu_estimator_: BaseMu
|
73
|
+
covariance_estimator_: BaseCovariance
|
74
|
+
|
75
|
+
def __init__(
|
76
|
+
self,
|
77
|
+
mu_estimator: BaseMu | None = None,
|
78
|
+
covariance_estimator: BaseCovariance | None = None,
|
79
|
+
is_log_normal: bool = False,
|
80
|
+
investment_horizon: float | None = None,
|
81
|
+
):
|
82
|
+
self.mu_estimator = mu_estimator
|
83
|
+
self.covariance_estimator = covariance_estimator
|
84
|
+
self.is_log_normal = is_log_normal
|
85
|
+
self.investment_horizon = investment_horizon
|
86
|
+
|
87
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "EmpiricalPrior":
|
88
|
+
"""Fit the Empirical Prior estimator.
|
89
|
+
|
90
|
+
Parameters
|
91
|
+
----------
|
92
|
+
X : array-like of shape (n_observations, n_assets)
|
93
|
+
Price returns of the assets.
|
94
|
+
|
95
|
+
y : Ignored
|
96
|
+
Not used, present for API consistency by convention.
|
97
|
+
|
98
|
+
Returns
|
99
|
+
-------
|
100
|
+
self : EmpiricalPrior
|
101
|
+
Fitted estimator.
|
102
|
+
"""
|
103
|
+
self.mu_estimator_ = check_estimator(
|
104
|
+
self.mu_estimator,
|
105
|
+
default=EmpiricalMu(),
|
106
|
+
check_type=BaseMu,
|
107
|
+
)
|
108
|
+
self.covariance_estimator_ = check_estimator(
|
109
|
+
self.covariance_estimator,
|
110
|
+
default=EmpiricalCovariance(),
|
111
|
+
check_type=BaseCovariance,
|
112
|
+
)
|
113
|
+
# fitting estimators
|
114
|
+
if not self.is_log_normal:
|
115
|
+
if self.investment_horizon is not None:
|
116
|
+
raise ValueError(
|
117
|
+
"`investment_horizon` must be `None` when "
|
118
|
+
"`is_log_normal` is `False`"
|
119
|
+
)
|
120
|
+
# Expected returns
|
121
|
+
self.mu_estimator_.fit(X)
|
122
|
+
mu = self.mu_estimator_.mu_
|
123
|
+
|
124
|
+
# Covariance
|
125
|
+
self.covariance_estimator_.fit(X)
|
126
|
+
covariance = self.covariance_estimator_.covariance_
|
127
|
+
else:
|
128
|
+
if self.investment_horizon is None:
|
129
|
+
raise ValueError(
|
130
|
+
"`investment_horizon` must be provided when "
|
131
|
+
"`is_log_normal` is `True`"
|
132
|
+
)
|
133
|
+
# Convert linear returns to log returns
|
134
|
+
X_log = np.log(1 + X)
|
135
|
+
|
136
|
+
# Estimates the moments on the log returns
|
137
|
+
# Expected returns
|
138
|
+
self.mu_estimator_.fit(X_log)
|
139
|
+
mu = self.mu_estimator_.mu_
|
140
|
+
|
141
|
+
# Covariance
|
142
|
+
self.covariance_estimator_.fit(X_log)
|
143
|
+
covariance = self.covariance_estimator_.covariance_
|
144
|
+
|
145
|
+
# Using the property of aggregation across time we scale this distribution
|
146
|
+
# to the investment horizon by the “square-root rule”.
|
147
|
+
mu *= self.investment_horizon
|
148
|
+
covariance *= self.investment_horizon
|
149
|
+
|
150
|
+
# We convert it into a distribution of linear returns over the investment
|
151
|
+
# horizon
|
152
|
+
mu = np.exp(mu + 0.5 * np.diag(covariance))
|
153
|
+
covariance = np.outer(mu, mu) * (np.exp(covariance) - 1)
|
154
|
+
|
155
|
+
# we validate and convert to numpy after all models have been fitted to keep
|
156
|
+
# features names information.
|
157
|
+
X = self._validate_data(X)
|
158
|
+
self.prior_model_ = PriorModel(
|
159
|
+
mu=mu,
|
160
|
+
covariance=covariance,
|
161
|
+
returns=X,
|
162
|
+
)
|
163
|
+
return self
|
@@ -0,0 +1,268 @@
|
|
1
|
+
"""Factor Model estimator"""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
from abc import ABC, abstractmethod
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import sklearn.base as skb
|
11
|
+
import sklearn.linear_model as skl
|
12
|
+
import sklearn.multioutput as skm
|
13
|
+
|
14
|
+
from skfolio.prior._base import BasePrior, PriorModel
|
15
|
+
from skfolio.prior._empirical import EmpiricalPrior
|
16
|
+
from skfolio.utils.stats import cov_nearest
|
17
|
+
from skfolio.utils.tools import check_estimator
|
18
|
+
|
19
|
+
|
20
|
+
class BaseLoadingMatrix(skb.BaseEstimator, ABC):
|
21
|
+
"""Base class for all Loading Matrix estimators.
|
22
|
+
|
23
|
+
Notes
|
24
|
+
-----
|
25
|
+
All estimators should specify all the parameters that can be set
|
26
|
+
at the class level in their ``__init__`` as explicit keyword
|
27
|
+
arguments (no ``*args`` or ``**kwargs``).
|
28
|
+
"""
|
29
|
+
|
30
|
+
loading_matrix_: np.ndarray
|
31
|
+
intercepts_: np.ndarray
|
32
|
+
|
33
|
+
@abstractmethod
|
34
|
+
def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
|
35
|
+
pass
|
36
|
+
|
37
|
+
|
38
|
+
class LoadingMatrixRegression(BaseLoadingMatrix):
|
39
|
+
"""Loading Matrix Regression estimator.
|
40
|
+
|
41
|
+
Estimate the loading matrix by fitting one linear regressor per asset.
|
42
|
+
|
43
|
+
Parameters
|
44
|
+
----------
|
45
|
+
linear_regressor : BaseEstimator, optional
|
46
|
+
Linear regressor used to fit the factors on each asset separately.
|
47
|
+
The default (`None`) is to use `LassoCV(fit_intercept=False)`.
|
48
|
+
|
49
|
+
n_jobs : int, optional
|
50
|
+
The number of jobs to run in parallel.
|
51
|
+
|
52
|
+
When individual estimators are fast to train or predict,
|
53
|
+
using ``n_jobs > 1`` can result in slower performance due
|
54
|
+
to the parallelism overhead.
|
55
|
+
|
56
|
+
The value `-1` means using all processors.
|
57
|
+
The default (`None`) means 1 unless in a `joblib.parallel_backend` context.
|
58
|
+
|
59
|
+
Attributes
|
60
|
+
----------
|
61
|
+
loading_matrix_ : ndarray of shape (n_assets, n_factors)
|
62
|
+
The loading matrix.
|
63
|
+
|
64
|
+
intercepts_: ndarray of shape (n_assets,)
|
65
|
+
The intercepts.
|
66
|
+
|
67
|
+
multi_output_regressor_: MultiOutputRegressor
|
68
|
+
Fitted `sklearn.multioutput.MultiOutputRegressor`
|
69
|
+
"""
|
70
|
+
|
71
|
+
multi_output_regressor_: skm.MultiOutputRegressor
|
72
|
+
|
73
|
+
def __init__(
|
74
|
+
self,
|
75
|
+
linear_regressor: skb.BaseEstimator | None = None,
|
76
|
+
n_jobs: int | None = None,
|
77
|
+
):
|
78
|
+
self.linear_regressor = linear_regressor
|
79
|
+
self.n_jobs = n_jobs
|
80
|
+
|
81
|
+
def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
|
82
|
+
"""Fit the Loading Matrix Regression Estimator.
|
83
|
+
|
84
|
+
Parameters
|
85
|
+
----------
|
86
|
+
X : array-like of shape (n_observations, n_assets)
|
87
|
+
Price returns of the assets.
|
88
|
+
|
89
|
+
y : array-like of shape (n_observations, n_factors)
|
90
|
+
Price returns of the factors.
|
91
|
+
|
92
|
+
Returns
|
93
|
+
-------
|
94
|
+
self : LoadingMatrixRegression
|
95
|
+
Fitted estimator.
|
96
|
+
"""
|
97
|
+
_linear_regressor = check_estimator(
|
98
|
+
self.linear_regressor,
|
99
|
+
default=skl.LassoCV(fit_intercept=False),
|
100
|
+
check_type=skb.BaseEstimator,
|
101
|
+
)
|
102
|
+
|
103
|
+
self.multi_output_regressor_ = skm.MultiOutputRegressor(
|
104
|
+
_linear_regressor, n_jobs=self.n_jobs
|
105
|
+
)
|
106
|
+
self.multi_output_regressor_.fit(X=y, y=X)
|
107
|
+
# noinspection PyUnresolvedReferences
|
108
|
+
n_assets = X.shape[1]
|
109
|
+
self.loading_matrix_ = np.array(
|
110
|
+
[self.multi_output_regressor_.estimators_[i].coef_ for i in range(n_assets)]
|
111
|
+
)
|
112
|
+
self.intercepts_ = np.array(
|
113
|
+
[
|
114
|
+
self.multi_output_regressor_.estimators_[i].intercept_
|
115
|
+
for i in range(n_assets)
|
116
|
+
]
|
117
|
+
)
|
118
|
+
|
119
|
+
|
120
|
+
class FactorModel(BasePrior):
|
121
|
+
"""Factor Model estimator.
|
122
|
+
|
123
|
+
The purpose of Factor Models is to impose a structure on financial variables and
|
124
|
+
their covariance matrix by explaining them through a small number of common factors.
|
125
|
+
This can help to overcome estimation error by reducing the number of parameters,
|
126
|
+
i.e. the dimensionality of the estimation problem, making portfolio optimization
|
127
|
+
more robust against noise in the data. Factor Models also provide a decomposition of
|
128
|
+
financial risk to systematic and security specific components.
|
129
|
+
|
130
|
+
Parameters
|
131
|
+
----------
|
132
|
+
loading_matrix_estimator : LoadingMatrixEstimator, optional
|
133
|
+
Estimator of the loading matrix (betas) of the factors.
|
134
|
+
The default (`None`) is to use :class:`LoadingMatrixRegression` which fit the
|
135
|
+
factors using `LassoCV` on each asset separately.
|
136
|
+
|
137
|
+
factor_prior_estimator : BasePrior, optional
|
138
|
+
The factors :ref:`prior estimator <prior>`.
|
139
|
+
It is used to estimate the :class:`~skfolio.prior.PriorModel` containing the
|
140
|
+
factors expected returns and covariance matrix.
|
141
|
+
The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
|
142
|
+
|
143
|
+
residual_variance : bool, default=True
|
144
|
+
If this is set to True, the diagonal term of the residuals covariance
|
145
|
+
(residuals variance) is added to the factor model covariance.
|
146
|
+
|
147
|
+
higham : bool, default=False
|
148
|
+
If this is set to True, we use the Higham & Nick (2002) algorithm to find the
|
149
|
+
nearest covariance matrix that is positive semi-definite. It is more accurate
|
150
|
+
but slower that the default clipping method. For more information
|
151
|
+
see :func:`~skfolio.utils.stats.cov_nearest`.
|
152
|
+
|
153
|
+
max_iteration : int, default=100
|
154
|
+
Only used when `higham` is set to True. Maximum number of iterations of the
|
155
|
+
Higham & Nick (2002) algorithm.
|
156
|
+
|
157
|
+
Attributes
|
158
|
+
----------
|
159
|
+
prior_model_ : PriorModel
|
160
|
+
The :class:`~skfolio.prior.PriorModel`.
|
161
|
+
|
162
|
+
factor_prior_estimator_ : BasePrior
|
163
|
+
Fitted `factor_prior_estimator`.
|
164
|
+
|
165
|
+
loading_matrix_estimator_ : BaseLoadingMatrix
|
166
|
+
Fitted `loading_matrix_estimator`.
|
167
|
+
|
168
|
+
n_features_in_ : int
|
169
|
+
Number of assets seen during `fit`.
|
170
|
+
|
171
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
172
|
+
Names of features seen during `fit`. Defined only when `X`
|
173
|
+
has feature names that are all strings.
|
174
|
+
"""
|
175
|
+
|
176
|
+
factor_prior_estimator_: BasePrior
|
177
|
+
loading_matrix_estimator_: BaseLoadingMatrix
|
178
|
+
|
179
|
+
def __init__(
|
180
|
+
self,
|
181
|
+
loading_matrix_estimator: BaseLoadingMatrix | None = None,
|
182
|
+
factor_prior_estimator: BasePrior | None = None,
|
183
|
+
residual_variance: bool = True,
|
184
|
+
higham: bool = False,
|
185
|
+
max_iteration: int = 100,
|
186
|
+
):
|
187
|
+
self.loading_matrix_estimator = loading_matrix_estimator
|
188
|
+
self.factor_prior_estimator = factor_prior_estimator
|
189
|
+
self.residual_variance = residual_variance
|
190
|
+
self.higham = higham
|
191
|
+
self.max_iteration = max_iteration
|
192
|
+
|
193
|
+
# noinspection PyMethodOverriding, PyPep8Naming
|
194
|
+
def fit(self, X: npt.ArrayLike, y: any):
|
195
|
+
"""Fit the Factor Model estimator.
|
196
|
+
|
197
|
+
Parameters
|
198
|
+
----------
|
199
|
+
X : array-like of shape (n_observations, n_assets)
|
200
|
+
Price returns of the assets.
|
201
|
+
|
202
|
+
y : array-like of shape (n_observations, n_factors)
|
203
|
+
Factors' returns.
|
204
|
+
|
205
|
+
Returns
|
206
|
+
-------
|
207
|
+
self : FactorModel
|
208
|
+
Fitted estimator.
|
209
|
+
"""
|
210
|
+
self.factor_prior_estimator_ = check_estimator(
|
211
|
+
self.factor_prior_estimator,
|
212
|
+
default=EmpiricalPrior(),
|
213
|
+
check_type=BasePrior,
|
214
|
+
)
|
215
|
+
self.loading_matrix_estimator_ = check_estimator(
|
216
|
+
self.loading_matrix_estimator,
|
217
|
+
default=LoadingMatrixRegression(),
|
218
|
+
check_type=BaseLoadingMatrix,
|
219
|
+
)
|
220
|
+
|
221
|
+
# Fitting prior estimator
|
222
|
+
self.factor_prior_estimator_.fit(y)
|
223
|
+
factor_mu = self.factor_prior_estimator_.prior_model_.mu
|
224
|
+
factor_covariance = self.factor_prior_estimator_.prior_model_.covariance
|
225
|
+
|
226
|
+
# Fitting loading matrix estimator
|
227
|
+
self.loading_matrix_estimator_.fit(X, y)
|
228
|
+
loading_matrix = self.loading_matrix_estimator_.loading_matrix_
|
229
|
+
intercepts = self.loading_matrix_estimator_.intercepts_
|
230
|
+
|
231
|
+
# we validate and convert to numpy after all models have been fitted to keep
|
232
|
+
# features names information.
|
233
|
+
X, y = self._validate_data(X, y, multi_output=True)
|
234
|
+
n_assets = X.shape[1]
|
235
|
+
n_factors = y.shape[1]
|
236
|
+
|
237
|
+
if loading_matrix.shape != (n_assets, n_factors):
|
238
|
+
raise ValueError(
|
239
|
+
"`loading_matrix_estimator.loading_matrix_` must ba a 2D array of"
|
240
|
+
f" shape {(n_assets, n_factors)}, got"
|
241
|
+
f" {loading_matrix.shape} instead."
|
242
|
+
)
|
243
|
+
|
244
|
+
if intercepts.shape != (n_assets,):
|
245
|
+
raise ValueError(
|
246
|
+
"`loading_matrix_estimator.intercepts_` must ba a 1D array of "
|
247
|
+
f"shape {(n_assets,)}, got {intercepts.shape} instead."
|
248
|
+
)
|
249
|
+
|
250
|
+
mu = loading_matrix @ factor_mu + intercepts
|
251
|
+
covariance = loading_matrix @ factor_covariance @ loading_matrix.T
|
252
|
+
returns = y @ loading_matrix.T + intercepts
|
253
|
+
cholesky = loading_matrix @ np.linalg.cholesky(factor_covariance)
|
254
|
+
|
255
|
+
if self.residual_variance:
|
256
|
+
err = X - returns
|
257
|
+
err_cov = np.diag(np.var(err, ddof=1, axis=0))
|
258
|
+
covariance += err_cov
|
259
|
+
cholesky = np.hstack((cholesky, np.sqrt(err_cov)))
|
260
|
+
|
261
|
+
covariance = cov_nearest(
|
262
|
+
covariance, higham=self.higham, higham_max_iteration=self.max_iteration
|
263
|
+
)
|
264
|
+
|
265
|
+
self.prior_model_ = PriorModel(
|
266
|
+
mu=mu, covariance=covariance, returns=returns, cholesky=cholesky
|
267
|
+
)
|
268
|
+
return self
|