riskfolio-lib 7.1.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riskfolio/__init__.py +14 -0
- riskfolio/external/__init__.py +10 -0
- riskfolio/external/cppfunctions.py +300 -0
- riskfolio/external/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- riskfolio/src/AuxFunctions.py +1488 -0
- riskfolio/src/ConstraintsFunctions.py +2210 -0
- riskfolio/src/DBHT.py +1089 -0
- riskfolio/src/GerberStatistic.py +240 -0
- riskfolio/src/HCPortfolio.py +1102 -0
- riskfolio/src/OwaWeights.py +433 -0
- riskfolio/src/ParamsEstimation.py +1943 -0
- riskfolio/src/PlotFunctions.py +5052 -0
- riskfolio/src/Portfolio.py +6100 -0
- riskfolio/src/Reports.py +692 -0
- riskfolio/src/RiskFunctions.py +3195 -0
- riskfolio/src/__init__.py +20 -0
- riskfolio/version.py +4 -0
- riskfolio_lib-7.1.0.dist-info/LICENSE.txt +27 -0
- riskfolio_lib-7.1.0.dist-info/METADATA +377 -0
- riskfolio_lib-7.1.0.dist-info/RECORD +22 -0
- riskfolio_lib-7.1.0.dist-info/WHEEL +6 -0
- riskfolio_lib-7.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1943 @@
|
|
|
1
|
+
"""""" #
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Copyright (c) 2020-2025, Dany Cajas
|
|
5
|
+
All rights reserved.
|
|
6
|
+
This work is licensed under BSD 3-Clause "New" or "Revised" License.
|
|
7
|
+
License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import statsmodels.api as sm
|
|
13
|
+
import scipy.stats as st
|
|
14
|
+
import sklearn.covariance as skcov
|
|
15
|
+
import arch.bootstrap as bs
|
|
16
|
+
|
|
17
|
+
from sklearn.preprocessing import StandardScaler
|
|
18
|
+
from sklearn.decomposition import PCA
|
|
19
|
+
from numpy.linalg import inv
|
|
20
|
+
from itertools import product
|
|
21
|
+
|
|
22
|
+
import riskfolio.src.AuxFunctions as af
|
|
23
|
+
import riskfolio.src.DBHT as db
|
|
24
|
+
import riskfolio.src.GerberStatistic as gs
|
|
25
|
+
import riskfolio.external.cppfunctions as cf
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"mean_vector",
|
|
30
|
+
"covar_matrix",
|
|
31
|
+
"cokurt_matrix",
|
|
32
|
+
"forward_regression",
|
|
33
|
+
"backward_regression",
|
|
34
|
+
"PCR",
|
|
35
|
+
"loadings_matrix",
|
|
36
|
+
"risk_factors",
|
|
37
|
+
"black_litterman",
|
|
38
|
+
"augmented_black_litterman",
|
|
39
|
+
"black_litterman_bayesian",
|
|
40
|
+
"bootstrapping",
|
|
41
|
+
"normal_simulation",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def mean_vector(X, method="hist", d=0.94, target="b1"):
|
|
46
|
+
r"""
|
|
47
|
+
Calculate the expected returns vector using the selected method.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
52
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
53
|
+
observations and n_assets is the number of assets.
|
|
54
|
+
method : str, optional
|
|
55
|
+
The method used to estimate the expected returns.
|
|
56
|
+
The default value is 'hist'. Possible values are:
|
|
57
|
+
|
|
58
|
+
- 'hist': use historical estimator.
|
|
59
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
60
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
61
|
+
- 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
|
|
62
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
|
|
63
|
+
- 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
|
|
64
|
+
|
|
65
|
+
d : scalar
|
|
66
|
+
The smoothing factor of ewma methods.
|
|
67
|
+
The default is 0.94.
|
|
68
|
+
|
|
69
|
+
target : str, optional
|
|
70
|
+
The target mean vector. The default value is 'b1'.
|
|
71
|
+
Possible values are:
|
|
72
|
+
|
|
73
|
+
- 'b1': grand mean.
|
|
74
|
+
- 'b2': volatility weighted grand mean.
|
|
75
|
+
- 'b3': mean square error of sample mean.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
mu : 1d-array
|
|
80
|
+
The estimation of expected returns.
|
|
81
|
+
|
|
82
|
+
Raises
|
|
83
|
+
------
|
|
84
|
+
ValueError
|
|
85
|
+
When the value cannot be calculated.
|
|
86
|
+
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
if not isinstance(X, pd.DataFrame):
|
|
90
|
+
raise ValueError("X must be a DataFrame")
|
|
91
|
+
|
|
92
|
+
assets = X.columns.tolist()
|
|
93
|
+
|
|
94
|
+
if method == "hist":
|
|
95
|
+
mu = np.array(X.mean(), ndmin=2)
|
|
96
|
+
elif method == "ewma1":
|
|
97
|
+
mu = np.array(X.ewm(alpha=1 - d).mean().iloc[-1, :], ndmin=2)
|
|
98
|
+
elif method == "ewma2":
|
|
99
|
+
mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
|
|
100
|
+
elif method == "ewma2":
|
|
101
|
+
mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
|
|
102
|
+
elif method in ["JS", "BS", "BOP"]:
|
|
103
|
+
T, n = np.array(X, ndmin=2).shape
|
|
104
|
+
ones = np.ones((n, 1))
|
|
105
|
+
mu = np.array(X.mean(), ndmin=2).reshape(-1, 1)
|
|
106
|
+
Sigma = np.cov(X, rowvar=False)
|
|
107
|
+
Sigma_inv = np.linalg.inv(Sigma)
|
|
108
|
+
eigvals = np.linalg.eigvals(Sigma)
|
|
109
|
+
|
|
110
|
+
# Calculate target vector
|
|
111
|
+
if target == "b1":
|
|
112
|
+
b = ones.T @ mu / n * ones
|
|
113
|
+
elif target == "b2":
|
|
114
|
+
b = ones.T @ Sigma_inv @ mu / (ones.T @ Sigma_inv @ ones) * ones
|
|
115
|
+
elif target == "b3":
|
|
116
|
+
b = np.trace(Sigma) / T * ones
|
|
117
|
+
|
|
118
|
+
# Calculate Estimators
|
|
119
|
+
if method == "JS":
|
|
120
|
+
alpha_1 = (
|
|
121
|
+
1
|
|
122
|
+
/ T
|
|
123
|
+
* (n * np.mean(eigvals) - 2 * np.max(eigvals))
|
|
124
|
+
/ ((mu - b).T @ (mu - b))
|
|
125
|
+
)
|
|
126
|
+
mu = (1 - alpha_1) * mu + alpha_1 * b
|
|
127
|
+
elif method == "BS":
|
|
128
|
+
alpha_1 = (n + 2) / ((n + 2) + T * (mu - b).T @ Sigma_inv @ (mu - b))
|
|
129
|
+
mu = (1 - alpha_1) * mu + alpha_1 * b
|
|
130
|
+
elif method == "BOP":
|
|
131
|
+
alpha_1 = (mu.T @ Sigma_inv @ mu - n / (T - n)) * b.T @ Sigma_inv @ b - (
|
|
132
|
+
mu.T @ Sigma_inv @ b
|
|
133
|
+
) ** 2
|
|
134
|
+
alpha_1 /= (mu.T @ Sigma_inv @ mu) * (b.T @ Sigma_inv @ b) - (
|
|
135
|
+
mu.T @ Sigma_inv @ b
|
|
136
|
+
) ** 2
|
|
137
|
+
beta_1 = (1 - alpha_1) * (mu.T @ Sigma_inv @ b) / (mu.T @ Sigma_inv @ mu)
|
|
138
|
+
mu = alpha_1 * mu + beta_1 * b
|
|
139
|
+
mu = mu.T
|
|
140
|
+
|
|
141
|
+
mu = pd.DataFrame(np.array(mu, ndmin=2), columns=assets)
|
|
142
|
+
|
|
143
|
+
return mu
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def covar_matrix(
|
|
147
|
+
X,
|
|
148
|
+
method="hist",
|
|
149
|
+
d=0.94,
|
|
150
|
+
alpha=0.1,
|
|
151
|
+
bWidth=0.01,
|
|
152
|
+
detone=False,
|
|
153
|
+
mkt_comp=1,
|
|
154
|
+
threshold=0.5,
|
|
155
|
+
):
|
|
156
|
+
r"""
|
|
157
|
+
Calculate the covariance matrix using the selected method.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
162
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
163
|
+
observations and n_assets is the number of assets.
|
|
164
|
+
method : str, optional
|
|
165
|
+
The method used to estimate the covariance matrix:
|
|
166
|
+
The default is 'hist'. Possible values are:
|
|
167
|
+
|
|
168
|
+
- 'hist': use historical estimates.
|
|
169
|
+
- 'semi': use semi lower covariance matrix.
|
|
170
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
171
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
172
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
173
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
174
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
175
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
176
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
|
|
177
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
178
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
179
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
180
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
|
|
181
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
|
|
182
|
+
|
|
183
|
+
d : scalar
|
|
184
|
+
The smoothing factor of ewma methods. The default is 0.94.
|
|
185
|
+
alpha : scalar
|
|
186
|
+
The shrfactor of shrunk and shrink method. The default is 0.1.
|
|
187
|
+
bWidth : float
|
|
188
|
+
The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
|
|
189
|
+
detone : bool, optional
|
|
190
|
+
If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
|
|
191
|
+
and 'shrink' methods. The detone correlation matrix is singular, so it
|
|
192
|
+
cannot be inverted.
|
|
193
|
+
mkt_comp : int, optional
|
|
194
|
+
Number of first components that will be removed using the detone method.
|
|
195
|
+
threshold : float
|
|
196
|
+
Threshold for 'gerber1' and 'gerber2' methods is between 0 and 1.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
cov : nd-array
|
|
201
|
+
The estimation of covariance matrix.
|
|
202
|
+
|
|
203
|
+
Raises
|
|
204
|
+
------
|
|
205
|
+
ValueError
|
|
206
|
+
When the value cannot be calculated.
|
|
207
|
+
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
if not isinstance(X, pd.DataFrame):
|
|
211
|
+
raise ValueError("X must be a DataFrame")
|
|
212
|
+
|
|
213
|
+
assets = X.columns.tolist()
|
|
214
|
+
|
|
215
|
+
if method == "hist":
|
|
216
|
+
cov = np.cov(X, rowvar=False)
|
|
217
|
+
elif method == "semi":
|
|
218
|
+
T, N = X.shape
|
|
219
|
+
mu = X.mean().to_numpy().reshape(1, -1)
|
|
220
|
+
a = X - np.repeat(mu, T, axis=0)
|
|
221
|
+
a = np.minimum(a, np.zeros_like(a))
|
|
222
|
+
cov = 1 / (T - 1) * a.T @ a
|
|
223
|
+
elif method == "ewma1":
|
|
224
|
+
cov = X.ewm(alpha=1 - d).cov()
|
|
225
|
+
item = cov.iloc[-1, :].name[0]
|
|
226
|
+
cov = cov.loc[(item, slice(None)), :]
|
|
227
|
+
elif method == "ewma2":
|
|
228
|
+
cov = X.ewm(alpha=1 - d, adjust=False).cov()
|
|
229
|
+
item = cov.iloc[-1, :].name[0]
|
|
230
|
+
cov = cov.loc[(item, slice(None)), :]
|
|
231
|
+
elif method == "ledoit":
|
|
232
|
+
lw = skcov.LedoitWolf()
|
|
233
|
+
lw.fit(X)
|
|
234
|
+
cov = lw.covariance_
|
|
235
|
+
elif method == "oas":
|
|
236
|
+
oas = skcov.OAS()
|
|
237
|
+
oas.fit(X)
|
|
238
|
+
cov = oas.covariance_
|
|
239
|
+
elif method == "shrunk":
|
|
240
|
+
sc = skcov.ShrunkCovariance(shrinkage=alpha)
|
|
241
|
+
sc.fit(X)
|
|
242
|
+
cov = sc.covariance_
|
|
243
|
+
elif method == "gl":
|
|
244
|
+
gl = skcov.GraphicalLassoCV()
|
|
245
|
+
gl.fit(X)
|
|
246
|
+
cov = gl.covariance_
|
|
247
|
+
elif method == "jlogo":
|
|
248
|
+
S = np.cov(X, rowvar=False)
|
|
249
|
+
R = np.corrcoef(X, rowvar=False)
|
|
250
|
+
D = np.sqrt(np.clip((1 - R) / 2, a_min=0.0, a_max=1.0))
|
|
251
|
+
(_, _, separators, cliques, _) = db.PMFG_T2s(1 - D**2, nargout=4)
|
|
252
|
+
cov = db.j_LoGo(S, separators, cliques)
|
|
253
|
+
cov = np.linalg.inv(cov)
|
|
254
|
+
elif method in ["fixed", "spectral", "shrink"]:
|
|
255
|
+
cov = np.cov(X, rowvar=False)
|
|
256
|
+
T, N = X.shape
|
|
257
|
+
q = T / N
|
|
258
|
+
cov = af.denoiseCov(
|
|
259
|
+
cov,
|
|
260
|
+
q,
|
|
261
|
+
kind=method,
|
|
262
|
+
bWidth=bWidth,
|
|
263
|
+
detone=detone,
|
|
264
|
+
mkt_comp=int(mkt_comp),
|
|
265
|
+
alpha=alpha,
|
|
266
|
+
)
|
|
267
|
+
elif method == "gerber1":
|
|
268
|
+
cov = gs.gerber_cov_stat1(X, threshold=threshold)
|
|
269
|
+
elif method == "gerber2":
|
|
270
|
+
cov = gs.gerber_cov_stat2(X, threshold=threshold)
|
|
271
|
+
|
|
272
|
+
cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets)
|
|
273
|
+
|
|
274
|
+
return cov
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def cokurt_matrix(
|
|
278
|
+
X,
|
|
279
|
+
method="hist",
|
|
280
|
+
alpha=0.1,
|
|
281
|
+
bWidth=0.01,
|
|
282
|
+
detone=False,
|
|
283
|
+
mkt_comp=1,
|
|
284
|
+
):
|
|
285
|
+
r"""
|
|
286
|
+
Calculate the cokurtosis square matrix using the selected method.
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
291
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
292
|
+
observations and n_assets is the number of assets.
|
|
293
|
+
method : str, optional
|
|
294
|
+
The method used to estimate the cokurtosis square matrix:
|
|
295
|
+
The default is 'hist'. Possible values are:
|
|
296
|
+
|
|
297
|
+
- 'hist': use historical estimates.
|
|
298
|
+
- 'semi': use semi lower cokurtosis square matrix.
|
|
299
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
300
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
301
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
302
|
+
bWidth : float
|
|
303
|
+
The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
|
|
304
|
+
detone : bool, optional
|
|
305
|
+
If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
|
|
306
|
+
and 'shrink' methods. The detone correlation matrix is singular, so it
|
|
307
|
+
cannot be inverted.
|
|
308
|
+
mkt_comp : int, optional
|
|
309
|
+
Number of first components that will be removed using the detone method.
|
|
310
|
+
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
kurt : nd-array
|
|
314
|
+
The estimation of cokurtosis square matrix.
|
|
315
|
+
|
|
316
|
+
Raises
|
|
317
|
+
------
|
|
318
|
+
ValueError
|
|
319
|
+
When the value cannot be calculated.
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
if not isinstance(X, pd.DataFrame):
|
|
324
|
+
raise ValueError("X must be a DataFrame")
|
|
325
|
+
|
|
326
|
+
assets = X.columns.tolist()
|
|
327
|
+
cols = list(product(assets, assets))
|
|
328
|
+
cols = [str(y) + " - " + str(x) for x, y in cols]
|
|
329
|
+
|
|
330
|
+
if method == "hist":
|
|
331
|
+
kurt = cf.cokurtosis_matrix(X)
|
|
332
|
+
if method == "semi":
|
|
333
|
+
kurt = cf.semi_cokurtosis_matrix(X)
|
|
334
|
+
elif method in ["fixed", "spectral", "shrink"]:
|
|
335
|
+
kurt = cf.cokurtosis_matrix(X)
|
|
336
|
+
T, N = X.shape
|
|
337
|
+
q = T / N
|
|
338
|
+
kurt = af.denoiseCov(
|
|
339
|
+
kurt,
|
|
340
|
+
q,
|
|
341
|
+
kind=method,
|
|
342
|
+
bWidth=bWidth,
|
|
343
|
+
detone=detone,
|
|
344
|
+
mkt_comp=mkt_comp,
|
|
345
|
+
alpha=alpha,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
kurt = pd.DataFrame(np.array(kurt, ndmin=2), columns=cols, index=cols)
|
|
349
|
+
|
|
350
|
+
return kurt
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def forward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
|
|
354
|
+
r"""
|
|
355
|
+
Select the variables that estimate the best model using stepwise
|
|
356
|
+
forward regression. In case none of the variables has a p-value lower
|
|
357
|
+
than threshold, the algorithm will select the variable with lowest p-value.
|
|
358
|
+
|
|
359
|
+
Parameters
|
|
360
|
+
----------
|
|
361
|
+
X : DataFrame of shape (n_samples, n_factors)
|
|
362
|
+
Risk factors returns matrix, where n_samples is the number of samples
|
|
363
|
+
and n_factors is the number of risk factors.
|
|
364
|
+
y : Series of shape (n_samples, 1)
|
|
365
|
+
Asset returns column DataFrame or Series, where n_samples is the number
|
|
366
|
+
of samples.
|
|
367
|
+
criterion : str, optional
|
|
368
|
+
The default is 'pvalue'. Possible values of the criterion used to select
|
|
369
|
+
the best features are:
|
|
370
|
+
|
|
371
|
+
- 'pvalue': select the features based on p-values.
|
|
372
|
+
- 'AIC': select the features based on lowest Akaike Information Criterion.
|
|
373
|
+
- 'SIC': select the features based on lowest Schwarz Information Criterion.
|
|
374
|
+
- 'R2': select the features based on highest R Squared.
|
|
375
|
+
- 'R2_A': select the features based on highest Adjusted R Squared.
|
|
376
|
+
|
|
377
|
+
threshold : scalar, optional
|
|
378
|
+
Is the maximum p-value for each variable that will be
|
|
379
|
+
accepted in the model. The default is 0.05.
|
|
380
|
+
verbose : bool, optional
|
|
381
|
+
Enable verbose output. The default is False.
|
|
382
|
+
|
|
383
|
+
Returns
|
|
384
|
+
-------
|
|
385
|
+
value : list
|
|
386
|
+
A list of the variables that produce the best model.
|
|
387
|
+
|
|
388
|
+
Raises
|
|
389
|
+
------
|
|
390
|
+
ValueError
|
|
391
|
+
When the value cannot be calculated.
|
|
392
|
+
|
|
393
|
+
"""
|
|
394
|
+
if not isinstance(X, pd.DataFrame):
|
|
395
|
+
raise ValueError("X must be a DataFrame")
|
|
396
|
+
|
|
397
|
+
if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
|
|
398
|
+
raise ValueError("y must be a column DataFrame")
|
|
399
|
+
|
|
400
|
+
if isinstance(y, pd.DataFrame):
|
|
401
|
+
if y.shape[0] > 1 and y.shape[1] > 1:
|
|
402
|
+
raise ValueError("y must be a column DataFrame")
|
|
403
|
+
|
|
404
|
+
included = []
|
|
405
|
+
aic = 1e10
|
|
406
|
+
sic = 1e10
|
|
407
|
+
r2 = -1e10
|
|
408
|
+
r2_a = -1e10
|
|
409
|
+
pvalues = None
|
|
410
|
+
|
|
411
|
+
if criterion == "pvalue":
|
|
412
|
+
value = 0
|
|
413
|
+
while value <= threshold:
|
|
414
|
+
excluded = list(set(X.columns) - set(included))
|
|
415
|
+
best_pvalue = 999999
|
|
416
|
+
new_feature = None
|
|
417
|
+
for i in excluded:
|
|
418
|
+
factors = included + [i]
|
|
419
|
+
X1 = X[factors]
|
|
420
|
+
X1 = sm.add_constant(X1)
|
|
421
|
+
results = sm.OLS(y, X1).fit()
|
|
422
|
+
new_pvalues = results.pvalues
|
|
423
|
+
new_pvalues = new_pvalues[new_pvalues.index != "const"]
|
|
424
|
+
cond_1 = new_pvalues.max()
|
|
425
|
+
if best_pvalue > new_pvalues[i] and cond_1 <= threshold:
|
|
426
|
+
best_pvalue = results.pvalues[i]
|
|
427
|
+
new_feature = i
|
|
428
|
+
pvalues = new_pvalues.copy()
|
|
429
|
+
|
|
430
|
+
if pvalues is not None:
|
|
431
|
+
value = pvalues[pvalues.index != "const"].max()
|
|
432
|
+
|
|
433
|
+
if new_feature is None:
|
|
434
|
+
break
|
|
435
|
+
else:
|
|
436
|
+
included.append(new_feature)
|
|
437
|
+
|
|
438
|
+
if verbose:
|
|
439
|
+
print("Add {} with p-value {:.6}".format(new_feature, best_pvalue))
|
|
440
|
+
|
|
441
|
+
# This part is how to deal when there isn't an asset with pvalue lower than threshold
|
|
442
|
+
if len(included) == 0:
|
|
443
|
+
excluded = list(set(X.columns) - set(included))
|
|
444
|
+
best_pvalue = 999999
|
|
445
|
+
new_feature = None
|
|
446
|
+
for i in excluded:
|
|
447
|
+
factors = included + [i]
|
|
448
|
+
X1 = X[factors]
|
|
449
|
+
X1 = sm.add_constant(X1)
|
|
450
|
+
results = sm.OLS(y, X1).fit()
|
|
451
|
+
new_pvalues = results.pvalues
|
|
452
|
+
new_pvalues = new_pvalues[new_pvalues.index != "const"]
|
|
453
|
+
if best_pvalue > new_pvalues[i]:
|
|
454
|
+
best_pvalue = results.pvalues[i]
|
|
455
|
+
new_feature = i
|
|
456
|
+
pvalues = new_pvalues.copy()
|
|
457
|
+
|
|
458
|
+
value = pvalues[pvalues.index != "const"].max()
|
|
459
|
+
|
|
460
|
+
included.append(new_feature)
|
|
461
|
+
|
|
462
|
+
if verbose:
|
|
463
|
+
print(
|
|
464
|
+
"Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
else:
|
|
468
|
+
excluded = X.columns.tolist()
|
|
469
|
+
flag = False
|
|
470
|
+
n = len(excluded)
|
|
471
|
+
|
|
472
|
+
for j in range(n):
|
|
473
|
+
value = {}
|
|
474
|
+
n_ini = len(excluded)
|
|
475
|
+
for i in excluded:
|
|
476
|
+
factors = included.copy()
|
|
477
|
+
factors.append(i)
|
|
478
|
+
X1 = X[factors]
|
|
479
|
+
X1 = sm.add_constant(X1)
|
|
480
|
+
results = sm.OLS(y, X1).fit()
|
|
481
|
+
|
|
482
|
+
if criterion == "AIC":
|
|
483
|
+
value[i] = results.aic
|
|
484
|
+
elif criterion == "SIC":
|
|
485
|
+
value[i] = results.bic
|
|
486
|
+
elif criterion == "R2":
|
|
487
|
+
value[i] = results.rsquared
|
|
488
|
+
elif criterion == "R2_A":
|
|
489
|
+
value[i] = results.rsquared_adj
|
|
490
|
+
|
|
491
|
+
value = pd.Series(value)
|
|
492
|
+
|
|
493
|
+
if criterion in ["AIC", "SIC"]:
|
|
494
|
+
key = value.idxmin()
|
|
495
|
+
value = value.min()
|
|
496
|
+
if criterion in ["R2", "R2_A"]:
|
|
497
|
+
key = value.idxmax()
|
|
498
|
+
value = value.max()
|
|
499
|
+
|
|
500
|
+
if criterion == "AIC":
|
|
501
|
+
if value < aic:
|
|
502
|
+
excluded.remove(key)
|
|
503
|
+
included.append(key)
|
|
504
|
+
aic = value
|
|
505
|
+
flag = True
|
|
506
|
+
elif criterion == "SIC":
|
|
507
|
+
if value < sic:
|
|
508
|
+
excluded.remove(key)
|
|
509
|
+
included.append(key)
|
|
510
|
+
sic = value
|
|
511
|
+
flag = True
|
|
512
|
+
elif criterion == "R2":
|
|
513
|
+
if value > r2:
|
|
514
|
+
excluded.remove(key)
|
|
515
|
+
included.append(key)
|
|
516
|
+
r2 = value
|
|
517
|
+
flag = True
|
|
518
|
+
elif criterion == "R2_A":
|
|
519
|
+
if value > r2_a:
|
|
520
|
+
excluded.remove(key)
|
|
521
|
+
included.append(key)
|
|
522
|
+
r2_a = value
|
|
523
|
+
flag = True
|
|
524
|
+
|
|
525
|
+
if n_ini == len(excluded):
|
|
526
|
+
break
|
|
527
|
+
|
|
528
|
+
if flag and verbose:
|
|
529
|
+
print("Add {} with {} {:.6}".format(key, criterion, value))
|
|
530
|
+
|
|
531
|
+
flag = False
|
|
532
|
+
|
|
533
|
+
return included
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def backward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
|
|
537
|
+
r"""
|
|
538
|
+
Select the variables that estimate the best model using stepwise
|
|
539
|
+
backward regression. In case none of the variables has a p-value lower
|
|
540
|
+
than threshold, the algorithm will select the variable with lowest p-value.
|
|
541
|
+
|
|
542
|
+
Parameters
|
|
543
|
+
----------
|
|
544
|
+
X : DataFrame of shape (n_samples, n_factors)
|
|
545
|
+
Risk factors returns matrix, where n_samples is the number of samples
|
|
546
|
+
and n_factors is the number of risk factors.
|
|
547
|
+
y : Series of shape (n_samples, 1)
|
|
548
|
+
Asset returns column DataFrame or Series, where n_samples is the number
|
|
549
|
+
of samples.
|
|
550
|
+
criterion : str, optional
|
|
551
|
+
The default is 'pvalue'. Possible values of the criterion used to select
|
|
552
|
+
the best features are:
|
|
553
|
+
|
|
554
|
+
- 'pvalue': select the features based on p-values.
|
|
555
|
+
- 'AIC': select the features based on lowest Akaike Information Criterion.
|
|
556
|
+
- 'SIC': select the features based on lowest Schwarz Information Criterion.
|
|
557
|
+
- 'R2': select the features based on highest R Squared.
|
|
558
|
+
- 'R2_A': select the features based on highest Adjusted R Squared.
|
|
559
|
+
threshold : scalar, optional
|
|
560
|
+
Is the maximum p-value for each variable that will be
|
|
561
|
+
accepted in the model. The default is 0.05.
|
|
562
|
+
verbose : bool, optional
|
|
563
|
+
Enable verbose output. The default is False.
|
|
564
|
+
|
|
565
|
+
Returns
|
|
566
|
+
-------
|
|
567
|
+
value : list
|
|
568
|
+
A list of the variables that produce the best model.
|
|
569
|
+
|
|
570
|
+
Raises
|
|
571
|
+
------
|
|
572
|
+
ValueError
|
|
573
|
+
When the value cannot be calculated.
|
|
574
|
+
|
|
575
|
+
"""
|
|
576
|
+
|
|
577
|
+
if not isinstance(X, pd.DataFrame):
|
|
578
|
+
raise ValueError("X must be a DataFrame")
|
|
579
|
+
|
|
580
|
+
if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
|
|
581
|
+
raise ValueError("y must be a column DataFrame")
|
|
582
|
+
|
|
583
|
+
if isinstance(y, pd.DataFrame):
|
|
584
|
+
if y.shape[0] > 1 and y.shape[1] > 1:
|
|
585
|
+
raise ValueError("y must be a column DataFrame")
|
|
586
|
+
|
|
587
|
+
X1 = sm.add_constant(X)
|
|
588
|
+
results = sm.OLS(y, X1).fit()
|
|
589
|
+
pvalues = results.pvalues
|
|
590
|
+
aic = results.aic
|
|
591
|
+
sic = results.bic
|
|
592
|
+
r2 = results.rsquared
|
|
593
|
+
r2_a = results.rsquared_adj
|
|
594
|
+
|
|
595
|
+
included = pvalues.index.tolist()
|
|
596
|
+
|
|
597
|
+
if criterion == "pvalue":
|
|
598
|
+
excluded = ["const"]
|
|
599
|
+
while pvalues[pvalues.index != "const"].max() > threshold:
|
|
600
|
+
factors = pvalues[~pvalues.index.isin(excluded)].index.tolist()
|
|
601
|
+
X1 = X[factors]
|
|
602
|
+
X1 = sm.add_constant(X1)
|
|
603
|
+
results = sm.OLS(y, X1).fit()
|
|
604
|
+
pvalues = results.pvalues
|
|
605
|
+
pvalues = pvalues[pvalues.index != "const"]
|
|
606
|
+
if pvalues.shape[0] == 0:
|
|
607
|
+
break
|
|
608
|
+
excluded = ["const", pvalues.idxmax()]
|
|
609
|
+
if verbose and pvalues.max() > threshold:
|
|
610
|
+
print(
|
|
611
|
+
"Drop {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
included = pvalues[pvalues.index != "const"].index.tolist()
|
|
615
|
+
|
|
616
|
+
# This part is how to deal when there isn't an asset with pvalue lower than threshold
|
|
617
|
+
if len(included) == 0:
|
|
618
|
+
excluded = list(set(X.columns) - set(included))
|
|
619
|
+
best_pvalue = 999999
|
|
620
|
+
new_feature = None
|
|
621
|
+
for i in excluded:
|
|
622
|
+
factors = included + [i]
|
|
623
|
+
X1 = X[factors]
|
|
624
|
+
X1 = sm.add_constant(X1)
|
|
625
|
+
results = sm.OLS(y, X1).fit()
|
|
626
|
+
new_pvalues = results.pvalues
|
|
627
|
+
new_pvalues = results.pvalues
|
|
628
|
+
new_pvalues = new_pvalues[new_pvalues.index != "const"]
|
|
629
|
+
if best_pvalue > new_pvalues[i]:
|
|
630
|
+
best_pvalue = results.pvalues[i]
|
|
631
|
+
new_feature = i
|
|
632
|
+
pvalues = new_pvalues.copy()
|
|
633
|
+
|
|
634
|
+
value = pvalues[pvalues.index != "const"].max()
|
|
635
|
+
|
|
636
|
+
included.append(new_feature)
|
|
637
|
+
|
|
638
|
+
if verbose:
|
|
639
|
+
print(
|
|
640
|
+
"Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
else:
|
|
644
|
+
included.remove("const")
|
|
645
|
+
flag = False
|
|
646
|
+
n = len(included)
|
|
647
|
+
|
|
648
|
+
for j in range(n):
|
|
649
|
+
value = {}
|
|
650
|
+
n_ini = len(included)
|
|
651
|
+
for i in included:
|
|
652
|
+
factors = included.copy()
|
|
653
|
+
factors.remove(i)
|
|
654
|
+
X1 = X[factors]
|
|
655
|
+
X1 = sm.add_constant(X1)
|
|
656
|
+
results = sm.OLS(y, X1).fit()
|
|
657
|
+
|
|
658
|
+
if criterion == "AIC":
|
|
659
|
+
value[i] = results.aic
|
|
660
|
+
elif criterion == "SIC":
|
|
661
|
+
value[i] = results.bic
|
|
662
|
+
elif criterion == "R2":
|
|
663
|
+
value[i] = results.rsquared
|
|
664
|
+
elif criterion == "R2_A":
|
|
665
|
+
value[i] = results.rsquared_adj
|
|
666
|
+
|
|
667
|
+
value = pd.Series(value)
|
|
668
|
+
|
|
669
|
+
if criterion in ["AIC", "SIC"]:
|
|
670
|
+
key = value.idxmin()
|
|
671
|
+
value = value.min()
|
|
672
|
+
if criterion in ["R2", "R2_A"]:
|
|
673
|
+
key = value.idxmax()
|
|
674
|
+
value = value.max()
|
|
675
|
+
|
|
676
|
+
if criterion == "AIC":
|
|
677
|
+
if value < aic:
|
|
678
|
+
included.remove(key)
|
|
679
|
+
aic = value
|
|
680
|
+
flag = True
|
|
681
|
+
elif criterion == "SIC":
|
|
682
|
+
if value < sic:
|
|
683
|
+
included.remove(key)
|
|
684
|
+
sic = value
|
|
685
|
+
flag = True
|
|
686
|
+
elif criterion == "R2":
|
|
687
|
+
if value > r2:
|
|
688
|
+
included.remove(key)
|
|
689
|
+
r2 = value
|
|
690
|
+
flag = True
|
|
691
|
+
elif criterion == "R2_A":
|
|
692
|
+
if value > r2_a:
|
|
693
|
+
included.remove(key)
|
|
694
|
+
r2_a = value
|
|
695
|
+
flag = True
|
|
696
|
+
|
|
697
|
+
if n_ini == len(included):
|
|
698
|
+
break
|
|
699
|
+
|
|
700
|
+
if flag and verbose:
|
|
701
|
+
print("Drop {} with {} {:.6}".format(key, criterion, value))
|
|
702
|
+
|
|
703
|
+
flag = False
|
|
704
|
+
|
|
705
|
+
return included
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def PCR(X, y, n_components=0.95):
|
|
709
|
+
r"""
|
|
710
|
+
Estimate the coefficients using Principal Components Regression (PCR).
|
|
711
|
+
|
|
712
|
+
Parameters
|
|
713
|
+
----------
|
|
714
|
+
X : DataFrame of shape (n_samples, n_factors)
|
|
715
|
+
Risk factors returns matrix, where n_samples is the number of samples
|
|
716
|
+
and n_factors is the number of risk factors.
|
|
717
|
+
y : DataFrame or Series of shape (n_samples, 1)
|
|
718
|
+
Asset returns column DataFrame or Series, where n_samples is the number
|
|
719
|
+
of samples.
|
|
720
|
+
n_components : int, float, None or str, optional
|
|
721
|
+
if 1 < n_components (int), it represents the number of components that
|
|
722
|
+
will be keep. if 0 < n_components < 1 (float), it represents the
|
|
723
|
+
percentage of variance that the is explained by the components kept.
|
|
724
|
+
See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
|
|
725
|
+
for more details. The default is 0.95.
|
|
726
|
+
|
|
727
|
+
Returns
|
|
728
|
+
-------
|
|
729
|
+
value : nd-array
|
|
730
|
+
An array with the coefficients of the model calculated using PCR.
|
|
731
|
+
|
|
732
|
+
Raises
|
|
733
|
+
------
|
|
734
|
+
ValueError
|
|
735
|
+
When the value cannot be calculated.
|
|
736
|
+
|
|
737
|
+
"""
|
|
738
|
+
|
|
739
|
+
if not isinstance(X, pd.DataFrame):
|
|
740
|
+
raise ValueError("X must be a DataFrame")
|
|
741
|
+
|
|
742
|
+
if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
|
|
743
|
+
raise ValueError("y must be a column DataFrame")
|
|
744
|
+
|
|
745
|
+
if isinstance(y, pd.DataFrame):
|
|
746
|
+
if y.shape[0] > 1 and y.shape[1] > 1:
|
|
747
|
+
raise ValueError("y must be a column DataFrame")
|
|
748
|
+
|
|
749
|
+
scaler = StandardScaler()
|
|
750
|
+
scaler.fit(X)
|
|
751
|
+
X_std = scaler.transform(X)
|
|
752
|
+
|
|
753
|
+
if n_components > 0 and n_components < 1:
|
|
754
|
+
pca = PCA(n_components=n_components)
|
|
755
|
+
elif n_components >= 1:
|
|
756
|
+
pca = PCA(n_components=int(n_components))
|
|
757
|
+
|
|
758
|
+
pca.fit(X_std)
|
|
759
|
+
Z_p = pca.transform(X_std)
|
|
760
|
+
V_p = pca.components_.T
|
|
761
|
+
|
|
762
|
+
results = sm.OLS(y, sm.add_constant(Z_p)).fit()
|
|
763
|
+
beta_pc = results.params[1:]
|
|
764
|
+
beta_pc = np.array(beta_pc, ndmin=2)
|
|
765
|
+
|
|
766
|
+
std = np.array(np.std(X, axis=0, ddof=1), ndmin=2)
|
|
767
|
+
mean = np.array(np.mean(X, axis=0), ndmin=2)
|
|
768
|
+
beta = V_p @ beta_pc.T / std.T
|
|
769
|
+
|
|
770
|
+
beta_0 = np.array(y.mean(), ndmin=2) - np.sum(beta * mean.T)
|
|
771
|
+
|
|
772
|
+
beta = np.insert(beta, 0, beta_0)
|
|
773
|
+
beta = np.array(beta, ndmin=2)
|
|
774
|
+
|
|
775
|
+
return beta
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def loadings_matrix(
|
|
779
|
+
X,
|
|
780
|
+
Y,
|
|
781
|
+
feature_selection="stepwise",
|
|
782
|
+
stepwise="Forward",
|
|
783
|
+
criterion="pvalue",
|
|
784
|
+
threshold=0.05,
|
|
785
|
+
n_components=0.95,
|
|
786
|
+
verbose=False,
|
|
787
|
+
):
|
|
788
|
+
r"""
|
|
789
|
+
Estimate the loadings matrix using stepwise regression.
|
|
790
|
+
|
|
791
|
+
Parameters
|
|
792
|
+
----------
|
|
793
|
+
X : DataFrame of shape (n_samples, n_factors)
|
|
794
|
+
Risk factors returns matrix, where n_samples is the number of samples
|
|
795
|
+
and n_factors is the number of risk factors.
|
|
796
|
+
Y : DataFrame of shape (n_samples, n_assets)
|
|
797
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
798
|
+
observations and n_assets is the number of assets.
|
|
799
|
+
feature_selection: str, 'stepwise' or 'PCR', optional
|
|
800
|
+
Indicate the method used to estimate the loadings matrix.
|
|
801
|
+
The default is 'stepwise'. Possible values are:
|
|
802
|
+
|
|
803
|
+
- 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
|
|
804
|
+
- 'PCR': use principal components regression to estimate coefficients.
|
|
805
|
+
stepwise: str 'Forward' or 'Backward', optional
|
|
806
|
+
Indicate the method used for stepwise regression.
|
|
807
|
+
The default is 'Forward'.
|
|
808
|
+
criterion : str, optional
|
|
809
|
+
The default is 'pvalue'. Possible values of the criterion used to select
|
|
810
|
+
the best features are:
|
|
811
|
+
|
|
812
|
+
- 'pvalue': select the features based on p-values.
|
|
813
|
+
- 'AIC': select the features based on lowest Akaike Information Criterion.
|
|
814
|
+
- 'SIC': select the features based on lowest Schwarz Information Criterion.
|
|
815
|
+
- 'R2': select the features based on highest R Squared.
|
|
816
|
+
- 'R2_A': select the features based on highest Adjusted R Squared.
|
|
817
|
+
threshold : scalar, optional
|
|
818
|
+
Is the maximum p-value for each variable that will be
|
|
819
|
+
accepted in the model. The default is 0.05.
|
|
820
|
+
n_components : int, float, None or str, optional
|
|
821
|
+
if 1 < n_components (int), it represents the number of components that
|
|
822
|
+
will be keep. if 0 < n_components < 1 (float), it represents the
|
|
823
|
+
percentage of variance that the is explained by the components kept.
|
|
824
|
+
See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
|
|
825
|
+
for more details. The default is 0.95.
|
|
826
|
+
verbose : bool, optional
|
|
827
|
+
Enable verbose output. The default is False.
|
|
828
|
+
|
|
829
|
+
Returns
|
|
830
|
+
-------
|
|
831
|
+
loadings : DataFrame
|
|
832
|
+
Loadings matrix.
|
|
833
|
+
|
|
834
|
+
Raises
|
|
835
|
+
------
|
|
836
|
+
ValueError
|
|
837
|
+
When the value cannot be calculated.
|
|
838
|
+
|
|
839
|
+
"""
|
|
840
|
+
if not isinstance(X, pd.DataFrame):
|
|
841
|
+
raise ValueError("X must be a DataFrame")
|
|
842
|
+
|
|
843
|
+
if not isinstance(Y, pd.DataFrame):
|
|
844
|
+
raise ValueError("Y must be a DataFrame")
|
|
845
|
+
|
|
846
|
+
rows = Y.columns.tolist()
|
|
847
|
+
cols = X.columns.tolist()
|
|
848
|
+
cols.insert(0, "const")
|
|
849
|
+
loadings = np.zeros((len(rows), len(cols)))
|
|
850
|
+
loadings = pd.DataFrame(loadings, index=rows, columns=cols)
|
|
851
|
+
|
|
852
|
+
for i in rows:
|
|
853
|
+
if feature_selection == "stepwise":
|
|
854
|
+
if stepwise == "Forward":
|
|
855
|
+
included = forward_regression(
|
|
856
|
+
X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
|
|
857
|
+
)
|
|
858
|
+
elif stepwise == "Backward":
|
|
859
|
+
included = backward_regression(
|
|
860
|
+
X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
|
|
861
|
+
)
|
|
862
|
+
else:
|
|
863
|
+
raise ValueError("Choose and adequate stepwise method")
|
|
864
|
+
results = sm.OLS(Y[i], sm.add_constant(X[included])).fit()
|
|
865
|
+
params = results.params
|
|
866
|
+
loadings.loc[i, params.index.tolist()] = params.T
|
|
867
|
+
elif feature_selection == "PCR":
|
|
868
|
+
beta = PCR(X, Y[i], n_components=n_components)
|
|
869
|
+
beta = pd.Series(np.ravel(beta), index=cols)
|
|
870
|
+
loadings.loc[i, cols] = beta.T
|
|
871
|
+
|
|
872
|
+
return loadings
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def risk_factors(
|
|
876
|
+
X,
|
|
877
|
+
Y,
|
|
878
|
+
B=None,
|
|
879
|
+
const=True,
|
|
880
|
+
method_mu="hist",
|
|
881
|
+
method_cov="hist",
|
|
882
|
+
feature_selection="stepwise",
|
|
883
|
+
stepwise="Forward",
|
|
884
|
+
criterion="pvalue",
|
|
885
|
+
threshold=0.05,
|
|
886
|
+
n_components=0.95,
|
|
887
|
+
dict_mu={},
|
|
888
|
+
dict_cov={},
|
|
889
|
+
):
|
|
890
|
+
r"""
|
|
891
|
+
Estimate the expected returns vector and covariance matrix based on risk
|
|
892
|
+
factors models :cite:`b-Ross` :cite:`b-Fan`.
|
|
893
|
+
|
|
894
|
+
.. math::
|
|
895
|
+
\begin{aligned}
|
|
896
|
+
R & = \alpha + B F + \epsilon \\
|
|
897
|
+
\mu_{f} & = \alpha +BE(F) \\
|
|
898
|
+
\Sigma_{f} & = B \Sigma_{F} B^{T} + \Sigma_{\epsilon} \\
|
|
899
|
+
\end{aligned}
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
where:
|
|
903
|
+
|
|
904
|
+
:math:`R` is the series returns.
|
|
905
|
+
|
|
906
|
+
:math:`\alpha` is the intercept.
|
|
907
|
+
|
|
908
|
+
:math:`B` is the loadings matrix.
|
|
909
|
+
|
|
910
|
+
:math:`F` is the expected returns vector of the risk factors.
|
|
911
|
+
|
|
912
|
+
:math:`\Sigma_{F}` is the covariance matrix of the risk factors.
|
|
913
|
+
|
|
914
|
+
:math:`\Sigma_{\epsilon}` is the covariance matrix of error terms.
|
|
915
|
+
|
|
916
|
+
:math:`\mu_{f}` is the expected returns vector obtained with the
|
|
917
|
+
risk factor model.
|
|
918
|
+
|
|
919
|
+
:math:`\Sigma_{f}` is the covariance matrix obtained with the risk
|
|
920
|
+
factor model.
|
|
921
|
+
|
|
922
|
+
Parameters
|
|
923
|
+
----------
|
|
924
|
+
X : DataFrame of shape (n_samples, n_factors)
|
|
925
|
+
Risk factors returns matrix, where n_samples is the number of samples
|
|
926
|
+
and n_factors is the number of risk factors.
|
|
927
|
+
Y : DataFrame of shape (n_samples, n_assets)
|
|
928
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
929
|
+
observations and n_assets is the number of assets.
|
|
930
|
+
B : DataFrame of shape (n_assets, n_factors), optional
|
|
931
|
+
Loadings matrix, where n_assets is the number assets and n_factors is
|
|
932
|
+
the number of risk factors. If is not specified, is estimated using
|
|
933
|
+
stepwise regression. The default is None.
|
|
934
|
+
const : bool, optional
|
|
935
|
+
Indicate if the loadings matrix has a constant.
|
|
936
|
+
The default is False.
|
|
937
|
+
method_mu : str, optional
|
|
938
|
+
The method used to estimate the expected returns of factors.
|
|
939
|
+
The default value is 'hist'. Possible values are:
|
|
940
|
+
|
|
941
|
+
- 'hist': use historical estimates.
|
|
942
|
+
- 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
|
|
943
|
+
- 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
|
|
944
|
+
- 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
|
|
945
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
|
|
946
|
+
- 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
|
|
947
|
+
method_cov : str, optional
|
|
948
|
+
The method used to estimate the covariance matrix of factors.
|
|
949
|
+
The default is 'hist'. Possible values are:
|
|
950
|
+
|
|
951
|
+
- 'hist': use historical estimates.
|
|
952
|
+
- 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
|
|
953
|
+
- 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
|
|
954
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
955
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
956
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
957
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
958
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
|
|
959
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
960
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
961
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
962
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
|
|
963
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
|
|
964
|
+
feature_selection: str, 'stepwise' or 'PCR', optional
|
|
965
|
+
Indicate the method used to estimate the loadings matrix.
|
|
966
|
+
The default is 'stepwise'. Possible values are:
|
|
967
|
+
|
|
968
|
+
- 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
|
|
969
|
+
- 'PCR': use principal components regression to estimate coefficients.
|
|
970
|
+
stepwise: str, 'Forward' or 'Backward'
|
|
971
|
+
Indicate the method used for stepwise regression.
|
|
972
|
+
The default is 'Forward'.
|
|
973
|
+
criterion : str, optional
|
|
974
|
+
The default is 'pvalue'. Possible values of the criterion used to select
|
|
975
|
+
the best features are:
|
|
976
|
+
|
|
977
|
+
- 'pvalue': select the features based on p-values.
|
|
978
|
+
- 'AIC': select the features based on lowest Akaike Information Criterion.
|
|
979
|
+
- 'SIC': select the features based on lowest Schwarz Information Criterion.
|
|
980
|
+
- 'R2': select the features based on highest R Squared.
|
|
981
|
+
- 'R2_A': select the features based on highest Adjusted R Squared.
|
|
982
|
+
threshold : scalar, optional
|
|
983
|
+
Is the maximum p-value for each variable that will be
|
|
984
|
+
accepted in the model. The default is 0.05.
|
|
985
|
+
n_components : int, float, None or str, optional
|
|
986
|
+
if 1 < n_components (int), it represents the number of components that
|
|
987
|
+
will be keep. if 0 < n_components < 1 (float), it represents the
|
|
988
|
+
percentage of variance that the is explained by the components kept.
|
|
989
|
+
See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
|
|
990
|
+
for more details. The default is 0.95.
|
|
991
|
+
dict_mu : dict
|
|
992
|
+
Other variables related to the expected returns.
|
|
993
|
+
dict_cov : dict
|
|
994
|
+
Other variables related to the covariance estimation.
|
|
995
|
+
|
|
996
|
+
Returns
|
|
997
|
+
-------
|
|
998
|
+
mu : DataFrame
|
|
999
|
+
The mean vector of risk factors model.
|
|
1000
|
+
cov : DataFrame
|
|
1001
|
+
The covariance matrix of risk factors model.
|
|
1002
|
+
returns : DataFrame
|
|
1003
|
+
The returns based on a risk factor model.
|
|
1004
|
+
B : DataFrame
|
|
1005
|
+
Loadings matrix.
|
|
1006
|
+
|
|
1007
|
+
Raises
|
|
1008
|
+
------
|
|
1009
|
+
ValueError
|
|
1010
|
+
When the value cannot be calculated.
|
|
1011
|
+
|
|
1012
|
+
"""
|
|
1013
|
+
if not isinstance(X, pd.DataFrame) and not isinstance(Y, pd.DataFrame):
|
|
1014
|
+
raise ValueError("X and Y must be DataFrames")
|
|
1015
|
+
|
|
1016
|
+
if B is None:
|
|
1017
|
+
B = loadings_matrix(
|
|
1018
|
+
X,
|
|
1019
|
+
Y,
|
|
1020
|
+
feature_selection=feature_selection,
|
|
1021
|
+
stepwise=stepwise,
|
|
1022
|
+
criterion=criterion,
|
|
1023
|
+
threshold=threshold,
|
|
1024
|
+
n_components=n_components,
|
|
1025
|
+
verbose=False,
|
|
1026
|
+
)
|
|
1027
|
+
elif not isinstance(B, pd.DataFrame):
|
|
1028
|
+
raise ValueError("B must be a DataFrame")
|
|
1029
|
+
|
|
1030
|
+
assets = Y.columns.tolist()
|
|
1031
|
+
dates = X.index.tolist()
|
|
1032
|
+
|
|
1033
|
+
X1 = X.copy()
|
|
1034
|
+
if const == True or ("const" in B.columns.tolist()):
|
|
1035
|
+
mu_f = np.hstack(
|
|
1036
|
+
[
|
|
1037
|
+
np.ones((1, 1)),
|
|
1038
|
+
np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2),
|
|
1039
|
+
]
|
|
1040
|
+
)
|
|
1041
|
+
X1 = sm.add_constant(X)
|
|
1042
|
+
else:
|
|
1043
|
+
mu_f = np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2)
|
|
1044
|
+
S_f = np.array(covar_matrix(X1, method=method_cov, **dict_cov), ndmin=2)
|
|
1045
|
+
B_ = np.array(B, ndmin=2)
|
|
1046
|
+
|
|
1047
|
+
returns = np.array(X1, ndmin=2) @ B_.T
|
|
1048
|
+
mu = B_ @ mu_f.T
|
|
1049
|
+
|
|
1050
|
+
e = np.array(Y, ndmin=2) - returns
|
|
1051
|
+
S_e = np.diag(np.var(np.array(e), ddof=1, axis=0))
|
|
1052
|
+
S = B_ @ S_f @ B_.T + S_e
|
|
1053
|
+
|
|
1054
|
+
mu = pd.DataFrame(mu.T, columns=assets)
|
|
1055
|
+
cov = pd.DataFrame(S, index=assets, columns=assets)
|
|
1056
|
+
returns = pd.DataFrame(returns, index=dates, columns=assets)
|
|
1057
|
+
|
|
1058
|
+
return mu, cov, returns, B
|
|
1059
|
+
|
|
1060
|
+
|
|
1061
|
+
def black_litterman(
|
|
1062
|
+
X,
|
|
1063
|
+
w,
|
|
1064
|
+
P,
|
|
1065
|
+
Q,
|
|
1066
|
+
delta=1,
|
|
1067
|
+
rf=0,
|
|
1068
|
+
eq=True,
|
|
1069
|
+
method_mu="hist",
|
|
1070
|
+
method_cov="hist",
|
|
1071
|
+
dict_mu={},
|
|
1072
|
+
dict_cov={},
|
|
1073
|
+
):
|
|
1074
|
+
r"""
|
|
1075
|
+
Estimate the expected returns vector and covariance matrix based
|
|
1076
|
+
on the Black Litterman model :cite:`b-BlackLitterman` :cite:`b-Black1`.
|
|
1077
|
+
|
|
1078
|
+
.. math::
|
|
1079
|
+
\begin{aligned}
|
|
1080
|
+
\Pi & = \delta \Sigma w \\
|
|
1081
|
+
\Pi_{BL} & = \left [ (\tau\Sigma)^{-1}+ P^{T} \Omega^{-1}P \right]^{-1}
|
|
1082
|
+
\left[(\tau\Sigma)^{-1} \Pi + P^{T} \Omega^{-1} Q \right] \\
|
|
1083
|
+
M & = \left((\tau\Sigma)^{-1} + P^{T}\Omega^{-1} P \right)^{-1} \\
|
|
1084
|
+
\mu_{BL} & = \Pi_{BL} + r_{f} \\
|
|
1085
|
+
\Sigma_{BL} & = \Sigma + M \\
|
|
1086
|
+
\end{aligned}
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
where:
|
|
1090
|
+
|
|
1091
|
+
:math:`r_{f}` is the risk free rate.
|
|
1092
|
+
|
|
1093
|
+
:math:`\delta` is the risk aversion factor.
|
|
1094
|
+
|
|
1095
|
+
:math:`\Pi` is the equilibrium excess returns.
|
|
1096
|
+
|
|
1097
|
+
:math:`\Sigma` is the covariance matrix.
|
|
1098
|
+
|
|
1099
|
+
:math:`P` is the views matrix.
|
|
1100
|
+
|
|
1101
|
+
:math:`Q` is the views returns matrix.
|
|
1102
|
+
|
|
1103
|
+
:math:`\Omega` is the covariance matrix of the error views.
|
|
1104
|
+
|
|
1105
|
+
:math:`\mu_{BL}` is the mean vector obtained with the black
|
|
1106
|
+
litterman model.
|
|
1107
|
+
|
|
1108
|
+
:math:`\Sigma_{BL}` is the covariance matrix obtained with the black
|
|
1109
|
+
litterman model.
|
|
1110
|
+
|
|
1111
|
+
Parameters
|
|
1112
|
+
----------
|
|
1113
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
1114
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1115
|
+
observations and n_assets is the number of assets.
|
|
1116
|
+
w : DataFrame or Series of shape (n_assets, 1)
|
|
1117
|
+
Portfolio weights, where n_assets is the number of assets.
|
|
1118
|
+
P : DataFrame of shape (n_views, n_assets)
|
|
1119
|
+
Analyst's views matrix, can be relative or absolute.
|
|
1120
|
+
Q : DataFrame of shape (n_views, 1)
|
|
1121
|
+
Expected returns of analyst's views.
|
|
1122
|
+
delta : float, optional
|
|
1123
|
+
Risk aversion factor. The default value is 1.
|
|
1124
|
+
rf : scalar, optional
|
|
1125
|
+
Risk free rate. The default is 0.
|
|
1126
|
+
eq : bool, optional
|
|
1127
|
+
Indicate if use equilibrium or historical excess returns.
|
|
1128
|
+
The default is True.
|
|
1129
|
+
method_mu : str, optional
|
|
1130
|
+
The method used to estimate the expected returns.
|
|
1131
|
+
The default value is 'hist'.
|
|
1132
|
+
|
|
1133
|
+
- 'hist': use historical estimates.
|
|
1134
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1135
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1136
|
+
- 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
|
|
1137
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
|
|
1138
|
+
- 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
|
|
1139
|
+
method_cov : str, optional
|
|
1140
|
+
The method used to estimate the covariance matrix.
|
|
1141
|
+
The default is 'hist'. Possible values are:
|
|
1142
|
+
|
|
1143
|
+
- 'hist': use historical estimates.
|
|
1144
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1145
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1146
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
1147
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
1148
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
1149
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
1150
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
|
|
1151
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1152
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1153
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1154
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
|
|
1155
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
|
|
1156
|
+
dict_mu : dict
|
|
1157
|
+
Other variables related to the mean vector estimation method.
|
|
1158
|
+
dict_cov : dict
|
|
1159
|
+
Other variables related to the covariance estimation method.
|
|
1160
|
+
|
|
1161
|
+
Returns
|
|
1162
|
+
-------
|
|
1163
|
+
mu : DataFrame
|
|
1164
|
+
The mean vector of Black Litterman model.
|
|
1165
|
+
cov : DataFrame
|
|
1166
|
+
The covariance matrix of Black Litterman model.
|
|
1167
|
+
w : DataFrame
|
|
1168
|
+
The equilibrium weights of Black Litterman model, without constraints.
|
|
1169
|
+
|
|
1170
|
+
Raises
|
|
1171
|
+
------
|
|
1172
|
+
ValueError
|
|
1173
|
+
When the value cannot be calculated.
|
|
1174
|
+
|
|
1175
|
+
"""
|
|
1176
|
+
if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
|
|
1177
|
+
raise ValueError("X and w must be DataFrames")
|
|
1178
|
+
|
|
1179
|
+
if w.shape[0] > 1 and w.shape[1] > 1:
|
|
1180
|
+
raise ValueError("w must be a column DataFrame")
|
|
1181
|
+
|
|
1182
|
+
assets = X.columns.tolist()
|
|
1183
|
+
|
|
1184
|
+
w = np.array(w, ndmin=2)
|
|
1185
|
+
if w.shape[0] == 1:
|
|
1186
|
+
w = w.T
|
|
1187
|
+
|
|
1188
|
+
mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
|
|
1189
|
+
S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
|
|
1190
|
+
P = np.array(P, ndmin=2)
|
|
1191
|
+
Q = np.array(Q, ndmin=2)
|
|
1192
|
+
tau = 1 / X.shape[0]
|
|
1193
|
+
Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
|
|
1194
|
+
|
|
1195
|
+
if eq == True:
|
|
1196
|
+
PI = delta * (S @ w)
|
|
1197
|
+
elif eq == False:
|
|
1198
|
+
PI = mu.T - rf
|
|
1199
|
+
|
|
1200
|
+
PI_ = inv(inv(tau * S) + P.T @ inv(Omega) @ P) @ (
|
|
1201
|
+
inv(tau * S) @ PI + P.T @ inv(Omega) @ Q
|
|
1202
|
+
)
|
|
1203
|
+
M = inv(inv(tau * S) + P.T @ inv(Omega) @ P)
|
|
1204
|
+
# PI_1 = PI + (tau * S* P.T) * inv(P * tau * S * P.T + Omega) * (Q - P * PI)
|
|
1205
|
+
# M = tau * S - (tau * S * P.T) * inv(P * tau * S * P.T + Omega) * P * tau * S
|
|
1206
|
+
|
|
1207
|
+
mu = PI_ + rf
|
|
1208
|
+
mu = mu.T
|
|
1209
|
+
cov = S + M
|
|
1210
|
+
w = inv(delta * cov) @ PI_
|
|
1211
|
+
|
|
1212
|
+
mu = pd.DataFrame(mu, columns=assets)
|
|
1213
|
+
cov = pd.DataFrame(cov, index=assets, columns=assets)
|
|
1214
|
+
w = pd.DataFrame(w, index=assets)
|
|
1215
|
+
|
|
1216
|
+
return mu, cov, w
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
def augmented_black_litterman(
|
|
1220
|
+
X,
|
|
1221
|
+
w,
|
|
1222
|
+
F,
|
|
1223
|
+
B,
|
|
1224
|
+
P=None,
|
|
1225
|
+
Q=None,
|
|
1226
|
+
P_f=None,
|
|
1227
|
+
Q_f=None,
|
|
1228
|
+
delta=1,
|
|
1229
|
+
rf=0,
|
|
1230
|
+
eq=True,
|
|
1231
|
+
const=True,
|
|
1232
|
+
method_mu="hist",
|
|
1233
|
+
method_cov="hist",
|
|
1234
|
+
dict_mu={},
|
|
1235
|
+
dict_cov={},
|
|
1236
|
+
):
|
|
1237
|
+
r"""
|
|
1238
|
+
Estimate the expected returns vector and covariance matrix based
|
|
1239
|
+
on the Augmented Black Litterman model :cite:`b-WCheung`.
|
|
1240
|
+
|
|
1241
|
+
.. math::
|
|
1242
|
+
\begin{aligned}
|
|
1243
|
+
\Pi^{a} & = \delta \left [ \begin{array}{c} \Sigma \\ \Sigma_{F} B^{T} \\ \end{array} \right ] w \\
|
|
1244
|
+
P^{a} & = \left [ \begin{array}{cc} P & 0 \\ 0 & P_{F} \\ \end{array} \right ] \\
|
|
1245
|
+
Q^{a} & = \left [ \begin{array}{c} Q \\ Q_{F} \\ \end{array} \right ] \\
|
|
1246
|
+
\Sigma^{a} & = \left [ \begin{array}{cc} \Sigma & B \Sigma_{F}\\ \Sigma_{F} B^{T} & \Sigma_{F} \\ \end{array} \right ] \\
|
|
1247
|
+
\Omega^{a} & = \left [ \begin{array}{cc} \Omega & 0 \\ 0 & \Omega_{F} \\ \end{array} \right ] \\
|
|
1248
|
+
\Pi^{a}_{BL} & = \left [ (\tau \Sigma^{a})^{-1} + (P^{a})^{T} (\Omega^{a})^{-1} P^{a} \right ]^{-1}
|
|
1249
|
+
\left [ (\tau\Sigma^{a})^{-1} \Pi^{a} + (P^{a})^{T} (\Omega^{a})^{-1} Q^{a} \right ] \\
|
|
1250
|
+
M^{a} & = \left ( (\tau\Sigma^{a})^{-1} + (P^{a})^{T} (\Omega^{a})^{-1} P^{a} \right )^{-1} \\
|
|
1251
|
+
\mu^{a}_{BL} & = \Pi^{a}_{BL} + r_{f} \\
|
|
1252
|
+
\Sigma^{a}_{BL} & = \Sigma^{a} + M^{a} \\
|
|
1253
|
+
\end{aligned}
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
where:
|
|
1257
|
+
|
|
1258
|
+
:math:`r_{f}` is the risk free rate.
|
|
1259
|
+
|
|
1260
|
+
:math:`\delta` is the risk aversion factor.
|
|
1261
|
+
|
|
1262
|
+
:math:`B` is the loadings matrix.
|
|
1263
|
+
|
|
1264
|
+
:math:`\Sigma` is the covariance matrix of assets.
|
|
1265
|
+
|
|
1266
|
+
:math:`\Sigma_{F}` is the covariance matrix of factors.
|
|
1267
|
+
|
|
1268
|
+
:math:`\Sigma^{a}` is the augmented covariance matrix.
|
|
1269
|
+
|
|
1270
|
+
:math:`P` is the assets views matrix.
|
|
1271
|
+
|
|
1272
|
+
:math:`Q` is the assets views returns matrix.
|
|
1273
|
+
|
|
1274
|
+
:math:`P_{F}` is the factors views matrix.
|
|
1275
|
+
|
|
1276
|
+
:math:`Q_{F}` is the factors views returns matrix.
|
|
1277
|
+
|
|
1278
|
+
:math:`P^{a}` is the augmented views matrix.
|
|
1279
|
+
|
|
1280
|
+
:math:`Q^{a}` is the augmented views returns matrix.
|
|
1281
|
+
|
|
1282
|
+
:math:`\Pi^{a}` is the augmented equilibrium excess returns.
|
|
1283
|
+
|
|
1284
|
+
:math:`\Omega` is the covariance matrix of errors of assets views.
|
|
1285
|
+
|
|
1286
|
+
:math:`\Omega_{F}` is the covariance matrix of errors of factors views.
|
|
1287
|
+
|
|
1288
|
+
:math:`\Omega^{a}` is the covariance matrix of errors of augmented views.
|
|
1289
|
+
|
|
1290
|
+
:math:`\mu^{a}_{BL}` is the mean vector obtained with the Augmented Black
|
|
1291
|
+
Litterman model.
|
|
1292
|
+
|
|
1293
|
+
:math:`\Sigma^{a}_{BL}` is the covariance matrix obtained with the Augmented
|
|
1294
|
+
Black Litterman model.
|
|
1295
|
+
|
|
1296
|
+
Parameters
|
|
1297
|
+
----------
|
|
1298
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
1299
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1300
|
+
observations and n_assets is the number of assets.
|
|
1301
|
+
w : DataFrame or Series of shape (n_assets, 1)
|
|
1302
|
+
Portfolio weights, where n_assets is the number of assets.
|
|
1303
|
+
F : DataFrame of shape (n_samples, n_factors)
|
|
1304
|
+
Risk factors returns DataFrame, where n_samples is the number of samples
|
|
1305
|
+
and n_factors is the number of risk factors.
|
|
1306
|
+
B : DataFrame of shape (n_assets, n_factors), optional
|
|
1307
|
+
Loadings matrix, where n_assets is the number assets and n_factors is
|
|
1308
|
+
the number of risk factors.
|
|
1309
|
+
P : DataFrame of shape (n_views, n_assets)
|
|
1310
|
+
Analyst's views matrix, can be relative or absolute.
|
|
1311
|
+
Q : DataFrame of shape (n_views, 1)
|
|
1312
|
+
Expected returns of analyst's views.
|
|
1313
|
+
P_f : DataFrame of shape (n_views, n_factors)
|
|
1314
|
+
Analyst's factors views matrix, can be relative or absolute.
|
|
1315
|
+
Q_f : DataFrame of shape (n_views, 1)
|
|
1316
|
+
Expected returns of analyst's factors views.
|
|
1317
|
+
delta : float, optional
|
|
1318
|
+
Risk aversion factor. The default value is 1.
|
|
1319
|
+
rf : scalar, optional
|
|
1320
|
+
Risk free rate. The default is 0.
|
|
1321
|
+
eq : bool, optional
|
|
1322
|
+
Indicate if use equilibrium or historical excess returns.
|
|
1323
|
+
The default is True.
|
|
1324
|
+
const : bool, optional
|
|
1325
|
+
Indicate if the loadings matrix has a constant.
|
|
1326
|
+
The default is True.
|
|
1327
|
+
method_mu : str, optional
|
|
1328
|
+
The method used to estimate the expected returns.
|
|
1329
|
+
The default value is 'hist'.
|
|
1330
|
+
|
|
1331
|
+
- 'hist': use historical estimates.
|
|
1332
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1333
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1334
|
+
- 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
|
|
1335
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
|
|
1336
|
+
- 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
|
|
1337
|
+
method_cov : str, optional
|
|
1338
|
+
The method used to estimate the covariance matrix.
|
|
1339
|
+
The default is 'hist'. Possible values are:
|
|
1340
|
+
|
|
1341
|
+
- 'hist': use historical estimates.
|
|
1342
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1343
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1344
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
1345
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
1346
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
1347
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
1348
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
|
|
1349
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1350
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1351
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1352
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
|
|
1353
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
|
|
1354
|
+
dict_mu : dict
|
|
1355
|
+
Other variables related to the mean vector estimation method.
|
|
1356
|
+
dict_cov : dict
|
|
1357
|
+
Other variables related to the covariance estimation method.
|
|
1358
|
+
|
|
1359
|
+
Returns
|
|
1360
|
+
-------
|
|
1361
|
+
mu : DataFrame
|
|
1362
|
+
The mean vector of Augmented Black Litterman model.
|
|
1363
|
+
cov : DataFrame
|
|
1364
|
+
The covariance matrix of Augmented Black Litterman model.
|
|
1365
|
+
w : DataFrame
|
|
1366
|
+
The equilibrium weights of Augmented Black Litterman model, without constraints.
|
|
1367
|
+
|
|
1368
|
+
Raises
|
|
1369
|
+
------
|
|
1370
|
+
ValueError
|
|
1371
|
+
When the value cannot be calculated.
|
|
1372
|
+
|
|
1373
|
+
"""
|
|
1374
|
+
if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
|
|
1375
|
+
raise ValueError("X and w must be DataFrames")
|
|
1376
|
+
|
|
1377
|
+
if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
|
|
1378
|
+
raise ValueError("F and B must be DataFrames")
|
|
1379
|
+
|
|
1380
|
+
if w.shape[0] > 1 and w.shape[1] > 1:
|
|
1381
|
+
raise ValueError("w must be a column DataFrame")
|
|
1382
|
+
|
|
1383
|
+
assets = X.columns.tolist()
|
|
1384
|
+
N = len(assets)
|
|
1385
|
+
|
|
1386
|
+
w = np.array(w, ndmin=2)
|
|
1387
|
+
if w.shape[0] == 1:
|
|
1388
|
+
w = w.T
|
|
1389
|
+
|
|
1390
|
+
if B is not None:
|
|
1391
|
+
B_ = np.array(B, ndmin=2)
|
|
1392
|
+
if const == True:
|
|
1393
|
+
alpha = B_[:, :1]
|
|
1394
|
+
B_ = B_[:, 1:]
|
|
1395
|
+
|
|
1396
|
+
mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
|
|
1397
|
+
S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
|
|
1398
|
+
|
|
1399
|
+
tau = 1 / X.shape[0]
|
|
1400
|
+
|
|
1401
|
+
mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
|
|
1402
|
+
S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
|
|
1403
|
+
|
|
1404
|
+
if P is not None and Q is not None and P_f is None and Q_f is None:
|
|
1405
|
+
S_a = S
|
|
1406
|
+
P_a = P
|
|
1407
|
+
Q_a = Q
|
|
1408
|
+
Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
|
|
1409
|
+
Omega_a = Omega
|
|
1410
|
+
|
|
1411
|
+
if eq == True:
|
|
1412
|
+
PI_a_ = delta * S_a @ w
|
|
1413
|
+
elif eq == False:
|
|
1414
|
+
PI_a_ = mu.T - rf
|
|
1415
|
+
elif P is None and Q is None and P_f is not None and Q_f is not None:
|
|
1416
|
+
S_a = S_f
|
|
1417
|
+
P_a = P_f
|
|
1418
|
+
Q_a = Q_f
|
|
1419
|
+
Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
|
|
1420
|
+
Omega_a = Omega_f
|
|
1421
|
+
|
|
1422
|
+
if eq == True:
|
|
1423
|
+
PI_a_ = delta * (S_f @ B.T) @ w
|
|
1424
|
+
elif eq == False:
|
|
1425
|
+
PI_a_ = mu_f.T - rf
|
|
1426
|
+
|
|
1427
|
+
elif P is not None and Q is not None and P_f is not None and Q_f is not None:
|
|
1428
|
+
S_a = np.hstack((np.vstack((S, S_f @ B_.T)), np.vstack((B_ @ S_f, S_f))))
|
|
1429
|
+
|
|
1430
|
+
P = np.array(P, ndmin=2)
|
|
1431
|
+
Q = np.array(Q, ndmin=2)
|
|
1432
|
+
P_f = np.array(P_f, ndmin=2)
|
|
1433
|
+
Q_f = np.array(Q_f, ndmin=2)
|
|
1434
|
+
zeros_1 = np.zeros((P_f.shape[0], P.shape[1]))
|
|
1435
|
+
zeros_2 = np.zeros((P.shape[0], P_f.shape[1]))
|
|
1436
|
+
P_a = np.hstack((np.vstack((P, zeros_1)), np.vstack((zeros_2, P_f))))
|
|
1437
|
+
Q_a = np.vstack((Q, Q_f))
|
|
1438
|
+
|
|
1439
|
+
Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
|
|
1440
|
+
Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
|
|
1441
|
+
zeros = np.zeros((Omega.shape[0], Omega_f.shape[0]))
|
|
1442
|
+
Omega_a = np.hstack((np.vstack((Omega, zeros.T)), np.vstack((zeros, Omega_f))))
|
|
1443
|
+
|
|
1444
|
+
if eq == True:
|
|
1445
|
+
PI_a_ = delta * (np.vstack((S, S_f @ B_.T)) @ w)
|
|
1446
|
+
elif eq == False:
|
|
1447
|
+
PI_a_ = np.vstack((mu.T, mu_f.T)) - rf
|
|
1448
|
+
|
|
1449
|
+
PI_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a) @ (
|
|
1450
|
+
inv(tau * S_a) @ PI_a_ + P_a.T @ inv(Omega_a) @ Q_a
|
|
1451
|
+
)
|
|
1452
|
+
M_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a)
|
|
1453
|
+
# PI_a = PI_a_ + (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega) * (Q_a - P_a @ PI_a_)
|
|
1454
|
+
# M = tau * S_a - (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega_a) @ P_a @ tau * S_a
|
|
1455
|
+
|
|
1456
|
+
mu_a = PI_a + rf
|
|
1457
|
+
mu_a = mu_a.T
|
|
1458
|
+
cov_a = S_a + M_a
|
|
1459
|
+
w_a = inv(delta * cov_a) @ PI_a
|
|
1460
|
+
|
|
1461
|
+
if P is None and Q is None and P_f is not None and Q_f is not None:
|
|
1462
|
+
mu_a = mu_a @ B_.T
|
|
1463
|
+
cov_a = B_ @ cov_a @ B_.T
|
|
1464
|
+
w_a = inv(delta * cov_a) @ B_ @ PI_a
|
|
1465
|
+
|
|
1466
|
+
if const == True:
|
|
1467
|
+
mu_a = mu_a[:, :N] + alpha.T
|
|
1468
|
+
|
|
1469
|
+
mu_a = pd.DataFrame(mu_a[:, :N], columns=assets)
|
|
1470
|
+
cov_a = pd.DataFrame(cov_a[:N, :N], index=assets, columns=assets)
|
|
1471
|
+
w_a = pd.DataFrame(w_a[:N, 0], index=assets)
|
|
1472
|
+
|
|
1473
|
+
return mu_a, cov_a, w_a
|
|
1474
|
+
|
|
1475
|
+
|
|
1476
|
+
def black_litterman_bayesian(
|
|
1477
|
+
X,
|
|
1478
|
+
F,
|
|
1479
|
+
B,
|
|
1480
|
+
P_f,
|
|
1481
|
+
Q_f,
|
|
1482
|
+
delta=1,
|
|
1483
|
+
rf=0,
|
|
1484
|
+
eq=True,
|
|
1485
|
+
const=True,
|
|
1486
|
+
method_mu="hist",
|
|
1487
|
+
method_cov="hist",
|
|
1488
|
+
dict_mu={},
|
|
1489
|
+
dict_cov={},
|
|
1490
|
+
):
|
|
1491
|
+
r"""
|
|
1492
|
+
Estimate the expected returns vector and covariance matrix based
|
|
1493
|
+
on the black litterman model :cite:`b-BLB`.
|
|
1494
|
+
|
|
1495
|
+
.. math::
|
|
1496
|
+
\begin{aligned}
|
|
1497
|
+
\Sigma_{F} & = B \Sigma_{F} B^{T} + D \\
|
|
1498
|
+
\overline{\Pi}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{T}\Omega_{F}^{-1}P_{F} \right )^{-1} \left ( \Sigma_{F}^{-1}\Pi_{F} + P_{F}^{T}\Omega_{F}^{-1}Q_{F} \right) \\
|
|
1499
|
+
\overline{\Sigma}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{T}\Omega_{F}^{-1}P_{F} \right )^{-1} \\
|
|
1500
|
+
\Sigma_{BLB} & = \left( \Sigma^{-1} - \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} + B^{T}\Sigma^{-1}B \right)^{-1} B^{T}\Sigma^{-1} \right )^{-1} \\
|
|
1501
|
+
\mu_{BLB} & = \Sigma_{BLB} \left ( \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} +B^{T}\Sigma^{-1}B \right)^{-1} \overline{\Sigma}_{F}^{-1} \overline{\Pi}_{F} \right ) + r_{f} \\
|
|
1502
|
+
\end{aligned}
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
where:
|
|
1506
|
+
|
|
1507
|
+
:math:`r_{f}` is the risk free rate.
|
|
1508
|
+
|
|
1509
|
+
:math:`B` is the loadings matrix.
|
|
1510
|
+
|
|
1511
|
+
:math:`D` is a diagonal matrix of variance of errors of a factor model.
|
|
1512
|
+
|
|
1513
|
+
:math:`\Sigma` is the covariance matrix obtained with a factor model.
|
|
1514
|
+
|
|
1515
|
+
:math:`\Pi_{F}` is the equilibrium excess returns of factors.
|
|
1516
|
+
|
|
1517
|
+
:math:`\overline{\Pi}_{F}` is the posterior excess returns of factors.
|
|
1518
|
+
|
|
1519
|
+
:math:`\Sigma_{F}` is the covariance matrix of factors.
|
|
1520
|
+
|
|
1521
|
+
:math:`\overline{\Sigma}_{F}` is the posterior covariance matrix of factors.
|
|
1522
|
+
|
|
1523
|
+
:math:`P_{F}` is the factors views matrix.
|
|
1524
|
+
|
|
1525
|
+
:math:`Q_{F}` is the factors views returns matrix.
|
|
1526
|
+
|
|
1527
|
+
:math:`\Omega_{F}` is the covariance matrix of errors of factors views.
|
|
1528
|
+
|
|
1529
|
+
:math:`\mu_{BLB}` is the mean vector obtained with the Black
|
|
1530
|
+
Litterman Bayesian model or posterior predictive mean.
|
|
1531
|
+
|
|
1532
|
+
:math:`\Sigma_{BLB}` is the covariance matrix obtained with the Black
|
|
1533
|
+
Litterman Bayesian model or posterior predictive covariance.
|
|
1534
|
+
|
|
1535
|
+
Parameters
|
|
1536
|
+
----------
|
|
1537
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
1538
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1539
|
+
observations and n_assets is the number of assets.
|
|
1540
|
+
F : DataFrame of shape (n_samples, n_factors)
|
|
1541
|
+
Risk factors returns DataFrame, where n_samples is the number of samples
|
|
1542
|
+
and n_factors is the number of risk factors.
|
|
1543
|
+
B : DataFrame of shape (n_assets, n_factors), optional
|
|
1544
|
+
Loadings matrix, where n_assets is the number assets and n_factors is
|
|
1545
|
+
the number of risk factors. The default is None.
|
|
1546
|
+
P_f : DataFrame of shape (n_views, n_factors)
|
|
1547
|
+
Analyst's factors views matrix, can be relative or absolute.
|
|
1548
|
+
Q_f : DataFrame of shape (n_views, 1)
|
|
1549
|
+
Expected returns of analyst's factors views.
|
|
1550
|
+
delta : float, optional
|
|
1551
|
+
Risk aversion factor. The default value is 1.
|
|
1552
|
+
rf : scalar, optional
|
|
1553
|
+
Risk free rate. The default is 0.
|
|
1554
|
+
eq : bool, optional
|
|
1555
|
+
Indicate if use equilibrium or historical excess returns.
|
|
1556
|
+
The default is True.
|
|
1557
|
+
const : bool, optional
|
|
1558
|
+
Indicate if the loadings matrix has a constant.
|
|
1559
|
+
The default is True.
|
|
1560
|
+
method_mu : str, optional
|
|
1561
|
+
The method used to estimate the expected returns.
|
|
1562
|
+
The default value is 'hist'.
|
|
1563
|
+
|
|
1564
|
+
- 'hist': use historical estimates.
|
|
1565
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1566
|
+
- 'ewma2': use ewma with adjust=False, For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1567
|
+
- 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
|
|
1568
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
|
|
1569
|
+
- 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
|
|
1570
|
+
method_cov : str, optional
|
|
1571
|
+
The method used to estimate the covariance matrix:
|
|
1572
|
+
The default is 'hist'. Possible values are:
|
|
1573
|
+
|
|
1574
|
+
- 'hist': use historical estimates.
|
|
1575
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1576
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
|
|
1577
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
1578
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
1579
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
1580
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
1581
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
|
|
1582
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1583
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1584
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
|
|
1585
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
|
|
1586
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
|
|
1587
|
+
dict_mu : dict
|
|
1588
|
+
Other variables related to the mean vector estimation method.
|
|
1589
|
+
dict_cov : dict
|
|
1590
|
+
Other variables related to the covariance estimation method.
|
|
1591
|
+
|
|
1592
|
+
Returns
|
|
1593
|
+
-------
|
|
1594
|
+
mu : DataFrame
|
|
1595
|
+
The mean vector of Black Litterman model.
|
|
1596
|
+
cov : DataFrame
|
|
1597
|
+
The covariance matrix of Black Litterman model.
|
|
1598
|
+
w : DataFrame
|
|
1599
|
+
The equilibrium weights of Black Litterman model, without constraints.
|
|
1600
|
+
|
|
1601
|
+
Raises
|
|
1602
|
+
------
|
|
1603
|
+
ValueError
|
|
1604
|
+
When the value cannot be calculated.
|
|
1605
|
+
|
|
1606
|
+
"""
|
|
1607
|
+
if not isinstance(X, pd.DataFrame):
|
|
1608
|
+
raise ValueError("X must be DataFrames")
|
|
1609
|
+
|
|
1610
|
+
if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
|
|
1611
|
+
raise ValueError("F and B must be DataFrames")
|
|
1612
|
+
|
|
1613
|
+
assets = X.columns.tolist()
|
|
1614
|
+
|
|
1615
|
+
if B is not None:
|
|
1616
|
+
B = np.array(B, ndmin=2)
|
|
1617
|
+
if const == True:
|
|
1618
|
+
alpha = B[:, :1]
|
|
1619
|
+
B = B[:, 1:]
|
|
1620
|
+
|
|
1621
|
+
mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
|
|
1622
|
+
mu_f = (mu_f - rf).T
|
|
1623
|
+
|
|
1624
|
+
tau = 1 / X.shape[0]
|
|
1625
|
+
|
|
1626
|
+
S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
|
|
1627
|
+
S = B @ S_f @ B.T
|
|
1628
|
+
|
|
1629
|
+
D = X.to_numpy() - F @ B.T
|
|
1630
|
+
D = np.diag(D.var())
|
|
1631
|
+
S = S + D
|
|
1632
|
+
|
|
1633
|
+
Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
|
|
1634
|
+
|
|
1635
|
+
S_hat = inv(inv(S_f) + P_f.T @ inv(Omega_f) @ P_f)
|
|
1636
|
+
|
|
1637
|
+
Pi_hat = S_hat @ (inv(S_f) @ mu_f + P_f.T @ inv(Omega_f) @ Q_f)
|
|
1638
|
+
|
|
1639
|
+
S_blb = inv(inv(S) - inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ B.T @ inv(S))
|
|
1640
|
+
|
|
1641
|
+
Pi_blb = (
|
|
1642
|
+
S_blb @ inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ inv(S_hat) @ Pi_hat
|
|
1643
|
+
)
|
|
1644
|
+
|
|
1645
|
+
mu = Pi_blb + rf
|
|
1646
|
+
|
|
1647
|
+
if const == True:
|
|
1648
|
+
mu = mu + alpha
|
|
1649
|
+
mu = mu.T
|
|
1650
|
+
cov = S_blb
|
|
1651
|
+
w = inv(delta * cov) @ mu.T
|
|
1652
|
+
|
|
1653
|
+
mu = pd.DataFrame(mu, columns=assets)
|
|
1654
|
+
cov = pd.DataFrame(cov, index=assets, columns=assets)
|
|
1655
|
+
w = pd.DataFrame(w, index=assets)
|
|
1656
|
+
|
|
1657
|
+
return mu, cov, w
|
|
1658
|
+
|
|
1659
|
+
|
|
1660
|
+
def bootstrapping(
|
|
1661
|
+
X,
|
|
1662
|
+
kind="stationary",
|
|
1663
|
+
q=0.05,
|
|
1664
|
+
n_sim=6000,
|
|
1665
|
+
window=3,
|
|
1666
|
+
diag=False,
|
|
1667
|
+
threshold=1e-15,
|
|
1668
|
+
seed=0,
|
|
1669
|
+
):
|
|
1670
|
+
r"""
|
|
1671
|
+
Estimates the uncertainty sets of mean and covariance matrix through the selected
|
|
1672
|
+
bootstrapping method.
|
|
1673
|
+
|
|
1674
|
+
Parameters
|
|
1675
|
+
----------
|
|
1676
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
1677
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1678
|
+
observations and n_assets is the number of assets.
|
|
1679
|
+
kind : str
|
|
1680
|
+
The bootstrapping method. The default value is 'stationary'. Possible values are:
|
|
1681
|
+
|
|
1682
|
+
- 'stationary': stationary bootstrapping method, see `StationaryBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.StationaryBootstrap.html#arch.bootstrap.StationaryBootstrap>`_ for more details.
|
|
1683
|
+
- 'circular': circular bootstrapping method, see `CircularBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.CircularBlockBootstrap.html#arch.bootstrap.CircularBlockBootstrap>`_ for more details.
|
|
1684
|
+
- 'moving': moving bootstrapping method, see `MovingBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.MovingBlockBootstrap.html#arch.bootstrap.MovingBlockBootstrap>`_ for more details.
|
|
1685
|
+
q : scalar
|
|
1686
|
+
Significance level for box and elliptical constraints.
|
|
1687
|
+
The default is 0.05.
|
|
1688
|
+
n_sim : scalar
|
|
1689
|
+
Number of simulations of the bootstrapping method.
|
|
1690
|
+
The default is 6000.
|
|
1691
|
+
window: int
|
|
1692
|
+
Block size of the bootstrapping method. Must be greather than 1
|
|
1693
|
+
and lower than the n_samples - n_factors + 1
|
|
1694
|
+
The default is 3.
|
|
1695
|
+
diag: bool
|
|
1696
|
+
If consider only the main diagonal of covariance matrices of estimation
|
|
1697
|
+
errors following :cite:`b-fabozzi2007robust`. The default is False.
|
|
1698
|
+
threshold: float
|
|
1699
|
+
Parameter used to fix covariance matrices in case they are not positive semidefinite.
|
|
1700
|
+
The default is 1e-15.
|
|
1701
|
+
seed: int
|
|
1702
|
+
Seed used to generate random numbers for bootstrapping method.
|
|
1703
|
+
The default is 0.
|
|
1704
|
+
|
|
1705
|
+
Returns
|
|
1706
|
+
-------
|
|
1707
|
+
mu_l : DataFrame
|
|
1708
|
+
The q/2 percentile of mean vector obtained through the selected
|
|
1709
|
+
bootstrapping method.
|
|
1710
|
+
mu_u : DataFrame
|
|
1711
|
+
The 1-q/2 percentile of mean vector obtained through the selected
|
|
1712
|
+
bootstrapping method.
|
|
1713
|
+
cov_l : DataFrame
|
|
1714
|
+
The q/2 percentile of covariance matrix obtained through the selected
|
|
1715
|
+
bootstrapping method.
|
|
1716
|
+
cov_u : DataFrame
|
|
1717
|
+
The 1-q/2 percentile of covariance matrix obtained through the selected
|
|
1718
|
+
bootstrapping method.
|
|
1719
|
+
cov_mu : DataFrame
|
|
1720
|
+
The covariance matrix of estimation errors of mean vector obtained
|
|
1721
|
+
through the selected bootstrapping method.
|
|
1722
|
+
cov_sigma : DataFrame
|
|
1723
|
+
The covariance matrix of estimation errors of covariance matrix
|
|
1724
|
+
obtained through the selected bootstrapping method.
|
|
1725
|
+
k_mu : DataFrame
|
|
1726
|
+
The square root of size of elliptical constraint of mean vector
|
|
1727
|
+
estimation error based on 1-q percentile.
|
|
1728
|
+
k_sigma : DataFrame
|
|
1729
|
+
The square root of size of elliptical constraint of covariance matrix
|
|
1730
|
+
estimation error based on 1-q percentile.
|
|
1731
|
+
|
|
1732
|
+
Raises
|
|
1733
|
+
------
|
|
1734
|
+
ValueError
|
|
1735
|
+
When the value cannot be calculated.
|
|
1736
|
+
|
|
1737
|
+
"""
|
|
1738
|
+
|
|
1739
|
+
if not isinstance(X, pd.DataFrame):
|
|
1740
|
+
raise ValueError("X must be a DataFrame")
|
|
1741
|
+
|
|
1742
|
+
if window >= X.shape[0] - window + 1:
|
|
1743
|
+
raise ValueError("block must be lower than n_samples - window + 1")
|
|
1744
|
+
elif window <= 1:
|
|
1745
|
+
raise ValueError("block must be greather than 1")
|
|
1746
|
+
|
|
1747
|
+
cols = X.columns.tolist()
|
|
1748
|
+
cols_2 = [i + "-" + j for i in cols for j in cols]
|
|
1749
|
+
T, n = X.shape
|
|
1750
|
+
|
|
1751
|
+
mu = X.mean().to_numpy().reshape(1, n)
|
|
1752
|
+
vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
|
|
1753
|
+
|
|
1754
|
+
mus = np.zeros((n_sim, 1, n))
|
|
1755
|
+
covs = np.zeros((n_sim, n, n))
|
|
1756
|
+
|
|
1757
|
+
if kind == "stationary":
|
|
1758
|
+
gen = bs.StationaryBootstrap(window, X, seed=seed)
|
|
1759
|
+
elif kind == "circular":
|
|
1760
|
+
gen = bs.CircularBlockBootstrap(window, X, seed=seed)
|
|
1761
|
+
elif kind == "moving":
|
|
1762
|
+
gen = bs.MovingBlockBootstrap(window, X, seed=seed)
|
|
1763
|
+
else:
|
|
1764
|
+
raise ValueError("kind only can be 'stationary', 'circular' or 'moving'")
|
|
1765
|
+
|
|
1766
|
+
i = 0
|
|
1767
|
+
for data in gen.bootstrap(n_sim):
|
|
1768
|
+
A = data[0][0]
|
|
1769
|
+
mus[i] = A.mean().to_numpy().reshape(1, n)
|
|
1770
|
+
covs[i] = A.cov().to_numpy()
|
|
1771
|
+
i += 1
|
|
1772
|
+
|
|
1773
|
+
# Box Constraint for Mean
|
|
1774
|
+
mu_l = np.percentile(mus, q=q / 2 * 100, axis=0, keepdims=True).reshape(1, n)
|
|
1775
|
+
mu_u = np.percentile(mus, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(1, n)
|
|
1776
|
+
mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
|
|
1777
|
+
mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
|
|
1778
|
+
|
|
1779
|
+
# Box Constraint for Covariance
|
|
1780
|
+
cov_l = np.percentile(covs, q=q / 2 * 100, axis=0, keepdims=True).reshape(n, n)
|
|
1781
|
+
cov_u = np.percentile(covs, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(
|
|
1782
|
+
n, n
|
|
1783
|
+
)
|
|
1784
|
+
cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
|
|
1785
|
+
cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
|
|
1786
|
+
|
|
1787
|
+
# Check and fix if upper and lower bound for Covariance are positive
|
|
1788
|
+
# semidefinite and fix when they are not
|
|
1789
|
+
if af.is_pos_def(cov_l) == False:
|
|
1790
|
+
cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
|
|
1791
|
+
if af.is_pos_def(cov_u) == False:
|
|
1792
|
+
cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
|
|
1793
|
+
|
|
1794
|
+
# Elliptical Constraint for Mean
|
|
1795
|
+
A_mu = mus.reshape(n_sim, n) - np.repeat(mu, n_sim, axis=0)
|
|
1796
|
+
cov_mu = np.cov(A_mu, rowvar=False)
|
|
1797
|
+
if diag == True:
|
|
1798
|
+
cov_mu = np.diag(np.diag(cov_mu))
|
|
1799
|
+
k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
|
|
1800
|
+
k_mu = np.percentile(k_mus, q=(1 - q) * 100) ** 0.5
|
|
1801
|
+
cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
|
|
1802
|
+
|
|
1803
|
+
# Elliptical Constraint for Covariance
|
|
1804
|
+
A_Sigma = covs.reshape((n_sim, n**2), order="F")
|
|
1805
|
+
A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
|
|
1806
|
+
cov_sigma = np.cov(A_Sigma, rowvar=False)
|
|
1807
|
+
cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
|
|
1808
|
+
if diag == True:
|
|
1809
|
+
cov_sigma = np.diag(np.diag(cov_sigma))
|
|
1810
|
+
if af.is_pos_def(cov_sigma) == False:
|
|
1811
|
+
cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
|
|
1812
|
+
k_sigmas = np.diag(A_Sigma @ inv(cov_sigma) @ A_Sigma.T)
|
|
1813
|
+
k_sigma = np.percentile(k_sigmas, q=(1 - q) * 100) ** 0.5
|
|
1814
|
+
cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
|
|
1815
|
+
|
|
1816
|
+
return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma
|
|
1817
|
+
|
|
1818
|
+
|
|
1819
|
+
def normal_simulation(X, q=0.05, n_sim=6000, diag=False, threshold=1e-15, seed=0):
|
|
1820
|
+
r"""
|
|
1821
|
+
Estimates the uncertainty sets of mean and covariance matrix assuming that
|
|
1822
|
+
assets returns follows a multivariate normal distribution.
|
|
1823
|
+
|
|
1824
|
+
Parameters
|
|
1825
|
+
----------
|
|
1826
|
+
X : DataFrame of shape (n_samples, n_assets)
|
|
1827
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1828
|
+
observations and n_assets is the number of assets.
|
|
1829
|
+
q : scalar
|
|
1830
|
+
Significance level for box and elliptical constraints.
|
|
1831
|
+
The default is 0.05.
|
|
1832
|
+
n_sim : scalar
|
|
1833
|
+
Number of simulations of the bootstrapping method.
|
|
1834
|
+
The default is 6000.
|
|
1835
|
+
diag: bool
|
|
1836
|
+
If consider only the main diagonal of covariance matrices of estimation
|
|
1837
|
+
errors following :cite:`b-fabozzi2007robust`. The default is False.
|
|
1838
|
+
threshold: float
|
|
1839
|
+
Parameter used to fix covariance matrices in case they are not positive
|
|
1840
|
+
semidefinite. The default is 1e-10.
|
|
1841
|
+
seed: int
|
|
1842
|
+
Seed used to generate random numbers for simulation.
|
|
1843
|
+
The default is 0.
|
|
1844
|
+
|
|
1845
|
+
Returns
|
|
1846
|
+
-------
|
|
1847
|
+
mu_l : DataFrame
|
|
1848
|
+
The q/2 percentile of mean vector obtained through the normal
|
|
1849
|
+
simulation.
|
|
1850
|
+
mu_u : DataFrame
|
|
1851
|
+
The 1-q/2 percentile of mean vector obtained through the normal
|
|
1852
|
+
simulation.
|
|
1853
|
+
cov_l : DataFrame
|
|
1854
|
+
The q/2 percentile of covariance matrix obtained through the normal
|
|
1855
|
+
simulation.
|
|
1856
|
+
cov_u : DataFrame
|
|
1857
|
+
The 1-q/2 percentile of covariance matrix obtained through the normal
|
|
1858
|
+
simulation.
|
|
1859
|
+
cov_mu : DataFrame
|
|
1860
|
+
The covariance matrix of estimation errors of mean vector obtained
|
|
1861
|
+
through the normal simulation.
|
|
1862
|
+
cov_sigma : DataFrame
|
|
1863
|
+
The covariance matrix of estimation errors of covariance matrix
|
|
1864
|
+
obtained through the normal simulation.
|
|
1865
|
+
k_mu : DataFrame
|
|
1866
|
+
The square root of size of elliptical constraint of mean vector
|
|
1867
|
+
estimation error based on 1-q percentile.
|
|
1868
|
+
k_sigma : DataFrame
|
|
1869
|
+
The square root of size of elliptical constraint of covariance matrix
|
|
1870
|
+
estimation error based on 1-q percentile.
|
|
1871
|
+
|
|
1872
|
+
Raises
|
|
1873
|
+
------
|
|
1874
|
+
ValueError
|
|
1875
|
+
When the value cannot be calculated.
|
|
1876
|
+
|
|
1877
|
+
"""
|
|
1878
|
+
|
|
1879
|
+
if not isinstance(X, pd.DataFrame):
|
|
1880
|
+
raise ValueError("X must be a DataFrame")
|
|
1881
|
+
|
|
1882
|
+
cols = X.columns.tolist()
|
|
1883
|
+
cols_2 = [i + "-" + j for i in cols for j in cols]
|
|
1884
|
+
T, n = X.shape
|
|
1885
|
+
|
|
1886
|
+
# Set initial parameters based on assumption of normality
|
|
1887
|
+
mu = X.mean().to_numpy().reshape(1, n)
|
|
1888
|
+
vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
|
|
1889
|
+
Sigma = X.cov().to_numpy()
|
|
1890
|
+
cov_mu = Sigma / T
|
|
1891
|
+
K = cf.commutation_matrix(T=n, n=n)
|
|
1892
|
+
I = np.identity(n**2)
|
|
1893
|
+
cov_sigma = T * (I + K) @ np.kron(cov_mu, cov_mu)
|
|
1894
|
+
if diag == True:
|
|
1895
|
+
cov_sigma = np.diag(np.diag(cov_sigma))
|
|
1896
|
+
if af.is_pos_def(cov_sigma) == False:
|
|
1897
|
+
cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
|
|
1898
|
+
cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
|
|
1899
|
+
|
|
1900
|
+
# Box Constraint for Mean
|
|
1901
|
+
delta_mu = st.norm.ppf(1 - q / 2) * np.sqrt(np.diag(cov_mu)).reshape(-1, 1)
|
|
1902
|
+
mu_l = mu - delta_mu.T
|
|
1903
|
+
mu_u = mu + delta_mu.T
|
|
1904
|
+
mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
|
|
1905
|
+
mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
|
|
1906
|
+
|
|
1907
|
+
# Box Constraints for Covariance
|
|
1908
|
+
rs = np.random.RandomState(seed=seed)
|
|
1909
|
+
covs = st.wishart.rvs(T, cov_mu, size=n_sim, random_state=rs)
|
|
1910
|
+
cov_l = np.percentile(covs, q=q / 2, axis=0)
|
|
1911
|
+
cov_u = np.percentile(covs, q=1 - q / 2, axis=0)
|
|
1912
|
+
cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
|
|
1913
|
+
cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
|
|
1914
|
+
|
|
1915
|
+
# Check and fix if upper and lower bound for Covariance are positive
|
|
1916
|
+
# semidefinite and fix when they are not
|
|
1917
|
+
if af.is_pos_def(cov_l) == False:
|
|
1918
|
+
cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
|
|
1919
|
+
if af.is_pos_def(cov_u) == False:
|
|
1920
|
+
cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
|
|
1921
|
+
|
|
1922
|
+
# Elliptical Constraint for Mean
|
|
1923
|
+
A_mu = rs.multivariate_normal(mu.ravel(), cov_mu, size=n_sim)
|
|
1924
|
+
# cov_mu = np.cov(A_mu - np.repeat(mu, n_sim, axis=0), rowvar=False)
|
|
1925
|
+
if diag == True:
|
|
1926
|
+
cov_mu = np.diag(np.diag(cov_mu))
|
|
1927
|
+
k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
|
|
1928
|
+
k_mu = np.percentile(k_mus, q=1 - q) ** 0.5
|
|
1929
|
+
# k_mu = st.chi2.ppf(1 - q, df=n) ** 0.5
|
|
1930
|
+
cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
|
|
1931
|
+
|
|
1932
|
+
# Elliptical Constraint for Covariance
|
|
1933
|
+
A_Sigma = covs.reshape((n_sim, n**2), order="F")
|
|
1934
|
+
A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
|
|
1935
|
+
A_cov_sigma = np.cov(A_Sigma, rowvar=False)
|
|
1936
|
+
if diag == True:
|
|
1937
|
+
A_cov_sigma = np.diag(np.diag(A_cov_sigma))
|
|
1938
|
+
if af.is_pos_def(A_cov_sigma) == False:
|
|
1939
|
+
A_cov_sigma = af.cov_fix(A_cov_sigma, method="clipped", threshold=threshold)
|
|
1940
|
+
k_sigmas = np.diag(A_Sigma @ inv(A_cov_sigma) @ A_Sigma.T)
|
|
1941
|
+
k_sigma = np.percentile(k_sigmas, q=1 - q) ** 0.5
|
|
1942
|
+
|
|
1943
|
+
return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma
|