riskfolio-lib 7.1.0__cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1943 @@
1
+ """""" #
2
+
3
+ """
4
+ Copyright (c) 2020-2025, Dany Cajas
5
+ All rights reserved.
6
+ This work is licensed under BSD 3-Clause "New" or "Revised" License.
7
+ License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import statsmodels.api as sm
13
+ import scipy.stats as st
14
+ import sklearn.covariance as skcov
15
+ import arch.bootstrap as bs
16
+
17
+ from sklearn.preprocessing import StandardScaler
18
+ from sklearn.decomposition import PCA
19
+ from numpy.linalg import inv
20
+ from itertools import product
21
+
22
+ import riskfolio.src.AuxFunctions as af
23
+ import riskfolio.src.DBHT as db
24
+ import riskfolio.src.GerberStatistic as gs
25
+ import riskfolio.external.cppfunctions as cf
26
+
27
+
28
+ __all__ = [
29
+ "mean_vector",
30
+ "covar_matrix",
31
+ "cokurt_matrix",
32
+ "forward_regression",
33
+ "backward_regression",
34
+ "PCR",
35
+ "loadings_matrix",
36
+ "risk_factors",
37
+ "black_litterman",
38
+ "augmented_black_litterman",
39
+ "black_litterman_bayesian",
40
+ "bootstrapping",
41
+ "normal_simulation",
42
+ ]
43
+
44
+
45
+ def mean_vector(X, method="hist", d=0.94, target="b1"):
46
+ r"""
47
+ Calculate the expected returns vector using the selected method.
48
+
49
+ Parameters
50
+ ----------
51
+ X : DataFrame of shape (n_samples, n_assets)
52
+ Assets returns DataFrame, where n_samples is the number of
53
+ observations and n_assets is the number of assets.
54
+ method : str, optional
55
+ The method used to estimate the expected returns.
56
+ The default value is 'hist'. Possible values are:
57
+
58
+ - 'hist': use historical estimator.
59
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
60
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
61
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
62
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
63
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
64
+
65
+ d : scalar
66
+ The smoothing factor of ewma methods.
67
+ The default is 0.94.
68
+
69
+ target : str, optional
70
+ The target mean vector. The default value is 'b1'.
71
+ Possible values are:
72
+
73
+ - 'b1': grand mean.
74
+ - 'b2': volatility weighted grand mean.
75
+ - 'b3': mean square error of sample mean.
76
+
77
+ Returns
78
+ -------
79
+ mu : 1d-array
80
+ The estimation of expected returns.
81
+
82
+ Raises
83
+ ------
84
+ ValueError
85
+ When the value cannot be calculated.
86
+
87
+ """
88
+
89
+ if not isinstance(X, pd.DataFrame):
90
+ raise ValueError("X must be a DataFrame")
91
+
92
+ assets = X.columns.tolist()
93
+
94
+ if method == "hist":
95
+ mu = np.array(X.mean(), ndmin=2)
96
+ elif method == "ewma1":
97
+ mu = np.array(X.ewm(alpha=1 - d).mean().iloc[-1, :], ndmin=2)
98
+ elif method == "ewma2":
99
+ mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
100
+ elif method == "ewma2":
101
+ mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
102
+ elif method in ["JS", "BS", "BOP"]:
103
+ T, n = np.array(X, ndmin=2).shape
104
+ ones = np.ones((n, 1))
105
+ mu = np.array(X.mean(), ndmin=2).reshape(-1, 1)
106
+ Sigma = np.cov(X, rowvar=False)
107
+ Sigma_inv = np.linalg.inv(Sigma)
108
+ eigvals = np.linalg.eigvals(Sigma)
109
+
110
+ # Calculate target vector
111
+ if target == "b1":
112
+ b = ones.T @ mu / n * ones
113
+ elif target == "b2":
114
+ b = ones.T @ Sigma_inv @ mu / (ones.T @ Sigma_inv @ ones) * ones
115
+ elif target == "b3":
116
+ b = np.trace(Sigma) / T * ones
117
+
118
+ # Calculate Estimators
119
+ if method == "JS":
120
+ alpha_1 = (
121
+ 1
122
+ / T
123
+ * (n * np.mean(eigvals) - 2 * np.max(eigvals))
124
+ / ((mu - b).T @ (mu - b))
125
+ )
126
+ mu = (1 - alpha_1) * mu + alpha_1 * b
127
+ elif method == "BS":
128
+ alpha_1 = (n + 2) / ((n + 2) + T * (mu - b).T @ Sigma_inv @ (mu - b))
129
+ mu = (1 - alpha_1) * mu + alpha_1 * b
130
+ elif method == "BOP":
131
+ alpha_1 = (mu.T @ Sigma_inv @ mu - n / (T - n)) * b.T @ Sigma_inv @ b - (
132
+ mu.T @ Sigma_inv @ b
133
+ ) ** 2
134
+ alpha_1 /= (mu.T @ Sigma_inv @ mu) * (b.T @ Sigma_inv @ b) - (
135
+ mu.T @ Sigma_inv @ b
136
+ ) ** 2
137
+ beta_1 = (1 - alpha_1) * (mu.T @ Sigma_inv @ b) / (mu.T @ Sigma_inv @ mu)
138
+ mu = alpha_1 * mu + beta_1 * b
139
+ mu = mu.T
140
+
141
+ mu = pd.DataFrame(np.array(mu, ndmin=2), columns=assets)
142
+
143
+ return mu
144
+
145
+
146
+ def covar_matrix(
147
+ X,
148
+ method="hist",
149
+ d=0.94,
150
+ alpha=0.1,
151
+ bWidth=0.01,
152
+ detone=False,
153
+ mkt_comp=1,
154
+ threshold=0.5,
155
+ ):
156
+ r"""
157
+ Calculate the covariance matrix using the selected method.
158
+
159
+ Parameters
160
+ ----------
161
+ X : DataFrame of shape (n_samples, n_assets)
162
+ Assets returns DataFrame, where n_samples is the number of
163
+ observations and n_assets is the number of assets.
164
+ method : str, optional
165
+ The method used to estimate the covariance matrix:
166
+ The default is 'hist'. Possible values are:
167
+
168
+ - 'hist': use historical estimates.
169
+ - 'semi': use semi lower covariance matrix.
170
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
171
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
172
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
173
+ - 'oas': use the Oracle Approximation Shrinkage method.
174
+ - 'shrunk': use the basic Shrunk Covariance method.
175
+ - 'gl': use the basic Graphical Lasso Covariance method.
176
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
177
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
178
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
179
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
180
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
181
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
182
+
183
+ d : scalar
184
+ The smoothing factor of ewma methods. The default is 0.94.
185
+ alpha : scalar
186
+ The shrfactor of shrunk and shrink method. The default is 0.1.
187
+ bWidth : float
188
+ The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
189
+ detone : bool, optional
190
+ If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
191
+ and 'shrink' methods. The detone correlation matrix is singular, so it
192
+ cannot be inverted.
193
+ mkt_comp : int, optional
194
+ Number of first components that will be removed using the detone method.
195
+ threshold : float
196
+ Threshold for 'gerber1' and 'gerber2' methods is between 0 and 1.
197
+
198
+ Returns
199
+ -------
200
+ cov : nd-array
201
+ The estimation of covariance matrix.
202
+
203
+ Raises
204
+ ------
205
+ ValueError
206
+ When the value cannot be calculated.
207
+
208
+ """
209
+
210
+ if not isinstance(X, pd.DataFrame):
211
+ raise ValueError("X must be a DataFrame")
212
+
213
+ assets = X.columns.tolist()
214
+
215
+ if method == "hist":
216
+ cov = np.cov(X, rowvar=False)
217
+ elif method == "semi":
218
+ T, N = X.shape
219
+ mu = X.mean().to_numpy().reshape(1, -1)
220
+ a = X - np.repeat(mu, T, axis=0)
221
+ a = np.minimum(a, np.zeros_like(a))
222
+ cov = 1 / (T - 1) * a.T @ a
223
+ elif method == "ewma1":
224
+ cov = X.ewm(alpha=1 - d).cov()
225
+ item = cov.iloc[-1, :].name[0]
226
+ cov = cov.loc[(item, slice(None)), :]
227
+ elif method == "ewma2":
228
+ cov = X.ewm(alpha=1 - d, adjust=False).cov()
229
+ item = cov.iloc[-1, :].name[0]
230
+ cov = cov.loc[(item, slice(None)), :]
231
+ elif method == "ledoit":
232
+ lw = skcov.LedoitWolf()
233
+ lw.fit(X)
234
+ cov = lw.covariance_
235
+ elif method == "oas":
236
+ oas = skcov.OAS()
237
+ oas.fit(X)
238
+ cov = oas.covariance_
239
+ elif method == "shrunk":
240
+ sc = skcov.ShrunkCovariance(shrinkage=alpha)
241
+ sc.fit(X)
242
+ cov = sc.covariance_
243
+ elif method == "gl":
244
+ gl = skcov.GraphicalLassoCV()
245
+ gl.fit(X)
246
+ cov = gl.covariance_
247
+ elif method == "jlogo":
248
+ S = np.cov(X, rowvar=False)
249
+ R = np.corrcoef(X, rowvar=False)
250
+ D = np.sqrt(np.clip((1 - R) / 2, a_min=0.0, a_max=1.0))
251
+ (_, _, separators, cliques, _) = db.PMFG_T2s(1 - D**2, nargout=4)
252
+ cov = db.j_LoGo(S, separators, cliques)
253
+ cov = np.linalg.inv(cov)
254
+ elif method in ["fixed", "spectral", "shrink"]:
255
+ cov = np.cov(X, rowvar=False)
256
+ T, N = X.shape
257
+ q = T / N
258
+ cov = af.denoiseCov(
259
+ cov,
260
+ q,
261
+ kind=method,
262
+ bWidth=bWidth,
263
+ detone=detone,
264
+ mkt_comp=int(mkt_comp),
265
+ alpha=alpha,
266
+ )
267
+ elif method == "gerber1":
268
+ cov = gs.gerber_cov_stat1(X, threshold=threshold)
269
+ elif method == "gerber2":
270
+ cov = gs.gerber_cov_stat2(X, threshold=threshold)
271
+
272
+ cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets)
273
+
274
+ return cov
275
+
276
+
277
+ def cokurt_matrix(
278
+ X,
279
+ method="hist",
280
+ alpha=0.1,
281
+ bWidth=0.01,
282
+ detone=False,
283
+ mkt_comp=1,
284
+ ):
285
+ r"""
286
+ Calculate the cokurtosis square matrix using the selected method.
287
+
288
+ Parameters
289
+ ----------
290
+ X : DataFrame of shape (n_samples, n_assets)
291
+ Assets returns DataFrame, where n_samples is the number of
292
+ observations and n_assets is the number of assets.
293
+ method : str, optional
294
+ The method used to estimate the cokurtosis square matrix:
295
+ The default is 'hist'. Possible values are:
296
+
297
+ - 'hist': use historical estimates.
298
+ - 'semi': use semi lower cokurtosis square matrix.
299
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
300
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
301
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
302
+ bWidth : float
303
+ The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
304
+ detone : bool, optional
305
+ If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
306
+ and 'shrink' methods. The detone correlation matrix is singular, so it
307
+ cannot be inverted.
308
+ mkt_comp : int, optional
309
+ Number of first components that will be removed using the detone method.
310
+
311
+ Returns
312
+ -------
313
+ kurt : nd-array
314
+ The estimation of cokurtosis square matrix.
315
+
316
+ Raises
317
+ ------
318
+ ValueError
319
+ When the value cannot be calculated.
320
+
321
+ """
322
+
323
+ if not isinstance(X, pd.DataFrame):
324
+ raise ValueError("X must be a DataFrame")
325
+
326
+ assets = X.columns.tolist()
327
+ cols = list(product(assets, assets))
328
+ cols = [str(y) + " - " + str(x) for x, y in cols]
329
+
330
+ if method == "hist":
331
+ kurt = cf.cokurtosis_matrix(X)
332
+ if method == "semi":
333
+ kurt = cf.semi_cokurtosis_matrix(X)
334
+ elif method in ["fixed", "spectral", "shrink"]:
335
+ kurt = cf.cokurtosis_matrix(X)
336
+ T, N = X.shape
337
+ q = T / N
338
+ kurt = af.denoiseCov(
339
+ kurt,
340
+ q,
341
+ kind=method,
342
+ bWidth=bWidth,
343
+ detone=detone,
344
+ mkt_comp=mkt_comp,
345
+ alpha=alpha,
346
+ )
347
+
348
+ kurt = pd.DataFrame(np.array(kurt, ndmin=2), columns=cols, index=cols)
349
+
350
+ return kurt
351
+
352
+
353
+ def forward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
354
+ r"""
355
+ Select the variables that estimate the best model using stepwise
356
+ forward regression. In case none of the variables has a p-value lower
357
+ than threshold, the algorithm will select the variable with lowest p-value.
358
+
359
+ Parameters
360
+ ----------
361
+ X : DataFrame of shape (n_samples, n_factors)
362
+ Risk factors returns matrix, where n_samples is the number of samples
363
+ and n_factors is the number of risk factors.
364
+ y : Series of shape (n_samples, 1)
365
+ Asset returns column DataFrame or Series, where n_samples is the number
366
+ of samples.
367
+ criterion : str, optional
368
+ The default is 'pvalue'. Possible values of the criterion used to select
369
+ the best features are:
370
+
371
+ - 'pvalue': select the features based on p-values.
372
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
373
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
374
+ - 'R2': select the features based on highest R Squared.
375
+ - 'R2_A': select the features based on highest Adjusted R Squared.
376
+
377
+ threshold : scalar, optional
378
+ Is the maximum p-value for each variable that will be
379
+ accepted in the model. The default is 0.05.
380
+ verbose : bool, optional
381
+ Enable verbose output. The default is False.
382
+
383
+ Returns
384
+ -------
385
+ value : list
386
+ A list of the variables that produce the best model.
387
+
388
+ Raises
389
+ ------
390
+ ValueError
391
+ When the value cannot be calculated.
392
+
393
+ """
394
+ if not isinstance(X, pd.DataFrame):
395
+ raise ValueError("X must be a DataFrame")
396
+
397
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
398
+ raise ValueError("y must be a column DataFrame")
399
+
400
+ if isinstance(y, pd.DataFrame):
401
+ if y.shape[0] > 1 and y.shape[1] > 1:
402
+ raise ValueError("y must be a column DataFrame")
403
+
404
+ included = []
405
+ aic = 1e10
406
+ sic = 1e10
407
+ r2 = -1e10
408
+ r2_a = -1e10
409
+ pvalues = None
410
+
411
+ if criterion == "pvalue":
412
+ value = 0
413
+ while value <= threshold:
414
+ excluded = list(set(X.columns) - set(included))
415
+ best_pvalue = 999999
416
+ new_feature = None
417
+ for i in excluded:
418
+ factors = included + [i]
419
+ X1 = X[factors]
420
+ X1 = sm.add_constant(X1)
421
+ results = sm.OLS(y, X1).fit()
422
+ new_pvalues = results.pvalues
423
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
424
+ cond_1 = new_pvalues.max()
425
+ if best_pvalue > new_pvalues[i] and cond_1 <= threshold:
426
+ best_pvalue = results.pvalues[i]
427
+ new_feature = i
428
+ pvalues = new_pvalues.copy()
429
+
430
+ if pvalues is not None:
431
+ value = pvalues[pvalues.index != "const"].max()
432
+
433
+ if new_feature is None:
434
+ break
435
+ else:
436
+ included.append(new_feature)
437
+
438
+ if verbose:
439
+ print("Add {} with p-value {:.6}".format(new_feature, best_pvalue))
440
+
441
+ # This part is how to deal when there isn't an asset with pvalue lower than threshold
442
+ if len(included) == 0:
443
+ excluded = list(set(X.columns) - set(included))
444
+ best_pvalue = 999999
445
+ new_feature = None
446
+ for i in excluded:
447
+ factors = included + [i]
448
+ X1 = X[factors]
449
+ X1 = sm.add_constant(X1)
450
+ results = sm.OLS(y, X1).fit()
451
+ new_pvalues = results.pvalues
452
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
453
+ if best_pvalue > new_pvalues[i]:
454
+ best_pvalue = results.pvalues[i]
455
+ new_feature = i
456
+ pvalues = new_pvalues.copy()
457
+
458
+ value = pvalues[pvalues.index != "const"].max()
459
+
460
+ included.append(new_feature)
461
+
462
+ if verbose:
463
+ print(
464
+ "Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
465
+ )
466
+
467
+ else:
468
+ excluded = X.columns.tolist()
469
+ flag = False
470
+ n = len(excluded)
471
+
472
+ for j in range(n):
473
+ value = {}
474
+ n_ini = len(excluded)
475
+ for i in excluded:
476
+ factors = included.copy()
477
+ factors.append(i)
478
+ X1 = X[factors]
479
+ X1 = sm.add_constant(X1)
480
+ results = sm.OLS(y, X1).fit()
481
+
482
+ if criterion == "AIC":
483
+ value[i] = results.aic
484
+ elif criterion == "SIC":
485
+ value[i] = results.bic
486
+ elif criterion == "R2":
487
+ value[i] = results.rsquared
488
+ elif criterion == "R2_A":
489
+ value[i] = results.rsquared_adj
490
+
491
+ value = pd.Series(value)
492
+
493
+ if criterion in ["AIC", "SIC"]:
494
+ key = value.idxmin()
495
+ value = value.min()
496
+ if criterion in ["R2", "R2_A"]:
497
+ key = value.idxmax()
498
+ value = value.max()
499
+
500
+ if criterion == "AIC":
501
+ if value < aic:
502
+ excluded.remove(key)
503
+ included.append(key)
504
+ aic = value
505
+ flag = True
506
+ elif criterion == "SIC":
507
+ if value < sic:
508
+ excluded.remove(key)
509
+ included.append(key)
510
+ sic = value
511
+ flag = True
512
+ elif criterion == "R2":
513
+ if value > r2:
514
+ excluded.remove(key)
515
+ included.append(key)
516
+ r2 = value
517
+ flag = True
518
+ elif criterion == "R2_A":
519
+ if value > r2_a:
520
+ excluded.remove(key)
521
+ included.append(key)
522
+ r2_a = value
523
+ flag = True
524
+
525
+ if n_ini == len(excluded):
526
+ break
527
+
528
+ if flag and verbose:
529
+ print("Add {} with {} {:.6}".format(key, criterion, value))
530
+
531
+ flag = False
532
+
533
+ return included
534
+
535
+
536
+ def backward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
537
+ r"""
538
+ Select the variables that estimate the best model using stepwise
539
+ backward regression. In case none of the variables has a p-value lower
540
+ than threshold, the algorithm will select the variable with lowest p-value.
541
+
542
+ Parameters
543
+ ----------
544
+ X : DataFrame of shape (n_samples, n_factors)
545
+ Risk factors returns matrix, where n_samples is the number of samples
546
+ and n_factors is the number of risk factors.
547
+ y : Series of shape (n_samples, 1)
548
+ Asset returns column DataFrame or Series, where n_samples is the number
549
+ of samples.
550
+ criterion : str, optional
551
+ The default is 'pvalue'. Possible values of the criterion used to select
552
+ the best features are:
553
+
554
+ - 'pvalue': select the features based on p-values.
555
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
556
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
557
+ - 'R2': select the features based on highest R Squared.
558
+ - 'R2_A': select the features based on highest Adjusted R Squared.
559
+ threshold : scalar, optional
560
+ Is the maximum p-value for each variable that will be
561
+ accepted in the model. The default is 0.05.
562
+ verbose : bool, optional
563
+ Enable verbose output. The default is False.
564
+
565
+ Returns
566
+ -------
567
+ value : list
568
+ A list of the variables that produce the best model.
569
+
570
+ Raises
571
+ ------
572
+ ValueError
573
+ When the value cannot be calculated.
574
+
575
+ """
576
+
577
+ if not isinstance(X, pd.DataFrame):
578
+ raise ValueError("X must be a DataFrame")
579
+
580
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
581
+ raise ValueError("y must be a column DataFrame")
582
+
583
+ if isinstance(y, pd.DataFrame):
584
+ if y.shape[0] > 1 and y.shape[1] > 1:
585
+ raise ValueError("y must be a column DataFrame")
586
+
587
+ X1 = sm.add_constant(X)
588
+ results = sm.OLS(y, X1).fit()
589
+ pvalues = results.pvalues
590
+ aic = results.aic
591
+ sic = results.bic
592
+ r2 = results.rsquared
593
+ r2_a = results.rsquared_adj
594
+
595
+ included = pvalues.index.tolist()
596
+
597
+ if criterion == "pvalue":
598
+ excluded = ["const"]
599
+ while pvalues[pvalues.index != "const"].max() > threshold:
600
+ factors = pvalues[~pvalues.index.isin(excluded)].index.tolist()
601
+ X1 = X[factors]
602
+ X1 = sm.add_constant(X1)
603
+ results = sm.OLS(y, X1).fit()
604
+ pvalues = results.pvalues
605
+ pvalues = pvalues[pvalues.index != "const"]
606
+ if pvalues.shape[0] == 0:
607
+ break
608
+ excluded = ["const", pvalues.idxmax()]
609
+ if verbose and pvalues.max() > threshold:
610
+ print(
611
+ "Drop {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
612
+ )
613
+
614
+ included = pvalues[pvalues.index != "const"].index.tolist()
615
+
616
+ # This part is how to deal when there isn't an asset with pvalue lower than threshold
617
+ if len(included) == 0:
618
+ excluded = list(set(X.columns) - set(included))
619
+ best_pvalue = 999999
620
+ new_feature = None
621
+ for i in excluded:
622
+ factors = included + [i]
623
+ X1 = X[factors]
624
+ X1 = sm.add_constant(X1)
625
+ results = sm.OLS(y, X1).fit()
626
+ new_pvalues = results.pvalues
627
+ new_pvalues = results.pvalues
628
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
629
+ if best_pvalue > new_pvalues[i]:
630
+ best_pvalue = results.pvalues[i]
631
+ new_feature = i
632
+ pvalues = new_pvalues.copy()
633
+
634
+ value = pvalues[pvalues.index != "const"].max()
635
+
636
+ included.append(new_feature)
637
+
638
+ if verbose:
639
+ print(
640
+ "Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
641
+ )
642
+
643
+ else:
644
+ included.remove("const")
645
+ flag = False
646
+ n = len(included)
647
+
648
+ for j in range(n):
649
+ value = {}
650
+ n_ini = len(included)
651
+ for i in included:
652
+ factors = included.copy()
653
+ factors.remove(i)
654
+ X1 = X[factors]
655
+ X1 = sm.add_constant(X1)
656
+ results = sm.OLS(y, X1).fit()
657
+
658
+ if criterion == "AIC":
659
+ value[i] = results.aic
660
+ elif criterion == "SIC":
661
+ value[i] = results.bic
662
+ elif criterion == "R2":
663
+ value[i] = results.rsquared
664
+ elif criterion == "R2_A":
665
+ value[i] = results.rsquared_adj
666
+
667
+ value = pd.Series(value)
668
+
669
+ if criterion in ["AIC", "SIC"]:
670
+ key = value.idxmin()
671
+ value = value.min()
672
+ if criterion in ["R2", "R2_A"]:
673
+ key = value.idxmax()
674
+ value = value.max()
675
+
676
+ if criterion == "AIC":
677
+ if value < aic:
678
+ included.remove(key)
679
+ aic = value
680
+ flag = True
681
+ elif criterion == "SIC":
682
+ if value < sic:
683
+ included.remove(key)
684
+ sic = value
685
+ flag = True
686
+ elif criterion == "R2":
687
+ if value > r2:
688
+ included.remove(key)
689
+ r2 = value
690
+ flag = True
691
+ elif criterion == "R2_A":
692
+ if value > r2_a:
693
+ included.remove(key)
694
+ r2_a = value
695
+ flag = True
696
+
697
+ if n_ini == len(included):
698
+ break
699
+
700
+ if flag and verbose:
701
+ print("Drop {} with {} {:.6}".format(key, criterion, value))
702
+
703
+ flag = False
704
+
705
+ return included
706
+
707
+
708
+ def PCR(X, y, n_components=0.95):
709
+ r"""
710
+ Estimate the coefficients using Principal Components Regression (PCR).
711
+
712
+ Parameters
713
+ ----------
714
+ X : DataFrame of shape (n_samples, n_factors)
715
+ Risk factors returns matrix, where n_samples is the number of samples
716
+ and n_factors is the number of risk factors.
717
+ y : DataFrame or Series of shape (n_samples, 1)
718
+ Asset returns column DataFrame or Series, where n_samples is the number
719
+ of samples.
720
+ n_components : int, float, None or str, optional
721
+ if 1 < n_components (int), it represents the number of components that
722
+ will be keep. if 0 < n_components < 1 (float), it represents the
723
+ percentage of variance that the is explained by the components kept.
724
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
725
+ for more details. The default is 0.95.
726
+
727
+ Returns
728
+ -------
729
+ value : nd-array
730
+ An array with the coefficients of the model calculated using PCR.
731
+
732
+ Raises
733
+ ------
734
+ ValueError
735
+ When the value cannot be calculated.
736
+
737
+ """
738
+
739
+ if not isinstance(X, pd.DataFrame):
740
+ raise ValueError("X must be a DataFrame")
741
+
742
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
743
+ raise ValueError("y must be a column DataFrame")
744
+
745
+ if isinstance(y, pd.DataFrame):
746
+ if y.shape[0] > 1 and y.shape[1] > 1:
747
+ raise ValueError("y must be a column DataFrame")
748
+
749
+ scaler = StandardScaler()
750
+ scaler.fit(X)
751
+ X_std = scaler.transform(X)
752
+
753
+ if n_components > 0 and n_components < 1:
754
+ pca = PCA(n_components=n_components)
755
+ elif n_components >= 1:
756
+ pca = PCA(n_components=int(n_components))
757
+
758
+ pca.fit(X_std)
759
+ Z_p = pca.transform(X_std)
760
+ V_p = pca.components_.T
761
+
762
+ results = sm.OLS(y, sm.add_constant(Z_p)).fit()
763
+ beta_pc = results.params[1:]
764
+ beta_pc = np.array(beta_pc, ndmin=2)
765
+
766
+ std = np.array(np.std(X, axis=0, ddof=1), ndmin=2)
767
+ mean = np.array(np.mean(X, axis=0), ndmin=2)
768
+ beta = V_p @ beta_pc.T / std.T
769
+
770
+ beta_0 = np.array(y.mean(), ndmin=2) - np.sum(beta * mean.T)
771
+
772
+ beta = np.insert(beta, 0, beta_0)
773
+ beta = np.array(beta, ndmin=2)
774
+
775
+ return beta
776
+
777
+
778
+ def loadings_matrix(
779
+ X,
780
+ Y,
781
+ feature_selection="stepwise",
782
+ stepwise="Forward",
783
+ criterion="pvalue",
784
+ threshold=0.05,
785
+ n_components=0.95,
786
+ verbose=False,
787
+ ):
788
+ r"""
789
+ Estimate the loadings matrix using stepwise regression.
790
+
791
+ Parameters
792
+ ----------
793
+ X : DataFrame of shape (n_samples, n_factors)
794
+ Risk factors returns matrix, where n_samples is the number of samples
795
+ and n_factors is the number of risk factors.
796
+ Y : DataFrame of shape (n_samples, n_assets)
797
+ Assets returns DataFrame, where n_samples is the number of
798
+ observations and n_assets is the number of assets.
799
+ feature_selection: str, 'stepwise' or 'PCR', optional
800
+ Indicate the method used to estimate the loadings matrix.
801
+ The default is 'stepwise'. Possible values are:
802
+
803
+ - 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
804
+ - 'PCR': use principal components regression to estimate coefficients.
805
+ stepwise: str 'Forward' or 'Backward', optional
806
+ Indicate the method used for stepwise regression.
807
+ The default is 'Forward'.
808
+ criterion : str, optional
809
+ The default is 'pvalue'. Possible values of the criterion used to select
810
+ the best features are:
811
+
812
+ - 'pvalue': select the features based on p-values.
813
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
814
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
815
+ - 'R2': select the features based on highest R Squared.
816
+ - 'R2_A': select the features based on highest Adjusted R Squared.
817
+ threshold : scalar, optional
818
+ Is the maximum p-value for each variable that will be
819
+ accepted in the model. The default is 0.05.
820
+ n_components : int, float, None or str, optional
821
+ if 1 < n_components (int), it represents the number of components that
822
+ will be keep. if 0 < n_components < 1 (float), it represents the
823
+ percentage of variance that the is explained by the components kept.
824
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
825
+ for more details. The default is 0.95.
826
+ verbose : bool, optional
827
+ Enable verbose output. The default is False.
828
+
829
+ Returns
830
+ -------
831
+ loadings : DataFrame
832
+ Loadings matrix.
833
+
834
+ Raises
835
+ ------
836
+ ValueError
837
+ When the value cannot be calculated.
838
+
839
+ """
840
+ if not isinstance(X, pd.DataFrame):
841
+ raise ValueError("X must be a DataFrame")
842
+
843
+ if not isinstance(Y, pd.DataFrame):
844
+ raise ValueError("Y must be a DataFrame")
845
+
846
+ rows = Y.columns.tolist()
847
+ cols = X.columns.tolist()
848
+ cols.insert(0, "const")
849
+ loadings = np.zeros((len(rows), len(cols)))
850
+ loadings = pd.DataFrame(loadings, index=rows, columns=cols)
851
+
852
+ for i in rows:
853
+ if feature_selection == "stepwise":
854
+ if stepwise == "Forward":
855
+ included = forward_regression(
856
+ X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
857
+ )
858
+ elif stepwise == "Backward":
859
+ included = backward_regression(
860
+ X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
861
+ )
862
+ else:
863
+ raise ValueError("Choose and adequate stepwise method")
864
+ results = sm.OLS(Y[i], sm.add_constant(X[included])).fit()
865
+ params = results.params
866
+ loadings.loc[i, params.index.tolist()] = params.T
867
+ elif feature_selection == "PCR":
868
+ beta = PCR(X, Y[i], n_components=n_components)
869
+ beta = pd.Series(np.ravel(beta), index=cols)
870
+ loadings.loc[i, cols] = beta.T
871
+
872
+ return loadings
873
+
874
+
875
+ def risk_factors(
876
+ X,
877
+ Y,
878
+ B=None,
879
+ const=True,
880
+ method_mu="hist",
881
+ method_cov="hist",
882
+ feature_selection="stepwise",
883
+ stepwise="Forward",
884
+ criterion="pvalue",
885
+ threshold=0.05,
886
+ n_components=0.95,
887
+ dict_mu={},
888
+ dict_cov={},
889
+ ):
890
+ r"""
891
+ Estimate the expected returns vector and covariance matrix based on risk
892
+ factors models :cite:`b-Ross` :cite:`b-Fan`.
893
+
894
+ .. math::
895
+ \begin{aligned}
896
+ R & = \alpha + B F + \epsilon \\
897
+ \mu_{f} & = \alpha +BE(F) \\
898
+ \Sigma_{f} & = B \Sigma_{F} B^{T} + \Sigma_{\epsilon} \\
899
+ \end{aligned}
900
+
901
+
902
+ where:
903
+
904
+ :math:`R` is the series returns.
905
+
906
+ :math:`\alpha` is the intercept.
907
+
908
+ :math:`B` is the loadings matrix.
909
+
910
+ :math:`F` is the expected returns vector of the risk factors.
911
+
912
+ :math:`\Sigma_{F}` is the covariance matrix of the risk factors.
913
+
914
+ :math:`\Sigma_{\epsilon}` is the covariance matrix of error terms.
915
+
916
+ :math:`\mu_{f}` is the expected returns vector obtained with the
917
+ risk factor model.
918
+
919
+ :math:`\Sigma_{f}` is the covariance matrix obtained with the risk
920
+ factor model.
921
+
922
+ Parameters
923
+ ----------
924
+ X : DataFrame of shape (n_samples, n_factors)
925
+ Risk factors returns matrix, where n_samples is the number of samples
926
+ and n_factors is the number of risk factors.
927
+ Y : DataFrame of shape (n_samples, n_assets)
928
+ Assets returns DataFrame, where n_samples is the number of
929
+ observations and n_assets is the number of assets.
930
+ B : DataFrame of shape (n_assets, n_factors), optional
931
+ Loadings matrix, where n_assets is the number assets and n_factors is
932
+ the number of risk factors. If is not specified, is estimated using
933
+ stepwise regression. The default is None.
934
+ const : bool, optional
935
+ Indicate if the loadings matrix has a constant.
936
+ The default is False.
937
+ method_mu : str, optional
938
+ The method used to estimate the expected returns of factors.
939
+ The default value is 'hist'. Possible values are:
940
+
941
+ - 'hist': use historical estimates.
942
+ - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
943
+ - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
944
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
945
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
946
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
947
+ method_cov : str, optional
948
+ The method used to estimate the covariance matrix of factors.
949
+ The default is 'hist'. Possible values are:
950
+
951
+ - 'hist': use historical estimates.
952
+ - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
953
+ - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
954
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
955
+ - 'oas': use the Oracle Approximation Shrinkage method.
956
+ - 'shrunk': use the basic Shrunk Covariance method.
957
+ - 'gl': use the basic Graphical Lasso Covariance method.
958
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
959
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
960
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
961
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
962
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
963
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
964
+ feature_selection: str, 'stepwise' or 'PCR', optional
965
+ Indicate the method used to estimate the loadings matrix.
966
+ The default is 'stepwise'. Possible values are:
967
+
968
+ - 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
969
+ - 'PCR': use principal components regression to estimate coefficients.
970
+ stepwise: str, 'Forward' or 'Backward'
971
+ Indicate the method used for stepwise regression.
972
+ The default is 'Forward'.
973
+ criterion : str, optional
974
+ The default is 'pvalue'. Possible values of the criterion used to select
975
+ the best features are:
976
+
977
+ - 'pvalue': select the features based on p-values.
978
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
979
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
980
+ - 'R2': select the features based on highest R Squared.
981
+ - 'R2_A': select the features based on highest Adjusted R Squared.
982
+ threshold : scalar, optional
983
+ Is the maximum p-value for each variable that will be
984
+ accepted in the model. The default is 0.05.
985
+ n_components : int, float, None or str, optional
986
+ if 1 < n_components (int), it represents the number of components that
987
+ will be keep. if 0 < n_components < 1 (float), it represents the
988
+ percentage of variance that the is explained by the components kept.
989
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
990
+ for more details. The default is 0.95.
991
+ dict_mu : dict
992
+ Other variables related to the expected returns.
993
+ dict_cov : dict
994
+ Other variables related to the covariance estimation.
995
+
996
+ Returns
997
+ -------
998
+ mu : DataFrame
999
+ The mean vector of risk factors model.
1000
+ cov : DataFrame
1001
+ The covariance matrix of risk factors model.
1002
+ returns : DataFrame
1003
+ The returns based on a risk factor model.
1004
+ B : DataFrame
1005
+ Loadings matrix.
1006
+
1007
+ Raises
1008
+ ------
1009
+ ValueError
1010
+ When the value cannot be calculated.
1011
+
1012
+ """
1013
+ if not isinstance(X, pd.DataFrame) and not isinstance(Y, pd.DataFrame):
1014
+ raise ValueError("X and Y must be DataFrames")
1015
+
1016
+ if B is None:
1017
+ B = loadings_matrix(
1018
+ X,
1019
+ Y,
1020
+ feature_selection=feature_selection,
1021
+ stepwise=stepwise,
1022
+ criterion=criterion,
1023
+ threshold=threshold,
1024
+ n_components=n_components,
1025
+ verbose=False,
1026
+ )
1027
+ elif not isinstance(B, pd.DataFrame):
1028
+ raise ValueError("B must be a DataFrame")
1029
+
1030
+ assets = Y.columns.tolist()
1031
+ dates = X.index.tolist()
1032
+
1033
+ X1 = X.copy()
1034
+ if const == True or ("const" in B.columns.tolist()):
1035
+ mu_f = np.hstack(
1036
+ [
1037
+ np.ones((1, 1)),
1038
+ np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2),
1039
+ ]
1040
+ )
1041
+ X1 = sm.add_constant(X)
1042
+ else:
1043
+ mu_f = np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2)
1044
+ S_f = np.array(covar_matrix(X1, method=method_cov, **dict_cov), ndmin=2)
1045
+ B_ = np.array(B, ndmin=2)
1046
+
1047
+ returns = np.array(X1, ndmin=2) @ B_.T
1048
+ mu = B_ @ mu_f.T
1049
+
1050
+ e = np.array(Y, ndmin=2) - returns
1051
+ S_e = np.diag(np.var(np.array(e), ddof=1, axis=0))
1052
+ S = B_ @ S_f @ B_.T + S_e
1053
+
1054
+ mu = pd.DataFrame(mu.T, columns=assets)
1055
+ cov = pd.DataFrame(S, index=assets, columns=assets)
1056
+ returns = pd.DataFrame(returns, index=dates, columns=assets)
1057
+
1058
+ return mu, cov, returns, B
1059
+
1060
+
1061
+ def black_litterman(
1062
+ X,
1063
+ w,
1064
+ P,
1065
+ Q,
1066
+ delta=1,
1067
+ rf=0,
1068
+ eq=True,
1069
+ method_mu="hist",
1070
+ method_cov="hist",
1071
+ dict_mu={},
1072
+ dict_cov={},
1073
+ ):
1074
+ r"""
1075
+ Estimate the expected returns vector and covariance matrix based
1076
+ on the Black Litterman model :cite:`b-BlackLitterman` :cite:`b-Black1`.
1077
+
1078
+ .. math::
1079
+ \begin{aligned}
1080
+ \Pi & = \delta \Sigma w \\
1081
+ \Pi_{BL} & = \left [ (\tau\Sigma)^{-1}+ P^{T} \Omega^{-1}P \right]^{-1}
1082
+ \left[(\tau\Sigma)^{-1} \Pi + P^{T} \Omega^{-1} Q \right] \\
1083
+ M & = \left((\tau\Sigma)^{-1} + P^{T}\Omega^{-1} P \right)^{-1} \\
1084
+ \mu_{BL} & = \Pi_{BL} + r_{f} \\
1085
+ \Sigma_{BL} & = \Sigma + M \\
1086
+ \end{aligned}
1087
+
1088
+
1089
+ where:
1090
+
1091
+ :math:`r_{f}` is the risk free rate.
1092
+
1093
+ :math:`\delta` is the risk aversion factor.
1094
+
1095
+ :math:`\Pi` is the equilibrium excess returns.
1096
+
1097
+ :math:`\Sigma` is the covariance matrix.
1098
+
1099
+ :math:`P` is the views matrix.
1100
+
1101
+ :math:`Q` is the views returns matrix.
1102
+
1103
+ :math:`\Omega` is the covariance matrix of the error views.
1104
+
1105
+ :math:`\mu_{BL}` is the mean vector obtained with the black
1106
+ litterman model.
1107
+
1108
+ :math:`\Sigma_{BL}` is the covariance matrix obtained with the black
1109
+ litterman model.
1110
+
1111
+ Parameters
1112
+ ----------
1113
+ X : DataFrame of shape (n_samples, n_assets)
1114
+ Assets returns DataFrame, where n_samples is the number of
1115
+ observations and n_assets is the number of assets.
1116
+ w : DataFrame or Series of shape (n_assets, 1)
1117
+ Portfolio weights, where n_assets is the number of assets.
1118
+ P : DataFrame of shape (n_views, n_assets)
1119
+ Analyst's views matrix, can be relative or absolute.
1120
+ Q : DataFrame of shape (n_views, 1)
1121
+ Expected returns of analyst's views.
1122
+ delta : float, optional
1123
+ Risk aversion factor. The default value is 1.
1124
+ rf : scalar, optional
1125
+ Risk free rate. The default is 0.
1126
+ eq : bool, optional
1127
+ Indicate if use equilibrium or historical excess returns.
1128
+ The default is True.
1129
+ method_mu : str, optional
1130
+ The method used to estimate the expected returns.
1131
+ The default value is 'hist'.
1132
+
1133
+ - 'hist': use historical estimates.
1134
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1135
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1136
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1137
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1138
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1139
+ method_cov : str, optional
1140
+ The method used to estimate the covariance matrix.
1141
+ The default is 'hist'. Possible values are:
1142
+
1143
+ - 'hist': use historical estimates.
1144
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1145
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1146
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1147
+ - 'oas': use the Oracle Approximation Shrinkage method.
1148
+ - 'shrunk': use the basic Shrunk Covariance method.
1149
+ - 'gl': use the basic Graphical Lasso Covariance method.
1150
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1151
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1152
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1153
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1154
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1155
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1156
+ dict_mu : dict
1157
+ Other variables related to the mean vector estimation method.
1158
+ dict_cov : dict
1159
+ Other variables related to the covariance estimation method.
1160
+
1161
+ Returns
1162
+ -------
1163
+ mu : DataFrame
1164
+ The mean vector of Black Litterman model.
1165
+ cov : DataFrame
1166
+ The covariance matrix of Black Litterman model.
1167
+ w : DataFrame
1168
+ The equilibrium weights of Black Litterman model, without constraints.
1169
+
1170
+ Raises
1171
+ ------
1172
+ ValueError
1173
+ When the value cannot be calculated.
1174
+
1175
+ """
1176
+ if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
1177
+ raise ValueError("X and w must be DataFrames")
1178
+
1179
+ if w.shape[0] > 1 and w.shape[1] > 1:
1180
+ raise ValueError("w must be a column DataFrame")
1181
+
1182
+ assets = X.columns.tolist()
1183
+
1184
+ w = np.array(w, ndmin=2)
1185
+ if w.shape[0] == 1:
1186
+ w = w.T
1187
+
1188
+ mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
1189
+ S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
1190
+ P = np.array(P, ndmin=2)
1191
+ Q = np.array(Q, ndmin=2)
1192
+ tau = 1 / X.shape[0]
1193
+ Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
1194
+
1195
+ if eq == True:
1196
+ PI = delta * (S @ w)
1197
+ elif eq == False:
1198
+ PI = mu.T - rf
1199
+
1200
+ PI_ = inv(inv(tau * S) + P.T @ inv(Omega) @ P) @ (
1201
+ inv(tau * S) @ PI + P.T @ inv(Omega) @ Q
1202
+ )
1203
+ M = inv(inv(tau * S) + P.T @ inv(Omega) @ P)
1204
+ # PI_1 = PI + (tau * S* P.T) * inv(P * tau * S * P.T + Omega) * (Q - P * PI)
1205
+ # M = tau * S - (tau * S * P.T) * inv(P * tau * S * P.T + Omega) * P * tau * S
1206
+
1207
+ mu = PI_ + rf
1208
+ mu = mu.T
1209
+ cov = S + M
1210
+ w = inv(delta * cov) @ PI_
1211
+
1212
+ mu = pd.DataFrame(mu, columns=assets)
1213
+ cov = pd.DataFrame(cov, index=assets, columns=assets)
1214
+ w = pd.DataFrame(w, index=assets)
1215
+
1216
+ return mu, cov, w
1217
+
1218
+
1219
+ def augmented_black_litterman(
1220
+ X,
1221
+ w,
1222
+ F,
1223
+ B,
1224
+ P=None,
1225
+ Q=None,
1226
+ P_f=None,
1227
+ Q_f=None,
1228
+ delta=1,
1229
+ rf=0,
1230
+ eq=True,
1231
+ const=True,
1232
+ method_mu="hist",
1233
+ method_cov="hist",
1234
+ dict_mu={},
1235
+ dict_cov={},
1236
+ ):
1237
+ r"""
1238
+ Estimate the expected returns vector and covariance matrix based
1239
+ on the Augmented Black Litterman model :cite:`b-WCheung`.
1240
+
1241
+ .. math::
1242
+ \begin{aligned}
1243
+ \Pi^{a} & = \delta \left [ \begin{array}{c} \Sigma \\ \Sigma_{F} B^{T} \\ \end{array} \right ] w \\
1244
+ P^{a} & = \left [ \begin{array}{cc} P & 0 \\ 0 & P_{F} \\ \end{array} \right ] \\
1245
+ Q^{a} & = \left [ \begin{array}{c} Q \\ Q_{F} \\ \end{array} \right ] \\
1246
+ \Sigma^{a} & = \left [ \begin{array}{cc} \Sigma & B \Sigma_{F}\\ \Sigma_{F} B^{T} & \Sigma_{F} \\ \end{array} \right ] \\
1247
+ \Omega^{a} & = \left [ \begin{array}{cc} \Omega & 0 \\ 0 & \Omega_{F} \\ \end{array} \right ] \\
1248
+ \Pi^{a}_{BL} & = \left [ (\tau \Sigma^{a})^{-1} + (P^{a})^{T} (\Omega^{a})^{-1} P^{a} \right ]^{-1}
1249
+ \left [ (\tau\Sigma^{a})^{-1} \Pi^{a} + (P^{a})^{T} (\Omega^{a})^{-1} Q^{a} \right ] \\
1250
+ M^{a} & = \left ( (\tau\Sigma^{a})^{-1} + (P^{a})^{T} (\Omega^{a})^{-1} P^{a} \right )^{-1} \\
1251
+ \mu^{a}_{BL} & = \Pi^{a}_{BL} + r_{f} \\
1252
+ \Sigma^{a}_{BL} & = \Sigma^{a} + M^{a} \\
1253
+ \end{aligned}
1254
+
1255
+
1256
+ where:
1257
+
1258
+ :math:`r_{f}` is the risk free rate.
1259
+
1260
+ :math:`\delta` is the risk aversion factor.
1261
+
1262
+ :math:`B` is the loadings matrix.
1263
+
1264
+ :math:`\Sigma` is the covariance matrix of assets.
1265
+
1266
+ :math:`\Sigma_{F}` is the covariance matrix of factors.
1267
+
1268
+ :math:`\Sigma^{a}` is the augmented covariance matrix.
1269
+
1270
+ :math:`P` is the assets views matrix.
1271
+
1272
+ :math:`Q` is the assets views returns matrix.
1273
+
1274
+ :math:`P_{F}` is the factors views matrix.
1275
+
1276
+ :math:`Q_{F}` is the factors views returns matrix.
1277
+
1278
+ :math:`P^{a}` is the augmented views matrix.
1279
+
1280
+ :math:`Q^{a}` is the augmented views returns matrix.
1281
+
1282
+ :math:`\Pi^{a}` is the augmented equilibrium excess returns.
1283
+
1284
+ :math:`\Omega` is the covariance matrix of errors of assets views.
1285
+
1286
+ :math:`\Omega_{F}` is the covariance matrix of errors of factors views.
1287
+
1288
+ :math:`\Omega^{a}` is the covariance matrix of errors of augmented views.
1289
+
1290
+ :math:`\mu^{a}_{BL}` is the mean vector obtained with the Augmented Black
1291
+ Litterman model.
1292
+
1293
+ :math:`\Sigma^{a}_{BL}` is the covariance matrix obtained with the Augmented
1294
+ Black Litterman model.
1295
+
1296
+ Parameters
1297
+ ----------
1298
+ X : DataFrame of shape (n_samples, n_assets)
1299
+ Assets returns DataFrame, where n_samples is the number of
1300
+ observations and n_assets is the number of assets.
1301
+ w : DataFrame or Series of shape (n_assets, 1)
1302
+ Portfolio weights, where n_assets is the number of assets.
1303
+ F : DataFrame of shape (n_samples, n_factors)
1304
+ Risk factors returns DataFrame, where n_samples is the number of samples
1305
+ and n_factors is the number of risk factors.
1306
+ B : DataFrame of shape (n_assets, n_factors), optional
1307
+ Loadings matrix, where n_assets is the number assets and n_factors is
1308
+ the number of risk factors.
1309
+ P : DataFrame of shape (n_views, n_assets)
1310
+ Analyst's views matrix, can be relative or absolute.
1311
+ Q : DataFrame of shape (n_views, 1)
1312
+ Expected returns of analyst's views.
1313
+ P_f : DataFrame of shape (n_views, n_factors)
1314
+ Analyst's factors views matrix, can be relative or absolute.
1315
+ Q_f : DataFrame of shape (n_views, 1)
1316
+ Expected returns of analyst's factors views.
1317
+ delta : float, optional
1318
+ Risk aversion factor. The default value is 1.
1319
+ rf : scalar, optional
1320
+ Risk free rate. The default is 0.
1321
+ eq : bool, optional
1322
+ Indicate if use equilibrium or historical excess returns.
1323
+ The default is True.
1324
+ const : bool, optional
1325
+ Indicate if the loadings matrix has a constant.
1326
+ The default is True.
1327
+ method_mu : str, optional
1328
+ The method used to estimate the expected returns.
1329
+ The default value is 'hist'.
1330
+
1331
+ - 'hist': use historical estimates.
1332
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1333
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1334
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1335
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1336
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1337
+ method_cov : str, optional
1338
+ The method used to estimate the covariance matrix.
1339
+ The default is 'hist'. Possible values are:
1340
+
1341
+ - 'hist': use historical estimates.
1342
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1343
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1344
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1345
+ - 'oas': use the Oracle Approximation Shrinkage method.
1346
+ - 'shrunk': use the basic Shrunk Covariance method.
1347
+ - 'gl': use the basic Graphical Lasso Covariance method.
1348
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1349
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1350
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1351
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1352
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1353
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1354
+ dict_mu : dict
1355
+ Other variables related to the mean vector estimation method.
1356
+ dict_cov : dict
1357
+ Other variables related to the covariance estimation method.
1358
+
1359
+ Returns
1360
+ -------
1361
+ mu : DataFrame
1362
+ The mean vector of Augmented Black Litterman model.
1363
+ cov : DataFrame
1364
+ The covariance matrix of Augmented Black Litterman model.
1365
+ w : DataFrame
1366
+ The equilibrium weights of Augmented Black Litterman model, without constraints.
1367
+
1368
+ Raises
1369
+ ------
1370
+ ValueError
1371
+ When the value cannot be calculated.
1372
+
1373
+ """
1374
+ if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
1375
+ raise ValueError("X and w must be DataFrames")
1376
+
1377
+ if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
1378
+ raise ValueError("F and B must be DataFrames")
1379
+
1380
+ if w.shape[0] > 1 and w.shape[1] > 1:
1381
+ raise ValueError("w must be a column DataFrame")
1382
+
1383
+ assets = X.columns.tolist()
1384
+ N = len(assets)
1385
+
1386
+ w = np.array(w, ndmin=2)
1387
+ if w.shape[0] == 1:
1388
+ w = w.T
1389
+
1390
+ if B is not None:
1391
+ B_ = np.array(B, ndmin=2)
1392
+ if const == True:
1393
+ alpha = B_[:, :1]
1394
+ B_ = B_[:, 1:]
1395
+
1396
+ mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
1397
+ S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
1398
+
1399
+ tau = 1 / X.shape[0]
1400
+
1401
+ mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
1402
+ S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
1403
+
1404
+ if P is not None and Q is not None and P_f is None and Q_f is None:
1405
+ S_a = S
1406
+ P_a = P
1407
+ Q_a = Q
1408
+ Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
1409
+ Omega_a = Omega
1410
+
1411
+ if eq == True:
1412
+ PI_a_ = delta * S_a @ w
1413
+ elif eq == False:
1414
+ PI_a_ = mu.T - rf
1415
+ elif P is None and Q is None and P_f is not None and Q_f is not None:
1416
+ S_a = S_f
1417
+ P_a = P_f
1418
+ Q_a = Q_f
1419
+ Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
1420
+ Omega_a = Omega_f
1421
+
1422
+ if eq == True:
1423
+ PI_a_ = delta * (S_f @ B.T) @ w
1424
+ elif eq == False:
1425
+ PI_a_ = mu_f.T - rf
1426
+
1427
+ elif P is not None and Q is not None and P_f is not None and Q_f is not None:
1428
+ S_a = np.hstack((np.vstack((S, S_f @ B_.T)), np.vstack((B_ @ S_f, S_f))))
1429
+
1430
+ P = np.array(P, ndmin=2)
1431
+ Q = np.array(Q, ndmin=2)
1432
+ P_f = np.array(P_f, ndmin=2)
1433
+ Q_f = np.array(Q_f, ndmin=2)
1434
+ zeros_1 = np.zeros((P_f.shape[0], P.shape[1]))
1435
+ zeros_2 = np.zeros((P.shape[0], P_f.shape[1]))
1436
+ P_a = np.hstack((np.vstack((P, zeros_1)), np.vstack((zeros_2, P_f))))
1437
+ Q_a = np.vstack((Q, Q_f))
1438
+
1439
+ Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
1440
+ Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
1441
+ zeros = np.zeros((Omega.shape[0], Omega_f.shape[0]))
1442
+ Omega_a = np.hstack((np.vstack((Omega, zeros.T)), np.vstack((zeros, Omega_f))))
1443
+
1444
+ if eq == True:
1445
+ PI_a_ = delta * (np.vstack((S, S_f @ B_.T)) @ w)
1446
+ elif eq == False:
1447
+ PI_a_ = np.vstack((mu.T, mu_f.T)) - rf
1448
+
1449
+ PI_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a) @ (
1450
+ inv(tau * S_a) @ PI_a_ + P_a.T @ inv(Omega_a) @ Q_a
1451
+ )
1452
+ M_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a)
1453
+ # PI_a = PI_a_ + (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega) * (Q_a - P_a @ PI_a_)
1454
+ # M = tau * S_a - (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega_a) @ P_a @ tau * S_a
1455
+
1456
+ mu_a = PI_a + rf
1457
+ mu_a = mu_a.T
1458
+ cov_a = S_a + M_a
1459
+ w_a = inv(delta * cov_a) @ PI_a
1460
+
1461
+ if P is None and Q is None and P_f is not None and Q_f is not None:
1462
+ mu_a = mu_a @ B_.T
1463
+ cov_a = B_ @ cov_a @ B_.T
1464
+ w_a = inv(delta * cov_a) @ B_ @ PI_a
1465
+
1466
+ if const == True:
1467
+ mu_a = mu_a[:, :N] + alpha.T
1468
+
1469
+ mu_a = pd.DataFrame(mu_a[:, :N], columns=assets)
1470
+ cov_a = pd.DataFrame(cov_a[:N, :N], index=assets, columns=assets)
1471
+ w_a = pd.DataFrame(w_a[:N, 0], index=assets)
1472
+
1473
+ return mu_a, cov_a, w_a
1474
+
1475
+
1476
+ def black_litterman_bayesian(
1477
+ X,
1478
+ F,
1479
+ B,
1480
+ P_f,
1481
+ Q_f,
1482
+ delta=1,
1483
+ rf=0,
1484
+ eq=True,
1485
+ const=True,
1486
+ method_mu="hist",
1487
+ method_cov="hist",
1488
+ dict_mu={},
1489
+ dict_cov={},
1490
+ ):
1491
+ r"""
1492
+ Estimate the expected returns vector and covariance matrix based
1493
+ on the black litterman model :cite:`b-BLB`.
1494
+
1495
+ .. math::
1496
+ \begin{aligned}
1497
+ \Sigma_{F} & = B \Sigma_{F} B^{T} + D \\
1498
+ \overline{\Pi}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{T}\Omega_{F}^{-1}P_{F} \right )^{-1} \left ( \Sigma_{F}^{-1}\Pi_{F} + P_{F}^{T}\Omega_{F}^{-1}Q_{F} \right) \\
1499
+ \overline{\Sigma}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{T}\Omega_{F}^{-1}P_{F} \right )^{-1} \\
1500
+ \Sigma_{BLB} & = \left( \Sigma^{-1} - \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} + B^{T}\Sigma^{-1}B \right)^{-1} B^{T}\Sigma^{-1} \right )^{-1} \\
1501
+ \mu_{BLB} & = \Sigma_{BLB} \left ( \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} +B^{T}\Sigma^{-1}B \right)^{-1} \overline{\Sigma}_{F}^{-1} \overline{\Pi}_{F} \right ) + r_{f} \\
1502
+ \end{aligned}
1503
+
1504
+
1505
+ where:
1506
+
1507
+ :math:`r_{f}` is the risk free rate.
1508
+
1509
+ :math:`B` is the loadings matrix.
1510
+
1511
+ :math:`D` is a diagonal matrix of variance of errors of a factor model.
1512
+
1513
+ :math:`\Sigma` is the covariance matrix obtained with a factor model.
1514
+
1515
+ :math:`\Pi_{F}` is the equilibrium excess returns of factors.
1516
+
1517
+ :math:`\overline{\Pi}_{F}` is the posterior excess returns of factors.
1518
+
1519
+ :math:`\Sigma_{F}` is the covariance matrix of factors.
1520
+
1521
+ :math:`\overline{\Sigma}_{F}` is the posterior covariance matrix of factors.
1522
+
1523
+ :math:`P_{F}` is the factors views matrix.
1524
+
1525
+ :math:`Q_{F}` is the factors views returns matrix.
1526
+
1527
+ :math:`\Omega_{F}` is the covariance matrix of errors of factors views.
1528
+
1529
+ :math:`\mu_{BLB}` is the mean vector obtained with the Black
1530
+ Litterman Bayesian model or posterior predictive mean.
1531
+
1532
+ :math:`\Sigma_{BLB}` is the covariance matrix obtained with the Black
1533
+ Litterman Bayesian model or posterior predictive covariance.
1534
+
1535
+ Parameters
1536
+ ----------
1537
+ X : DataFrame of shape (n_samples, n_assets)
1538
+ Assets returns DataFrame, where n_samples is the number of
1539
+ observations and n_assets is the number of assets.
1540
+ F : DataFrame of shape (n_samples, n_factors)
1541
+ Risk factors returns DataFrame, where n_samples is the number of samples
1542
+ and n_factors is the number of risk factors.
1543
+ B : DataFrame of shape (n_assets, n_factors), optional
1544
+ Loadings matrix, where n_assets is the number assets and n_factors is
1545
+ the number of risk factors. The default is None.
1546
+ P_f : DataFrame of shape (n_views, n_factors)
1547
+ Analyst's factors views matrix, can be relative or absolute.
1548
+ Q_f : DataFrame of shape (n_views, 1)
1549
+ Expected returns of analyst's factors views.
1550
+ delta : float, optional
1551
+ Risk aversion factor. The default value is 1.
1552
+ rf : scalar, optional
1553
+ Risk free rate. The default is 0.
1554
+ eq : bool, optional
1555
+ Indicate if use equilibrium or historical excess returns.
1556
+ The default is True.
1557
+ const : bool, optional
1558
+ Indicate if the loadings matrix has a constant.
1559
+ The default is True.
1560
+ method_mu : str, optional
1561
+ The method used to estimate the expected returns.
1562
+ The default value is 'hist'.
1563
+
1564
+ - 'hist': use historical estimates.
1565
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1566
+ - 'ewma2': use ewma with adjust=False, For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1567
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1568
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1569
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1570
+ method_cov : str, optional
1571
+ The method used to estimate the covariance matrix:
1572
+ The default is 'hist'. Possible values are:
1573
+
1574
+ - 'hist': use historical estimates.
1575
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1576
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1577
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1578
+ - 'oas': use the Oracle Approximation Shrinkage method.
1579
+ - 'shrunk': use the basic Shrunk Covariance method.
1580
+ - 'gl': use the basic Graphical Lasso Covariance method.
1581
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1582
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1583
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1584
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1585
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1586
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1587
+ dict_mu : dict
1588
+ Other variables related to the mean vector estimation method.
1589
+ dict_cov : dict
1590
+ Other variables related to the covariance estimation method.
1591
+
1592
+ Returns
1593
+ -------
1594
+ mu : DataFrame
1595
+ The mean vector of Black Litterman model.
1596
+ cov : DataFrame
1597
+ The covariance matrix of Black Litterman model.
1598
+ w : DataFrame
1599
+ The equilibrium weights of Black Litterman model, without constraints.
1600
+
1601
+ Raises
1602
+ ------
1603
+ ValueError
1604
+ When the value cannot be calculated.
1605
+
1606
+ """
1607
+ if not isinstance(X, pd.DataFrame):
1608
+ raise ValueError("X must be DataFrames")
1609
+
1610
+ if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
1611
+ raise ValueError("F and B must be DataFrames")
1612
+
1613
+ assets = X.columns.tolist()
1614
+
1615
+ if B is not None:
1616
+ B = np.array(B, ndmin=2)
1617
+ if const == True:
1618
+ alpha = B[:, :1]
1619
+ B = B[:, 1:]
1620
+
1621
+ mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
1622
+ mu_f = (mu_f - rf).T
1623
+
1624
+ tau = 1 / X.shape[0]
1625
+
1626
+ S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
1627
+ S = B @ S_f @ B.T
1628
+
1629
+ D = X.to_numpy() - F @ B.T
1630
+ D = np.diag(D.var())
1631
+ S = S + D
1632
+
1633
+ Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
1634
+
1635
+ S_hat = inv(inv(S_f) + P_f.T @ inv(Omega_f) @ P_f)
1636
+
1637
+ Pi_hat = S_hat @ (inv(S_f) @ mu_f + P_f.T @ inv(Omega_f) @ Q_f)
1638
+
1639
+ S_blb = inv(inv(S) - inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ B.T @ inv(S))
1640
+
1641
+ Pi_blb = (
1642
+ S_blb @ inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ inv(S_hat) @ Pi_hat
1643
+ )
1644
+
1645
+ mu = Pi_blb + rf
1646
+
1647
+ if const == True:
1648
+ mu = mu + alpha
1649
+ mu = mu.T
1650
+ cov = S_blb
1651
+ w = inv(delta * cov) @ mu.T
1652
+
1653
+ mu = pd.DataFrame(mu, columns=assets)
1654
+ cov = pd.DataFrame(cov, index=assets, columns=assets)
1655
+ w = pd.DataFrame(w, index=assets)
1656
+
1657
+ return mu, cov, w
1658
+
1659
+
1660
+ def bootstrapping(
1661
+ X,
1662
+ kind="stationary",
1663
+ q=0.05,
1664
+ n_sim=6000,
1665
+ window=3,
1666
+ diag=False,
1667
+ threshold=1e-15,
1668
+ seed=0,
1669
+ ):
1670
+ r"""
1671
+ Estimates the uncertainty sets of mean and covariance matrix through the selected
1672
+ bootstrapping method.
1673
+
1674
+ Parameters
1675
+ ----------
1676
+ X : DataFrame of shape (n_samples, n_assets)
1677
+ Assets returns DataFrame, where n_samples is the number of
1678
+ observations and n_assets is the number of assets.
1679
+ kind : str
1680
+ The bootstrapping method. The default value is 'stationary'. Possible values are:
1681
+
1682
+ - 'stationary': stationary bootstrapping method, see `StationaryBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.StationaryBootstrap.html#arch.bootstrap.StationaryBootstrap>`_ for more details.
1683
+ - 'circular': circular bootstrapping method, see `CircularBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.CircularBlockBootstrap.html#arch.bootstrap.CircularBlockBootstrap>`_ for more details.
1684
+ - 'moving': moving bootstrapping method, see `MovingBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.MovingBlockBootstrap.html#arch.bootstrap.MovingBlockBootstrap>`_ for more details.
1685
+ q : scalar
1686
+ Significance level for box and elliptical constraints.
1687
+ The default is 0.05.
1688
+ n_sim : scalar
1689
+ Number of simulations of the bootstrapping method.
1690
+ The default is 6000.
1691
+ window: int
1692
+ Block size of the bootstrapping method. Must be greather than 1
1693
+ and lower than the n_samples - n_factors + 1
1694
+ The default is 3.
1695
+ diag: bool
1696
+ If consider only the main diagonal of covariance matrices of estimation
1697
+ errors following :cite:`b-fabozzi2007robust`. The default is False.
1698
+ threshold: float
1699
+ Parameter used to fix covariance matrices in case they are not positive semidefinite.
1700
+ The default is 1e-15.
1701
+ seed: int
1702
+ Seed used to generate random numbers for bootstrapping method.
1703
+ The default is 0.
1704
+
1705
+ Returns
1706
+ -------
1707
+ mu_l : DataFrame
1708
+ The q/2 percentile of mean vector obtained through the selected
1709
+ bootstrapping method.
1710
+ mu_u : DataFrame
1711
+ The 1-q/2 percentile of mean vector obtained through the selected
1712
+ bootstrapping method.
1713
+ cov_l : DataFrame
1714
+ The q/2 percentile of covariance matrix obtained through the selected
1715
+ bootstrapping method.
1716
+ cov_u : DataFrame
1717
+ The 1-q/2 percentile of covariance matrix obtained through the selected
1718
+ bootstrapping method.
1719
+ cov_mu : DataFrame
1720
+ The covariance matrix of estimation errors of mean vector obtained
1721
+ through the selected bootstrapping method.
1722
+ cov_sigma : DataFrame
1723
+ The covariance matrix of estimation errors of covariance matrix
1724
+ obtained through the selected bootstrapping method.
1725
+ k_mu : DataFrame
1726
+ The square root of size of elliptical constraint of mean vector
1727
+ estimation error based on 1-q percentile.
1728
+ k_sigma : DataFrame
1729
+ The square root of size of elliptical constraint of covariance matrix
1730
+ estimation error based on 1-q percentile.
1731
+
1732
+ Raises
1733
+ ------
1734
+ ValueError
1735
+ When the value cannot be calculated.
1736
+
1737
+ """
1738
+
1739
+ if not isinstance(X, pd.DataFrame):
1740
+ raise ValueError("X must be a DataFrame")
1741
+
1742
+ if window >= X.shape[0] - window + 1:
1743
+ raise ValueError("block must be lower than n_samples - window + 1")
1744
+ elif window <= 1:
1745
+ raise ValueError("block must be greather than 1")
1746
+
1747
+ cols = X.columns.tolist()
1748
+ cols_2 = [i + "-" + j for i in cols for j in cols]
1749
+ T, n = X.shape
1750
+
1751
+ mu = X.mean().to_numpy().reshape(1, n)
1752
+ vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
1753
+
1754
+ mus = np.zeros((n_sim, 1, n))
1755
+ covs = np.zeros((n_sim, n, n))
1756
+
1757
+ if kind == "stationary":
1758
+ gen = bs.StationaryBootstrap(window, X, seed=seed)
1759
+ elif kind == "circular":
1760
+ gen = bs.CircularBlockBootstrap(window, X, seed=seed)
1761
+ elif kind == "moving":
1762
+ gen = bs.MovingBlockBootstrap(window, X, seed=seed)
1763
+ else:
1764
+ raise ValueError("kind only can be 'stationary', 'circular' or 'moving'")
1765
+
1766
+ i = 0
1767
+ for data in gen.bootstrap(n_sim):
1768
+ A = data[0][0]
1769
+ mus[i] = A.mean().to_numpy().reshape(1, n)
1770
+ covs[i] = A.cov().to_numpy()
1771
+ i += 1
1772
+
1773
+ # Box Constraint for Mean
1774
+ mu_l = np.percentile(mus, q=q / 2 * 100, axis=0, keepdims=True).reshape(1, n)
1775
+ mu_u = np.percentile(mus, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(1, n)
1776
+ mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
1777
+ mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
1778
+
1779
+ # Box Constraint for Covariance
1780
+ cov_l = np.percentile(covs, q=q / 2 * 100, axis=0, keepdims=True).reshape(n, n)
1781
+ cov_u = np.percentile(covs, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(
1782
+ n, n
1783
+ )
1784
+ cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
1785
+ cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
1786
+
1787
+ # Check and fix if upper and lower bound for Covariance are positive
1788
+ # semidefinite and fix when they are not
1789
+ if af.is_pos_def(cov_l) == False:
1790
+ cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
1791
+ if af.is_pos_def(cov_u) == False:
1792
+ cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
1793
+
1794
+ # Elliptical Constraint for Mean
1795
+ A_mu = mus.reshape(n_sim, n) - np.repeat(mu, n_sim, axis=0)
1796
+ cov_mu = np.cov(A_mu, rowvar=False)
1797
+ if diag == True:
1798
+ cov_mu = np.diag(np.diag(cov_mu))
1799
+ k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
1800
+ k_mu = np.percentile(k_mus, q=(1 - q) * 100) ** 0.5
1801
+ cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
1802
+
1803
+ # Elliptical Constraint for Covariance
1804
+ A_Sigma = covs.reshape((n_sim, n**2), order="F")
1805
+ A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
1806
+ cov_sigma = np.cov(A_Sigma, rowvar=False)
1807
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1808
+ if diag == True:
1809
+ cov_sigma = np.diag(np.diag(cov_sigma))
1810
+ if af.is_pos_def(cov_sigma) == False:
1811
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1812
+ k_sigmas = np.diag(A_Sigma @ inv(cov_sigma) @ A_Sigma.T)
1813
+ k_sigma = np.percentile(k_sigmas, q=(1 - q) * 100) ** 0.5
1814
+ cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
1815
+
1816
+ return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma
1817
+
1818
+
1819
+ def normal_simulation(X, q=0.05, n_sim=6000, diag=False, threshold=1e-15, seed=0):
1820
+ r"""
1821
+ Estimates the uncertainty sets of mean and covariance matrix assuming that
1822
+ assets returns follows a multivariate normal distribution.
1823
+
1824
+ Parameters
1825
+ ----------
1826
+ X : DataFrame of shape (n_samples, n_assets)
1827
+ Assets returns DataFrame, where n_samples is the number of
1828
+ observations and n_assets is the number of assets.
1829
+ q : scalar
1830
+ Significance level for box and elliptical constraints.
1831
+ The default is 0.05.
1832
+ n_sim : scalar
1833
+ Number of simulations of the bootstrapping method.
1834
+ The default is 6000.
1835
+ diag: bool
1836
+ If consider only the main diagonal of covariance matrices of estimation
1837
+ errors following :cite:`b-fabozzi2007robust`. The default is False.
1838
+ threshold: float
1839
+ Parameter used to fix covariance matrices in case they are not positive
1840
+ semidefinite. The default is 1e-10.
1841
+ seed: int
1842
+ Seed used to generate random numbers for simulation.
1843
+ The default is 0.
1844
+
1845
+ Returns
1846
+ -------
1847
+ mu_l : DataFrame
1848
+ The q/2 percentile of mean vector obtained through the normal
1849
+ simulation.
1850
+ mu_u : DataFrame
1851
+ The 1-q/2 percentile of mean vector obtained through the normal
1852
+ simulation.
1853
+ cov_l : DataFrame
1854
+ The q/2 percentile of covariance matrix obtained through the normal
1855
+ simulation.
1856
+ cov_u : DataFrame
1857
+ The 1-q/2 percentile of covariance matrix obtained through the normal
1858
+ simulation.
1859
+ cov_mu : DataFrame
1860
+ The covariance matrix of estimation errors of mean vector obtained
1861
+ through the normal simulation.
1862
+ cov_sigma : DataFrame
1863
+ The covariance matrix of estimation errors of covariance matrix
1864
+ obtained through the normal simulation.
1865
+ k_mu : DataFrame
1866
+ The square root of size of elliptical constraint of mean vector
1867
+ estimation error based on 1-q percentile.
1868
+ k_sigma : DataFrame
1869
+ The square root of size of elliptical constraint of covariance matrix
1870
+ estimation error based on 1-q percentile.
1871
+
1872
+ Raises
1873
+ ------
1874
+ ValueError
1875
+ When the value cannot be calculated.
1876
+
1877
+ """
1878
+
1879
+ if not isinstance(X, pd.DataFrame):
1880
+ raise ValueError("X must be a DataFrame")
1881
+
1882
+ cols = X.columns.tolist()
1883
+ cols_2 = [i + "-" + j for i in cols for j in cols]
1884
+ T, n = X.shape
1885
+
1886
+ # Set initial parameters based on assumption of normality
1887
+ mu = X.mean().to_numpy().reshape(1, n)
1888
+ vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
1889
+ Sigma = X.cov().to_numpy()
1890
+ cov_mu = Sigma / T
1891
+ K = cf.commutation_matrix(T=n, n=n)
1892
+ I = np.identity(n**2)
1893
+ cov_sigma = T * (I + K) @ np.kron(cov_mu, cov_mu)
1894
+ if diag == True:
1895
+ cov_sigma = np.diag(np.diag(cov_sigma))
1896
+ if af.is_pos_def(cov_sigma) == False:
1897
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1898
+ cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
1899
+
1900
+ # Box Constraint for Mean
1901
+ delta_mu = st.norm.ppf(1 - q / 2) * np.sqrt(np.diag(cov_mu)).reshape(-1, 1)
1902
+ mu_l = mu - delta_mu.T
1903
+ mu_u = mu + delta_mu.T
1904
+ mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
1905
+ mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
1906
+
1907
+ # Box Constraints for Covariance
1908
+ rs = np.random.RandomState(seed=seed)
1909
+ covs = st.wishart.rvs(T, cov_mu, size=n_sim, random_state=rs)
1910
+ cov_l = np.percentile(covs, q=q / 2, axis=0)
1911
+ cov_u = np.percentile(covs, q=1 - q / 2, axis=0)
1912
+ cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
1913
+ cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
1914
+
1915
+ # Check and fix if upper and lower bound for Covariance are positive
1916
+ # semidefinite and fix when they are not
1917
+ if af.is_pos_def(cov_l) == False:
1918
+ cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
1919
+ if af.is_pos_def(cov_u) == False:
1920
+ cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
1921
+
1922
+ # Elliptical Constraint for Mean
1923
+ A_mu = rs.multivariate_normal(mu.ravel(), cov_mu, size=n_sim)
1924
+ # cov_mu = np.cov(A_mu - np.repeat(mu, n_sim, axis=0), rowvar=False)
1925
+ if diag == True:
1926
+ cov_mu = np.diag(np.diag(cov_mu))
1927
+ k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
1928
+ k_mu = np.percentile(k_mus, q=1 - q) ** 0.5
1929
+ # k_mu = st.chi2.ppf(1 - q, df=n) ** 0.5
1930
+ cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
1931
+
1932
+ # Elliptical Constraint for Covariance
1933
+ A_Sigma = covs.reshape((n_sim, n**2), order="F")
1934
+ A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
1935
+ A_cov_sigma = np.cov(A_Sigma, rowvar=False)
1936
+ if diag == True:
1937
+ A_cov_sigma = np.diag(np.diag(A_cov_sigma))
1938
+ if af.is_pos_def(A_cov_sigma) == False:
1939
+ A_cov_sigma = af.cov_fix(A_cov_sigma, method="clipped", threshold=threshold)
1940
+ k_sigmas = np.diag(A_Sigma @ inv(A_cov_sigma) @ A_Sigma.T)
1941
+ k_sigma = np.percentile(k_sigmas, q=1 - q) ** 0.5
1942
+
1943
+ return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma