riskfolio-lib 7.2.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1989 @@
1
+ """""" #
2
+
3
+ """
4
+ Copyright (c) 2020-2026, Dany Cajas
5
+ All rights reserved.
6
+ This work is licensed under BSD 3-Clause "New" or "Revised" License.
7
+ License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import statsmodels.api as sm
13
+ import scipy.stats as st
14
+ import sklearn.covariance as skcov
15
+ import arch.bootstrap as bs
16
+
17
+ from sklearn.preprocessing import StandardScaler
18
+ from sklearn.decomposition import PCA
19
+ from numpy.linalg import inv
20
+ from itertools import product
21
+
22
+ import riskfolio.src.AuxFunctions as af
23
+ import riskfolio.src.DBHT as db
24
+ import riskfolio.src.GerberStatistic as gs
25
+ import riskfolio.external.cppfunctions as cf
26
+
27
+
28
+ __all__ = [
29
+ "mean_vector",
30
+ "covar_matrix",
31
+ "cokurt_matrix",
32
+ "forward_regression",
33
+ "backward_regression",
34
+ "PCR",
35
+ "loadings_matrix",
36
+ "risk_factors",
37
+ "black_litterman",
38
+ "augmented_black_litterman",
39
+ "black_litterman_bayesian",
40
+ "bootstrapping",
41
+ "normal_simulation",
42
+ ]
43
+
44
+
45
+ def mean_vector(X, method="hist", d=0.94, target="b1"):
46
+ r"""
47
+ Calculate the expected returns vector using the selected method.
48
+
49
+ Parameters
50
+ ----------
51
+ X : DataFrame of shape (n_samples, n_assets)
52
+ Assets returns DataFrame, where n_samples is the number of
53
+ observations and n_assets is the number of assets.
54
+ method : str, optional
55
+ The method used to estimate the expected returns.
56
+ The default value is 'hist'. Possible values are:
57
+
58
+ - 'hist': use historical estimator.
59
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
60
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
61
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
62
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
63
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
64
+
65
+ d : scalar
66
+ The smoothing factor of ewma methods.
67
+ The default is 0.94.
68
+
69
+ target : str, optional
70
+ The target mean vector. The default value is 'b1'.
71
+ Possible values are:
72
+
73
+ - 'b1': grand mean.
74
+ - 'b2': volatility weighted grand mean.
75
+ - 'b3': mean square error of sample mean.
76
+
77
+ Returns
78
+ -------
79
+ mu : 1d-array
80
+ The estimation of expected returns.
81
+
82
+ Raises
83
+ ------
84
+ ValueError
85
+ When the value cannot be calculated.
86
+
87
+ """
88
+
89
+ if not isinstance(X, pd.DataFrame):
90
+ raise ValueError("X must be a DataFrame")
91
+
92
+ assets = X.columns.tolist()
93
+
94
+ if method == "hist":
95
+ mu = np.array(X.mean(), ndmin=2)
96
+ elif method == "ewma1":
97
+ mu = np.array(X.ewm(alpha=1 - d).mean().iloc[-1, :], ndmin=2)
98
+ elif method == "ewma2":
99
+ mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
100
+ elif method == "ewma2":
101
+ mu = np.array(X.ewm(alpha=1 - d, adjust=False).mean().iloc[-1, :], ndmin=2)
102
+ elif method in ["JS", "BS", "BOP"]:
103
+ T, n = np.array(X, ndmin=2).shape
104
+ ones = np.ones((n, 1))
105
+ mu = np.array(X.mean(), ndmin=2).reshape(-1, 1)
106
+ Sigma = np.cov(X, rowvar=False)
107
+ Sigma_inv = np.linalg.inv(Sigma)
108
+ eigvals = np.linalg.eigvals(Sigma)
109
+
110
+ # Calculate target vector
111
+ if target == "b1":
112
+ b = ones.T @ mu / n * ones
113
+ elif target == "b2":
114
+ b = ones.T @ Sigma_inv @ mu / (ones.T @ Sigma_inv @ ones) * ones
115
+ elif target == "b3":
116
+ b = np.trace(Sigma) / T * ones
117
+
118
+ # Calculate Estimators
119
+ if method == "JS":
120
+ alpha_1 = (
121
+ 1
122
+ / T
123
+ * (n * np.mean(eigvals) - 2 * np.max(eigvals))
124
+ / ((mu - b).T @ (mu - b))
125
+ )
126
+ mu = (1 - alpha_1) * mu + alpha_1 * b
127
+ elif method == "BS":
128
+ alpha_1 = (n + 2) / ((n + 2) + T * (mu - b).T @ Sigma_inv @ (mu - b))
129
+ mu = (1 - alpha_1) * mu + alpha_1 * b
130
+ elif method == "BOP":
131
+ alpha_1 = (mu.T @ Sigma_inv @ mu - n / (T - n)) * b.T @ Sigma_inv @ b - (
132
+ mu.T @ Sigma_inv @ b
133
+ ) ** 2
134
+ alpha_1 /= (mu.T @ Sigma_inv @ mu) * (b.T @ Sigma_inv @ b) - (
135
+ mu.T @ Sigma_inv @ b
136
+ ) ** 2
137
+ beta_1 = (1 - alpha_1) * (mu.T @ Sigma_inv @ b) / (mu.T @ Sigma_inv @ mu)
138
+ mu = alpha_1 * mu + beta_1 * b
139
+ mu = mu.T
140
+
141
+ mu = pd.DataFrame(np.array(mu, ndmin=2), columns=assets)
142
+
143
+ return mu
144
+
145
+
146
+ def covar_matrix(
147
+ X,
148
+ method="hist",
149
+ d=0.94,
150
+ alpha=0.1,
151
+ bWidth=0.01,
152
+ detone=False,
153
+ mkt_comp=1,
154
+ threshold=0.5,
155
+ ):
156
+ r"""
157
+ Calculate the covariance matrix using the selected method.
158
+
159
+ Parameters
160
+ ----------
161
+ X : DataFrame of shape (n_samples, n_assets)
162
+ Assets returns DataFrame, where n_samples is the number of
163
+ observations and n_assets is the number of assets.
164
+ method : str, optional
165
+ The method used to estimate the covariance matrix:
166
+ The default is 'hist'. Possible values are:
167
+
168
+ - 'hist': use historical estimates.
169
+ - 'semi': use semi lower covariance matrix.
170
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
171
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
172
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
173
+ - 'oas': use the Oracle Approximation Shrinkage method.
174
+ - 'shrunk': use the basic Shrunk Covariance method.
175
+ - 'gl': use the basic Graphical Lasso Covariance method.
176
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
177
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
178
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
179
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
180
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
181
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
182
+
183
+ d : scalar
184
+ The smoothing factor of ewma methods. The default is 0.94.
185
+ alpha : scalar
186
+ The shrfactor of shrunk and shrink method. The default is 0.1.
187
+ bWidth : float
188
+ The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
189
+ detone : bool, optional
190
+ If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
191
+ and 'shrink' methods. The detone correlation matrix is singular, so it
192
+ cannot be inverted.
193
+ mkt_comp : int, optional
194
+ Number of first components that will be removed using the detone method.
195
+ threshold : float
196
+ Threshold for 'gerber1' and 'gerber2' methods is between 0 and 1.
197
+
198
+ Returns
199
+ -------
200
+ cov : nd-array
201
+ The estimation of covariance matrix.
202
+
203
+ Raises
204
+ ------
205
+ ValueError
206
+ When the value cannot be calculated.
207
+
208
+ """
209
+
210
+ if not isinstance(X, pd.DataFrame):
211
+ raise ValueError("X must be a DataFrame")
212
+
213
+ assets = X.columns.tolist()
214
+
215
+ if method == "hist":
216
+ cov = np.cov(X, rowvar=False)
217
+ elif method == "semi":
218
+ T, N = X.shape
219
+ mu = X.mean().to_numpy().reshape(1, -1)
220
+ a = X - np.repeat(mu, T, axis=0)
221
+ a = np.minimum(a, np.zeros_like(a))
222
+ cov = 1 / (T - 1) * a.T @ a
223
+ elif method == "ewma1":
224
+ cov = X.ewm(alpha=1 - d).cov()
225
+ item = cov.iloc[-1, :].name[0]
226
+ cov = cov.loc[(item, slice(None)), :]
227
+ elif method == "ewma2":
228
+ cov = X.ewm(alpha=1 - d, adjust=False).cov()
229
+ item = cov.iloc[-1, :].name[0]
230
+ cov = cov.loc[(item, slice(None)), :]
231
+ elif method == "ledoit":
232
+ lw = skcov.LedoitWolf()
233
+ lw.fit(X)
234
+ cov = lw.covariance_
235
+ elif method == "oas":
236
+ oas = skcov.OAS()
237
+ oas.fit(X)
238
+ cov = oas.covariance_
239
+ elif method == "shrunk":
240
+ sc = skcov.ShrunkCovariance(shrinkage=alpha)
241
+ sc.fit(X)
242
+ cov = sc.covariance_
243
+ elif method == "gl":
244
+ gl = skcov.GraphicalLassoCV()
245
+ gl.fit(X)
246
+ cov = gl.covariance_
247
+ elif method == "jlogo":
248
+ S = np.cov(X, rowvar=False)
249
+ R = np.corrcoef(X, rowvar=False)
250
+ D = np.sqrt(np.clip((1 - R) / 2, a_min=0.0, a_max=1.0))
251
+ (_, _, separators, cliques, _) = db.PMFG_T2s(1 - D**2, nargout=4)
252
+ cov = db.j_LoGo(S, separators, cliques)
253
+ cov = np.linalg.inv(cov)
254
+ elif method in ["fixed", "spectral", "shrink"]:
255
+ cov = np.cov(X, rowvar=False)
256
+ T, N = X.shape
257
+ q = T / N
258
+ cov = af.denoiseCov(
259
+ cov,
260
+ q,
261
+ kind=method,
262
+ bWidth=bWidth,
263
+ detone=detone,
264
+ mkt_comp=int(mkt_comp),
265
+ alpha=alpha,
266
+ )
267
+ elif method == "gerber1":
268
+ cov = gs.gerber_cov_stat1(X, threshold=threshold)
269
+ elif method == "gerber2":
270
+ cov = gs.gerber_cov_stat2(X, threshold=threshold)
271
+
272
+ cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets)
273
+
274
+ return cov
275
+
276
+
277
+ def cokurt_matrix(
278
+ X,
279
+ method="hist",
280
+ alpha=0.1,
281
+ bWidth=0.01,
282
+ detone=False,
283
+ mkt_comp=1,
284
+ ):
285
+ r"""
286
+ Calculate the cokurtosis square matrix using the selected method.
287
+
288
+ Parameters
289
+ ----------
290
+ X : DataFrame of shape (n_samples, n_assets)
291
+ Assets returns DataFrame, where n_samples is the number of
292
+ observations and n_assets is the number of assets.
293
+ method : str, optional
294
+ The method used to estimate the cokurtosis square matrix:
295
+ The default is 'hist'. Possible values are:
296
+
297
+ - 'hist': use historical estimates.
298
+ - 'semi': use semi lower cokurtosis square matrix.
299
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
300
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
301
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
302
+ bWidth : float
303
+ The bandwidth of the kernel for 'fixed', 'spectral' and 'shrink' methods.
304
+ detone : bool, optional
305
+ If remove the first mkt_comp of correlation matrix for 'fixed', 'spectral'
306
+ and 'shrink' methods. The detone correlation matrix is singular, so it
307
+ cannot be inverted.
308
+ mkt_comp : int, optional
309
+ Number of first components that will be removed using the detone method.
310
+
311
+ Returns
312
+ -------
313
+ kurt : nd-array
314
+ The estimation of cokurtosis square matrix.
315
+
316
+ Raises
317
+ ------
318
+ ValueError
319
+ When the value cannot be calculated.
320
+
321
+ """
322
+
323
+ if not isinstance(X, pd.DataFrame):
324
+ raise ValueError("X must be a DataFrame")
325
+
326
+ assets = X.columns.tolist()
327
+ cols = list(product(assets, assets))
328
+ cols = [str(y) + " - " + str(x) for x, y in cols]
329
+
330
+ if method == "hist":
331
+ kurt = cf.cokurtosis_matrix(X)
332
+ if method == "semi":
333
+ kurt = cf.semi_cokurtosis_matrix(X)
334
+ elif method in ["fixed", "spectral", "shrink"]:
335
+ kurt = cf.cokurtosis_matrix(X)
336
+ T, N = X.shape
337
+ q = T / N
338
+ kurt = af.denoiseCov(
339
+ kurt,
340
+ q,
341
+ kind=method,
342
+ bWidth=bWidth,
343
+ detone=detone,
344
+ mkt_comp=mkt_comp,
345
+ alpha=alpha,
346
+ )
347
+
348
+ kurt = pd.DataFrame(np.array(kurt, ndmin=2), columns=cols, index=cols)
349
+
350
+ return kurt
351
+
352
+
353
+ def forward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
354
+ r"""
355
+ Select the variables that estimate the best model using stepwise
356
+ forward regression. In case none of the variables has a p-value lower
357
+ than threshold, the algorithm will select the variable with lowest p-value.
358
+
359
+ Parameters
360
+ ----------
361
+ X : DataFrame of shape (n_samples, n_factors)
362
+ Risk factors returns matrix, where n_samples is the number of samples
363
+ and n_factors is the number of risk factors.
364
+ y : Series of shape (n_samples, 1)
365
+ Asset returns column DataFrame or Series, where n_samples is the number
366
+ of samples.
367
+ criterion : str, optional
368
+ The default is 'pvalue'. Possible values of the criterion used to select
369
+ the best features are:
370
+
371
+ - 'pvalue': select the features based on p-values.
372
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
373
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
374
+ - 'R2': select the features based on highest R Squared.
375
+ - 'R2_A': select the features based on highest Adjusted R Squared.
376
+
377
+ threshold : scalar, optional
378
+ Is the maximum p-value for each variable that will be
379
+ accepted in the model. The default is 0.05.
380
+ verbose : bool, optional
381
+ Enable verbose output. The default is False.
382
+
383
+ Returns
384
+ -------
385
+ value : list
386
+ A list of the variables that produce the best model.
387
+
388
+ Raises
389
+ ------
390
+ ValueError
391
+ When the value cannot be calculated.
392
+
393
+ """
394
+ if not isinstance(X, pd.DataFrame):
395
+ raise ValueError("X must be a DataFrame")
396
+
397
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
398
+ raise ValueError("y must be a column DataFrame")
399
+
400
+ if isinstance(y, pd.DataFrame):
401
+ if y.shape[0] > 1 and y.shape[1] > 1:
402
+ raise ValueError("y must be a column DataFrame")
403
+
404
+ included = []
405
+ aic = 1e10
406
+ sic = 1e10
407
+ r2 = -1e10
408
+ r2_a = -1e10
409
+ pvalues = None
410
+
411
+ if criterion == "pvalue":
412
+ value = 0
413
+ while value <= threshold:
414
+ excluded = list(set(X.columns) - set(included))
415
+ best_pvalue = 999999
416
+ new_feature = None
417
+ for i in excluded:
418
+ factors = included + [i]
419
+ X1 = X[factors]
420
+ X1 = sm.add_constant(X1)
421
+ results = sm.OLS(y, X1).fit()
422
+ new_pvalues = results.pvalues
423
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
424
+ cond_1 = new_pvalues.max()
425
+ if best_pvalue > new_pvalues[i] and cond_1 <= threshold:
426
+ best_pvalue = results.pvalues[i]
427
+ new_feature = i
428
+ pvalues = new_pvalues.copy()
429
+
430
+ if pvalues is not None:
431
+ value = pvalues[pvalues.index != "const"].max()
432
+
433
+ if new_feature is None:
434
+ break
435
+ else:
436
+ included.append(new_feature)
437
+
438
+ if verbose:
439
+ print("Add {} with p-value {:.6}".format(new_feature, best_pvalue))
440
+
441
+ # This part is how to deal when there isn't an asset with pvalue lower than threshold
442
+ if len(included) == 0:
443
+ excluded = list(set(X.columns) - set(included))
444
+ best_pvalue = 999999
445
+ new_feature = None
446
+ for i in excluded:
447
+ factors = included + [i]
448
+ X1 = X[factors]
449
+ X1 = sm.add_constant(X1)
450
+ results = sm.OLS(y, X1).fit()
451
+ new_pvalues = results.pvalues
452
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
453
+ if best_pvalue > new_pvalues[i]:
454
+ best_pvalue = results.pvalues[i]
455
+ new_feature = i
456
+ pvalues = new_pvalues.copy()
457
+
458
+ value = pvalues[pvalues.index != "const"].max()
459
+
460
+ included.append(new_feature)
461
+
462
+ if verbose:
463
+ print(
464
+ "Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
465
+ )
466
+
467
+ else:
468
+ excluded = X.columns.tolist()
469
+ flag = False
470
+ n = len(excluded)
471
+
472
+ for j in range(n):
473
+ value = {}
474
+ n_ini = len(excluded)
475
+ for i in excluded:
476
+ factors = included.copy()
477
+ factors.append(i)
478
+ X1 = X[factors]
479
+ X1 = sm.add_constant(X1)
480
+ results = sm.OLS(y, X1).fit()
481
+
482
+ if criterion == "AIC":
483
+ value[i] = results.aic
484
+ elif criterion == "SIC":
485
+ value[i] = results.bic
486
+ elif criterion == "R2":
487
+ value[i] = results.rsquared
488
+ elif criterion == "R2_A":
489
+ value[i] = results.rsquared_adj
490
+
491
+ value = pd.Series(value)
492
+
493
+ if criterion in ["AIC", "SIC"]:
494
+ key = value.idxmin()
495
+ value = value.min()
496
+ if criterion in ["R2", "R2_A"]:
497
+ key = value.idxmax()
498
+ value = value.max()
499
+
500
+ if criterion == "AIC":
501
+ if value < aic:
502
+ excluded.remove(key)
503
+ included.append(key)
504
+ aic = value
505
+ flag = True
506
+ elif criterion == "SIC":
507
+ if value < sic:
508
+ excluded.remove(key)
509
+ included.append(key)
510
+ sic = value
511
+ flag = True
512
+ elif criterion == "R2":
513
+ if value > r2:
514
+ excluded.remove(key)
515
+ included.append(key)
516
+ r2 = value
517
+ flag = True
518
+ elif criterion == "R2_A":
519
+ if value > r2_a:
520
+ excluded.remove(key)
521
+ included.append(key)
522
+ r2_a = value
523
+ flag = True
524
+
525
+ if n_ini == len(excluded):
526
+ break
527
+
528
+ if flag and verbose:
529
+ print("Add {} with {} {:.6}".format(key, criterion, value))
530
+
531
+ flag = False
532
+
533
+ return included
534
+
535
+
536
+ def backward_regression(X, y, criterion="pvalue", threshold=0.05, verbose=False):
537
+ r"""
538
+ Select the variables that estimate the best model using stepwise
539
+ backward regression. In case none of the variables has a p-value lower
540
+ than threshold, the algorithm will select the variable with lowest p-value.
541
+
542
+ Parameters
543
+ ----------
544
+ X : DataFrame of shape (n_samples, n_factors)
545
+ Risk factors returns matrix, where n_samples is the number of samples
546
+ and n_factors is the number of risk factors.
547
+ y : Series of shape (n_samples, 1)
548
+ Asset returns column DataFrame or Series, where n_samples is the number
549
+ of samples.
550
+ criterion : str, optional
551
+ The default is 'pvalue'. Possible values of the criterion used to select
552
+ the best features are:
553
+
554
+ - 'pvalue': select the features based on p-values.
555
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
556
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
557
+ - 'R2': select the features based on highest R Squared.
558
+ - 'R2_A': select the features based on highest Adjusted R Squared.
559
+ threshold : scalar, optional
560
+ Is the maximum p-value for each variable that will be
561
+ accepted in the model. The default is 0.05.
562
+ verbose : bool, optional
563
+ Enable verbose output. The default is False.
564
+
565
+ Returns
566
+ -------
567
+ value : list
568
+ A list of the variables that produce the best model.
569
+
570
+ Raises
571
+ ------
572
+ ValueError
573
+ When the value cannot be calculated.
574
+
575
+ """
576
+
577
+ if not isinstance(X, pd.DataFrame):
578
+ raise ValueError("X must be a DataFrame")
579
+
580
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
581
+ raise ValueError("y must be a column DataFrame")
582
+
583
+ if isinstance(y, pd.DataFrame):
584
+ if y.shape[0] > 1 and y.shape[1] > 1:
585
+ raise ValueError("y must be a column DataFrame")
586
+
587
+ X1 = sm.add_constant(X)
588
+ results = sm.OLS(y, X1).fit()
589
+ pvalues = results.pvalues
590
+ aic = results.aic
591
+ sic = results.bic
592
+ r2 = results.rsquared
593
+ r2_a = results.rsquared_adj
594
+
595
+ included = pvalues.index.tolist()
596
+
597
+ if criterion == "pvalue":
598
+ excluded = ["const"]
599
+ while pvalues[pvalues.index != "const"].max() > threshold:
600
+ factors = pvalues[~pvalues.index.isin(excluded)].index.tolist()
601
+ X1 = X[factors]
602
+ X1 = sm.add_constant(X1)
603
+ results = sm.OLS(y, X1).fit()
604
+ pvalues = results.pvalues
605
+ pvalues = pvalues[pvalues.index != "const"]
606
+ if pvalues.shape[0] == 0:
607
+ break
608
+ excluded = ["const", pvalues.idxmax()]
609
+ if verbose and pvalues.max() > threshold:
610
+ print(
611
+ "Drop {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
612
+ )
613
+
614
+ included = pvalues[pvalues.index != "const"].index.tolist()
615
+
616
+ # This part is how to deal when there isn't an asset with pvalue lower than threshold
617
+ if len(included) == 0:
618
+ excluded = list(set(X.columns) - set(included))
619
+ best_pvalue = 999999
620
+ new_feature = None
621
+ for i in excluded:
622
+ factors = included + [i]
623
+ X1 = X[factors]
624
+ X1 = sm.add_constant(X1)
625
+ results = sm.OLS(y, X1).fit()
626
+ new_pvalues = results.pvalues
627
+ new_pvalues = results.pvalues
628
+ new_pvalues = new_pvalues[new_pvalues.index != "const"]
629
+ if best_pvalue > new_pvalues[i]:
630
+ best_pvalue = results.pvalues[i]
631
+ new_feature = i
632
+ pvalues = new_pvalues.copy()
633
+
634
+ value = pvalues[pvalues.index != "const"].max()
635
+
636
+ included.append(new_feature)
637
+
638
+ if verbose:
639
+ print(
640
+ "Add {} with p-value {:.6}".format(pvalues.idxmax(), pvalues.max())
641
+ )
642
+
643
+ else:
644
+ included.remove("const")
645
+ flag = False
646
+ n = len(included)
647
+
648
+ for j in range(n):
649
+ value = {}
650
+ n_ini = len(included)
651
+ for i in included:
652
+ factors = included.copy()
653
+ factors.remove(i)
654
+ X1 = X[factors]
655
+ X1 = sm.add_constant(X1)
656
+ results = sm.OLS(y, X1).fit()
657
+
658
+ if criterion == "AIC":
659
+ value[i] = results.aic
660
+ elif criterion == "SIC":
661
+ value[i] = results.bic
662
+ elif criterion == "R2":
663
+ value[i] = results.rsquared
664
+ elif criterion == "R2_A":
665
+ value[i] = results.rsquared_adj
666
+
667
+ value = pd.Series(value)
668
+
669
+ if criterion in ["AIC", "SIC"]:
670
+ key = value.idxmin()
671
+ value = value.min()
672
+ if criterion in ["R2", "R2_A"]:
673
+ key = value.idxmax()
674
+ value = value.max()
675
+
676
+ if criterion == "AIC":
677
+ if value < aic:
678
+ included.remove(key)
679
+ aic = value
680
+ flag = True
681
+ elif criterion == "SIC":
682
+ if value < sic:
683
+ included.remove(key)
684
+ sic = value
685
+ flag = True
686
+ elif criterion == "R2":
687
+ if value > r2:
688
+ included.remove(key)
689
+ r2 = value
690
+ flag = True
691
+ elif criterion == "R2_A":
692
+ if value > r2_a:
693
+ included.remove(key)
694
+ r2_a = value
695
+ flag = True
696
+
697
+ if n_ini == len(included):
698
+ break
699
+
700
+ if flag and verbose:
701
+ print("Drop {} with {} {:.6}".format(key, criterion, value))
702
+
703
+ flag = False
704
+
705
+ return included
706
+
707
+
708
+ def PCR(X, y, n_components=0.95):
709
+ r"""
710
+ Estimate the coefficients using Principal Components Regression (PCR).
711
+
712
+ Parameters
713
+ ----------
714
+ X : DataFrame of shape (n_samples, n_factors)
715
+ Risk factors returns matrix, where n_samples is the number of samples
716
+ and n_factors is the number of risk factors.
717
+ y : DataFrame or Series of shape (n_samples, 1)
718
+ Asset returns column DataFrame or Series, where n_samples is the number
719
+ of samples.
720
+ n_components : int, float, None or str, optional
721
+ if 1 < n_components (int), it represents the number of components that
722
+ will be keep. if 0 < n_components < 1 (float), it represents the
723
+ percentage of variance that the is explained by the components kept.
724
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
725
+ for more details. The default is 0.95.
726
+
727
+ Returns
728
+ -------
729
+ value : nd-array
730
+ An array with the coefficients of the model calculated using PCR.
731
+
732
+ Raises
733
+ ------
734
+ ValueError
735
+ When the value cannot be calculated.
736
+
737
+ """
738
+
739
+ if not isinstance(X, pd.DataFrame):
740
+ raise ValueError("X must be a DataFrame")
741
+
742
+ if not isinstance(y, pd.DataFrame) and not isinstance(y, pd.Series):
743
+ raise ValueError("y must be a column DataFrame")
744
+
745
+ if isinstance(y, pd.DataFrame):
746
+ if y.shape[0] > 1 and y.shape[1] > 1:
747
+ raise ValueError("y must be a column DataFrame")
748
+
749
+ scaler = StandardScaler()
750
+ scaler.fit(X)
751
+ X_std = scaler.transform(X)
752
+
753
+ if n_components > 0 and n_components < 1:
754
+ pca = PCA(n_components=n_components)
755
+ elif n_components >= 1:
756
+ pca = PCA(n_components=int(n_components))
757
+
758
+ pca.fit(X_std)
759
+ Z_p = pca.transform(X_std)
760
+ V_p = pca.components_.T
761
+
762
+ results = sm.OLS(y, sm.add_constant(Z_p)).fit()
763
+ beta_pc = results.params[1:]
764
+ beta_pc = np.array(beta_pc, ndmin=2)
765
+
766
+ std = np.array(np.std(X, axis=0, ddof=1), ndmin=2)
767
+ mean = np.array(np.mean(X, axis=0), ndmin=2)
768
+ beta = V_p @ beta_pc.T / std.T
769
+
770
+ beta_0 = np.array(y.mean(), ndmin=2) - np.sum(beta * mean.T)
771
+
772
+ beta = np.insert(beta, 0, beta_0)
773
+ beta = np.array(beta, ndmin=2)
774
+
775
+ return beta
776
+
777
+
778
+ def loadings_matrix(
779
+ X,
780
+ Y,
781
+ feature_selection="stepwise",
782
+ stepwise="Forward",
783
+ criterion="pvalue",
784
+ threshold=0.05,
785
+ n_components=0.95,
786
+ verbose=False,
787
+ ):
788
+ r"""
789
+ Estimate the loadings matrix using stepwise regression.
790
+
791
+ Parameters
792
+ ----------
793
+ X : DataFrame of shape (n_samples, n_factors)
794
+ Risk factors returns matrix, where n_samples is the number of samples
795
+ and n_factors is the number of risk factors.
796
+ Y : DataFrame of shape (n_samples, n_assets)
797
+ Assets returns DataFrame, where n_samples is the number of
798
+ observations and n_assets is the number of assets.
799
+ feature_selection: str, 'stepwise' or 'PCR', optional
800
+ Indicate the method used to estimate the loadings matrix.
801
+ The default is 'stepwise'. Possible values are:
802
+
803
+ - 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
804
+ - 'PCR': use principal components regression to estimate coefficients.
805
+ stepwise: str 'Forward' or 'Backward', optional
806
+ Indicate the method used for stepwise regression.
807
+ The default is 'Forward'.
808
+ criterion : str, optional
809
+ The default is 'pvalue'. Possible values of the criterion used to select
810
+ the best features are:
811
+
812
+ - 'pvalue': select the features based on p-values.
813
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
814
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
815
+ - 'R2': select the features based on highest R Squared.
816
+ - 'R2_A': select the features based on highest Adjusted R Squared.
817
+ threshold : scalar, optional
818
+ Is the maximum p-value for each variable that will be
819
+ accepted in the model. The default is 0.05.
820
+ n_components : int, float, None or str, optional
821
+ if 1 < n_components (int), it represents the number of components that
822
+ will be keep. if 0 < n_components < 1 (float), it represents the
823
+ percentage of variance that the is explained by the components kept.
824
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
825
+ for more details. The default is 0.95.
826
+ verbose : bool, optional
827
+ Enable verbose output. The default is False.
828
+
829
+ Returns
830
+ -------
831
+ loadings : DataFrame
832
+ Loadings matrix.
833
+
834
+ Raises
835
+ ------
836
+ ValueError
837
+ When the value cannot be calculated.
838
+
839
+ """
840
+ if not isinstance(X, pd.DataFrame):
841
+ raise ValueError("X must be a DataFrame")
842
+
843
+ if not isinstance(Y, pd.DataFrame):
844
+ raise ValueError("Y must be a DataFrame")
845
+
846
+ rows = Y.columns.tolist()
847
+ cols = X.columns.tolist()
848
+ cols.insert(0, "const")
849
+ loadings = np.zeros((len(rows), len(cols)))
850
+ loadings = pd.DataFrame(loadings, index=rows, columns=cols)
851
+
852
+ for i in rows:
853
+ if feature_selection == "stepwise":
854
+ if stepwise == "Forward":
855
+ included = forward_regression(
856
+ X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
857
+ )
858
+ elif stepwise == "Backward":
859
+ included = backward_regression(
860
+ X, Y[i], criterion=criterion, threshold=threshold, verbose=verbose
861
+ )
862
+ else:
863
+ raise ValueError("Choose and adequate stepwise method")
864
+ results = sm.OLS(Y[i], sm.add_constant(X[included])).fit()
865
+ params = results.params
866
+ loadings.loc[i, params.index.tolist()] = params.T
867
+ elif feature_selection == "PCR":
868
+ beta = PCR(X, Y[i], n_components=n_components)
869
+ beta = pd.Series(np.ravel(beta), index=cols)
870
+ loadings.loc[i, cols] = beta.T
871
+
872
+ return loadings
873
+
874
+
875
+ def risk_factors(
876
+ X,
877
+ Y,
878
+ B=None,
879
+ const=True,
880
+ method_mu="hist",
881
+ method_cov="hist",
882
+ method_kurt="hist",
883
+ feature_selection="stepwise",
884
+ stepwise="Forward",
885
+ criterion="pvalue",
886
+ threshold=0.05,
887
+ n_components=0.95,
888
+ higher_comoments=False,
889
+ dict_mu={},
890
+ dict_cov={},
891
+ dict_kurt={},
892
+ ):
893
+ r"""
894
+ Estimate the expected returns vector, covariance matrix, coskewness tensor and
895
+ cokurtosis square matrix based on risk factors models :cite:`b-Ross`
896
+ :cite:`b-Fan` :cite:`b-Boudt2015`.
897
+
898
+ .. math::
899
+ \begin{aligned}
900
+ R & = \alpha + B F + \epsilon \\
901
+ \mu_{f} & = \alpha + B \mu_{F} \\
902
+ \Sigma_{f} & = B \Sigma_{F} B^{\prime} + \Sigma_{\epsilon} \\
903
+ \Phi_{f} & = B \Phi_{F} \left ( B^{\prime} \otimes B^{\prime} \right )+ \Phi_{\epsilon} \\
904
+ \Psi_{f} & = \left ( B \otimes B \right ) \Psi_{F} \left ( B^{\prime} \otimes B^{\prime} \right ) + \Psi_{\epsilon} \\
905
+ \end{aligned}
906
+
907
+
908
+ where:
909
+
910
+ :math:`R` is the series returns.
911
+
912
+ :math:`\alpha` is the intercept.
913
+
914
+ :math:`B` is the loadings matrix.
915
+
916
+ :math:`\mu_{F}` is the expected returns vector of the risk factors.
917
+
918
+ :math:`\Sigma_{F}` is the covariance matrix of the risk factors.
919
+
920
+ :math:`\Phi_{F}` is the coskewness tensor of the risk factors.
921
+
922
+ :math:`\Psi_{F}` is the cokurtosis square matrix of the risk factors.
923
+
924
+ :math:`\Sigma_{\epsilon}` is the covariance matrix of error terms.
925
+
926
+ :math:`\Phi_{\epsilon}` is the coskewness tensor of error terms.
927
+
928
+ :math:`\Psi_{\epsilon}` is the cokurtosis square matrix of error terms.
929
+
930
+ :math:`\mu_{f}` is the expected returns vector obtained with the
931
+ risk factor model.
932
+
933
+ :math:`\Sigma_{f}` is the covariance matrix obtained with the risk
934
+ factor model.
935
+
936
+ :math:`\Phi_{f}` is the coskewness tensor obtained with the risk
937
+ factor model.
938
+
939
+ :math:`\Psi_{f}` is the cokurtosis square matrix obtained with the risk
940
+ factor model.
941
+
942
+ Parameters
943
+ ----------
944
+ X : DataFrame of shape (n_samples, n_factors)
945
+ Risk factors returns matrix, where n_samples is the number of samples
946
+ and n_factors is the number of risk factors.
947
+ Y : DataFrame of shape (n_samples, n_assets)
948
+ Assets returns DataFrame, where n_samples is the number of
949
+ observations and n_assets is the number of assets.
950
+ B : DataFrame of shape (n_assets, n_factors), optional
951
+ Loadings matrix, where n_assets is the number assets and n_factors is
952
+ the number of risk factors. If is not specified, is estimated using
953
+ stepwise regression. The default is None.
954
+ const : bool, optional
955
+ Indicate if the loadings matrix has a constant.
956
+ The default is False.
957
+ method_mu : str, optional
958
+ The method used to estimate the expected returns of factors.
959
+ The default value is 'hist'. Possible values are:
960
+
961
+ - 'hist': use historical estimates.
962
+ - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
963
+ - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
964
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
965
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
966
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
967
+ method_cov : str, optional
968
+ The method used to estimate the covariance matrix of factors.
969
+ The default is 'hist'. Possible values are:
970
+
971
+ - 'hist': use historical estimates.
972
+ - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
973
+ - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`__ for more details.
974
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
975
+ - 'oas': use the Oracle Approximation Shrinkage method.
976
+ - 'shrunk': use the basic Shrunk Covariance method.
977
+ - 'gl': use the basic Graphical Lasso Covariance method.
978
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
979
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
980
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
981
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
982
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
983
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
984
+ method_kurt : str, optional
985
+ The method used to estimate the cokurtosis square matrix:
986
+ The default is 'hist'. Possible values are:
987
+
988
+ - 'hist': use historical estimates.
989
+ - 'semi': use semi lower cokurtosis square matrix.
990
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
991
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
992
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
993
+ feature_selection: str, 'stepwise' or 'PCR', optional
994
+ Indicate the method used to estimate the loadings matrix.
995
+ The default is 'stepwise'. Possible values are:
996
+
997
+ - 'stepwise': use stepwise regression to select the best factors and estimate coefficients.
998
+ - 'PCR': use principal components regression to estimate coefficients.
999
+ stepwise: str, 'Forward' or 'Backward'
1000
+ Indicate the method used for stepwise regression.
1001
+ The default is 'Forward'.
1002
+ criterion : str, optional
1003
+ The default is 'pvalue'. Possible values of the criterion used to select
1004
+ the best features are:
1005
+
1006
+ - 'pvalue': select the features based on p-values.
1007
+ - 'AIC': select the features based on lowest Akaike Information Criterion.
1008
+ - 'SIC': select the features based on lowest Schwarz Information Criterion.
1009
+ - 'R2': select the features based on highest R Squared.
1010
+ - 'R2_A': select the features based on highest Adjusted R Squared.
1011
+ threshold : scalar, optional
1012
+ Is the maximum p-value for each variable that will be
1013
+ accepted in the model. The default is 0.05.
1014
+ n_components : int, float, None or str, optional
1015
+ if 1 < n_components (int), it represents the number of components that
1016
+ will be keep. if 0 < n_components < 1 (float), it represents the
1017
+ percentage of variance that the is explained by the components kept.
1018
+ See `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_
1019
+ for more details. The default is 0.95.
1020
+ dict_mu : dict
1021
+ Other variables related to the expected returns.
1022
+ dict_cov : dict
1023
+ Other variables related to the covariance estimation.
1024
+ dict_kurt : dict
1025
+ Other variables related to the cokurtosis estimation.
1026
+
1027
+ Returns
1028
+ -------
1029
+ mu : DataFrame
1030
+ The mean vector of risk factors model.
1031
+ cov : DataFrame
1032
+ The covariance matrix of risk factors model.
1033
+ skew : DataFrame
1034
+ The coskewness tensor of risk factors model.
1035
+ kurt : DataFrame
1036
+ The cokurtosis square matrix of risk factors model.
1037
+ returns : DataFrame
1038
+ The returns based on a risk factor model.
1039
+ B : DataFrame
1040
+ Loadings matrix.
1041
+
1042
+ Raises
1043
+ ------
1044
+ ValueError
1045
+ When the value cannot be calculated.
1046
+
1047
+ """
1048
+ if not isinstance(X, pd.DataFrame) and not isinstance(Y, pd.DataFrame):
1049
+ raise ValueError("X and Y must be DataFrames")
1050
+
1051
+ if B is None:
1052
+ B = loadings_matrix(
1053
+ X,
1054
+ Y,
1055
+ feature_selection=feature_selection,
1056
+ stepwise=stepwise,
1057
+ criterion=criterion,
1058
+ threshold=threshold,
1059
+ n_components=n_components,
1060
+ verbose=False,
1061
+ )
1062
+ elif not isinstance(B, pd.DataFrame):
1063
+ raise ValueError("B must be a DataFrame")
1064
+
1065
+ assets = Y.columns.tolist()
1066
+ dates = X.index.tolist()
1067
+
1068
+ X1 = X.copy()
1069
+ if const == True or ("const" in B.columns.tolist()):
1070
+ mu_f = np.hstack(
1071
+ [
1072
+ np.ones((1, 1)),
1073
+ np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2),
1074
+ ]
1075
+ )
1076
+ X1 = sm.add_constant(X)
1077
+ else:
1078
+ mu_f = np.array(mean_vector(X1, method=method_mu, **dict_mu), ndmin=2)
1079
+ S_f = np.array(covar_matrix(X1, method=method_cov, **dict_cov), ndmin=2)
1080
+ B_ = np.array(B, ndmin=2)
1081
+
1082
+ returns = np.array(X1, ndmin=2) @ B_.T
1083
+ mu = B_ @ mu_f.T
1084
+
1085
+ e = np.array(Y, ndmin=2) - returns
1086
+ S_e = np.diag(np.var(np.array(e), ddof=1, axis=0))
1087
+ S = B_ @ S_f @ B_.T + S_e
1088
+
1089
+ if higher_comoments:
1090
+ cols = list(product(assets, assets))
1091
+ cols = [str(y) + " - " + str(x) for x, y in cols]
1092
+
1093
+ skew_f = cf.coskewness_matrix(X1).to_numpy()
1094
+ skew_e = cf.residuals_coskewness_fm(e)
1095
+ skew = B_ @ skew_f @ np.kron(B_, B_).T + skew_e
1096
+ skew = pd.DataFrame(skew, index=assets, columns=cols)
1097
+
1098
+ kurt_f = cokurt_matrix(X1, method=method_kurt, **dict_kurt).to_numpy()
1099
+ kurt_e = cf.residuals_cokurtosis_fm(B_, S_f, e)
1100
+ kurt = np.kron(B_, B_) @ kurt_f @ np.kron(B_, B_).T + kurt_e
1101
+ kurt = pd.DataFrame(kurt, index=cols, columns=cols)
1102
+ else:
1103
+ skew, kurt = None, None
1104
+
1105
+ mu = pd.DataFrame(mu.T, columns=assets)
1106
+ cov = pd.DataFrame(S, index=assets, columns=assets)
1107
+ returns = pd.DataFrame(returns, index=dates, columns=assets)
1108
+
1109
+ return mu, cov, returns, B, skew, kurt
1110
+
1111
+
1112
+ def black_litterman(
1113
+ X,
1114
+ w,
1115
+ P,
1116
+ Q,
1117
+ delta=1,
1118
+ rf=0,
1119
+ eq=True,
1120
+ method_mu="hist",
1121
+ method_cov="hist",
1122
+ dict_mu={},
1123
+ dict_cov={},
1124
+ ):
1125
+ r"""
1126
+ Estimate the expected returns vector and covariance matrix based
1127
+ on the Black Litterman model :cite:`b-BlackLitterman` :cite:`b-Black1`.
1128
+
1129
+ .. math::
1130
+ \begin{aligned}
1131
+ \Pi & = \delta \Sigma w \\
1132
+ \Pi_{BL} & = \left [ (\tau\Sigma)^{-1}+ P^{\prime} \Omega^{-1}P \right]^{-1}
1133
+ \left[(\tau\Sigma)^{-1} \Pi + P^{\prime} \Omega^{-1} Q \right] \\
1134
+ M & = \left((\tau\Sigma)^{-1} + P^{\prime}\Omega^{-1} P \right)^{-1} \\
1135
+ \mu_{BL} & = \Pi_{BL} + r_{f} \\
1136
+ \Sigma_{BL} & = \Sigma + M \\
1137
+ \end{aligned}
1138
+
1139
+
1140
+ where:
1141
+
1142
+ :math:`r_{f}` is the risk free rate.
1143
+
1144
+ :math:`\delta` is the risk aversion factor.
1145
+
1146
+ :math:`\Pi` is the equilibrium excess returns.
1147
+
1148
+ :math:`\Sigma` is the covariance matrix.
1149
+
1150
+ :math:`P` is the views matrix.
1151
+
1152
+ :math:`Q` is the views returns matrix.
1153
+
1154
+ :math:`\Omega` is the covariance matrix of the error views.
1155
+
1156
+ :math:`\mu_{BL}` is the mean vector obtained with the black
1157
+ litterman model.
1158
+
1159
+ :math:`\Sigma_{BL}` is the covariance matrix obtained with the black
1160
+ litterman model.
1161
+
1162
+ Parameters
1163
+ ----------
1164
+ X : DataFrame of shape (n_samples, n_assets)
1165
+ Assets returns DataFrame, where n_samples is the number of
1166
+ observations and n_assets is the number of assets.
1167
+ w : DataFrame or Series of shape (n_assets, 1)
1168
+ Portfolio weights, where n_assets is the number of assets.
1169
+ P : DataFrame of shape (n_views, n_assets)
1170
+ Analyst's views matrix, can be relative or absolute.
1171
+ Q : DataFrame of shape (n_views, 1)
1172
+ Expected returns of analyst's views.
1173
+ delta : float, optional
1174
+ Risk aversion factor. The default value is 1.
1175
+ rf : scalar, optional
1176
+ Risk free rate. The default is 0.
1177
+ eq : bool, optional
1178
+ Indicate if use equilibrium or historical excess returns.
1179
+ The default is True.
1180
+ method_mu : str, optional
1181
+ The method used to estimate the expected returns.
1182
+ The default value is 'hist'.
1183
+
1184
+ - 'hist': use historical estimates.
1185
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1186
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1187
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1188
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1189
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1190
+ method_cov : str, optional
1191
+ The method used to estimate the covariance matrix.
1192
+ The default is 'hist'. Possible values are:
1193
+
1194
+ - 'hist': use historical estimates.
1195
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1196
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1197
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1198
+ - 'oas': use the Oracle Approximation Shrinkage method.
1199
+ - 'shrunk': use the basic Shrunk Covariance method.
1200
+ - 'gl': use the basic Graphical Lasso Covariance method.
1201
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1202
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1203
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1204
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1205
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1206
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1207
+ dict_mu : dict
1208
+ Other variables related to the mean vector estimation method.
1209
+ dict_cov : dict
1210
+ Other variables related to the covariance estimation method.
1211
+
1212
+ Returns
1213
+ -------
1214
+ mu : DataFrame
1215
+ The mean vector of Black Litterman model.
1216
+ cov : DataFrame
1217
+ The covariance matrix of Black Litterman model.
1218
+ w : DataFrame
1219
+ The equilibrium weights of Black Litterman model, without constraints.
1220
+
1221
+ Raises
1222
+ ------
1223
+ ValueError
1224
+ When the value cannot be calculated.
1225
+
1226
+ """
1227
+ if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
1228
+ raise ValueError("X and w must be DataFrames")
1229
+
1230
+ if w.shape[0] > 1 and w.shape[1] > 1:
1231
+ raise ValueError("w must be a column DataFrame")
1232
+
1233
+ assets = X.columns.tolist()
1234
+
1235
+ w = np.array(w, ndmin=2)
1236
+ if w.shape[0] == 1:
1237
+ w = w.T
1238
+
1239
+ mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
1240
+ S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
1241
+ P = np.array(P, ndmin=2)
1242
+ Q = np.array(Q, ndmin=2)
1243
+ tau = 1 / X.shape[0]
1244
+ Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
1245
+
1246
+ if eq == True:
1247
+ PI = delta * (S @ w)
1248
+ elif eq == False:
1249
+ PI = mu.T - rf
1250
+
1251
+ PI_ = inv(inv(tau * S) + P.T @ inv(Omega) @ P) @ (
1252
+ inv(tau * S) @ PI + P.T @ inv(Omega) @ Q
1253
+ )
1254
+ M = inv(inv(tau * S) + P.T @ inv(Omega) @ P)
1255
+ # PI_1 = PI + (tau * S* P.T) * inv(P * tau * S * P.T + Omega) * (Q - P * PI)
1256
+ # M = tau * S - (tau * S * P.T) * inv(P * tau * S * P.T + Omega) * P * tau * S
1257
+
1258
+ mu = PI_ + rf
1259
+ mu = mu.T
1260
+ cov = S + M
1261
+ w = inv(delta * cov) @ PI_
1262
+
1263
+ mu = pd.DataFrame(mu, columns=assets)
1264
+ cov = pd.DataFrame(cov, index=assets, columns=assets)
1265
+ w = pd.DataFrame(w, index=assets)
1266
+
1267
+ return mu, cov, w
1268
+
1269
+
1270
+ def augmented_black_litterman(
1271
+ X,
1272
+ w,
1273
+ F,
1274
+ B,
1275
+ P=None,
1276
+ Q=None,
1277
+ P_f=None,
1278
+ Q_f=None,
1279
+ delta=1,
1280
+ rf=0,
1281
+ eq=True,
1282
+ const=True,
1283
+ method_mu="hist",
1284
+ method_cov="hist",
1285
+ dict_mu={},
1286
+ dict_cov={},
1287
+ ):
1288
+ r"""
1289
+ Estimate the expected returns vector and covariance matrix based
1290
+ on the Augmented Black Litterman model :cite:`b-WCheung`.
1291
+
1292
+ .. math::
1293
+ \begin{aligned}
1294
+ \Pi^{a} & = \delta \left [ \begin{array}{c} \Sigma \\ \Sigma_{F} B^{\prime} \\ \end{array} \right ] w \\
1295
+ P^{a} & = \left [ \begin{array}{cc} P & 0 \\ 0 & P_{F} \\ \end{array} \right ] \\
1296
+ Q^{a} & = \left [ \begin{array}{c} Q \\ Q_{F} \\ \end{array} \right ] \\
1297
+ \Sigma^{a} & = \left [ \begin{array}{cc} \Sigma & B \Sigma_{F}\\ \Sigma_{F} B^{\prime} & \Sigma_{F} \\ \end{array} \right ] \\
1298
+ \Omega^{a} & = \left [ \begin{array}{cc} \Omega & 0 \\ 0 & \Omega_{F} \\ \end{array} \right ] \\
1299
+ \Pi^{a}_{BL} & = \left [ (\tau \Sigma^{a})^{-1} + (P^{a})^{\prime} (\Omega^{a})^{-1} P^{a} \right ]^{-1}
1300
+ \left [ (\tau\Sigma^{a})^{-1} \Pi^{a} + (P^{a})^{\prime} (\Omega^{a})^{-1} Q^{a} \right ] \\
1301
+ M^{a} & = \left ( (\tau\Sigma^{a})^{-1} + (P^{a})^{\prime} (\Omega^{a})^{-1} P^{a} \right )^{-1} \\
1302
+ \mu^{a}_{BL} & = \Pi^{a}_{BL} + r_{f} \\
1303
+ \Sigma^{a}_{BL} & = \Sigma^{a} + M^{a} \\
1304
+ \end{aligned}
1305
+
1306
+
1307
+ where:
1308
+
1309
+ :math:`r_{f}` is the risk free rate.
1310
+
1311
+ :math:`\delta` is the risk aversion factor.
1312
+
1313
+ :math:`B` is the loadings matrix.
1314
+
1315
+ :math:`\Sigma` is the covariance matrix of assets.
1316
+
1317
+ :math:`\Sigma_{F}` is the covariance matrix of factors.
1318
+
1319
+ :math:`\Sigma^{a}` is the augmented covariance matrix.
1320
+
1321
+ :math:`P` is the assets views matrix.
1322
+
1323
+ :math:`Q` is the assets views returns matrix.
1324
+
1325
+ :math:`P_{F}` is the factors views matrix.
1326
+
1327
+ :math:`Q_{F}` is the factors views returns matrix.
1328
+
1329
+ :math:`P^{a}` is the augmented views matrix.
1330
+
1331
+ :math:`Q^{a}` is the augmented views returns matrix.
1332
+
1333
+ :math:`\Pi^{a}` is the augmented equilibrium excess returns.
1334
+
1335
+ :math:`\Omega` is the covariance matrix of errors of assets views.
1336
+
1337
+ :math:`\Omega_{F}` is the covariance matrix of errors of factors views.
1338
+
1339
+ :math:`\Omega^{a}` is the covariance matrix of errors of augmented views.
1340
+
1341
+ :math:`\mu^{a}_{BL}` is the mean vector obtained with the Augmented Black
1342
+ Litterman model.
1343
+
1344
+ :math:`\Sigma^{a}_{BL}` is the covariance matrix obtained with the Augmented
1345
+ Black Litterman model.
1346
+
1347
+ Parameters
1348
+ ----------
1349
+ X : DataFrame of shape (n_samples, n_assets)
1350
+ Assets returns DataFrame, where n_samples is the number of
1351
+ observations and n_assets is the number of assets.
1352
+ w : DataFrame or Series of shape (n_assets, 1)
1353
+ Portfolio weights, where n_assets is the number of assets.
1354
+ F : DataFrame of shape (n_samples, n_factors)
1355
+ Risk factors returns DataFrame, where n_samples is the number of samples
1356
+ and n_factors is the number of risk factors.
1357
+ B : DataFrame of shape (n_assets, n_factors), optional
1358
+ Loadings matrix, where n_assets is the number assets and n_factors is
1359
+ the number of risk factors.
1360
+ P : DataFrame of shape (n_views, n_assets)
1361
+ Analyst's views matrix, can be relative or absolute.
1362
+ Q : DataFrame of shape (n_views, 1)
1363
+ Expected returns of analyst's views.
1364
+ P_f : DataFrame of shape (n_views, n_factors)
1365
+ Analyst's factors views matrix, can be relative or absolute.
1366
+ Q_f : DataFrame of shape (n_views, 1)
1367
+ Expected returns of analyst's factors views.
1368
+ delta : float, optional
1369
+ Risk aversion factor. The default value is 1.
1370
+ rf : scalar, optional
1371
+ Risk free rate. The default is 0.
1372
+ eq : bool, optional
1373
+ Indicate if use equilibrium or historical excess returns.
1374
+ The default is True.
1375
+ const : bool, optional
1376
+ Indicate if the loadings matrix has a constant.
1377
+ The default is True.
1378
+ method_mu : str, optional
1379
+ The method used to estimate the expected returns.
1380
+ The default value is 'hist'.
1381
+
1382
+ - 'hist': use historical estimates.
1383
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1384
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1385
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1386
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1387
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1388
+ method_cov : str, optional
1389
+ The method used to estimate the covariance matrix.
1390
+ The default is 'hist'. Possible values are:
1391
+
1392
+ - 'hist': use historical estimates.
1393
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1394
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1395
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1396
+ - 'oas': use the Oracle Approximation Shrinkage method.
1397
+ - 'shrunk': use the basic Shrunk Covariance method.
1398
+ - 'gl': use the basic Graphical Lasso Covariance method.
1399
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1400
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1401
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1402
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1403
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1404
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1405
+ dict_mu : dict
1406
+ Other variables related to the mean vector estimation method.
1407
+ dict_cov : dict
1408
+ Other variables related to the covariance estimation method.
1409
+
1410
+ Returns
1411
+ -------
1412
+ mu : DataFrame
1413
+ The mean vector of Augmented Black Litterman model.
1414
+ cov : DataFrame
1415
+ The covariance matrix of Augmented Black Litterman model.
1416
+ w : DataFrame
1417
+ The equilibrium weights of Augmented Black Litterman model, without constraints.
1418
+
1419
+ Raises
1420
+ ------
1421
+ ValueError
1422
+ When the value cannot be calculated.
1423
+
1424
+ """
1425
+ if not isinstance(X, pd.DataFrame) and not isinstance(w, pd.DataFrame):
1426
+ raise ValueError("X and w must be DataFrames")
1427
+
1428
+ if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
1429
+ raise ValueError("F and B must be DataFrames")
1430
+
1431
+ if w.shape[0] > 1 and w.shape[1] > 1:
1432
+ raise ValueError("w must be a column DataFrame")
1433
+
1434
+ assets = X.columns.tolist()
1435
+ N = len(assets)
1436
+
1437
+ w_ = np.array(w, ndmin=2)
1438
+ if w_.shape[0] == 1:
1439
+ w_ = w_.T
1440
+
1441
+ if B is not None:
1442
+ B_ = np.array(B, ndmin=2)
1443
+ if const == True:
1444
+ alpha = B_[:, :1]
1445
+ B_ = B_[:, 1:]
1446
+
1447
+ mu = np.array(mean_vector(X, method=method_mu, **dict_mu), ndmin=2)
1448
+ S = np.array(covar_matrix(X, method=method_cov, **dict_cov), ndmin=2)
1449
+
1450
+ tau = 1 / X.shape[0]
1451
+
1452
+ mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
1453
+ S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
1454
+
1455
+ if P is not None and Q is not None and P_f is None and Q_f is None:
1456
+ S_a = np.array(S, ndmin=2)
1457
+ P_a = np.array(P, ndmin=2)
1458
+ Q_a = np.array(Q, ndmin=2)
1459
+ Omega_a = np.array(np.diag(np.diag(P_a @ (tau * S_a) @ P_a.T)), ndmin=2)
1460
+
1461
+ if eq == True:
1462
+ PI_a_ = delta * S_a @ w_
1463
+ elif eq == False:
1464
+ PI_a_ = mu.T - rf
1465
+
1466
+ elif P is None and Q is None and P_f is not None and Q_f is not None:
1467
+ S_a = np.array(S_f, ndmin=2)
1468
+ P_a = np.array(P_f, ndmin=2)
1469
+ Q_a = np.array(Q_f, ndmin=2)
1470
+ Omega_a = np.array(np.diag(np.diag(P_a @ (tau * S_a) @ P_a.T)), ndmin=2)
1471
+
1472
+ if eq == True:
1473
+ PI_a_ = delta * (S_a @ B_.T) @ w_
1474
+ elif eq == False:
1475
+ PI_a_ = mu_f.T - rf
1476
+
1477
+ elif P is not None and Q is not None and P_f is not None and Q_f is not None:
1478
+ S_a = np.hstack((np.vstack((S, S_f @ B_.T)), np.vstack((B_ @ S_f, S_f))))
1479
+
1480
+ P = np.array(P, ndmin=2)
1481
+ Q = np.array(Q, ndmin=2)
1482
+ P_f = np.array(P_f, ndmin=2)
1483
+ Q_f = np.array(Q_f, ndmin=2)
1484
+ zeros_1 = np.zeros((P_f.shape[0], P.shape[1]))
1485
+ zeros_2 = np.zeros((P.shape[0], P_f.shape[1]))
1486
+ P_a = np.hstack((np.vstack((P, zeros_1)), np.vstack((zeros_2, P_f))))
1487
+ Q_a = np.vstack((Q, Q_f))
1488
+
1489
+ Omega = np.array(np.diag(np.diag(P @ (tau * S) @ P.T)), ndmin=2)
1490
+ Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
1491
+ zeros = np.zeros((Omega.shape[0], Omega_f.shape[0]))
1492
+ Omega_a = np.hstack((np.vstack((Omega, zeros.T)), np.vstack((zeros, Omega_f))))
1493
+
1494
+ if eq == True:
1495
+ PI_a_ = delta * (np.vstack((S, S_f @ B_.T)) @ w_)
1496
+ elif eq == False:
1497
+ PI_a_ = np.vstack((mu.T, mu_f.T)) - rf
1498
+
1499
+ PI_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a) @ (
1500
+ inv(tau * S_a) @ PI_a_ + P_a.T @ inv(Omega_a) @ Q_a
1501
+ )
1502
+ M_a = inv(inv(tau * S_a) + P_a.T @ inv(Omega_a) @ P_a)
1503
+ # PI_a = PI_a_ + (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega) * (Q_a - P_a @ PI_a_)
1504
+ # M = tau * S_a - (tau * S_a @ P_a.T) * inv(P_a @ tau * S_a @ P_a.T + Omega_a) @ P_a @ tau * S_a
1505
+
1506
+ mu_a = PI_a + rf
1507
+ mu_a = mu_a.T
1508
+ cov_a = S_a + M_a
1509
+ w_a = inv(delta * cov_a) @ PI_a
1510
+
1511
+ if P is None and Q is None and P_f is not None and Q_f is not None:
1512
+ mu_a = mu_a @ B_.T
1513
+ cov_a = B_ @ cov_a @ B_.T
1514
+ w_a = inv(delta * cov_a) @ B_ @ PI_a
1515
+
1516
+ if const == True:
1517
+ mu_a = mu_a[:, :N] + alpha.T
1518
+
1519
+ mu_a = pd.DataFrame(mu_a[:, :N], columns=assets)
1520
+ cov_a = pd.DataFrame(cov_a[:N, :N], index=assets, columns=assets)
1521
+ w_a = pd.DataFrame(w_a[:N, 0], index=assets)
1522
+
1523
+ return mu_a, cov_a, w_a
1524
+
1525
+
1526
+ def black_litterman_bayesian(
1527
+ X,
1528
+ F,
1529
+ B,
1530
+ P_f,
1531
+ Q_f,
1532
+ delta=1,
1533
+ rf=0,
1534
+ const=True,
1535
+ method_mu="hist",
1536
+ method_cov="hist",
1537
+ dict_mu={},
1538
+ dict_cov={},
1539
+ ):
1540
+ r"""
1541
+ Estimate the expected returns vector and covariance matrix based
1542
+ on the black litterman model :cite:`b-BLB`.
1543
+
1544
+ .. math::
1545
+ \begin{aligned}
1546
+ \Sigma_{F} & = B \Sigma_{F} B^{\prime} + D \\
1547
+ \overline{\Pi}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{\prime}\Omega_{F}^{-1}P_{F} \right )^{-1} \left ( \Sigma_{F}^{-1}\Pi_{F} + P_{F}^{\prime}\Omega_{F}^{-1}Q_{F} \right) \\
1548
+ \overline{\Sigma}_{F} & = \left ( \Sigma_{F}^{-1} + P_{F}^{\prime}\Omega_{F}^{-1}P_{F} \right )^{-1} \\
1549
+ \Sigma_{BLB} & = \left( \Sigma^{-1} - \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} + B^{\prime}\Sigma^{-1}B \right)^{-1} B^{\prime}\Sigma^{-1} \right )^{-1} \\
1550
+ \mu_{BLB} & = \Sigma_{BLB} \left ( \Sigma^{-1} B \left( \overline{\Sigma}_{F}^{-1} +B^{\prime}\Sigma^{-1}B \right)^{-1} \overline{\Sigma}_{F}^{-1} \overline{\Pi}_{F} \right ) + r_{f} \\
1551
+ \end{aligned}
1552
+
1553
+
1554
+ where:
1555
+
1556
+ :math:`r_{f}` is the risk free rate.
1557
+
1558
+ :math:`B` is the loadings matrix.
1559
+
1560
+ :math:`D` is a diagonal matrix of variance of errors of a factor model.
1561
+
1562
+ :math:`\Sigma` is the covariance matrix obtained with a factor model.
1563
+
1564
+ :math:`\Pi_{F}` is the equilibrium excess returns of factors.
1565
+
1566
+ :math:`\overline{\Pi}_{F}` is the posterior excess returns of factors.
1567
+
1568
+ :math:`\Sigma_{F}` is the covariance matrix of factors.
1569
+
1570
+ :math:`\overline{\Sigma}_{F}` is the posterior covariance matrix of factors.
1571
+
1572
+ :math:`P_{F}` is the factors views matrix.
1573
+
1574
+ :math:`Q_{F}` is the factors views returns matrix.
1575
+
1576
+ :math:`\Omega_{F}` is the covariance matrix of errors of factors views.
1577
+
1578
+ :math:`\mu_{BLB}` is the mean vector obtained with the Black
1579
+ Litterman Bayesian model or posterior predictive mean.
1580
+
1581
+ :math:`\Sigma_{BLB}` is the covariance matrix obtained with the Black
1582
+ Litterman Bayesian model or posterior predictive covariance.
1583
+
1584
+ Parameters
1585
+ ----------
1586
+ X : DataFrame of shape (n_samples, n_assets)
1587
+ Assets returns DataFrame, where n_samples is the number of
1588
+ observations and n_assets is the number of assets.
1589
+ F : DataFrame of shape (n_samples, n_factors)
1590
+ Risk factors returns DataFrame, where n_samples is the number of samples
1591
+ and n_factors is the number of risk factors.
1592
+ B : DataFrame of shape (n_assets, n_factors), optional
1593
+ Loadings matrix, where n_assets is the number assets and n_factors is
1594
+ the number of risk factors. The default is None.
1595
+ P_f : DataFrame of shape (n_views, n_factors)
1596
+ Analyst's factors views matrix, can be relative or absolute.
1597
+ Q_f : DataFrame of shape (n_views, 1)
1598
+ Expected returns of analyst's factors views.
1599
+ delta : float, optional
1600
+ Risk aversion factor. The default value is 1.
1601
+ rf : scalar, optional
1602
+ Risk free rate. The default is 0.
1603
+ const : bool, optional
1604
+ Indicate if the loadings matrix has a constant.
1605
+ The default is True.
1606
+ method_mu : str, optional
1607
+ The method used to estimate the expected returns.
1608
+ The default value is 'hist'.
1609
+
1610
+ - 'hist': use historical estimates.
1611
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1612
+ - 'ewma2': use ewma with adjust=False, For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1613
+ - 'JS': James-Stein estimator. For more information see :cite:`b-Meucci2005` and :cite:`b-Feng2016`.
1614
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`b-Jorion1986`.
1615
+ - 'BOP': BOP estimator. For more information see :cite:`b-Bodnar2019`.
1616
+ method_cov : str, optional
1617
+ The method used to estimate the covariance matrix:
1618
+ The default is 'hist'. Possible values are:
1619
+
1620
+ - 'hist': use historical estimates.
1621
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1622
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`__.
1623
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
1624
+ - 'oas': use the Oracle Approximation Shrinkage method.
1625
+ - 'shrunk': use the basic Shrunk Covariance method.
1626
+ - 'gl': use the basic Graphical Lasso Covariance method.
1627
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`b-jLogo`.
1628
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`b-MLforAM`.
1629
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`b-MLforAM`.
1630
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`b-MLforAM`.
1631
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`b-Gerber2021`.
1632
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`b-Gerber2021`.
1633
+ dict_mu : dict
1634
+ Other variables related to the mean vector estimation method.
1635
+ dict_cov : dict
1636
+ Other variables related to the covariance estimation method.
1637
+
1638
+ Returns
1639
+ -------
1640
+ mu : DataFrame
1641
+ The mean vector of Black Litterman model.
1642
+ cov : DataFrame
1643
+ The covariance matrix of Black Litterman model.
1644
+ w : DataFrame
1645
+ The equilibrium weights of Black Litterman model, without constraints.
1646
+
1647
+ Raises
1648
+ ------
1649
+ ValueError
1650
+ When the value cannot be calculated.
1651
+
1652
+ """
1653
+ if not isinstance(X, pd.DataFrame):
1654
+ raise ValueError("X must be DataFrames")
1655
+
1656
+ if not isinstance(F, pd.DataFrame) and not isinstance(B, pd.DataFrame):
1657
+ raise ValueError("F and B must be DataFrames")
1658
+
1659
+ assets = X.columns.tolist()
1660
+
1661
+ if B is not None:
1662
+ B = np.array(B, ndmin=2)
1663
+ if const == True:
1664
+ alpha = B[:, :1]
1665
+ B = B[:, 1:]
1666
+
1667
+ mu_f = np.array(mean_vector(F, method=method_mu, **dict_mu), ndmin=2)
1668
+ mu_f = (mu_f - rf).T
1669
+
1670
+ tau = 1 / X.shape[0]
1671
+
1672
+ S_f = np.array(covar_matrix(F, method=method_cov, **dict_cov), ndmin=2)
1673
+ S = B @ S_f @ B.T
1674
+
1675
+ D = X.to_numpy() - F @ B.T
1676
+ D = np.diag(D.var())
1677
+ S = S + D
1678
+
1679
+ Omega_f = np.array(np.diag(np.diag(P_f @ (tau * S_f) @ P_f.T)), ndmin=2)
1680
+
1681
+ S_hat = inv(inv(S_f) + P_f.T @ inv(Omega_f) @ P_f)
1682
+
1683
+ Pi_hat = S_hat @ (inv(S_f) @ mu_f + P_f.T @ inv(Omega_f) @ Q_f)
1684
+
1685
+ S_blb = inv(inv(S) - inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ B.T @ inv(S))
1686
+
1687
+ Pi_blb = (
1688
+ S_blb @ inv(S) @ B @ inv(inv(S_hat) + B.T @ inv(S) @ B) @ inv(S_hat) @ Pi_hat
1689
+ )
1690
+
1691
+ mu = Pi_blb + rf
1692
+
1693
+ if const == True:
1694
+ mu = mu + alpha
1695
+ mu = mu.T
1696
+ cov = S_blb
1697
+ w = inv(delta * cov) @ mu.T
1698
+
1699
+ mu = pd.DataFrame(mu, columns=assets)
1700
+ cov = pd.DataFrame(cov, index=assets, columns=assets)
1701
+ w = pd.DataFrame(w, index=assets)
1702
+
1703
+ return mu, cov, w
1704
+
1705
+
1706
+ def bootstrapping(
1707
+ X,
1708
+ kind="stationary",
1709
+ q=0.05,
1710
+ n_sim=6000,
1711
+ window=3,
1712
+ diag=False,
1713
+ threshold=1e-15,
1714
+ seed=0,
1715
+ ):
1716
+ r"""
1717
+ Estimates the uncertainty sets of mean and covariance matrix through the selected
1718
+ bootstrapping method.
1719
+
1720
+ Parameters
1721
+ ----------
1722
+ X : DataFrame of shape (n_samples, n_assets)
1723
+ Assets returns DataFrame, where n_samples is the number of
1724
+ observations and n_assets is the number of assets.
1725
+ kind : str
1726
+ The bootstrapping method. The default value is 'stationary'. Possible values are:
1727
+
1728
+ - 'stationary': stationary bootstrapping method, see `StationaryBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.StationaryBootstrap.html#arch.bootstrap.StationaryBootstrap>`_ for more details.
1729
+ - 'circular': circular bootstrapping method, see `CircularBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.CircularBlockBootstrap.html#arch.bootstrap.CircularBlockBootstrap>`_ for more details.
1730
+ - 'moving': moving bootstrapping method, see `MovingBlockBootstrap <https://bashtage.github.io/arch/bootstrap/generated/arch.bootstrap.MovingBlockBootstrap.html#arch.bootstrap.MovingBlockBootstrap>`_ for more details.
1731
+ q : scalar
1732
+ Significance level for box and elliptical constraints.
1733
+ The default is 0.05.
1734
+ n_sim : scalar
1735
+ Number of simulations of the bootstrapping method.
1736
+ The default is 6000.
1737
+ window: int
1738
+ Block size of the bootstrapping method. Must be greather than 1
1739
+ and lower than the n_samples - n_factors + 1
1740
+ The default is 3.
1741
+ diag: bool
1742
+ If consider only the main diagonal of covariance matrices of estimation
1743
+ errors following :cite:`b-fabozzi2007robust`. The default is False.
1744
+ threshold: float
1745
+ Parameter used to fix covariance matrices in case they are not positive semidefinite.
1746
+ The default is 1e-15.
1747
+ seed: int
1748
+ Seed used to generate random numbers for bootstrapping method.
1749
+ The default is 0.
1750
+
1751
+ Returns
1752
+ -------
1753
+ mu_l : DataFrame
1754
+ The q/2 percentile of mean vector obtained through the selected
1755
+ bootstrapping method.
1756
+ mu_u : DataFrame
1757
+ The 1-q/2 percentile of mean vector obtained through the selected
1758
+ bootstrapping method.
1759
+ cov_l : DataFrame
1760
+ The q/2 percentile of covariance matrix obtained through the selected
1761
+ bootstrapping method.
1762
+ cov_u : DataFrame
1763
+ The 1-q/2 percentile of covariance matrix obtained through the selected
1764
+ bootstrapping method.
1765
+ cov_mu : DataFrame
1766
+ The covariance matrix of estimation errors of mean vector obtained
1767
+ through the selected bootstrapping method.
1768
+ cov_sigma : DataFrame
1769
+ The covariance matrix of estimation errors of covariance matrix
1770
+ obtained through the selected bootstrapping method.
1771
+ k_mu : DataFrame
1772
+ The square root of size of elliptical constraint of mean vector
1773
+ estimation error based on 1-q percentile.
1774
+ k_sigma : DataFrame
1775
+ The square root of size of elliptical constraint of covariance matrix
1776
+ estimation error based on 1-q percentile.
1777
+
1778
+ Raises
1779
+ ------
1780
+ ValueError
1781
+ When the value cannot be calculated.
1782
+
1783
+ """
1784
+
1785
+ if not isinstance(X, pd.DataFrame):
1786
+ raise ValueError("X must be a DataFrame")
1787
+
1788
+ if window >= X.shape[0] - window + 1:
1789
+ raise ValueError("block must be lower than n_samples - window + 1")
1790
+ elif window <= 1:
1791
+ raise ValueError("block must be greather than 1")
1792
+
1793
+ cols = X.columns.tolist()
1794
+ cols_2 = [i + "-" + j for i in cols for j in cols]
1795
+ T, n = X.shape
1796
+
1797
+ mu = X.mean().to_numpy().reshape(1, n)
1798
+ vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
1799
+
1800
+ mus = np.zeros((n_sim, 1, n))
1801
+ covs = np.zeros((n_sim, n, n))
1802
+
1803
+ if kind == "stationary":
1804
+ gen = bs.StationaryBootstrap(window, X, seed=seed)
1805
+ elif kind == "circular":
1806
+ gen = bs.CircularBlockBootstrap(window, X, seed=seed)
1807
+ elif kind == "moving":
1808
+ gen = bs.MovingBlockBootstrap(window, X, seed=seed)
1809
+ else:
1810
+ raise ValueError("kind only can be 'stationary', 'circular' or 'moving'")
1811
+
1812
+ i = 0
1813
+ for data in gen.bootstrap(n_sim):
1814
+ A = data[0][0]
1815
+ mus[i] = A.mean().to_numpy().reshape(1, n)
1816
+ covs[i] = A.cov().to_numpy()
1817
+ i += 1
1818
+
1819
+ # Box Constraint for Mean
1820
+ mu_l = np.percentile(mus, q=q / 2 * 100, axis=0, keepdims=True).reshape(1, n)
1821
+ mu_u = np.percentile(mus, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(1, n)
1822
+ mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
1823
+ mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
1824
+
1825
+ # Box Constraint for Covariance
1826
+ cov_l = np.percentile(covs, q=q / 2 * 100, axis=0, keepdims=True).reshape(n, n)
1827
+ cov_u = np.percentile(covs, q=(1 - q / 2) * 100, axis=0, keepdims=True).reshape(
1828
+ n, n
1829
+ )
1830
+ cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
1831
+ cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
1832
+
1833
+ # Check and fix if upper and lower bound for Covariance are positive
1834
+ # semidefinite and fix when they are not
1835
+ if af.is_pos_def(cov_l) == False:
1836
+ cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
1837
+ if af.is_pos_def(cov_u) == False:
1838
+ cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
1839
+
1840
+ # Elliptical Constraint for Mean
1841
+ A_mu = mus.reshape(n_sim, n) - np.repeat(mu, n_sim, axis=0)
1842
+ cov_mu = np.cov(A_mu, rowvar=False)
1843
+ if diag == True:
1844
+ cov_mu = np.diag(np.diag(cov_mu))
1845
+ k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
1846
+ k_mu = np.percentile(k_mus, q=(1 - q) * 100) ** 0.5
1847
+ cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
1848
+
1849
+ # Elliptical Constraint for Covariance
1850
+ A_Sigma = covs.reshape((n_sim, n**2), order="F")
1851
+ A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
1852
+ cov_sigma = np.cov(A_Sigma, rowvar=False)
1853
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1854
+ if diag == True:
1855
+ cov_sigma = np.diag(np.diag(cov_sigma))
1856
+ if af.is_pos_def(cov_sigma) == False:
1857
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1858
+ k_sigmas = np.diag(A_Sigma @ inv(cov_sigma) @ A_Sigma.T)
1859
+ k_sigma = np.percentile(k_sigmas, q=(1 - q) * 100) ** 0.5
1860
+ cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
1861
+
1862
+ return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma
1863
+
1864
+
1865
+ def normal_simulation(X, q=0.05, n_sim=6000, diag=False, threshold=1e-15, seed=0):
1866
+ r"""
1867
+ Estimates the uncertainty sets of mean and covariance matrix assuming that
1868
+ assets returns follows a multivariate normal distribution.
1869
+
1870
+ Parameters
1871
+ ----------
1872
+ X : DataFrame of shape (n_samples, n_assets)
1873
+ Assets returns DataFrame, where n_samples is the number of
1874
+ observations and n_assets is the number of assets.
1875
+ q : scalar
1876
+ Significance level for box and elliptical constraints.
1877
+ The default is 0.05.
1878
+ n_sim : scalar
1879
+ Number of simulations of the bootstrapping method.
1880
+ The default is 6000.
1881
+ diag: bool
1882
+ If consider only the main diagonal of covariance matrices of estimation
1883
+ errors following :cite:`b-fabozzi2007robust`. The default is False.
1884
+ threshold: float
1885
+ Parameter used to fix covariance matrices in case they are not positive
1886
+ semidefinite. The default is 1e-10.
1887
+ seed: int
1888
+ Seed used to generate random numbers for simulation.
1889
+ The default is 0.
1890
+
1891
+ Returns
1892
+ -------
1893
+ mu_l : DataFrame
1894
+ The q/2 percentile of mean vector obtained through the normal
1895
+ simulation.
1896
+ mu_u : DataFrame
1897
+ The 1-q/2 percentile of mean vector obtained through the normal
1898
+ simulation.
1899
+ cov_l : DataFrame
1900
+ The q/2 percentile of covariance matrix obtained through the normal
1901
+ simulation.
1902
+ cov_u : DataFrame
1903
+ The 1-q/2 percentile of covariance matrix obtained through the normal
1904
+ simulation.
1905
+ cov_mu : DataFrame
1906
+ The covariance matrix of estimation errors of mean vector obtained
1907
+ through the normal simulation.
1908
+ cov_sigma : DataFrame
1909
+ The covariance matrix of estimation errors of covariance matrix
1910
+ obtained through the normal simulation.
1911
+ k_mu : DataFrame
1912
+ The square root of size of elliptical constraint of mean vector
1913
+ estimation error based on 1-q percentile.
1914
+ k_sigma : DataFrame
1915
+ The square root of size of elliptical constraint of covariance matrix
1916
+ estimation error based on 1-q percentile.
1917
+
1918
+ Raises
1919
+ ------
1920
+ ValueError
1921
+ When the value cannot be calculated.
1922
+
1923
+ """
1924
+
1925
+ if not isinstance(X, pd.DataFrame):
1926
+ raise ValueError("X must be a DataFrame")
1927
+
1928
+ cols = X.columns.tolist()
1929
+ cols_2 = [i + "-" + j for i in cols for j in cols]
1930
+ T, n = X.shape
1931
+
1932
+ # Set initial parameters based on assumption of normality
1933
+ mu = X.mean().to_numpy().reshape(1, n)
1934
+ vec_Sigma = X.cov().to_numpy().reshape((1, n**2), order="F")
1935
+ Sigma = X.cov().to_numpy()
1936
+ cov_mu = Sigma / T
1937
+ K = cf.commutation_matrix(T=n, n=n)
1938
+ I = np.identity(n**2)
1939
+ cov_sigma = T * (I + K) @ np.kron(cov_mu, cov_mu)
1940
+ if diag == True:
1941
+ cov_sigma = np.diag(np.diag(cov_sigma))
1942
+ if af.is_pos_def(cov_sigma) == False:
1943
+ cov_sigma = af.cov_fix(cov_sigma, method="clipped", threshold=threshold)
1944
+ cov_sigma = pd.DataFrame(cov_sigma, index=cols_2, columns=cols_2)
1945
+
1946
+ # Box Constraint for Mean
1947
+ delta_mu = st.norm.ppf(1 - q / 2) * np.sqrt(np.diag(cov_mu)).reshape(-1, 1)
1948
+ mu_l = mu - delta_mu.T
1949
+ mu_u = mu + delta_mu.T
1950
+ mu_l = pd.DataFrame(mu_l, index=[0], columns=cols)
1951
+ mu_u = pd.DataFrame(mu_u, index=[0], columns=cols)
1952
+
1953
+ # Box Constraints for Covariance
1954
+ rs = np.random.RandomState(seed=seed)
1955
+ covs = st.wishart.rvs(T, cov_mu, size=n_sim, random_state=rs)
1956
+ cov_l = np.percentile(covs, q=q / 2, axis=0)
1957
+ cov_u = np.percentile(covs, q=1 - q / 2, axis=0)
1958
+ cov_l = pd.DataFrame(cov_l, index=cols, columns=cols)
1959
+ cov_u = pd.DataFrame(cov_u, index=cols, columns=cols)
1960
+
1961
+ # Check and fix if upper and lower bound for Covariance are positive
1962
+ # semidefinite and fix when they are not
1963
+ if af.is_pos_def(cov_l) == False:
1964
+ cov_l = af.cov_fix(cov_l, method="clipped", threshold=threshold)
1965
+ if af.is_pos_def(cov_u) == False:
1966
+ cov_u = af.cov_fix(cov_u, method="clipped", threshold=threshold)
1967
+
1968
+ # Elliptical Constraint for Mean
1969
+ A_mu = rs.multivariate_normal(mu.ravel(), cov_mu, size=n_sim)
1970
+ # cov_mu = np.cov(A_mu - np.repeat(mu, n_sim, axis=0), rowvar=False)
1971
+ if diag == True:
1972
+ cov_mu = np.diag(np.diag(cov_mu))
1973
+ k_mus = np.diag(A_mu @ inv(cov_mu) @ A_mu.T)
1974
+ k_mu = np.percentile(k_mus, q=1 - q) ** 0.5
1975
+ # k_mu = st.chi2.ppf(1 - q, df=n) ** 0.5
1976
+ cov_mu = pd.DataFrame(cov_mu, index=cols, columns=cols)
1977
+
1978
+ # Elliptical Constraint for Covariance
1979
+ A_Sigma = covs.reshape((n_sim, n**2), order="F")
1980
+ A_Sigma = A_Sigma - np.repeat(vec_Sigma, n_sim, axis=0)
1981
+ A_cov_sigma = np.cov(A_Sigma, rowvar=False)
1982
+ if diag == True:
1983
+ A_cov_sigma = np.diag(np.diag(A_cov_sigma))
1984
+ if af.is_pos_def(A_cov_sigma) == False:
1985
+ A_cov_sigma = af.cov_fix(A_cov_sigma, method="clipped", threshold=threshold)
1986
+ k_sigmas = np.diag(A_Sigma @ inv(A_cov_sigma) @ A_Sigma.T)
1987
+ k_sigma = np.percentile(k_sigmas, q=1 - q) ** 0.5
1988
+
1989
+ return mu_l, mu_u, cov_l, cov_u, cov_mu, cov_sigma, k_mu, k_sigma