riskfolio-lib 7.1.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1102 @@
1
+ """""" #
2
+
3
+ """
4
+ Copyright (c) 2020-2025, Dany Cajas
5
+ All rights reserved.
6
+ This work is licensed under BSD 3-Clause "New" or "Revised" License.
7
+ License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import scipy.cluster.hierarchy as hr
13
+ from scipy.spatial.distance import squareform
14
+ import riskfolio as rp
15
+ import riskfolio.src.RiskFunctions as rk
16
+ import riskfolio.src.AuxFunctions as af
17
+ import riskfolio.src.ParamsEstimation as pe
18
+ import riskfolio.src.DBHT as db
19
+ import riskfolio.src.GerberStatistic as gs
20
+
21
+
22
+ __all__ = [
23
+ "HCPortfolio",
24
+ ]
25
+
26
+
27
+ class HCPortfolio(object):
28
+ r"""
29
+ Class that creates a portfolio object with all properties needed to
30
+ calculate optimal portfolios.
31
+
32
+ Parameters
33
+ ----------
34
+ returns : DataFrame of shape (n_samples, n_assets), optional
35
+ Assets returns DataFrame, where n_samples is the number of
36
+ observations and n_assets is the number of assets.
37
+ The default is None.
38
+ alpha : float, optional
39
+ Significance level of VaR, CVaR, EVaR, RLVaR, DaR, CDaR, EDaR, RLDaR and Tail Gini of losses.
40
+ The default is 0.05.
41
+ a_sim : float, optional
42
+ Number of CVaRs used to approximate Tail Gini of losses. The default is 100.
43
+ beta : float, optional
44
+ Significance level of CVaR and Tail Gini of gains. If None it duplicates alpha value.
45
+ The default is None.
46
+ b_sim : float, optional
47
+ Number of CVaRs used to approximate Tail Gini of gains. If None it duplicates a_sim value.
48
+ The default is None.
49
+ kappa : float, optional
50
+ Deformation parameter of RLVaR and RLDaR for losses, must be between 0 and 1.
51
+ The default is 0.3.
52
+ kappa_g : float, optional
53
+ Deformation parameter of RLVaR and RLDaR for gains, must be between 0 and 1.
54
+ The default is None.
55
+ solver_rl: str, optional
56
+ Solver available for CVXPY that supports power cone programming. Used to calculate RLVaR and RLDaR.
57
+ The default value is None.
58
+ solvers: list, optional
59
+ List of solvers available for CVXPY used for the selected NCO method.
60
+ The default value is ['CLARABEL', 'SCS', 'ECOS'].
61
+ w_max : pd.Series or float, optional
62
+ Upper bound constraint for hierarchical risk parity weights :cite:`c-Pfitzinger`.
63
+ w_min : pd.Series or float, optional
64
+ Lower bound constraint for hierarchical risk parity weights :cite:`c-Pfitzinger`.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ returns=None,
70
+ alpha=0.05,
71
+ a_sim=100,
72
+ beta=None,
73
+ b_sim=None,
74
+ kappa=0.30,
75
+ kappa_g=None,
76
+ solver_rl="CLARABEL",
77
+ solvers=["CLARABEL", "SCS", "ECOS"],
78
+ w_max=None,
79
+ w_min=None,
80
+ ):
81
+ self._returns = returns
82
+ self.alpha = alpha
83
+ self.a_sim = a_sim
84
+ self.beta = beta
85
+ self.b_sim = b_sim
86
+ self._kappa = kappa
87
+ self._kappa_g = kappa_g
88
+ self.solver_rl = solver_rl
89
+ self.solvers = solvers
90
+ self.asset_order = None
91
+ self.clustering = None
92
+ self.cov = None
93
+ self.mu = None
94
+ self.kurt = False
95
+ self.skurt = False
96
+ self.codep = None
97
+ self.codep_sorted = None
98
+ self.w_max = w_max
99
+ self.w_min = w_min
100
+
101
+ @property
102
+ def returns(self):
103
+ if self._returns is not None and isinstance(self._returns, pd.DataFrame):
104
+ return self._returns
105
+ else:
106
+ raise NameError("returns must be a DataFrame")
107
+
108
+ @returns.setter
109
+ def returns(self, value):
110
+ if value is not None and isinstance(value, pd.DataFrame):
111
+ self._returns = value
112
+ else:
113
+ raise NameError("returns must be a DataFrame")
114
+
115
+ @property
116
+ def assetslist(self):
117
+ if self._returns is not None and isinstance(self._returns, pd.DataFrame):
118
+ return self._returns.columns.tolist()
119
+
120
+ @property
121
+ def kappa(self):
122
+ return self._kappa
123
+
124
+ @kappa.setter
125
+ def kappa(self, value):
126
+ a = value
127
+ if a >= 1:
128
+ print(
129
+ "kappa must be between 0 and 1, values higher or equal to 1 are setting to 0.99"
130
+ )
131
+ self._kappa = 0.99
132
+ elif a <= 0:
133
+ print(
134
+ "kappa must be between 0 and 1, values lower or equal to 0 are setting to 0.01"
135
+ )
136
+ self._kappa = 0.01
137
+ else:
138
+ self._kappa = a
139
+
140
+ @property
141
+ def kappa_g(self):
142
+ return self._kappa_g
143
+
144
+ @kappa_g.setter
145
+ def kappa_g(self, value):
146
+ a = value
147
+ if a >= 1:
148
+ print(
149
+ "kappa must be between 0 and 1, values higher or equal to 1 are setting to 0.99"
150
+ )
151
+ self._kappa_g = 0.99
152
+ elif a <= 0:
153
+ print(
154
+ "kappa must be between 0 and 1, values lower or equal to 0 are setting to 0.01"
155
+ )
156
+ self._kappa_g = 0.01
157
+ else:
158
+ self._kappa_g = a
159
+
160
+ # get naive-risk weights
161
+ def _naive_risk(self, returns, cov, rm="MV", rf=0):
162
+ assets = returns.columns.tolist()
163
+ n = len(assets)
164
+
165
+ if rm == "equal":
166
+ weights = np.ones((n, 1)) * 1 / n
167
+ else:
168
+ inv_risk = np.zeros((n, 1))
169
+ for i in assets:
170
+ k = assets.index(i)
171
+ w = np.zeros((n, 1))
172
+ w[k, 0] = 1
173
+ w = pd.DataFrame(w, columns=["weights"], index=assets)
174
+ if rm == "vol":
175
+ risk = rk.Sharpe_Risk(
176
+ returns=returns,
177
+ w=w,
178
+ cov=cov,
179
+ rm="MV",
180
+ rf=rf,
181
+ alpha=self.alpha,
182
+ a_sim=self.a_sim,
183
+ beta=self.beta,
184
+ b_sim=self.b_sim,
185
+ kappa=self.kappa,
186
+ kappa_g=self.kappa_g,
187
+ solver=self.solver_rl,
188
+ )
189
+ else:
190
+ risk = rk.Sharpe_Risk(
191
+ returns=returns,
192
+ w=w,
193
+ cov=cov,
194
+ rm=rm,
195
+ rf=rf,
196
+ alpha=self.alpha,
197
+ a_sim=self.a_sim,
198
+ beta=self.beta,
199
+ b_sim=self.b_sim,
200
+ kappa=self.kappa,
201
+ kappa_g=self.kappa_g,
202
+ solver=self.solver_rl,
203
+ )
204
+ inv_risk[k, 0] = risk
205
+
206
+ if rm == "MV":
207
+ inv_risk = 1 / np.power(inv_risk, 2)
208
+ else:
209
+ inv_risk = 1 / inv_risk
210
+ weights = inv_risk * (1 / np.sum(inv_risk))
211
+
212
+ weights = weights.reshape(-1, 1)
213
+
214
+ return weights
215
+
216
+ # get optimal weights
217
+ def _opt_w(
218
+ self,
219
+ returns,
220
+ mu,
221
+ cov,
222
+ obj="MinRisk",
223
+ rm="MV",
224
+ rf=0,
225
+ l=2,
226
+ ):
227
+ if returns.shape[1] == 1:
228
+ weights = np.array([1]).reshape(-1, 1)
229
+ else:
230
+ if obj in {"MinRisk", "Utility", "Sharpe"}:
231
+ port = rp.Portfolio(
232
+ returns=returns,
233
+ alpha=self.alpha,
234
+ a_sim=self.a_sim,
235
+ beta=self.beta,
236
+ b_sim=self.b_sim,
237
+ kappa=self.kappa,
238
+ kappa_g=self.kappa_g,
239
+ )
240
+
241
+ if self.kurt:
242
+ method_kurt = "hist"
243
+ elif self.skurt:
244
+ method_kurt = "hist"
245
+ else:
246
+ method_kurt = None
247
+
248
+ port.assets_stats(
249
+ method_mu="hist", method_cov="hist", method_kurt=method_kurt
250
+ )
251
+ if self.solvers is not None:
252
+ port.solvers = self.solvers
253
+ if mu is not None:
254
+ port.mu = mu
255
+ if cov is not None:
256
+ port.cov = cov
257
+ weights = port.optimization(
258
+ model="Classic", rm=rm, obj=obj, rf=rf, l=l, hist=True
259
+ ).to_numpy()
260
+ elif obj in {"ERC"}:
261
+ port = rp.Portfolio(
262
+ returns=returns,
263
+ alpha=self.alpha,
264
+ a_sim=self.a_sim,
265
+ beta=self.beta,
266
+ b_sim=self.b_sim,
267
+ kappa=self.kappa,
268
+ kappa_g=self.kappa_g,
269
+ )
270
+
271
+ if self.kurt:
272
+ method_kurt = "hist"
273
+ elif self.skurt:
274
+ method_kurt = "hist"
275
+ else:
276
+ method_kurt = None
277
+ port.assets_stats(
278
+ method_mu="hist", method_cov="hist", method_kurt=method_kurt
279
+ )
280
+ if self.solvers is not None:
281
+ port.solvers = self.solvers
282
+ if mu is not None:
283
+ port.mu = mu
284
+ if cov is not None:
285
+ port.cov = cov
286
+ weights = port.rp_optimization(
287
+ model="Classic", rm=rm, rf=rf, b=None, hist=True
288
+ ).to_numpy()
289
+
290
+ weights = weights.reshape(-1, 1)
291
+
292
+ return weights
293
+
294
+ # Create hierarchical clustering
295
+ def _hierarchical_clustering(
296
+ self,
297
+ model="HRP",
298
+ codependence="pearson",
299
+ linkage="ward",
300
+ opt_k_method="twodiff",
301
+ k=None,
302
+ max_k=10,
303
+ leaf_order=True,
304
+ ):
305
+ # Calculating distance
306
+ if codependence in {
307
+ "pearson",
308
+ "spearman",
309
+ "kendall",
310
+ "gerber1",
311
+ "gerber2",
312
+ "custom_cov",
313
+ }:
314
+ dist = np.sqrt(np.clip((1 - self.codep) / 2, a_min=0.0, a_max=1.0))
315
+ elif codependence in {"abs_pearson", "abs_spearman", "abs_kendall", "distance"}:
316
+ dist = np.sqrt(np.clip((1 - self.codep), a_min=0.0, a_max=1.0))
317
+ elif codependence in {"mutual_info"}:
318
+ dist = af.var_info_matrix(self.returns, self.bins_info).astype(float)
319
+ elif codependence in {"tail"}:
320
+ dist = -np.log(self.codep).astype(float)
321
+
322
+ # Hierarchical clustering
323
+ dist = dist.to_numpy()
324
+ dist = pd.DataFrame(dist, columns=self.codep.columns, index=self.codep.index)
325
+ if linkage == "DBHT":
326
+ # different choices for D, S give different outputs!
327
+ D = dist.to_numpy() # dissimilarity matrix
328
+ if codependence in {
329
+ "pearson",
330
+ "spearman",
331
+ "kendall",
332
+ "gerber1",
333
+ "gerber2",
334
+ "custom_cov",
335
+ }:
336
+ codep = 1 - dist**2
337
+ S = codep.to_numpy() # similarity matrix
338
+ else:
339
+ S = self.codep.to_numpy() # similarity matrix
340
+ (_, _, _, _, _, clustering) = db.DBHTs(
341
+ D, S, leaf_order=leaf_order
342
+ ) # DBHT clustering
343
+ else:
344
+ p_dist = squareform(dist, checks=False)
345
+ clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order)
346
+
347
+ if model in {"HERC", "HERC2", "NCO"}:
348
+ # optimal number of clusters
349
+ if k is None:
350
+ if opt_k_method == "twodiff":
351
+ k, _ = af.two_diff_gap_stat(dist, clustering, max_k)
352
+ elif opt_k_method == "stdsil":
353
+ k, _ = af.std_silhouette_score(dist, clustering, max_k)
354
+ else:
355
+ raise ValueError(
356
+ "The only opt_k_method available are twodiff and stdsil"
357
+ )
358
+ else:
359
+ k = None
360
+
361
+ return clustering, k
362
+
363
+ # sort clustered items by distance
364
+ def _seriation(self, clusters):
365
+ return hr.leaves_list(clusters)
366
+
367
+ # compute HRP weight allocation through recursive bisection
368
+ def _recursive_bisection(
369
+ self,
370
+ sort_order,
371
+ rm="MV",
372
+ rf=0,
373
+ upper_bound=None,
374
+ lower_bound=None,
375
+ ):
376
+ weights = pd.Series(1.0, index=self.assetslist) # set initial weights to 1
377
+ items = [sort_order]
378
+
379
+ while len(items) > 0: # loop while weights is under 100%
380
+ items = [
381
+ i[j:k]
382
+ for i in items
383
+ for j, k in (
384
+ (0, len(i) // 2),
385
+ (len(i) // 2, len(i)),
386
+ ) # get cluster indices
387
+ if len(i) > 1
388
+ ]
389
+
390
+ # allocate weight to left and right cluster
391
+ for i in range(0, len(items), 2):
392
+ left_cluster = items[i]
393
+ right_cluster = items[i + 1]
394
+
395
+ # Left cluster
396
+ left_cov = self.cov.iloc[left_cluster, left_cluster]
397
+ left_returns = self.returns.iloc[:, left_cluster]
398
+ left_weights = self._naive_risk(left_returns, left_cov, rm=rm, rf=rf)
399
+
400
+ if rm == "vol":
401
+ left_risk = rk.Sharpe_Risk(
402
+ returns=left_returns,
403
+ w=left_weights,
404
+ cov=left_cov,
405
+ rm="MV",
406
+ rf=rf,
407
+ alpha=self.alpha,
408
+ a_sim=self.a_sim,
409
+ beta=self.beta,
410
+ b_sim=self.b_sim,
411
+ kappa=self.kappa,
412
+ solver=self.solver_rl,
413
+ )
414
+ else:
415
+ left_risk = rk.Sharpe_Risk(
416
+ returns=left_returns,
417
+ w=left_weights,
418
+ cov=left_cov,
419
+ rm=rm,
420
+ rf=rf,
421
+ alpha=self.alpha,
422
+ a_sim=self.a_sim,
423
+ beta=self.beta,
424
+ b_sim=self.b_sim,
425
+ kappa=self.kappa,
426
+ solver=self.solver_rl,
427
+ )
428
+ if rm == "MV":
429
+ left_risk = np.power(left_risk, 2)
430
+
431
+ # Right cluster
432
+ right_cov = self.cov.iloc[right_cluster, right_cluster]
433
+ right_returns = self.returns.iloc[:, right_cluster]
434
+ right_weights = self._naive_risk(right_returns, right_cov, rm=rm, rf=rf)
435
+
436
+ if rm == "vol":
437
+ right_risk = rk.Sharpe_Risk(
438
+ returns=right_returns,
439
+ w=right_weights,
440
+ cov=right_cov,
441
+ rm="MV",
442
+ rf=rf,
443
+ alpha=self.alpha,
444
+ a_sim=self.a_sim,
445
+ beta=self.beta,
446
+ b_sim=self.b_sim,
447
+ kappa=self.kappa,
448
+ solver=self.solver_rl,
449
+ )
450
+ else:
451
+ right_risk = rk.Sharpe_Risk(
452
+ returns=right_returns,
453
+ w=right_weights,
454
+ cov=right_cov,
455
+ rm=rm,
456
+ rf=rf,
457
+ alpha=self.alpha,
458
+ a_sim=self.a_sim,
459
+ beta=self.beta,
460
+ b_sim=self.b_sim,
461
+ kappa=self.kappa,
462
+ solver=self.solver_rl,
463
+ )
464
+ if rm == "MV":
465
+ right_risk = np.power(right_risk, 2)
466
+
467
+ # Allocate weight to clusters
468
+ alpha_1 = 1 - left_risk / (left_risk + right_risk)
469
+
470
+ weights.iloc[left_cluster] *= alpha_1 # weight 1
471
+ weights.iloc[right_cluster] *= 1 - alpha_1 # weight 2
472
+
473
+ return weights
474
+
475
+ # compute HERC weight allocation through cluster-based bisection
476
+ def _hierarchical_recursive_bisection(
477
+ self,
478
+ Z,
479
+ rm="MV",
480
+ rf=0,
481
+ linkage="ward",
482
+ model="HERC",
483
+ upper_bound=None,
484
+ lower_bound=None,
485
+ ):
486
+ # Transform linkage to tree and reverse order
487
+ root, nodes = hr.to_tree(Z, rd=True)
488
+ nodes = np.array(nodes)
489
+ nodes_1 = np.array([i.dist for i in nodes])
490
+ idx = np.argsort(nodes_1)
491
+ nodes = nodes[idx][::-1].tolist()
492
+ weights = pd.Series(1.0, index=self.assetslist) # Set initial weights to 1
493
+
494
+ clustering_inds = hr.fcluster(Z, self.k, criterion="maxclust")
495
+ clusters = {
496
+ i: [] for i in range(min(clustering_inds), max(clustering_inds) + 1)
497
+ }
498
+ for i, v in enumerate(clustering_inds):
499
+ clusters[v].append(i)
500
+
501
+ # Loop through k clusters
502
+ for i in nodes[: self.k - 1]:
503
+ if i.is_leaf() == False: # skip leaf-nodes
504
+ left = i.get_left().pre_order() # lambda i: i.id) # get left cluster
505
+ right = i.get_right().pre_order() # lambda i: i.id) # get right cluster
506
+ left_set = set(left)
507
+ right_set = set(right)
508
+ left_risk = 0
509
+ right_risk = 0
510
+ left_cluster = []
511
+ right_cluster = []
512
+
513
+ # Allocate weight to clusters
514
+ if rm == "equal":
515
+ alpha_1 = 0.5
516
+
517
+ else:
518
+ for j in clusters.keys():
519
+ if set(clusters[j]).issubset(left_set):
520
+ # Left cluster
521
+ left_cov = self.cov.iloc[clusters[j], clusters[j]]
522
+ left_returns = self.returns.iloc[:, clusters[j]]
523
+ left_weights = self._naive_risk(
524
+ left_returns, left_cov, rm=rm, rf=rf
525
+ )
526
+
527
+ if rm == "vol":
528
+ left_risk_ = rk.Sharpe_Risk(
529
+ returns=left_returns,
530
+ w=left_weights,
531
+ cov=left_cov,
532
+ rm="MV",
533
+ rf=rf,
534
+ alpha=self.alpha,
535
+ a_sim=self.a_sim,
536
+ beta=self.beta,
537
+ b_sim=self.b_sim,
538
+ kappa=self.kappa,
539
+ kappa_g=self.kappa_g,
540
+ solver=self.solver_rl,
541
+ )
542
+ else:
543
+ left_risk_ = rk.Sharpe_Risk(
544
+ returns=left_returns,
545
+ w=left_weights,
546
+ cov=left_cov,
547
+ rm=rm,
548
+ rf=rf,
549
+ alpha=self.alpha,
550
+ a_sim=self.a_sim,
551
+ beta=self.beta,
552
+ b_sim=self.b_sim,
553
+ kappa=self.kappa,
554
+ kappa_g=self.kappa_g,
555
+ solver=self.solver_rl,
556
+ )
557
+ if rm == "MV":
558
+ left_risk_ = np.power(left_risk_, 2)
559
+
560
+ left_risk += left_risk_
561
+ left_cluster += clusters[j]
562
+
563
+ elif set(clusters[j]).issubset(right_set):
564
+ # Right cluster
565
+ right_cov = self.cov.iloc[clusters[j], clusters[j]]
566
+ right_returns = self.returns.iloc[:, clusters[j]]
567
+ right_weights = self._naive_risk(
568
+ right_returns, right_cov, rm=rm, rf=rf
569
+ )
570
+
571
+ if rm == "vol":
572
+ right_risk_ = rk.Sharpe_Risk(
573
+ returns=right_returns,
574
+ w=right_weights,
575
+ cov=right_cov,
576
+ rm="MV",
577
+ rf=rf,
578
+ alpha=self.alpha,
579
+ a_sim=self.a_sim,
580
+ beta=self.beta,
581
+ b_sim=self.b_sim,
582
+ kappa=self.kappa,
583
+ kappa_g=self.kappa_g,
584
+ solver=self.solver_rl,
585
+ )
586
+ else:
587
+ right_risk_ = rk.Sharpe_Risk(
588
+ returns=right_returns,
589
+ w=right_weights,
590
+ cov=right_cov,
591
+ rm=rm,
592
+ rf=rf,
593
+ alpha=self.alpha,
594
+ a_sim=self.a_sim,
595
+ beta=self.beta,
596
+ b_sim=self.b_sim,
597
+ kappa=self.kappa,
598
+ kappa_g=self.kappa_g,
599
+ solver=self.solver_rl,
600
+ )
601
+ if rm == "MV":
602
+ right_risk_ = np.power(right_risk_, 2)
603
+
604
+ right_risk += right_risk_
605
+ right_cluster += clusters[j]
606
+
607
+ alpha_1 = 1 - left_risk / (left_risk + right_risk)
608
+
609
+ weights.iloc[left] *= alpha_1 # weight 1
610
+ weights.iloc[right] *= 1 - alpha_1 # weight 2
611
+
612
+ # Get constituents of k clusters
613
+ clustered_assets = pd.Series(
614
+ hr.cut_tree(Z, n_clusters=self.k).flatten(), index=self.cov.index
615
+ )
616
+ # Multiply within-cluster weight with inter-cluster weight
617
+ for i in range(self.k):
618
+ cluster = clustered_assets.loc[clustered_assets == i]
619
+ cluster_cov = self.cov.loc[cluster.index, cluster.index]
620
+ cluster_returns = self.returns.loc[:, cluster.index]
621
+ if model == "HERC":
622
+ cluster_weights = pd.Series(
623
+ self._naive_risk(
624
+ cluster_returns, cluster_cov, rm=rm, rf=rf
625
+ ).flatten(),
626
+ index=cluster_cov.index,
627
+ )
628
+
629
+ elif model == "HERC2":
630
+ cluster_weights = pd.Series(
631
+ self._naive_risk(
632
+ cluster_returns, cluster_cov, rm="equal", rf=rf
633
+ ).flatten(),
634
+ index=cluster_cov.index,
635
+ )
636
+ weights.loc[cluster_weights.index] *= cluster_weights
637
+
638
+ return weights
639
+
640
+ # compute intra-cluster weights
641
+ def _intra_weights(
642
+ self,
643
+ Z,
644
+ obj="MinRisk",
645
+ rm="MV",
646
+ rf=0,
647
+ l=2,
648
+ ):
649
+ # Get constituents of k clusters
650
+ clustered_assets = pd.Series(
651
+ hr.cut_tree(Z, n_clusters=self.k).flatten(), index=self.cov.index
652
+ )
653
+
654
+ # get covariance matrices for each cluster
655
+ intra_weights = pd.DataFrame(index=clustered_assets.index)
656
+ for i in range(self.k):
657
+ cluster = clustered_assets.loc[clustered_assets == i]
658
+ if self.mu is not None:
659
+ cluster_mu = self.mu.loc[:, cluster.index]
660
+ else:
661
+ cluster_mu = None
662
+ cluster_cov = self.cov.loc[cluster.index, cluster.index]
663
+ cluster_returns = self.returns.loc[:, cluster.index]
664
+ weights = self._opt_w(
665
+ cluster_returns,
666
+ cluster_mu,
667
+ cluster_cov,
668
+ obj=obj,
669
+ rm=rm,
670
+ rf=rf,
671
+ l=l,
672
+ )
673
+ weights = pd.Series(
674
+ weights.flatten(),
675
+ index=cluster_cov.index,
676
+ )
677
+ intra_weights[i] = weights
678
+
679
+ intra_weights = intra_weights.fillna(0)
680
+ return intra_weights
681
+
682
+ def _inter_weights(
683
+ self,
684
+ intra_weights,
685
+ obj="MinRisk",
686
+ rm="MV",
687
+ rf=0,
688
+ l=2,
689
+ ):
690
+ # inter-cluster mean vector
691
+ if self.mu is not None:
692
+ tot_mu = self.mu @ intra_weights
693
+ else:
694
+ tot_mu = None
695
+ # inter-cluster covariance matrix
696
+ tot_cov = intra_weights.T.dot(np.dot(self.cov, intra_weights))
697
+ # inter-cluster returns matrix
698
+ tot_ret = self.returns @ intra_weights
699
+
700
+ # inter-cluster weights
701
+ inter_weights = self._opt_w(
702
+ tot_ret,
703
+ tot_mu,
704
+ tot_cov,
705
+ obj=obj,
706
+ rm=rm,
707
+ rf=rf,
708
+ l=l,
709
+ )
710
+ inter_weights = pd.Series(inter_weights.flatten(), index=intra_weights.columns)
711
+ # determine the weight on each cluster by multiplying the intra-cluster weight with the inter-cluster weight
712
+ weights = intra_weights.mul(inter_weights, axis=1).sum(axis=1).sort_index()
713
+
714
+ return weights
715
+
716
+ # Allocate weights
717
+ def optimization(
718
+ self,
719
+ model="HRP",
720
+ codependence="pearson",
721
+ obj="MinRisk",
722
+ rm="MV",
723
+ rf=0,
724
+ l=2,
725
+ method_mu="hist",
726
+ method_cov="hist",
727
+ custom_mu=None,
728
+ custom_cov=None,
729
+ linkage="single",
730
+ opt_k_method="twodiff",
731
+ k=None,
732
+ max_k=10,
733
+ bins_info="KN",
734
+ alpha_tail=0.05,
735
+ gs_threshold=0.5,
736
+ leaf_order=True,
737
+ dict_mu={},
738
+ dict_cov={},
739
+ ):
740
+ r"""
741
+ This method calculates the optimal portfolio according to the
742
+ optimization model selected by the user.
743
+
744
+ Parameters
745
+ ----------
746
+ model : str, optional
747
+ The hierarchical cluster portfolio model used for optimize the
748
+ portfolio. The default is 'HRP'. Possible values are:
749
+
750
+ - 'HRP': Hierarchical Risk Parity.
751
+ - 'HERC': Hierarchical Equal Risk Contribution.
752
+ - 'HERC2': HERC but splitting weights equally within clusters.
753
+ - 'NCO': Nested Clustered Optimization.
754
+
755
+ codependence : str, optional
756
+ The codependence or similarity matrix used to build the distance
757
+ metric and clusters. The default is 'pearson'. Possible values are:
758
+
759
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
760
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
761
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
762
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
763
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
764
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{pearson}_{i,j}|)}`.
765
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{spearman}_{i,j}|)}`.
766
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
767
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
768
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
769
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
770
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
771
+
772
+ obj : str, optional
773
+ Objective function used by the NCO model.
774
+ The default is 'MinRisk'. Possible values are:
775
+
776
+ - 'MinRisk': Minimize the selected risk measure.
777
+ - 'Utility': Maximize the Utility function :math:`\mu w - l \phi_{i}(w)`.
778
+ - 'Sharpe': Maximize the risk adjusted return ratio based on the selected risk measure.
779
+ - 'ERC': Equally risk contribution portfolio of the selected risk measure.
780
+
781
+ rm : str, optional
782
+ The risk measure used to optimize the portfolio. If model is 'NCO',
783
+ the risk measures available depends on the objective function.
784
+ The default is 'MV'. Possible values are:
785
+
786
+ - 'equal': Equally weighted.
787
+ - 'vol': Standard Deviation.
788
+ - 'MV': Variance.
789
+ - 'KT': Square Root Kurtosis.
790
+ - 'MAD': Mean Absolute Deviation.
791
+ - 'MSV': Semi Standard Deviation.
792
+ - 'SKT': Square Root Semi Kurtosis.
793
+ - 'FLPM': First Lower Partial Moment (Omega Ratio).
794
+ - 'SLPM': Second Lower Partial Moment (Sortino Ratio).
795
+ - 'VaR': Value at Risk.
796
+ - 'CVaR': Conditional Value at Risk.
797
+ - 'TG': Tail Gini.
798
+ - 'EVaR': Entropic Value at Risk.
799
+ - 'RLVaR': Relativistic Value at Risk. I recommend only use this function with MOSEK solver.
800
+ - 'WR': Worst Realization (Minimax).
801
+ - 'VRG' VaR range of returns.
802
+ - 'CVRG': CVaR range of returns.
803
+ - 'TGRG': Tail Gini range of returns.
804
+ - 'EVRG': EVaR range of returns.
805
+ - 'RVRG': RLVaR range of returns. I recommend only use this function with MOSEK solver.
806
+ - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
807
+ - 'ADD': Average Drawdown of uncompounded cumulative returns.
808
+ - 'DaR': Drawdown at Risk of uncompounded cumulative returns.
809
+ - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
810
+ - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
811
+ - 'RLDaR': Relativistic Drawdown at Risk of uncompounded cumulative returns. I recommend only use this function with MOSEK solver.
812
+ - 'UCI': Ulcer Index of uncompounded cumulative returns.
813
+ - 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio).
814
+ - 'ADD_Rel': Average Drawdown of compounded cumulative returns.
815
+ - 'DaR_Rel': Drawdown at Risk of compounded cumulative returns.
816
+ - 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns.
817
+ - 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns.
818
+ - 'RLDaR_Rel': Relativistic Drawdown at Risk of compounded cumulative returns. I recommend only use this function with MOSEK solver.
819
+ - 'UCI_Rel': Ulcer Index of compounded cumulative returns.
820
+
821
+ rf : float, optional
822
+ Risk free rate, must be in the same period of assets returns.
823
+ The default is 0.
824
+ l : scalar, optional
825
+ Risk aversion factor of the 'Utility' objective function.
826
+ The default is 2.
827
+ method_mu : str, optional
828
+ The method used to estimate the expected returns vector.
829
+ The default value is 'hist'. Possible values are:
830
+
831
+ - 'hist': use historical estimator.
832
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
833
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
834
+ - 'JS': James-Stein estimator. For more information see :cite:`c-Meucci2005` and :cite:`c-Feng2016`.
835
+ - 'BS': Bayes-Stein estimator. For more information see :cite:`c-Jorion1986`.
836
+ - 'BOP': BOP estimator. For more information see :cite:`c-Bodnar2019`.
837
+ - 'custom_mu': use custom expected returns vector.
838
+
839
+ method_cov : str, optional
840
+ The method used to estimate the covariance matrix:
841
+ The default is 'hist'. Possible values are:
842
+
843
+ - 'hist': use historical estimates.
844
+ - 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
845
+ - 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
846
+ - 'ledoit': use the Ledoit and Wolf Shrinkage method.
847
+ - 'oas': use the Oracle Approximation Shrinkage method.
848
+ - 'shrunk': use the basic Shrunk Covariance method.
849
+ - 'gl': use the basic Graphical Lasso Covariance method.
850
+ - 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`c-jLogo`.
851
+ - 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`c-MLforAM`.
852
+ - 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`c-MLforAM`.
853
+ - 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`c-MLforAM`.
854
+ - 'gerber1': use the Gerber statistic 1. For more information see: :cite:`c-Gerber2021`.
855
+ - 'gerber2': use the Gerber statistic 2. For more information see: :cite:`c-Gerber2021`.
856
+ - 'custom_cov': use custom covariance matrix.
857
+
858
+ custom_mu : DataFrame or None, optional
859
+ Custom mean vector when NCO objective is 'Utility' or 'Sharpe'.
860
+ The default is None.
861
+ custom_cov : DataFrame or None, optional
862
+ Custom covariance matrix, used when codependence or covariance
863
+ parameters have value 'custom_cov'. The default is None.
864
+ linkage : string, optional
865
+ Linkage method of hierarchical clustering. For more information see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html>`_.
866
+ The default is 'single'. Possible values are:
867
+
868
+ - 'single'.
869
+ - 'complete'.
870
+ - 'average'.
871
+ - 'weighted'.
872
+ - 'centroid'.
873
+ - 'median'.
874
+ - 'ward'.
875
+ - 'DBHT': Direct Bubble Hierarchical Tree.
876
+
877
+ opt_k_method : str
878
+ Method used to calculate the optimum number of clusters.
879
+ The default is 'twodiff'. Possible values are:
880
+
881
+ - 'twodiff': two difference gap statistic.
882
+ - 'stdsil': standarized silhouette score.
883
+
884
+ k : int, optional
885
+ Number of clusters. This value is took instead of the optimal number
886
+ of clusters calculated with the two difference gap statistic.
887
+ The default is None.
888
+ max_k : int, optional
889
+ Max number of clusters used by the two difference gap statistic
890
+ to find the optimal number of clusters. The default is 10.
891
+ bins_info: int or str
892
+ Number of bins used to calculate variation of information. The default
893
+ value is 'KN'. Possible values are:
894
+
895
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
896
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
897
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
898
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
899
+ - int: integer value choice by user.
900
+
901
+ alpha_tail : float, optional
902
+ Significance level for lower tail dependence index. The default is 0.05.
903
+ gs_threshold : float, optional
904
+ Gerber statistic threshold. The default is 0.5.
905
+ leaf_order : bool, optional
906
+ Indicates if the cluster are ordered so that the distance between
907
+ successive leaves is minimal. The default is True.
908
+ dict_mu : dict
909
+ Other variables related to the mean vector estimation method.
910
+ dict_cov : dict
911
+ Other variables related to the covariance estimation method.
912
+
913
+ Returns
914
+ -------
915
+ w : DataFrame
916
+ The weights of optimal portfolio.
917
+
918
+ See Also
919
+ --------
920
+ riskfolio.src.ParamsEstimation.mean_vector
921
+ riskfolio.src.ParamsEstimation.covar_matrix
922
+ """
923
+
924
+ # Covariance matrix
925
+ if method_cov == "custom_cov":
926
+ if isinstance(custom_cov, pd.DataFrame) == True:
927
+ if custom_cov.shape[0] != custom_cov.shape[1]:
928
+ raise NameError("custom_cov must be a square DataFrame")
929
+ else:
930
+ self.cov = custom_cov.copy()
931
+ else:
932
+ self.cov = pe.covar_matrix(self.returns, method=method_cov, **dict_cov)
933
+
934
+ # Mean vector
935
+ if method_mu == "custom_mu":
936
+ if isinstance(custom_mu, pd.Series) == True:
937
+ self.mu = custom_mu.to_frame().T
938
+ elif isinstance(custom_mu, pd.DataFrame) == True:
939
+ if custom_mu.shape[0] > 1 and custom_mu.shape[1] == 1:
940
+ self.mu = custom_mu.copy()
941
+ elif custom_mu.shape[0] == 1 and custom_mu.shape[1] > 1:
942
+ self.mu = custom_mu.copy()
943
+ else:
944
+ raise NameError("custom_mu must be a column DataFrame")
945
+ else:
946
+ raise NameError("custom_mu must be a column DataFrame or Series")
947
+ else:
948
+ self.mu = pe.mean_vector(self.returns, method=method_mu, **dict_mu)
949
+ if rm == "KT":
950
+ self.kurt, self.skurt = True, False
951
+ elif rm == "SKT":
952
+ self.kurt, self.skurt = False, True
953
+ else:
954
+ self.kurt, self.skurt = False, False
955
+
956
+ self.codependence = codependence
957
+ self.linkage = linkage
958
+ self.opt_k_method = opt_k_method
959
+ self.k = k
960
+ self.max_k = max_k
961
+ self.bins_info = bins_info
962
+ self.alpha_tail = alpha_tail
963
+ self.gs_threshold = gs_threshold
964
+ self.leaf_order = leaf_order
965
+
966
+ # Codependence matrix
967
+ if self.codependence in {"pearson", "spearman", "kendall"}:
968
+ self.codep = self.returns.corr(method=self.codependence).astype(float)
969
+ elif self.codependence == "gerber1":
970
+ self.codep = gs.gerber_cov_stat1(self.returns, threshold=self.gs_threshold)
971
+ self.codep = af.cov2corr(self.codep).astype(float)
972
+ elif self.codependence == "gerber2":
973
+ self.codep = gs.gerber_cov_stat2(self.returns, threshold=self.gs_threshold)
974
+ self.codep = af.cov2corr(self.codep).astype(float)
975
+ elif self.codependence in {"abs_pearson", "abs_spearman", "abs_kendall"}:
976
+ self.codep = np.abs(self.returns.corr(method=self.codependence[4:])).astype(
977
+ float
978
+ )
979
+ elif self.codependence in {"distance"}:
980
+ self.codep = af.dcorr_matrix(self.returns).astype(float)
981
+ elif self.codependence in {"mutual_info"}:
982
+ self.codep = af.mutual_info_matrix(self.returns, self.bins_info).astype(
983
+ float
984
+ )
985
+ elif self.codependence in {"tail"}:
986
+ self.codep = af.ltdi_matrix(self.returns, alpha=self.alpha_tail).astype(
987
+ float
988
+ )
989
+ elif self.codependence in {"custom_cov"}:
990
+ self.codep = af.cov2corr(custom_cov).astype(float)
991
+
992
+ # Step-1: Tree clustering
993
+ self.clustering, self.k = self._hierarchical_clustering(
994
+ model,
995
+ self.codependence,
996
+ self.linkage,
997
+ self.opt_k_method,
998
+ self.k,
999
+ self.max_k,
1000
+ self.leaf_order,
1001
+ )
1002
+ if k is not None:
1003
+ self.k = int(k)
1004
+
1005
+ # Step-2: Seriation (Quasi-Diagnalization)
1006
+ self.sort_order = self._seriation(self.clustering)
1007
+ # asset_order = self.assetslist
1008
+ asset_order = [self.assetslist[i] for i in self.sort_order]
1009
+ self.asset_order = asset_order.copy()
1010
+ self.codep_sorted = self.codep.reindex(
1011
+ index=self.asset_order, columns=self.asset_order
1012
+ )
1013
+
1014
+ # Step-2.1: Bound creation
1015
+ if self.w_max is None:
1016
+ upper_bound = pd.Series(1.0, index=self.assetslist)
1017
+ elif isinstance(self.w_max, int) or isinstance(self.w_max, float):
1018
+ upper_bound = pd.Series(self.w_max, index=self.assetslist)
1019
+ upper_bound = np.minimum(1.0, upper_bound).loc[self.assetslist]
1020
+ if upper_bound.sum() < 1:
1021
+ raise NameError("Sum of upper bounds must be higher equal than 1")
1022
+ elif isinstance(self.w_max, pd.Series):
1023
+ upper_bound = np.minimum(1.0, self.w_max).loc[self.assetslist]
1024
+ if upper_bound.sum() < 1.0:
1025
+ raise NameError("Sum of upper bounds must be higher equal than 1")
1026
+
1027
+ if self.w_min is None:
1028
+ lower_bound = pd.Series(0.0, index=self.assetslist)
1029
+ elif isinstance(self.w_min, int) or isinstance(self.w_min, float):
1030
+ lower_bound = pd.Series(self.w_min, index=self.assetslist)
1031
+ lower_bound = np.maximum(0.0, lower_bound).loc[self.assetslist]
1032
+ elif isinstance(self.w_min, pd.Series):
1033
+ lower_bound = np.maximum(0.0, self.w_min).loc[self.assetslist]
1034
+
1035
+ if (upper_bound >= lower_bound).all().item() is False:
1036
+ raise NameError("All upper bounds must be higher than lower bounds")
1037
+
1038
+ # Step-3: Recursive bisection
1039
+ if model == "HRP":
1040
+ # Recursive bisection
1041
+ weights = self._recursive_bisection(
1042
+ self.sort_order,
1043
+ rm=rm,
1044
+ rf=rf,
1045
+ upper_bound=upper_bound,
1046
+ lower_bound=lower_bound,
1047
+ )
1048
+ elif model in ["HERC", "HERC2"]:
1049
+ # Cluster-based Recursive bisection
1050
+ weights = self._hierarchical_recursive_bisection(
1051
+ self.clustering,
1052
+ rm=rm,
1053
+ rf=rf,
1054
+ linkage=linkage,
1055
+ model=model,
1056
+ upper_bound=upper_bound,
1057
+ lower_bound=lower_bound,
1058
+ )
1059
+ elif model == "NCO":
1060
+ # Step-3.1: Determine intra-cluster weights
1061
+ intra_weights = self._intra_weights(
1062
+ self.clustering,
1063
+ obj=obj,
1064
+ rm=rm,
1065
+ rf=rf,
1066
+ l=l,
1067
+ )
1068
+
1069
+ # Step-3.2: Determine inter-cluster weights and multiply with 􏰁→ intra-cluster weights
1070
+ weights = self._inter_weights(intra_weights, obj=obj, rm=rm, rf=rf, l=l)
1071
+
1072
+ weights = weights.loc[self.assetslist]
1073
+
1074
+ # Step-4: Fit weights to constraints
1075
+ if (upper_bound < weights).any().item() or (lower_bound > weights).any().item():
1076
+ max_iter = 100
1077
+ j = 0
1078
+ while (
1079
+ (upper_bound < weights).any().item()
1080
+ or (lower_bound > weights).any().item()
1081
+ ) and (j < max_iter):
1082
+ weights_original = weights.copy()
1083
+ weights = np.maximum(np.minimum(weights, upper_bound), lower_bound)
1084
+ tickers_mod = weights[
1085
+ (weights < upper_bound) & (weights > lower_bound)
1086
+ ].index.tolist()
1087
+
1088
+ weights_add = np.maximum(weights_original - upper_bound, 0).sum()
1089
+ weights_sub = np.minimum(weights_original - lower_bound, 0).sum()
1090
+ delta = weights_add + weights_sub
1091
+
1092
+ if delta != 0:
1093
+ weights[tickers_mod] += (
1094
+ delta * weights[tickers_mod] / weights[tickers_mod].sum()
1095
+ )
1096
+
1097
+ j += 1
1098
+
1099
+ weights = weights.loc[self.assetslist].to_frame()
1100
+ weights.columns = ["weights"]
1101
+
1102
+ return weights