riskfolio-lib 7.1.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riskfolio/__init__.py +14 -0
- riskfolio/external/__init__.py +10 -0
- riskfolio/external/cppfunctions.py +300 -0
- riskfolio/external/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- riskfolio/src/AuxFunctions.py +1488 -0
- riskfolio/src/ConstraintsFunctions.py +2210 -0
- riskfolio/src/DBHT.py +1089 -0
- riskfolio/src/GerberStatistic.py +240 -0
- riskfolio/src/HCPortfolio.py +1102 -0
- riskfolio/src/OwaWeights.py +433 -0
- riskfolio/src/ParamsEstimation.py +1943 -0
- riskfolio/src/PlotFunctions.py +5052 -0
- riskfolio/src/Portfolio.py +6100 -0
- riskfolio/src/Reports.py +692 -0
- riskfolio/src/RiskFunctions.py +3195 -0
- riskfolio/src/__init__.py +20 -0
- riskfolio/version.py +4 -0
- riskfolio_lib-7.1.0.dist-info/LICENSE.txt +27 -0
- riskfolio_lib-7.1.0.dist-info/METADATA +377 -0
- riskfolio_lib-7.1.0.dist-info/RECORD +22 -0
- riskfolio_lib-7.1.0.dist-info/WHEEL +6 -0
- riskfolio_lib-7.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1102 @@
|
|
|
1
|
+
"""""" #
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Copyright (c) 2020-2025, Dany Cajas
|
|
5
|
+
All rights reserved.
|
|
6
|
+
This work is licensed under BSD 3-Clause "New" or "Revised" License.
|
|
7
|
+
License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import scipy.cluster.hierarchy as hr
|
|
13
|
+
from scipy.spatial.distance import squareform
|
|
14
|
+
import riskfolio as rp
|
|
15
|
+
import riskfolio.src.RiskFunctions as rk
|
|
16
|
+
import riskfolio.src.AuxFunctions as af
|
|
17
|
+
import riskfolio.src.ParamsEstimation as pe
|
|
18
|
+
import riskfolio.src.DBHT as db
|
|
19
|
+
import riskfolio.src.GerberStatistic as gs
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"HCPortfolio",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class HCPortfolio(object):
|
|
28
|
+
r"""
|
|
29
|
+
Class that creates a portfolio object with all properties needed to
|
|
30
|
+
calculate optimal portfolios.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
returns : DataFrame of shape (n_samples, n_assets), optional
|
|
35
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
36
|
+
observations and n_assets is the number of assets.
|
|
37
|
+
The default is None.
|
|
38
|
+
alpha : float, optional
|
|
39
|
+
Significance level of VaR, CVaR, EVaR, RLVaR, DaR, CDaR, EDaR, RLDaR and Tail Gini of losses.
|
|
40
|
+
The default is 0.05.
|
|
41
|
+
a_sim : float, optional
|
|
42
|
+
Number of CVaRs used to approximate Tail Gini of losses. The default is 100.
|
|
43
|
+
beta : float, optional
|
|
44
|
+
Significance level of CVaR and Tail Gini of gains. If None it duplicates alpha value.
|
|
45
|
+
The default is None.
|
|
46
|
+
b_sim : float, optional
|
|
47
|
+
Number of CVaRs used to approximate Tail Gini of gains. If None it duplicates a_sim value.
|
|
48
|
+
The default is None.
|
|
49
|
+
kappa : float, optional
|
|
50
|
+
Deformation parameter of RLVaR and RLDaR for losses, must be between 0 and 1.
|
|
51
|
+
The default is 0.3.
|
|
52
|
+
kappa_g : float, optional
|
|
53
|
+
Deformation parameter of RLVaR and RLDaR for gains, must be between 0 and 1.
|
|
54
|
+
The default is None.
|
|
55
|
+
solver_rl: str, optional
|
|
56
|
+
Solver available for CVXPY that supports power cone programming. Used to calculate RLVaR and RLDaR.
|
|
57
|
+
The default value is None.
|
|
58
|
+
solvers: list, optional
|
|
59
|
+
List of solvers available for CVXPY used for the selected NCO method.
|
|
60
|
+
The default value is ['CLARABEL', 'SCS', 'ECOS'].
|
|
61
|
+
w_max : pd.Series or float, optional
|
|
62
|
+
Upper bound constraint for hierarchical risk parity weights :cite:`c-Pfitzinger`.
|
|
63
|
+
w_min : pd.Series or float, optional
|
|
64
|
+
Lower bound constraint for hierarchical risk parity weights :cite:`c-Pfitzinger`.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
returns=None,
|
|
70
|
+
alpha=0.05,
|
|
71
|
+
a_sim=100,
|
|
72
|
+
beta=None,
|
|
73
|
+
b_sim=None,
|
|
74
|
+
kappa=0.30,
|
|
75
|
+
kappa_g=None,
|
|
76
|
+
solver_rl="CLARABEL",
|
|
77
|
+
solvers=["CLARABEL", "SCS", "ECOS"],
|
|
78
|
+
w_max=None,
|
|
79
|
+
w_min=None,
|
|
80
|
+
):
|
|
81
|
+
self._returns = returns
|
|
82
|
+
self.alpha = alpha
|
|
83
|
+
self.a_sim = a_sim
|
|
84
|
+
self.beta = beta
|
|
85
|
+
self.b_sim = b_sim
|
|
86
|
+
self._kappa = kappa
|
|
87
|
+
self._kappa_g = kappa_g
|
|
88
|
+
self.solver_rl = solver_rl
|
|
89
|
+
self.solvers = solvers
|
|
90
|
+
self.asset_order = None
|
|
91
|
+
self.clustering = None
|
|
92
|
+
self.cov = None
|
|
93
|
+
self.mu = None
|
|
94
|
+
self.kurt = False
|
|
95
|
+
self.skurt = False
|
|
96
|
+
self.codep = None
|
|
97
|
+
self.codep_sorted = None
|
|
98
|
+
self.w_max = w_max
|
|
99
|
+
self.w_min = w_min
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def returns(self):
|
|
103
|
+
if self._returns is not None and isinstance(self._returns, pd.DataFrame):
|
|
104
|
+
return self._returns
|
|
105
|
+
else:
|
|
106
|
+
raise NameError("returns must be a DataFrame")
|
|
107
|
+
|
|
108
|
+
@returns.setter
|
|
109
|
+
def returns(self, value):
|
|
110
|
+
if value is not None and isinstance(value, pd.DataFrame):
|
|
111
|
+
self._returns = value
|
|
112
|
+
else:
|
|
113
|
+
raise NameError("returns must be a DataFrame")
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def assetslist(self):
|
|
117
|
+
if self._returns is not None and isinstance(self._returns, pd.DataFrame):
|
|
118
|
+
return self._returns.columns.tolist()
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def kappa(self):
|
|
122
|
+
return self._kappa
|
|
123
|
+
|
|
124
|
+
@kappa.setter
|
|
125
|
+
def kappa(self, value):
|
|
126
|
+
a = value
|
|
127
|
+
if a >= 1:
|
|
128
|
+
print(
|
|
129
|
+
"kappa must be between 0 and 1, values higher or equal to 1 are setting to 0.99"
|
|
130
|
+
)
|
|
131
|
+
self._kappa = 0.99
|
|
132
|
+
elif a <= 0:
|
|
133
|
+
print(
|
|
134
|
+
"kappa must be between 0 and 1, values lower or equal to 0 are setting to 0.01"
|
|
135
|
+
)
|
|
136
|
+
self._kappa = 0.01
|
|
137
|
+
else:
|
|
138
|
+
self._kappa = a
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def kappa_g(self):
|
|
142
|
+
return self._kappa_g
|
|
143
|
+
|
|
144
|
+
@kappa_g.setter
|
|
145
|
+
def kappa_g(self, value):
|
|
146
|
+
a = value
|
|
147
|
+
if a >= 1:
|
|
148
|
+
print(
|
|
149
|
+
"kappa must be between 0 and 1, values higher or equal to 1 are setting to 0.99"
|
|
150
|
+
)
|
|
151
|
+
self._kappa_g = 0.99
|
|
152
|
+
elif a <= 0:
|
|
153
|
+
print(
|
|
154
|
+
"kappa must be between 0 and 1, values lower or equal to 0 are setting to 0.01"
|
|
155
|
+
)
|
|
156
|
+
self._kappa_g = 0.01
|
|
157
|
+
else:
|
|
158
|
+
self._kappa_g = a
|
|
159
|
+
|
|
160
|
+
# get naive-risk weights
|
|
161
|
+
def _naive_risk(self, returns, cov, rm="MV", rf=0):
|
|
162
|
+
assets = returns.columns.tolist()
|
|
163
|
+
n = len(assets)
|
|
164
|
+
|
|
165
|
+
if rm == "equal":
|
|
166
|
+
weights = np.ones((n, 1)) * 1 / n
|
|
167
|
+
else:
|
|
168
|
+
inv_risk = np.zeros((n, 1))
|
|
169
|
+
for i in assets:
|
|
170
|
+
k = assets.index(i)
|
|
171
|
+
w = np.zeros((n, 1))
|
|
172
|
+
w[k, 0] = 1
|
|
173
|
+
w = pd.DataFrame(w, columns=["weights"], index=assets)
|
|
174
|
+
if rm == "vol":
|
|
175
|
+
risk = rk.Sharpe_Risk(
|
|
176
|
+
returns=returns,
|
|
177
|
+
w=w,
|
|
178
|
+
cov=cov,
|
|
179
|
+
rm="MV",
|
|
180
|
+
rf=rf,
|
|
181
|
+
alpha=self.alpha,
|
|
182
|
+
a_sim=self.a_sim,
|
|
183
|
+
beta=self.beta,
|
|
184
|
+
b_sim=self.b_sim,
|
|
185
|
+
kappa=self.kappa,
|
|
186
|
+
kappa_g=self.kappa_g,
|
|
187
|
+
solver=self.solver_rl,
|
|
188
|
+
)
|
|
189
|
+
else:
|
|
190
|
+
risk = rk.Sharpe_Risk(
|
|
191
|
+
returns=returns,
|
|
192
|
+
w=w,
|
|
193
|
+
cov=cov,
|
|
194
|
+
rm=rm,
|
|
195
|
+
rf=rf,
|
|
196
|
+
alpha=self.alpha,
|
|
197
|
+
a_sim=self.a_sim,
|
|
198
|
+
beta=self.beta,
|
|
199
|
+
b_sim=self.b_sim,
|
|
200
|
+
kappa=self.kappa,
|
|
201
|
+
kappa_g=self.kappa_g,
|
|
202
|
+
solver=self.solver_rl,
|
|
203
|
+
)
|
|
204
|
+
inv_risk[k, 0] = risk
|
|
205
|
+
|
|
206
|
+
if rm == "MV":
|
|
207
|
+
inv_risk = 1 / np.power(inv_risk, 2)
|
|
208
|
+
else:
|
|
209
|
+
inv_risk = 1 / inv_risk
|
|
210
|
+
weights = inv_risk * (1 / np.sum(inv_risk))
|
|
211
|
+
|
|
212
|
+
weights = weights.reshape(-1, 1)
|
|
213
|
+
|
|
214
|
+
return weights
|
|
215
|
+
|
|
216
|
+
# get optimal weights
|
|
217
|
+
def _opt_w(
|
|
218
|
+
self,
|
|
219
|
+
returns,
|
|
220
|
+
mu,
|
|
221
|
+
cov,
|
|
222
|
+
obj="MinRisk",
|
|
223
|
+
rm="MV",
|
|
224
|
+
rf=0,
|
|
225
|
+
l=2,
|
|
226
|
+
):
|
|
227
|
+
if returns.shape[1] == 1:
|
|
228
|
+
weights = np.array([1]).reshape(-1, 1)
|
|
229
|
+
else:
|
|
230
|
+
if obj in {"MinRisk", "Utility", "Sharpe"}:
|
|
231
|
+
port = rp.Portfolio(
|
|
232
|
+
returns=returns,
|
|
233
|
+
alpha=self.alpha,
|
|
234
|
+
a_sim=self.a_sim,
|
|
235
|
+
beta=self.beta,
|
|
236
|
+
b_sim=self.b_sim,
|
|
237
|
+
kappa=self.kappa,
|
|
238
|
+
kappa_g=self.kappa_g,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if self.kurt:
|
|
242
|
+
method_kurt = "hist"
|
|
243
|
+
elif self.skurt:
|
|
244
|
+
method_kurt = "hist"
|
|
245
|
+
else:
|
|
246
|
+
method_kurt = None
|
|
247
|
+
|
|
248
|
+
port.assets_stats(
|
|
249
|
+
method_mu="hist", method_cov="hist", method_kurt=method_kurt
|
|
250
|
+
)
|
|
251
|
+
if self.solvers is not None:
|
|
252
|
+
port.solvers = self.solvers
|
|
253
|
+
if mu is not None:
|
|
254
|
+
port.mu = mu
|
|
255
|
+
if cov is not None:
|
|
256
|
+
port.cov = cov
|
|
257
|
+
weights = port.optimization(
|
|
258
|
+
model="Classic", rm=rm, obj=obj, rf=rf, l=l, hist=True
|
|
259
|
+
).to_numpy()
|
|
260
|
+
elif obj in {"ERC"}:
|
|
261
|
+
port = rp.Portfolio(
|
|
262
|
+
returns=returns,
|
|
263
|
+
alpha=self.alpha,
|
|
264
|
+
a_sim=self.a_sim,
|
|
265
|
+
beta=self.beta,
|
|
266
|
+
b_sim=self.b_sim,
|
|
267
|
+
kappa=self.kappa,
|
|
268
|
+
kappa_g=self.kappa_g,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
if self.kurt:
|
|
272
|
+
method_kurt = "hist"
|
|
273
|
+
elif self.skurt:
|
|
274
|
+
method_kurt = "hist"
|
|
275
|
+
else:
|
|
276
|
+
method_kurt = None
|
|
277
|
+
port.assets_stats(
|
|
278
|
+
method_mu="hist", method_cov="hist", method_kurt=method_kurt
|
|
279
|
+
)
|
|
280
|
+
if self.solvers is not None:
|
|
281
|
+
port.solvers = self.solvers
|
|
282
|
+
if mu is not None:
|
|
283
|
+
port.mu = mu
|
|
284
|
+
if cov is not None:
|
|
285
|
+
port.cov = cov
|
|
286
|
+
weights = port.rp_optimization(
|
|
287
|
+
model="Classic", rm=rm, rf=rf, b=None, hist=True
|
|
288
|
+
).to_numpy()
|
|
289
|
+
|
|
290
|
+
weights = weights.reshape(-1, 1)
|
|
291
|
+
|
|
292
|
+
return weights
|
|
293
|
+
|
|
294
|
+
# Create hierarchical clustering
|
|
295
|
+
def _hierarchical_clustering(
|
|
296
|
+
self,
|
|
297
|
+
model="HRP",
|
|
298
|
+
codependence="pearson",
|
|
299
|
+
linkage="ward",
|
|
300
|
+
opt_k_method="twodiff",
|
|
301
|
+
k=None,
|
|
302
|
+
max_k=10,
|
|
303
|
+
leaf_order=True,
|
|
304
|
+
):
|
|
305
|
+
# Calculating distance
|
|
306
|
+
if codependence in {
|
|
307
|
+
"pearson",
|
|
308
|
+
"spearman",
|
|
309
|
+
"kendall",
|
|
310
|
+
"gerber1",
|
|
311
|
+
"gerber2",
|
|
312
|
+
"custom_cov",
|
|
313
|
+
}:
|
|
314
|
+
dist = np.sqrt(np.clip((1 - self.codep) / 2, a_min=0.0, a_max=1.0))
|
|
315
|
+
elif codependence in {"abs_pearson", "abs_spearman", "abs_kendall", "distance"}:
|
|
316
|
+
dist = np.sqrt(np.clip((1 - self.codep), a_min=0.0, a_max=1.0))
|
|
317
|
+
elif codependence in {"mutual_info"}:
|
|
318
|
+
dist = af.var_info_matrix(self.returns, self.bins_info).astype(float)
|
|
319
|
+
elif codependence in {"tail"}:
|
|
320
|
+
dist = -np.log(self.codep).astype(float)
|
|
321
|
+
|
|
322
|
+
# Hierarchical clustering
|
|
323
|
+
dist = dist.to_numpy()
|
|
324
|
+
dist = pd.DataFrame(dist, columns=self.codep.columns, index=self.codep.index)
|
|
325
|
+
if linkage == "DBHT":
|
|
326
|
+
# different choices for D, S give different outputs!
|
|
327
|
+
D = dist.to_numpy() # dissimilarity matrix
|
|
328
|
+
if codependence in {
|
|
329
|
+
"pearson",
|
|
330
|
+
"spearman",
|
|
331
|
+
"kendall",
|
|
332
|
+
"gerber1",
|
|
333
|
+
"gerber2",
|
|
334
|
+
"custom_cov",
|
|
335
|
+
}:
|
|
336
|
+
codep = 1 - dist**2
|
|
337
|
+
S = codep.to_numpy() # similarity matrix
|
|
338
|
+
else:
|
|
339
|
+
S = self.codep.to_numpy() # similarity matrix
|
|
340
|
+
(_, _, _, _, _, clustering) = db.DBHTs(
|
|
341
|
+
D, S, leaf_order=leaf_order
|
|
342
|
+
) # DBHT clustering
|
|
343
|
+
else:
|
|
344
|
+
p_dist = squareform(dist, checks=False)
|
|
345
|
+
clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order)
|
|
346
|
+
|
|
347
|
+
if model in {"HERC", "HERC2", "NCO"}:
|
|
348
|
+
# optimal number of clusters
|
|
349
|
+
if k is None:
|
|
350
|
+
if opt_k_method == "twodiff":
|
|
351
|
+
k, _ = af.two_diff_gap_stat(dist, clustering, max_k)
|
|
352
|
+
elif opt_k_method == "stdsil":
|
|
353
|
+
k, _ = af.std_silhouette_score(dist, clustering, max_k)
|
|
354
|
+
else:
|
|
355
|
+
raise ValueError(
|
|
356
|
+
"The only opt_k_method available are twodiff and stdsil"
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
k = None
|
|
360
|
+
|
|
361
|
+
return clustering, k
|
|
362
|
+
|
|
363
|
+
# sort clustered items by distance
|
|
364
|
+
def _seriation(self, clusters):
|
|
365
|
+
return hr.leaves_list(clusters)
|
|
366
|
+
|
|
367
|
+
# compute HRP weight allocation through recursive bisection
|
|
368
|
+
def _recursive_bisection(
|
|
369
|
+
self,
|
|
370
|
+
sort_order,
|
|
371
|
+
rm="MV",
|
|
372
|
+
rf=0,
|
|
373
|
+
upper_bound=None,
|
|
374
|
+
lower_bound=None,
|
|
375
|
+
):
|
|
376
|
+
weights = pd.Series(1.0, index=self.assetslist) # set initial weights to 1
|
|
377
|
+
items = [sort_order]
|
|
378
|
+
|
|
379
|
+
while len(items) > 0: # loop while weights is under 100%
|
|
380
|
+
items = [
|
|
381
|
+
i[j:k]
|
|
382
|
+
for i in items
|
|
383
|
+
for j, k in (
|
|
384
|
+
(0, len(i) // 2),
|
|
385
|
+
(len(i) // 2, len(i)),
|
|
386
|
+
) # get cluster indices
|
|
387
|
+
if len(i) > 1
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
# allocate weight to left and right cluster
|
|
391
|
+
for i in range(0, len(items), 2):
|
|
392
|
+
left_cluster = items[i]
|
|
393
|
+
right_cluster = items[i + 1]
|
|
394
|
+
|
|
395
|
+
# Left cluster
|
|
396
|
+
left_cov = self.cov.iloc[left_cluster, left_cluster]
|
|
397
|
+
left_returns = self.returns.iloc[:, left_cluster]
|
|
398
|
+
left_weights = self._naive_risk(left_returns, left_cov, rm=rm, rf=rf)
|
|
399
|
+
|
|
400
|
+
if rm == "vol":
|
|
401
|
+
left_risk = rk.Sharpe_Risk(
|
|
402
|
+
returns=left_returns,
|
|
403
|
+
w=left_weights,
|
|
404
|
+
cov=left_cov,
|
|
405
|
+
rm="MV",
|
|
406
|
+
rf=rf,
|
|
407
|
+
alpha=self.alpha,
|
|
408
|
+
a_sim=self.a_sim,
|
|
409
|
+
beta=self.beta,
|
|
410
|
+
b_sim=self.b_sim,
|
|
411
|
+
kappa=self.kappa,
|
|
412
|
+
solver=self.solver_rl,
|
|
413
|
+
)
|
|
414
|
+
else:
|
|
415
|
+
left_risk = rk.Sharpe_Risk(
|
|
416
|
+
returns=left_returns,
|
|
417
|
+
w=left_weights,
|
|
418
|
+
cov=left_cov,
|
|
419
|
+
rm=rm,
|
|
420
|
+
rf=rf,
|
|
421
|
+
alpha=self.alpha,
|
|
422
|
+
a_sim=self.a_sim,
|
|
423
|
+
beta=self.beta,
|
|
424
|
+
b_sim=self.b_sim,
|
|
425
|
+
kappa=self.kappa,
|
|
426
|
+
solver=self.solver_rl,
|
|
427
|
+
)
|
|
428
|
+
if rm == "MV":
|
|
429
|
+
left_risk = np.power(left_risk, 2)
|
|
430
|
+
|
|
431
|
+
# Right cluster
|
|
432
|
+
right_cov = self.cov.iloc[right_cluster, right_cluster]
|
|
433
|
+
right_returns = self.returns.iloc[:, right_cluster]
|
|
434
|
+
right_weights = self._naive_risk(right_returns, right_cov, rm=rm, rf=rf)
|
|
435
|
+
|
|
436
|
+
if rm == "vol":
|
|
437
|
+
right_risk = rk.Sharpe_Risk(
|
|
438
|
+
returns=right_returns,
|
|
439
|
+
w=right_weights,
|
|
440
|
+
cov=right_cov,
|
|
441
|
+
rm="MV",
|
|
442
|
+
rf=rf,
|
|
443
|
+
alpha=self.alpha,
|
|
444
|
+
a_sim=self.a_sim,
|
|
445
|
+
beta=self.beta,
|
|
446
|
+
b_sim=self.b_sim,
|
|
447
|
+
kappa=self.kappa,
|
|
448
|
+
solver=self.solver_rl,
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
right_risk = rk.Sharpe_Risk(
|
|
452
|
+
returns=right_returns,
|
|
453
|
+
w=right_weights,
|
|
454
|
+
cov=right_cov,
|
|
455
|
+
rm=rm,
|
|
456
|
+
rf=rf,
|
|
457
|
+
alpha=self.alpha,
|
|
458
|
+
a_sim=self.a_sim,
|
|
459
|
+
beta=self.beta,
|
|
460
|
+
b_sim=self.b_sim,
|
|
461
|
+
kappa=self.kappa,
|
|
462
|
+
solver=self.solver_rl,
|
|
463
|
+
)
|
|
464
|
+
if rm == "MV":
|
|
465
|
+
right_risk = np.power(right_risk, 2)
|
|
466
|
+
|
|
467
|
+
# Allocate weight to clusters
|
|
468
|
+
alpha_1 = 1 - left_risk / (left_risk + right_risk)
|
|
469
|
+
|
|
470
|
+
weights.iloc[left_cluster] *= alpha_1 # weight 1
|
|
471
|
+
weights.iloc[right_cluster] *= 1 - alpha_1 # weight 2
|
|
472
|
+
|
|
473
|
+
return weights
|
|
474
|
+
|
|
475
|
+
# compute HERC weight allocation through cluster-based bisection
|
|
476
|
+
def _hierarchical_recursive_bisection(
|
|
477
|
+
self,
|
|
478
|
+
Z,
|
|
479
|
+
rm="MV",
|
|
480
|
+
rf=0,
|
|
481
|
+
linkage="ward",
|
|
482
|
+
model="HERC",
|
|
483
|
+
upper_bound=None,
|
|
484
|
+
lower_bound=None,
|
|
485
|
+
):
|
|
486
|
+
# Transform linkage to tree and reverse order
|
|
487
|
+
root, nodes = hr.to_tree(Z, rd=True)
|
|
488
|
+
nodes = np.array(nodes)
|
|
489
|
+
nodes_1 = np.array([i.dist for i in nodes])
|
|
490
|
+
idx = np.argsort(nodes_1)
|
|
491
|
+
nodes = nodes[idx][::-1].tolist()
|
|
492
|
+
weights = pd.Series(1.0, index=self.assetslist) # Set initial weights to 1
|
|
493
|
+
|
|
494
|
+
clustering_inds = hr.fcluster(Z, self.k, criterion="maxclust")
|
|
495
|
+
clusters = {
|
|
496
|
+
i: [] for i in range(min(clustering_inds), max(clustering_inds) + 1)
|
|
497
|
+
}
|
|
498
|
+
for i, v in enumerate(clustering_inds):
|
|
499
|
+
clusters[v].append(i)
|
|
500
|
+
|
|
501
|
+
# Loop through k clusters
|
|
502
|
+
for i in nodes[: self.k - 1]:
|
|
503
|
+
if i.is_leaf() == False: # skip leaf-nodes
|
|
504
|
+
left = i.get_left().pre_order() # lambda i: i.id) # get left cluster
|
|
505
|
+
right = i.get_right().pre_order() # lambda i: i.id) # get right cluster
|
|
506
|
+
left_set = set(left)
|
|
507
|
+
right_set = set(right)
|
|
508
|
+
left_risk = 0
|
|
509
|
+
right_risk = 0
|
|
510
|
+
left_cluster = []
|
|
511
|
+
right_cluster = []
|
|
512
|
+
|
|
513
|
+
# Allocate weight to clusters
|
|
514
|
+
if rm == "equal":
|
|
515
|
+
alpha_1 = 0.5
|
|
516
|
+
|
|
517
|
+
else:
|
|
518
|
+
for j in clusters.keys():
|
|
519
|
+
if set(clusters[j]).issubset(left_set):
|
|
520
|
+
# Left cluster
|
|
521
|
+
left_cov = self.cov.iloc[clusters[j], clusters[j]]
|
|
522
|
+
left_returns = self.returns.iloc[:, clusters[j]]
|
|
523
|
+
left_weights = self._naive_risk(
|
|
524
|
+
left_returns, left_cov, rm=rm, rf=rf
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
if rm == "vol":
|
|
528
|
+
left_risk_ = rk.Sharpe_Risk(
|
|
529
|
+
returns=left_returns,
|
|
530
|
+
w=left_weights,
|
|
531
|
+
cov=left_cov,
|
|
532
|
+
rm="MV",
|
|
533
|
+
rf=rf,
|
|
534
|
+
alpha=self.alpha,
|
|
535
|
+
a_sim=self.a_sim,
|
|
536
|
+
beta=self.beta,
|
|
537
|
+
b_sim=self.b_sim,
|
|
538
|
+
kappa=self.kappa,
|
|
539
|
+
kappa_g=self.kappa_g,
|
|
540
|
+
solver=self.solver_rl,
|
|
541
|
+
)
|
|
542
|
+
else:
|
|
543
|
+
left_risk_ = rk.Sharpe_Risk(
|
|
544
|
+
returns=left_returns,
|
|
545
|
+
w=left_weights,
|
|
546
|
+
cov=left_cov,
|
|
547
|
+
rm=rm,
|
|
548
|
+
rf=rf,
|
|
549
|
+
alpha=self.alpha,
|
|
550
|
+
a_sim=self.a_sim,
|
|
551
|
+
beta=self.beta,
|
|
552
|
+
b_sim=self.b_sim,
|
|
553
|
+
kappa=self.kappa,
|
|
554
|
+
kappa_g=self.kappa_g,
|
|
555
|
+
solver=self.solver_rl,
|
|
556
|
+
)
|
|
557
|
+
if rm == "MV":
|
|
558
|
+
left_risk_ = np.power(left_risk_, 2)
|
|
559
|
+
|
|
560
|
+
left_risk += left_risk_
|
|
561
|
+
left_cluster += clusters[j]
|
|
562
|
+
|
|
563
|
+
elif set(clusters[j]).issubset(right_set):
|
|
564
|
+
# Right cluster
|
|
565
|
+
right_cov = self.cov.iloc[clusters[j], clusters[j]]
|
|
566
|
+
right_returns = self.returns.iloc[:, clusters[j]]
|
|
567
|
+
right_weights = self._naive_risk(
|
|
568
|
+
right_returns, right_cov, rm=rm, rf=rf
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
if rm == "vol":
|
|
572
|
+
right_risk_ = rk.Sharpe_Risk(
|
|
573
|
+
returns=right_returns,
|
|
574
|
+
w=right_weights,
|
|
575
|
+
cov=right_cov,
|
|
576
|
+
rm="MV",
|
|
577
|
+
rf=rf,
|
|
578
|
+
alpha=self.alpha,
|
|
579
|
+
a_sim=self.a_sim,
|
|
580
|
+
beta=self.beta,
|
|
581
|
+
b_sim=self.b_sim,
|
|
582
|
+
kappa=self.kappa,
|
|
583
|
+
kappa_g=self.kappa_g,
|
|
584
|
+
solver=self.solver_rl,
|
|
585
|
+
)
|
|
586
|
+
else:
|
|
587
|
+
right_risk_ = rk.Sharpe_Risk(
|
|
588
|
+
returns=right_returns,
|
|
589
|
+
w=right_weights,
|
|
590
|
+
cov=right_cov,
|
|
591
|
+
rm=rm,
|
|
592
|
+
rf=rf,
|
|
593
|
+
alpha=self.alpha,
|
|
594
|
+
a_sim=self.a_sim,
|
|
595
|
+
beta=self.beta,
|
|
596
|
+
b_sim=self.b_sim,
|
|
597
|
+
kappa=self.kappa,
|
|
598
|
+
kappa_g=self.kappa_g,
|
|
599
|
+
solver=self.solver_rl,
|
|
600
|
+
)
|
|
601
|
+
if rm == "MV":
|
|
602
|
+
right_risk_ = np.power(right_risk_, 2)
|
|
603
|
+
|
|
604
|
+
right_risk += right_risk_
|
|
605
|
+
right_cluster += clusters[j]
|
|
606
|
+
|
|
607
|
+
alpha_1 = 1 - left_risk / (left_risk + right_risk)
|
|
608
|
+
|
|
609
|
+
weights.iloc[left] *= alpha_1 # weight 1
|
|
610
|
+
weights.iloc[right] *= 1 - alpha_1 # weight 2
|
|
611
|
+
|
|
612
|
+
# Get constituents of k clusters
|
|
613
|
+
clustered_assets = pd.Series(
|
|
614
|
+
hr.cut_tree(Z, n_clusters=self.k).flatten(), index=self.cov.index
|
|
615
|
+
)
|
|
616
|
+
# Multiply within-cluster weight with inter-cluster weight
|
|
617
|
+
for i in range(self.k):
|
|
618
|
+
cluster = clustered_assets.loc[clustered_assets == i]
|
|
619
|
+
cluster_cov = self.cov.loc[cluster.index, cluster.index]
|
|
620
|
+
cluster_returns = self.returns.loc[:, cluster.index]
|
|
621
|
+
if model == "HERC":
|
|
622
|
+
cluster_weights = pd.Series(
|
|
623
|
+
self._naive_risk(
|
|
624
|
+
cluster_returns, cluster_cov, rm=rm, rf=rf
|
|
625
|
+
).flatten(),
|
|
626
|
+
index=cluster_cov.index,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
elif model == "HERC2":
|
|
630
|
+
cluster_weights = pd.Series(
|
|
631
|
+
self._naive_risk(
|
|
632
|
+
cluster_returns, cluster_cov, rm="equal", rf=rf
|
|
633
|
+
).flatten(),
|
|
634
|
+
index=cluster_cov.index,
|
|
635
|
+
)
|
|
636
|
+
weights.loc[cluster_weights.index] *= cluster_weights
|
|
637
|
+
|
|
638
|
+
return weights
|
|
639
|
+
|
|
640
|
+
# compute intra-cluster weights
|
|
641
|
+
def _intra_weights(
|
|
642
|
+
self,
|
|
643
|
+
Z,
|
|
644
|
+
obj="MinRisk",
|
|
645
|
+
rm="MV",
|
|
646
|
+
rf=0,
|
|
647
|
+
l=2,
|
|
648
|
+
):
|
|
649
|
+
# Get constituents of k clusters
|
|
650
|
+
clustered_assets = pd.Series(
|
|
651
|
+
hr.cut_tree(Z, n_clusters=self.k).flatten(), index=self.cov.index
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# get covariance matrices for each cluster
|
|
655
|
+
intra_weights = pd.DataFrame(index=clustered_assets.index)
|
|
656
|
+
for i in range(self.k):
|
|
657
|
+
cluster = clustered_assets.loc[clustered_assets == i]
|
|
658
|
+
if self.mu is not None:
|
|
659
|
+
cluster_mu = self.mu.loc[:, cluster.index]
|
|
660
|
+
else:
|
|
661
|
+
cluster_mu = None
|
|
662
|
+
cluster_cov = self.cov.loc[cluster.index, cluster.index]
|
|
663
|
+
cluster_returns = self.returns.loc[:, cluster.index]
|
|
664
|
+
weights = self._opt_w(
|
|
665
|
+
cluster_returns,
|
|
666
|
+
cluster_mu,
|
|
667
|
+
cluster_cov,
|
|
668
|
+
obj=obj,
|
|
669
|
+
rm=rm,
|
|
670
|
+
rf=rf,
|
|
671
|
+
l=l,
|
|
672
|
+
)
|
|
673
|
+
weights = pd.Series(
|
|
674
|
+
weights.flatten(),
|
|
675
|
+
index=cluster_cov.index,
|
|
676
|
+
)
|
|
677
|
+
intra_weights[i] = weights
|
|
678
|
+
|
|
679
|
+
intra_weights = intra_weights.fillna(0)
|
|
680
|
+
return intra_weights
|
|
681
|
+
|
|
682
|
+
def _inter_weights(
|
|
683
|
+
self,
|
|
684
|
+
intra_weights,
|
|
685
|
+
obj="MinRisk",
|
|
686
|
+
rm="MV",
|
|
687
|
+
rf=0,
|
|
688
|
+
l=2,
|
|
689
|
+
):
|
|
690
|
+
# inter-cluster mean vector
|
|
691
|
+
if self.mu is not None:
|
|
692
|
+
tot_mu = self.mu @ intra_weights
|
|
693
|
+
else:
|
|
694
|
+
tot_mu = None
|
|
695
|
+
# inter-cluster covariance matrix
|
|
696
|
+
tot_cov = intra_weights.T.dot(np.dot(self.cov, intra_weights))
|
|
697
|
+
# inter-cluster returns matrix
|
|
698
|
+
tot_ret = self.returns @ intra_weights
|
|
699
|
+
|
|
700
|
+
# inter-cluster weights
|
|
701
|
+
inter_weights = self._opt_w(
|
|
702
|
+
tot_ret,
|
|
703
|
+
tot_mu,
|
|
704
|
+
tot_cov,
|
|
705
|
+
obj=obj,
|
|
706
|
+
rm=rm,
|
|
707
|
+
rf=rf,
|
|
708
|
+
l=l,
|
|
709
|
+
)
|
|
710
|
+
inter_weights = pd.Series(inter_weights.flatten(), index=intra_weights.columns)
|
|
711
|
+
# determine the weight on each cluster by multiplying the intra-cluster weight with the inter-cluster weight
|
|
712
|
+
weights = intra_weights.mul(inter_weights, axis=1).sum(axis=1).sort_index()
|
|
713
|
+
|
|
714
|
+
return weights
|
|
715
|
+
|
|
716
|
+
# Allocate weights
|
|
717
|
+
def optimization(
|
|
718
|
+
self,
|
|
719
|
+
model="HRP",
|
|
720
|
+
codependence="pearson",
|
|
721
|
+
obj="MinRisk",
|
|
722
|
+
rm="MV",
|
|
723
|
+
rf=0,
|
|
724
|
+
l=2,
|
|
725
|
+
method_mu="hist",
|
|
726
|
+
method_cov="hist",
|
|
727
|
+
custom_mu=None,
|
|
728
|
+
custom_cov=None,
|
|
729
|
+
linkage="single",
|
|
730
|
+
opt_k_method="twodiff",
|
|
731
|
+
k=None,
|
|
732
|
+
max_k=10,
|
|
733
|
+
bins_info="KN",
|
|
734
|
+
alpha_tail=0.05,
|
|
735
|
+
gs_threshold=0.5,
|
|
736
|
+
leaf_order=True,
|
|
737
|
+
dict_mu={},
|
|
738
|
+
dict_cov={},
|
|
739
|
+
):
|
|
740
|
+
r"""
|
|
741
|
+
This method calculates the optimal portfolio according to the
|
|
742
|
+
optimization model selected by the user.
|
|
743
|
+
|
|
744
|
+
Parameters
|
|
745
|
+
----------
|
|
746
|
+
model : str, optional
|
|
747
|
+
The hierarchical cluster portfolio model used for optimize the
|
|
748
|
+
portfolio. The default is 'HRP'. Possible values are:
|
|
749
|
+
|
|
750
|
+
- 'HRP': Hierarchical Risk Parity.
|
|
751
|
+
- 'HERC': Hierarchical Equal Risk Contribution.
|
|
752
|
+
- 'HERC2': HERC but splitting weights equally within clusters.
|
|
753
|
+
- 'NCO': Nested Clustered Optimization.
|
|
754
|
+
|
|
755
|
+
codependence : str, optional
|
|
756
|
+
The codependence or similarity matrix used to build the distance
|
|
757
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
758
|
+
|
|
759
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
760
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
761
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
762
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
763
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
764
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{pearson}_{i,j}|)}`.
|
|
765
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{spearman}_{i,j}|)}`.
|
|
766
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
767
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
768
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
769
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
770
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
771
|
+
|
|
772
|
+
obj : str, optional
|
|
773
|
+
Objective function used by the NCO model.
|
|
774
|
+
The default is 'MinRisk'. Possible values are:
|
|
775
|
+
|
|
776
|
+
- 'MinRisk': Minimize the selected risk measure.
|
|
777
|
+
- 'Utility': Maximize the Utility function :math:`\mu w - l \phi_{i}(w)`.
|
|
778
|
+
- 'Sharpe': Maximize the risk adjusted return ratio based on the selected risk measure.
|
|
779
|
+
- 'ERC': Equally risk contribution portfolio of the selected risk measure.
|
|
780
|
+
|
|
781
|
+
rm : str, optional
|
|
782
|
+
The risk measure used to optimize the portfolio. If model is 'NCO',
|
|
783
|
+
the risk measures available depends on the objective function.
|
|
784
|
+
The default is 'MV'. Possible values are:
|
|
785
|
+
|
|
786
|
+
- 'equal': Equally weighted.
|
|
787
|
+
- 'vol': Standard Deviation.
|
|
788
|
+
- 'MV': Variance.
|
|
789
|
+
- 'KT': Square Root Kurtosis.
|
|
790
|
+
- 'MAD': Mean Absolute Deviation.
|
|
791
|
+
- 'MSV': Semi Standard Deviation.
|
|
792
|
+
- 'SKT': Square Root Semi Kurtosis.
|
|
793
|
+
- 'FLPM': First Lower Partial Moment (Omega Ratio).
|
|
794
|
+
- 'SLPM': Second Lower Partial Moment (Sortino Ratio).
|
|
795
|
+
- 'VaR': Value at Risk.
|
|
796
|
+
- 'CVaR': Conditional Value at Risk.
|
|
797
|
+
- 'TG': Tail Gini.
|
|
798
|
+
- 'EVaR': Entropic Value at Risk.
|
|
799
|
+
- 'RLVaR': Relativistic Value at Risk. I recommend only use this function with MOSEK solver.
|
|
800
|
+
- 'WR': Worst Realization (Minimax).
|
|
801
|
+
- 'VRG' VaR range of returns.
|
|
802
|
+
- 'CVRG': CVaR range of returns.
|
|
803
|
+
- 'TGRG': Tail Gini range of returns.
|
|
804
|
+
- 'EVRG': EVaR range of returns.
|
|
805
|
+
- 'RVRG': RLVaR range of returns. I recommend only use this function with MOSEK solver.
|
|
806
|
+
- 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
|
|
807
|
+
- 'ADD': Average Drawdown of uncompounded cumulative returns.
|
|
808
|
+
- 'DaR': Drawdown at Risk of uncompounded cumulative returns.
|
|
809
|
+
- 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
|
|
810
|
+
- 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
|
|
811
|
+
- 'RLDaR': Relativistic Drawdown at Risk of uncompounded cumulative returns. I recommend only use this function with MOSEK solver.
|
|
812
|
+
- 'UCI': Ulcer Index of uncompounded cumulative returns.
|
|
813
|
+
- 'MDD_Rel': Maximum Drawdown of compounded cumulative returns (Calmar Ratio).
|
|
814
|
+
- 'ADD_Rel': Average Drawdown of compounded cumulative returns.
|
|
815
|
+
- 'DaR_Rel': Drawdown at Risk of compounded cumulative returns.
|
|
816
|
+
- 'CDaR_Rel': Conditional Drawdown at Risk of compounded cumulative returns.
|
|
817
|
+
- 'EDaR_Rel': Entropic Drawdown at Risk of compounded cumulative returns.
|
|
818
|
+
- 'RLDaR_Rel': Relativistic Drawdown at Risk of compounded cumulative returns. I recommend only use this function with MOSEK solver.
|
|
819
|
+
- 'UCI_Rel': Ulcer Index of compounded cumulative returns.
|
|
820
|
+
|
|
821
|
+
rf : float, optional
|
|
822
|
+
Risk free rate, must be in the same period of assets returns.
|
|
823
|
+
The default is 0.
|
|
824
|
+
l : scalar, optional
|
|
825
|
+
Risk aversion factor of the 'Utility' objective function.
|
|
826
|
+
The default is 2.
|
|
827
|
+
method_mu : str, optional
|
|
828
|
+
The method used to estimate the expected returns vector.
|
|
829
|
+
The default value is 'hist'. Possible values are:
|
|
830
|
+
|
|
831
|
+
- 'hist': use historical estimator.
|
|
832
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
|
|
833
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
|
|
834
|
+
- 'JS': James-Stein estimator. For more information see :cite:`c-Meucci2005` and :cite:`c-Feng2016`.
|
|
835
|
+
- 'BS': Bayes-Stein estimator. For more information see :cite:`c-Jorion1986`.
|
|
836
|
+
- 'BOP': BOP estimator. For more information see :cite:`c-Bodnar2019`.
|
|
837
|
+
- 'custom_mu': use custom expected returns vector.
|
|
838
|
+
|
|
839
|
+
method_cov : str, optional
|
|
840
|
+
The method used to estimate the covariance matrix:
|
|
841
|
+
The default is 'hist'. Possible values are:
|
|
842
|
+
|
|
843
|
+
- 'hist': use historical estimates.
|
|
844
|
+
- 'ewma1': use ewma with adjust=True. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
|
|
845
|
+
- 'ewma2': use ewma with adjust=False. For more information see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/window.html#exponentially-weighted-window>`_.
|
|
846
|
+
- 'ledoit': use the Ledoit and Wolf Shrinkage method.
|
|
847
|
+
- 'oas': use the Oracle Approximation Shrinkage method.
|
|
848
|
+
- 'shrunk': use the basic Shrunk Covariance method.
|
|
849
|
+
- 'gl': use the basic Graphical Lasso Covariance method.
|
|
850
|
+
- 'jlogo': use the j-LoGo Covariance method. For more information see: :cite:`c-jLogo`.
|
|
851
|
+
- 'fixed': denoise using fixed method. For more information see chapter 2 of :cite:`c-MLforAM`.
|
|
852
|
+
- 'spectral': denoise using spectral method. For more information see chapter 2 of :cite:`c-MLforAM`.
|
|
853
|
+
- 'shrink': denoise using shrink method. For more information see chapter 2 of :cite:`c-MLforAM`.
|
|
854
|
+
- 'gerber1': use the Gerber statistic 1. For more information see: :cite:`c-Gerber2021`.
|
|
855
|
+
- 'gerber2': use the Gerber statistic 2. For more information see: :cite:`c-Gerber2021`.
|
|
856
|
+
- 'custom_cov': use custom covariance matrix.
|
|
857
|
+
|
|
858
|
+
custom_mu : DataFrame or None, optional
|
|
859
|
+
Custom mean vector when NCO objective is 'Utility' or 'Sharpe'.
|
|
860
|
+
The default is None.
|
|
861
|
+
custom_cov : DataFrame or None, optional
|
|
862
|
+
Custom covariance matrix, used when codependence or covariance
|
|
863
|
+
parameters have value 'custom_cov'. The default is None.
|
|
864
|
+
linkage : string, optional
|
|
865
|
+
Linkage method of hierarchical clustering. For more information see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html>`_.
|
|
866
|
+
The default is 'single'. Possible values are:
|
|
867
|
+
|
|
868
|
+
- 'single'.
|
|
869
|
+
- 'complete'.
|
|
870
|
+
- 'average'.
|
|
871
|
+
- 'weighted'.
|
|
872
|
+
- 'centroid'.
|
|
873
|
+
- 'median'.
|
|
874
|
+
- 'ward'.
|
|
875
|
+
- 'DBHT': Direct Bubble Hierarchical Tree.
|
|
876
|
+
|
|
877
|
+
opt_k_method : str
|
|
878
|
+
Method used to calculate the optimum number of clusters.
|
|
879
|
+
The default is 'twodiff'. Possible values are:
|
|
880
|
+
|
|
881
|
+
- 'twodiff': two difference gap statistic.
|
|
882
|
+
- 'stdsil': standarized silhouette score.
|
|
883
|
+
|
|
884
|
+
k : int, optional
|
|
885
|
+
Number of clusters. This value is took instead of the optimal number
|
|
886
|
+
of clusters calculated with the two difference gap statistic.
|
|
887
|
+
The default is None.
|
|
888
|
+
max_k : int, optional
|
|
889
|
+
Max number of clusters used by the two difference gap statistic
|
|
890
|
+
to find the optimal number of clusters. The default is 10.
|
|
891
|
+
bins_info: int or str
|
|
892
|
+
Number of bins used to calculate variation of information. The default
|
|
893
|
+
value is 'KN'. Possible values are:
|
|
894
|
+
|
|
895
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
896
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
897
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
898
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
899
|
+
- int: integer value choice by user.
|
|
900
|
+
|
|
901
|
+
alpha_tail : float, optional
|
|
902
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
903
|
+
gs_threshold : float, optional
|
|
904
|
+
Gerber statistic threshold. The default is 0.5.
|
|
905
|
+
leaf_order : bool, optional
|
|
906
|
+
Indicates if the cluster are ordered so that the distance between
|
|
907
|
+
successive leaves is minimal. The default is True.
|
|
908
|
+
dict_mu : dict
|
|
909
|
+
Other variables related to the mean vector estimation method.
|
|
910
|
+
dict_cov : dict
|
|
911
|
+
Other variables related to the covariance estimation method.
|
|
912
|
+
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
w : DataFrame
|
|
916
|
+
The weights of optimal portfolio.
|
|
917
|
+
|
|
918
|
+
See Also
|
|
919
|
+
--------
|
|
920
|
+
riskfolio.src.ParamsEstimation.mean_vector
|
|
921
|
+
riskfolio.src.ParamsEstimation.covar_matrix
|
|
922
|
+
"""
|
|
923
|
+
|
|
924
|
+
# Covariance matrix
|
|
925
|
+
if method_cov == "custom_cov":
|
|
926
|
+
if isinstance(custom_cov, pd.DataFrame) == True:
|
|
927
|
+
if custom_cov.shape[0] != custom_cov.shape[1]:
|
|
928
|
+
raise NameError("custom_cov must be a square DataFrame")
|
|
929
|
+
else:
|
|
930
|
+
self.cov = custom_cov.copy()
|
|
931
|
+
else:
|
|
932
|
+
self.cov = pe.covar_matrix(self.returns, method=method_cov, **dict_cov)
|
|
933
|
+
|
|
934
|
+
# Mean vector
|
|
935
|
+
if method_mu == "custom_mu":
|
|
936
|
+
if isinstance(custom_mu, pd.Series) == True:
|
|
937
|
+
self.mu = custom_mu.to_frame().T
|
|
938
|
+
elif isinstance(custom_mu, pd.DataFrame) == True:
|
|
939
|
+
if custom_mu.shape[0] > 1 and custom_mu.shape[1] == 1:
|
|
940
|
+
self.mu = custom_mu.copy()
|
|
941
|
+
elif custom_mu.shape[0] == 1 and custom_mu.shape[1] > 1:
|
|
942
|
+
self.mu = custom_mu.copy()
|
|
943
|
+
else:
|
|
944
|
+
raise NameError("custom_mu must be a column DataFrame")
|
|
945
|
+
else:
|
|
946
|
+
raise NameError("custom_mu must be a column DataFrame or Series")
|
|
947
|
+
else:
|
|
948
|
+
self.mu = pe.mean_vector(self.returns, method=method_mu, **dict_mu)
|
|
949
|
+
if rm == "KT":
|
|
950
|
+
self.kurt, self.skurt = True, False
|
|
951
|
+
elif rm == "SKT":
|
|
952
|
+
self.kurt, self.skurt = False, True
|
|
953
|
+
else:
|
|
954
|
+
self.kurt, self.skurt = False, False
|
|
955
|
+
|
|
956
|
+
self.codependence = codependence
|
|
957
|
+
self.linkage = linkage
|
|
958
|
+
self.opt_k_method = opt_k_method
|
|
959
|
+
self.k = k
|
|
960
|
+
self.max_k = max_k
|
|
961
|
+
self.bins_info = bins_info
|
|
962
|
+
self.alpha_tail = alpha_tail
|
|
963
|
+
self.gs_threshold = gs_threshold
|
|
964
|
+
self.leaf_order = leaf_order
|
|
965
|
+
|
|
966
|
+
# Codependence matrix
|
|
967
|
+
if self.codependence in {"pearson", "spearman", "kendall"}:
|
|
968
|
+
self.codep = self.returns.corr(method=self.codependence).astype(float)
|
|
969
|
+
elif self.codependence == "gerber1":
|
|
970
|
+
self.codep = gs.gerber_cov_stat1(self.returns, threshold=self.gs_threshold)
|
|
971
|
+
self.codep = af.cov2corr(self.codep).astype(float)
|
|
972
|
+
elif self.codependence == "gerber2":
|
|
973
|
+
self.codep = gs.gerber_cov_stat2(self.returns, threshold=self.gs_threshold)
|
|
974
|
+
self.codep = af.cov2corr(self.codep).astype(float)
|
|
975
|
+
elif self.codependence in {"abs_pearson", "abs_spearman", "abs_kendall"}:
|
|
976
|
+
self.codep = np.abs(self.returns.corr(method=self.codependence[4:])).astype(
|
|
977
|
+
float
|
|
978
|
+
)
|
|
979
|
+
elif self.codependence in {"distance"}:
|
|
980
|
+
self.codep = af.dcorr_matrix(self.returns).astype(float)
|
|
981
|
+
elif self.codependence in {"mutual_info"}:
|
|
982
|
+
self.codep = af.mutual_info_matrix(self.returns, self.bins_info).astype(
|
|
983
|
+
float
|
|
984
|
+
)
|
|
985
|
+
elif self.codependence in {"tail"}:
|
|
986
|
+
self.codep = af.ltdi_matrix(self.returns, alpha=self.alpha_tail).astype(
|
|
987
|
+
float
|
|
988
|
+
)
|
|
989
|
+
elif self.codependence in {"custom_cov"}:
|
|
990
|
+
self.codep = af.cov2corr(custom_cov).astype(float)
|
|
991
|
+
|
|
992
|
+
# Step-1: Tree clustering
|
|
993
|
+
self.clustering, self.k = self._hierarchical_clustering(
|
|
994
|
+
model,
|
|
995
|
+
self.codependence,
|
|
996
|
+
self.linkage,
|
|
997
|
+
self.opt_k_method,
|
|
998
|
+
self.k,
|
|
999
|
+
self.max_k,
|
|
1000
|
+
self.leaf_order,
|
|
1001
|
+
)
|
|
1002
|
+
if k is not None:
|
|
1003
|
+
self.k = int(k)
|
|
1004
|
+
|
|
1005
|
+
# Step-2: Seriation (Quasi-Diagnalization)
|
|
1006
|
+
self.sort_order = self._seriation(self.clustering)
|
|
1007
|
+
# asset_order = self.assetslist
|
|
1008
|
+
asset_order = [self.assetslist[i] for i in self.sort_order]
|
|
1009
|
+
self.asset_order = asset_order.copy()
|
|
1010
|
+
self.codep_sorted = self.codep.reindex(
|
|
1011
|
+
index=self.asset_order, columns=self.asset_order
|
|
1012
|
+
)
|
|
1013
|
+
|
|
1014
|
+
# Step-2.1: Bound creation
|
|
1015
|
+
if self.w_max is None:
|
|
1016
|
+
upper_bound = pd.Series(1.0, index=self.assetslist)
|
|
1017
|
+
elif isinstance(self.w_max, int) or isinstance(self.w_max, float):
|
|
1018
|
+
upper_bound = pd.Series(self.w_max, index=self.assetslist)
|
|
1019
|
+
upper_bound = np.minimum(1.0, upper_bound).loc[self.assetslist]
|
|
1020
|
+
if upper_bound.sum() < 1:
|
|
1021
|
+
raise NameError("Sum of upper bounds must be higher equal than 1")
|
|
1022
|
+
elif isinstance(self.w_max, pd.Series):
|
|
1023
|
+
upper_bound = np.minimum(1.0, self.w_max).loc[self.assetslist]
|
|
1024
|
+
if upper_bound.sum() < 1.0:
|
|
1025
|
+
raise NameError("Sum of upper bounds must be higher equal than 1")
|
|
1026
|
+
|
|
1027
|
+
if self.w_min is None:
|
|
1028
|
+
lower_bound = pd.Series(0.0, index=self.assetslist)
|
|
1029
|
+
elif isinstance(self.w_min, int) or isinstance(self.w_min, float):
|
|
1030
|
+
lower_bound = pd.Series(self.w_min, index=self.assetslist)
|
|
1031
|
+
lower_bound = np.maximum(0.0, lower_bound).loc[self.assetslist]
|
|
1032
|
+
elif isinstance(self.w_min, pd.Series):
|
|
1033
|
+
lower_bound = np.maximum(0.0, self.w_min).loc[self.assetslist]
|
|
1034
|
+
|
|
1035
|
+
if (upper_bound >= lower_bound).all().item() is False:
|
|
1036
|
+
raise NameError("All upper bounds must be higher than lower bounds")
|
|
1037
|
+
|
|
1038
|
+
# Step-3: Recursive bisection
|
|
1039
|
+
if model == "HRP":
|
|
1040
|
+
# Recursive bisection
|
|
1041
|
+
weights = self._recursive_bisection(
|
|
1042
|
+
self.sort_order,
|
|
1043
|
+
rm=rm,
|
|
1044
|
+
rf=rf,
|
|
1045
|
+
upper_bound=upper_bound,
|
|
1046
|
+
lower_bound=lower_bound,
|
|
1047
|
+
)
|
|
1048
|
+
elif model in ["HERC", "HERC2"]:
|
|
1049
|
+
# Cluster-based Recursive bisection
|
|
1050
|
+
weights = self._hierarchical_recursive_bisection(
|
|
1051
|
+
self.clustering,
|
|
1052
|
+
rm=rm,
|
|
1053
|
+
rf=rf,
|
|
1054
|
+
linkage=linkage,
|
|
1055
|
+
model=model,
|
|
1056
|
+
upper_bound=upper_bound,
|
|
1057
|
+
lower_bound=lower_bound,
|
|
1058
|
+
)
|
|
1059
|
+
elif model == "NCO":
|
|
1060
|
+
# Step-3.1: Determine intra-cluster weights
|
|
1061
|
+
intra_weights = self._intra_weights(
|
|
1062
|
+
self.clustering,
|
|
1063
|
+
obj=obj,
|
|
1064
|
+
rm=rm,
|
|
1065
|
+
rf=rf,
|
|
1066
|
+
l=l,
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
# Step-3.2: Determine inter-cluster weights and multiply with → intra-cluster weights
|
|
1070
|
+
weights = self._inter_weights(intra_weights, obj=obj, rm=rm, rf=rf, l=l)
|
|
1071
|
+
|
|
1072
|
+
weights = weights.loc[self.assetslist]
|
|
1073
|
+
|
|
1074
|
+
# Step-4: Fit weights to constraints
|
|
1075
|
+
if (upper_bound < weights).any().item() or (lower_bound > weights).any().item():
|
|
1076
|
+
max_iter = 100
|
|
1077
|
+
j = 0
|
|
1078
|
+
while (
|
|
1079
|
+
(upper_bound < weights).any().item()
|
|
1080
|
+
or (lower_bound > weights).any().item()
|
|
1081
|
+
) and (j < max_iter):
|
|
1082
|
+
weights_original = weights.copy()
|
|
1083
|
+
weights = np.maximum(np.minimum(weights, upper_bound), lower_bound)
|
|
1084
|
+
tickers_mod = weights[
|
|
1085
|
+
(weights < upper_bound) & (weights > lower_bound)
|
|
1086
|
+
].index.tolist()
|
|
1087
|
+
|
|
1088
|
+
weights_add = np.maximum(weights_original - upper_bound, 0).sum()
|
|
1089
|
+
weights_sub = np.minimum(weights_original - lower_bound, 0).sum()
|
|
1090
|
+
delta = weights_add + weights_sub
|
|
1091
|
+
|
|
1092
|
+
if delta != 0:
|
|
1093
|
+
weights[tickers_mod] += (
|
|
1094
|
+
delta * weights[tickers_mod] / weights[tickers_mod].sum()
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
j += 1
|
|
1098
|
+
|
|
1099
|
+
weights = weights.loc[self.assetslist].to_frame()
|
|
1100
|
+
weights.columns = ["weights"]
|
|
1101
|
+
|
|
1102
|
+
return weights
|