riskfolio-lib 7.2.0__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riskfolio/__init__.py +14 -0
- riskfolio/external/__init__.py +10 -0
- riskfolio/external/cppfunctions.py +376 -0
- riskfolio/external/functions.cpython-313-darwin.so +0 -0
- riskfolio/src/AuxFunctions.py +1488 -0
- riskfolio/src/ConstraintsFunctions.py +2210 -0
- riskfolio/src/DBHT.py +1089 -0
- riskfolio/src/GerberStatistic.py +240 -0
- riskfolio/src/HCPortfolio.py +1102 -0
- riskfolio/src/OwaWeights.py +433 -0
- riskfolio/src/ParamsEstimation.py +1989 -0
- riskfolio/src/PlotFunctions.py +5052 -0
- riskfolio/src/Portfolio.py +6164 -0
- riskfolio/src/Reports.py +692 -0
- riskfolio/src/RiskFunctions.py +3195 -0
- riskfolio/src/__init__.py +20 -0
- riskfolio/version.py +4 -0
- riskfolio_lib-7.2.0.dist-info/LICENSE.txt +27 -0
- riskfolio_lib-7.2.0.dist-info/METADATA +386 -0
- riskfolio_lib-7.2.0.dist-info/RECORD +22 -0
- riskfolio_lib-7.2.0.dist-info/WHEEL +6 -0
- riskfolio_lib-7.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2210 @@
|
|
|
1
|
+
"""""" #
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Copyright (c) 2020-2026, Dany Cajas
|
|
5
|
+
All rights reserved.
|
|
6
|
+
This work is licensed under BSD 3-Clause "New" or "Revised" License.
|
|
7
|
+
License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import networkx as nx
|
|
13
|
+
import scipy.cluster.hierarchy as hr
|
|
14
|
+
from scipy.spatial.distance import squareform
|
|
15
|
+
import riskfolio.src.AuxFunctions as af
|
|
16
|
+
import riskfolio.src.DBHT as db
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"assets_constraints",
|
|
21
|
+
"factors_constraints",
|
|
22
|
+
"integer_constraints",
|
|
23
|
+
"assets_views",
|
|
24
|
+
"factors_views",
|
|
25
|
+
"assets_clusters",
|
|
26
|
+
"hrp_constraints",
|
|
27
|
+
"risk_constraint",
|
|
28
|
+
"connection_matrix",
|
|
29
|
+
"centrality_vector",
|
|
30
|
+
"clusters_matrix",
|
|
31
|
+
"average_centrality",
|
|
32
|
+
"connected_assets",
|
|
33
|
+
"related_assets",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def assets_constraints(constraints, asset_classes):
|
|
38
|
+
r"""
|
|
39
|
+
Create the linear constraints matrixes A and B of the constraint
|
|
40
|
+
:math:`Aw \leq B`.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
constraints : DataFrame of shape (n_constraints, n_fields)
|
|
45
|
+
Constraints DataFrame, where n_constraints is the number of constraints
|
|
46
|
+
and n_fields is the number of fields of constraints DataFrame, the fields
|
|
47
|
+
are:
|
|
48
|
+
|
|
49
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
50
|
+
- Type: (str) can be 'Assets', 'Classes', 'All Assets', 'Each asset in a class' and 'All Classes'.
|
|
51
|
+
- Set: (str) if Type is 'Classes', 'Each asset in a class' or 'All Classes' specified the name of the asset's classes set.
|
|
52
|
+
- Position: (str) the name of the asset or asset class of the constraint.
|
|
53
|
+
- Sign: (str) can be '>=' or '<='.
|
|
54
|
+
- Weight: (scalar) is the maximum or minimum weight of the absolute constraint.
|
|
55
|
+
- Type Relative: (str) can be 'Assets' or 'Classes'.
|
|
56
|
+
- Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
|
|
57
|
+
- Relative: (str) the name of the asset or asset class of the relative constraint.
|
|
58
|
+
- Factor: (scalar) is the factor of the relative constraint.
|
|
59
|
+
|
|
60
|
+
asset_classes : DataFrame of shape (n_assets, n_cols)
|
|
61
|
+
Asset's classes matrix, where n_assets is the number of assets and
|
|
62
|
+
n_cols is the number of columns of the matrix where the first column
|
|
63
|
+
is the asset list and the next columns are the different asset's
|
|
64
|
+
classes sets.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
A : nd-array
|
|
69
|
+
The matrix A of :math:`Aw \leq B`.
|
|
70
|
+
|
|
71
|
+
B : nd-array
|
|
72
|
+
The matrix B of :math:`Aw \leq B`.
|
|
73
|
+
|
|
74
|
+
Raises
|
|
75
|
+
------
|
|
76
|
+
ValueError when the value cannot be calculated.
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
::
|
|
81
|
+
|
|
82
|
+
import riskfolio as rp
|
|
83
|
+
|
|
84
|
+
asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
|
|
85
|
+
'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
|
|
86
|
+
'Fixed Income', 'Fixed Income'],
|
|
87
|
+
'Class 2': ['Technology', 'Technology', 'Technology',
|
|
88
|
+
'Financial', 'Financial', 'Treasury', 'Treasury'],}
|
|
89
|
+
|
|
90
|
+
asset_classes = pd.DataFrame(asset_classes)
|
|
91
|
+
asset_classes = asset_classes.sort_values(by=['Assets'])
|
|
92
|
+
|
|
93
|
+
constraints = {'Disabled': [False, False, False, False, False, False, False],
|
|
94
|
+
'Type': ['Classes', 'Classes', 'Assets', 'Assets', 'Classes',
|
|
95
|
+
'All Assets', 'Each asset in a class'],
|
|
96
|
+
'Set': ['Class 1', 'Class 1', '', '', 'Class 2', '', 'Class 1'],
|
|
97
|
+
'Position': ['Equity', 'Fixed Income', 'BAC', 'WFC', 'Financial',
|
|
98
|
+
'', 'Equity'],
|
|
99
|
+
'Sign': ['<=', '<=', '<=', '<=', '>=', '>=', '>='],
|
|
100
|
+
'Weight': [0.6, 0.5, 0.1, '', '', 0.02, ''],
|
|
101
|
+
'Type Relative': ['', '', '', 'Assets', 'Classes', '', 'Assets'],
|
|
102
|
+
'Relative Set': ['', '', '', '', 'Class 1', '', ''],
|
|
103
|
+
'Relative': ['', '', '', 'FB', 'Fixed Income', '', 'TLT'],
|
|
104
|
+
'Factor': ['', '', '', 1.2, 0.5, '', 0.4]}
|
|
105
|
+
|
|
106
|
+
constraints = pd.DataFrame(constraints)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
The constraints look like the following image:
|
|
110
|
+
|
|
111
|
+
.. image:: images/Constraints.png
|
|
112
|
+
|
|
113
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
114
|
+
dataframe.
|
|
115
|
+
|
|
116
|
+
To create the matrixes A and B we use the following command:
|
|
117
|
+
|
|
118
|
+
::
|
|
119
|
+
|
|
120
|
+
A, B = rp.assets_constraints(constraints, asset_classes)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
The matrixes A and B looks like this (all constraints were converted to a linear
|
|
124
|
+
constraint):
|
|
125
|
+
|
|
126
|
+
.. image:: images/AxB.png
|
|
127
|
+
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
if not isinstance(constraints, pd.DataFrame) and not isinstance(
|
|
131
|
+
asset_classes, pd.DataFrame
|
|
132
|
+
):
|
|
133
|
+
raise ValueError("constraints and asset_classes must be DataFrames")
|
|
134
|
+
|
|
135
|
+
if constraints.shape[1] != 10:
|
|
136
|
+
raise ValueError("constraints must have ten columns")
|
|
137
|
+
|
|
138
|
+
constraints0 = constraints.fillna("")
|
|
139
|
+
constraints0 = constraints0[constraints0["Disabled"] == False]
|
|
140
|
+
data = constraints0.values.tolist()
|
|
141
|
+
assetslist = asset_classes.iloc[:, 0].values.tolist()
|
|
142
|
+
|
|
143
|
+
n, m = len(constraints0), len(asset_classes)
|
|
144
|
+
|
|
145
|
+
A = []
|
|
146
|
+
B = []
|
|
147
|
+
for i in range(0, n):
|
|
148
|
+
if data[i][1] == "Assets":
|
|
149
|
+
item = assetslist.index(data[i][3])
|
|
150
|
+
if data[i][4] == ">=":
|
|
151
|
+
d = 1
|
|
152
|
+
elif data[i][4] == "<=":
|
|
153
|
+
d = -1
|
|
154
|
+
if data[i][5] != "":
|
|
155
|
+
A1 = [0] * m
|
|
156
|
+
A1[item] = d
|
|
157
|
+
A.append(A1)
|
|
158
|
+
B.append([data[i][5] * d])
|
|
159
|
+
else:
|
|
160
|
+
A1 = [0] * m
|
|
161
|
+
A1[item] = 1
|
|
162
|
+
if data[i][6] == "Assets":
|
|
163
|
+
item2 = assetslist.index(data[i][8])
|
|
164
|
+
A2 = [0] * m
|
|
165
|
+
A2[item2] = 1
|
|
166
|
+
elif data[i][6] == "Classes":
|
|
167
|
+
A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
168
|
+
A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
|
|
169
|
+
A.append(A1)
|
|
170
|
+
B.append([0])
|
|
171
|
+
elif data[i][1] == "All Assets":
|
|
172
|
+
item = len(assetslist)
|
|
173
|
+
if data[i][4] == ">=":
|
|
174
|
+
d = 1
|
|
175
|
+
elif data[i][4] == "<=":
|
|
176
|
+
d = -1
|
|
177
|
+
if data[i][5] != "":
|
|
178
|
+
A1 = np.identity(item) * d
|
|
179
|
+
A1 = A1.tolist()
|
|
180
|
+
B1 = np.ones((item, 1)) * d * data[i][5]
|
|
181
|
+
for i in range(0, item):
|
|
182
|
+
A.append(A1[i])
|
|
183
|
+
B.append(B1.tolist()[0])
|
|
184
|
+
else:
|
|
185
|
+
A1 = np.identity(item)
|
|
186
|
+
if data[i][6] == "Assets":
|
|
187
|
+
item2 = assetslist.index(data[i][8])
|
|
188
|
+
A2 = np.zeros((item, item - 1))
|
|
189
|
+
A2 = np.insert(A2, item2 - 1, 1, axis=1)
|
|
190
|
+
elif data[i][6] == "Classes":
|
|
191
|
+
A1 = np.identity(item)
|
|
192
|
+
A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
193
|
+
A2 = np.ones((item, item)) * np.array(A2)
|
|
194
|
+
A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
|
|
195
|
+
for i in range(0, item):
|
|
196
|
+
A.append(A1[i])
|
|
197
|
+
B.append([0])
|
|
198
|
+
elif data[i][1] == "Classes":
|
|
199
|
+
if data[i][4] == ">=":
|
|
200
|
+
d = 1
|
|
201
|
+
elif data[i][4] == "<=":
|
|
202
|
+
d = -1
|
|
203
|
+
if data[i][5] != "":
|
|
204
|
+
A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
205
|
+
A1 = np.array(A1) * d
|
|
206
|
+
A1 = A1.tolist()
|
|
207
|
+
A.append(A1)
|
|
208
|
+
B.append([data[i][5] * d])
|
|
209
|
+
else:
|
|
210
|
+
A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
211
|
+
if data[i][6] == "Assets":
|
|
212
|
+
item2 = assetslist.index(data[i][8])
|
|
213
|
+
A2 = [0] * m
|
|
214
|
+
A2[item2] = 1
|
|
215
|
+
elif data[i][6] == "Classes":
|
|
216
|
+
A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
217
|
+
A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
|
|
218
|
+
A.append(A1)
|
|
219
|
+
B.append([0])
|
|
220
|
+
elif data[i][1] == "Each asset in a class":
|
|
221
|
+
if data[i][4] == ">=":
|
|
222
|
+
d = 1
|
|
223
|
+
elif data[i][4] == "<=":
|
|
224
|
+
d = -1
|
|
225
|
+
if data[i][5] != "":
|
|
226
|
+
A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
227
|
+
l = 0
|
|
228
|
+
for k in A1:
|
|
229
|
+
if k == 1:
|
|
230
|
+
A3 = [0] * m
|
|
231
|
+
A3[l] = 1 * d
|
|
232
|
+
A.append(A3)
|
|
233
|
+
B.append([data[i][5] * d])
|
|
234
|
+
l = l + 1
|
|
235
|
+
else:
|
|
236
|
+
A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
237
|
+
l = 0
|
|
238
|
+
for k in A1:
|
|
239
|
+
if k == 1:
|
|
240
|
+
A3 = [0] * m
|
|
241
|
+
A3[l] = 1
|
|
242
|
+
if data[i][6] == "Assets":
|
|
243
|
+
item2 = assetslist.index(data[i][8])
|
|
244
|
+
A2 = [0] * m
|
|
245
|
+
A2[item2] = 1
|
|
246
|
+
elif data[i][6] == "Classes":
|
|
247
|
+
A2 = np.where(
|
|
248
|
+
asset_classes[data[i][7]].values == data[i][8], 1, 0
|
|
249
|
+
)
|
|
250
|
+
A3 = (
|
|
251
|
+
(np.array(A3) + np.array(A2) * data[i][9] * -1) * d
|
|
252
|
+
).tolist()
|
|
253
|
+
A.append(A3)
|
|
254
|
+
B.append([0])
|
|
255
|
+
l = l + 1
|
|
256
|
+
elif data[i][1] == "All Classes":
|
|
257
|
+
if data[i][4] == ">=":
|
|
258
|
+
d = 1
|
|
259
|
+
elif data[i][4] == "<=":
|
|
260
|
+
d = -1
|
|
261
|
+
if data[i][5] != "":
|
|
262
|
+
for k in np.unique(asset_classes[data[i][2]].values):
|
|
263
|
+
A1 = np.where(asset_classes[data[i][2]].values == k, 1, 0) * d
|
|
264
|
+
A1 = A1.tolist()
|
|
265
|
+
A.append(A1)
|
|
266
|
+
B.append([data[i][5] * d])
|
|
267
|
+
else:
|
|
268
|
+
for k in np.unique(asset_classes[data[i][2]].values):
|
|
269
|
+
A1 = np.where(asset_classes[data[i][2]].values == k, 1, 0)
|
|
270
|
+
if data[i][6] == "Assets":
|
|
271
|
+
item2 = assetslist.index(data[i][8])
|
|
272
|
+
A2 = [0] * m
|
|
273
|
+
A2[item2] = 1
|
|
274
|
+
elif data[i][6] == "Classes":
|
|
275
|
+
A2 = np.where(
|
|
276
|
+
asset_classes[data[i][7]].values == data[i][8], 1, 0
|
|
277
|
+
)
|
|
278
|
+
A3 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
|
|
279
|
+
A.append(A3)
|
|
280
|
+
B.append([0])
|
|
281
|
+
|
|
282
|
+
A = -np.array(A, ndmin=2, dtype=float)
|
|
283
|
+
B = -np.array(B, ndmin=2, dtype=float)
|
|
284
|
+
|
|
285
|
+
return A, B
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def factors_constraints(constraints, loadings):
|
|
289
|
+
r"""
|
|
290
|
+
Create the factors constraints matrixes C and D of the constraint
|
|
291
|
+
:math:`Cw \leq D`.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
constraints : DataFrame of shape (n_constraints, n_fields)
|
|
296
|
+
Constraints DataFrame, where n_constraints is the number of constraints
|
|
297
|
+
and n_fields is the number of fields of constraints DataFrame, the fields
|
|
298
|
+
are:
|
|
299
|
+
|
|
300
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
301
|
+
- Factor: (str) the name of the factor of the constraint.
|
|
302
|
+
- Sign: (str) can be '>=' or '<='.
|
|
303
|
+
- Value: (scalar) is the maximum or minimum value of the factor.
|
|
304
|
+
|
|
305
|
+
loadings : DataFrame of shape (n_assets, n_features)
|
|
306
|
+
The loadings matrix.
|
|
307
|
+
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
C : nd-array
|
|
311
|
+
The matrix C of :math:`Cw \leq D`.
|
|
312
|
+
|
|
313
|
+
D : nd-array
|
|
314
|
+
The matrix D of :math:`Cw \leq D`.
|
|
315
|
+
|
|
316
|
+
Raises
|
|
317
|
+
------
|
|
318
|
+
ValueError when the value cannot be calculated.
|
|
319
|
+
|
|
320
|
+
Examples
|
|
321
|
+
--------
|
|
322
|
+
::
|
|
323
|
+
|
|
324
|
+
loadings = {'const': [0.0004, 0.0002, 0.0000, 0.0006, 0.0001, 0.0003, -0.0003],
|
|
325
|
+
'MTUM': [0.1916, 1.0061, 0.8695, 1.9996, 0.0000, 0.0000, 0.0000],
|
|
326
|
+
'QUAL': [0.0000, 2.0129, 1.4301, 0.0000, 0.0000, 0.0000, 0.0000],
|
|
327
|
+
'SIZE': [0.0000, 0.0000, 0.0000, 0.4717, 0.0000, -0.1857, 0.0000],
|
|
328
|
+
'USMV': [-0.7838, -1.6439, -1.0176, -1.4407, 0.0055, 0.5781, 0.0000],
|
|
329
|
+
'VLUE': [1.4772, -0.7590, -0.4090, 0.0000, -0.0054, -0.4844, 0.9435]}
|
|
330
|
+
|
|
331
|
+
loadings = pd.DataFrame(loadings)
|
|
332
|
+
|
|
333
|
+
constraints = {'Disabled': [False, False, False],
|
|
334
|
+
'Factor': ['MTUM', 'USMV', 'VLUE'],
|
|
335
|
+
'Sign': ['<=', '<=', '>='],
|
|
336
|
+
'Value': [0.9, -1.2, 0.3],
|
|
337
|
+
'Relative Factor': ['USMV', '', '']}
|
|
338
|
+
|
|
339
|
+
constraints = pd.DataFrame(constraints)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
The constraints look like the following image:
|
|
343
|
+
|
|
344
|
+
.. image:: images/Constraints2.png
|
|
345
|
+
|
|
346
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
347
|
+
dataframe.
|
|
348
|
+
|
|
349
|
+
To create the matrixes C and D we use the following command:
|
|
350
|
+
|
|
351
|
+
::
|
|
352
|
+
|
|
353
|
+
C, D = rp.factors_constraints(constraints, loadings)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
The matrixes C and D looks like this (all constraints were converted to a linear
|
|
357
|
+
constraint):
|
|
358
|
+
|
|
359
|
+
.. image:: images/CxD.png
|
|
360
|
+
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
if not isinstance(constraints, pd.DataFrame) and not isinstance(
|
|
364
|
+
loadings, pd.DataFrame
|
|
365
|
+
):
|
|
366
|
+
raise ValueError("constraints and loadings must be DataFrames")
|
|
367
|
+
|
|
368
|
+
if constraints.shape[1] != 5:
|
|
369
|
+
raise ValueError("constraints must have five columns")
|
|
370
|
+
|
|
371
|
+
constraints0 = constraints.fillna("")
|
|
372
|
+
constraints0 = constraints0[constraints0["Disabled"] == False]
|
|
373
|
+
data = constraints0.values.tolist()
|
|
374
|
+
|
|
375
|
+
n = len(constraints0)
|
|
376
|
+
|
|
377
|
+
C = []
|
|
378
|
+
D = []
|
|
379
|
+
for i in range(0, n):
|
|
380
|
+
if data[i][2] == ">=":
|
|
381
|
+
d = 1
|
|
382
|
+
elif data[i][2] == "<=":
|
|
383
|
+
d = -1
|
|
384
|
+
C1 = loadings[data[i][1]].values
|
|
385
|
+
if data[i][4] != "":
|
|
386
|
+
C2 = loadings[data[i][4]].values
|
|
387
|
+
C1 = C2 - C1
|
|
388
|
+
C.append(C1 * d)
|
|
389
|
+
D.append([data[i][3] * d])
|
|
390
|
+
|
|
391
|
+
C = -np.array(C, ndmin=2, dtype=float)
|
|
392
|
+
D = -np.array(D, ndmin=2, dtype=float)
|
|
393
|
+
|
|
394
|
+
return C, D
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def integer_constraints(constraints, asset_classes):
|
|
398
|
+
r"""
|
|
399
|
+
Create the integer constraints matrixes A, B, C, D, E, F associated to the
|
|
400
|
+
constrainta :math:`Ak \leq B`, :math:`Ck \leq D \odot k_{s}` and
|
|
401
|
+
:math:`E k_{s}\leq F`.
|
|
402
|
+
|
|
403
|
+
Parameters
|
|
404
|
+
----------
|
|
405
|
+
constraints : DataFrame of shape (n_constraints, n_fields)
|
|
406
|
+
Constraints DataFrame, where n_constraints is the number of constraints
|
|
407
|
+
and n_fields is the number of fields of constraints DataFrame, the fields
|
|
408
|
+
are:
|
|
409
|
+
|
|
410
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
411
|
+
- Type: (str) can be 'Assets' and 'Classes'.
|
|
412
|
+
- Set: (str) if Type is 'Classes' specified the name of the asset's classes set.
|
|
413
|
+
- Position: (str) the name of the asset or asset class of the constraint, or 'All' for all categories.
|
|
414
|
+
- Kind: (str) can be 'CardUp' (Upper Cardinality), 'CardLow' (Lower Cardinality), 'MuEx' (Mutually Exclusive) and 'Join' (Join Investments).
|
|
415
|
+
- Value: (int or None) is the maximum or minimum value of cardinality constraints.
|
|
416
|
+
- Type Relative: (str) can be: 'Assets' or 'Classes'.
|
|
417
|
+
- Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
|
|
418
|
+
- Relative: (str) the name of the asset or asset class of the relative constraint.
|
|
419
|
+
|
|
420
|
+
asset_classes : DataFrame of shape (n_assets, n_cols)
|
|
421
|
+
Asset's classes matrix, where n_assets is the number of assets and
|
|
422
|
+
n_cols is the number of columns of the matrix where the first column
|
|
423
|
+
is the asset list and the next columns are the different asset's
|
|
424
|
+
classes sets.
|
|
425
|
+
|
|
426
|
+
Returns
|
|
427
|
+
-------
|
|
428
|
+
A : dict
|
|
429
|
+
The dictionary that containts the matrices A of :math:`Ak \leq B`.
|
|
430
|
+
|
|
431
|
+
B : dict
|
|
432
|
+
The dictionary that containts the matrices B of :math:`Ak \leq B`.
|
|
433
|
+
|
|
434
|
+
C : dict
|
|
435
|
+
The dictionary that containts the matrices C of :math:`Ck \leq D \odot k_{s}`.
|
|
436
|
+
|
|
437
|
+
D : dict
|
|
438
|
+
The dictionary that containts the matrices D of :math:`Ck \leq D \odot k_{s}`.
|
|
439
|
+
|
|
440
|
+
E : dict
|
|
441
|
+
The dictionary that containts the matrices E of :math:`E k_{s}\leq F`.
|
|
442
|
+
|
|
443
|
+
F : dict
|
|
444
|
+
The dictionary that containts the matrices F of :math:`E k_{s}\leq F`.
|
|
445
|
+
|
|
446
|
+
Raises
|
|
447
|
+
------
|
|
448
|
+
ValueError when the value cannot be calculated.
|
|
449
|
+
|
|
450
|
+
Examples
|
|
451
|
+
--------
|
|
452
|
+
::
|
|
453
|
+
|
|
454
|
+
import riskfolio as rp
|
|
455
|
+
|
|
456
|
+
asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
|
|
457
|
+
'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
|
|
458
|
+
'Fixed Income', 'Fixed Income'],
|
|
459
|
+
'Class 2': ['Technology', 'Technology', 'Technology',
|
|
460
|
+
'Financial', 'Financial', 'Treasury', 'Treasury'],}
|
|
461
|
+
|
|
462
|
+
asset_classes = pd.DataFrame(asset_classes)
|
|
463
|
+
asset_classes = asset_classes.sort_values(by=['Assets'])
|
|
464
|
+
|
|
465
|
+
constraints = {'Disabled': [True, True, True, True, True, True, True, True, True, True, True, False],
|
|
466
|
+
'Type': ['Assets', 'Assets', 'Assets', 'Assets', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes'],
|
|
467
|
+
'Set': ['', '', '', '', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry'],
|
|
468
|
+
'Position': ['', '', 'PCAR', 'PSA', '', '', 'Financials', 'Energy', 'Financials', 'Financials', 'Industrials', 'Financials'],
|
|
469
|
+
'Kind': ['CardUp', 'CardLow', 'MuEx', 'Join', 'CardUp', 'CardLow', 'CardUp', 'CardLow', 'MuEx', 'MuEx', 'Join', 'Join'],
|
|
470
|
+
'Value': [7.0, 16.0, '', '', 4.0, 9.0, 1.0, 1.0, '', '', '', ''],
|
|
471
|
+
'Type Relative': ['', '', 'Assets', 'Assets', '', '', '', '', 'Assets', 'Classes', 'Assets', 'Classes'],
|
|
472
|
+
'Relative Set': ['', '', '', '', '', '', '', '', '', 'Industry', '', 'Industry'],
|
|
473
|
+
'Relative': ['', '', 'CPB', 'MMC', '', '', '', '', 'BAX', 'Consumer Staples', 'PSA', 'Information Technology']}
|
|
474
|
+
constraints = pd.DataFrame(constraints)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
The constraints look like the following image:
|
|
478
|
+
|
|
479
|
+
.. image:: images/Constraints_int.png
|
|
480
|
+
|
|
481
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
482
|
+
dataframe.
|
|
483
|
+
|
|
484
|
+
To create the dictionaries A, B, C, D, E, and F we use the following command:
|
|
485
|
+
|
|
486
|
+
::
|
|
487
|
+
|
|
488
|
+
A, B, C, D, E, F = rp.integer_constraints(constraints, asset_classes)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
The dictionaries A and B look like the following image:
|
|
492
|
+
|
|
493
|
+
.. image:: images/AxB_int.png
|
|
494
|
+
|
|
495
|
+
The dictionaries C and D look like the following image:
|
|
496
|
+
|
|
497
|
+
.. image:: images/CxD_int.png
|
|
498
|
+
|
|
499
|
+
The dictionaries E and F look like the following image:
|
|
500
|
+
|
|
501
|
+
.. image:: images/ExF_int.png
|
|
502
|
+
|
|
503
|
+
"""
|
|
504
|
+
|
|
505
|
+
if not isinstance(constraints, pd.DataFrame) and not isinstance(
|
|
506
|
+
asset_classes, pd.DataFrame
|
|
507
|
+
):
|
|
508
|
+
raise ValueError("constraints and asset_classes must be DataFrames")
|
|
509
|
+
|
|
510
|
+
if constraints.shape[1] != 9:
|
|
511
|
+
raise ValueError("constraints must have nine columns")
|
|
512
|
+
|
|
513
|
+
constraints0 = constraints.fillna("")
|
|
514
|
+
constraints0 = constraints0[constraints0["Disabled"] == False]
|
|
515
|
+
data = constraints0.values.tolist()
|
|
516
|
+
assetslist = asset_classes.iloc[:, 0].values.tolist()
|
|
517
|
+
groups = constraints0["Set"].unique().tolist()
|
|
518
|
+
|
|
519
|
+
n = len(constraints0)
|
|
520
|
+
m = len(asset_classes)
|
|
521
|
+
|
|
522
|
+
A, B, C, D, E, F, G, H, I = {}, {}, {}, {}, {}, {}, {}, {}, {}
|
|
523
|
+
for i, j in enumerate(groups):
|
|
524
|
+
A[i], B[i], C[i], D[i], E[i], F[i], I[i] = [], [], [], [], [], [], False
|
|
525
|
+
|
|
526
|
+
for group in groups:
|
|
527
|
+
G[group] = []
|
|
528
|
+
if group != "":
|
|
529
|
+
H[group] = asset_classes[group].unique().tolist()
|
|
530
|
+
for i in H[group]:
|
|
531
|
+
G1 = np.where(asset_classes[group].values == i, 1, 0).tolist()
|
|
532
|
+
G[group].append(G1)
|
|
533
|
+
G[group] = np.array(G[group])
|
|
534
|
+
|
|
535
|
+
for i in range(0, n):
|
|
536
|
+
key = groups.index(data[i][2])
|
|
537
|
+
if data[i][1].lower() == "assets":
|
|
538
|
+
if data[i][4].lower()[:4] == "card":
|
|
539
|
+
if (
|
|
540
|
+
data[i][6].lower() != ""
|
|
541
|
+
or data[i][7].lower() != ""
|
|
542
|
+
or data[i][8].lower() != ""
|
|
543
|
+
):
|
|
544
|
+
raise ValueError(
|
|
545
|
+
"Cardinality constraints don't require Type Relative , Relative Set and Relative columns."
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
if data[i][5] != "":
|
|
549
|
+
d = int(data[i][5])
|
|
550
|
+
if data[i][4].lower()[4:] == "up":
|
|
551
|
+
A1 = np.ones((m,)).tolist()
|
|
552
|
+
B1 = [d]
|
|
553
|
+
elif data[i][4].lower()[4:] == "low":
|
|
554
|
+
A1 = (-np.ones((m,))).tolist()
|
|
555
|
+
B1 = [-d]
|
|
556
|
+
else:
|
|
557
|
+
raise ValueError("Only CardLow and CardUp values are allowed.")
|
|
558
|
+
A[key].append(A1)
|
|
559
|
+
B[key].append(B1)
|
|
560
|
+
elif data[i][5] == "":
|
|
561
|
+
raise ValueError("Cardinality constraints require a Value column.")
|
|
562
|
+
|
|
563
|
+
elif data[i][4].lower() in ["muex", "join"]:
|
|
564
|
+
item = assetslist.index(data[i][3])
|
|
565
|
+
A1 = [0] * m
|
|
566
|
+
A1[item] = 1
|
|
567
|
+
if data[i][6].lower() == "assets":
|
|
568
|
+
item2 = assetslist.index(data[i][8])
|
|
569
|
+
A2 = [0] * m
|
|
570
|
+
A2[item2] = 1
|
|
571
|
+
elif data[i][6].lower() == "classes":
|
|
572
|
+
A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
573
|
+
if data[i][4].lower() == "muex":
|
|
574
|
+
A1 = (np.sum(A2) * np.array(A1) + np.array(A2)).tolist()
|
|
575
|
+
A[key].append(A1)
|
|
576
|
+
B[key].append([np.sum(A2)])
|
|
577
|
+
elif data[i][4].lower() == "join":
|
|
578
|
+
A1 = (np.sum(A2) * np.array(A1) - np.array(A2)).tolist()
|
|
579
|
+
A[key].append(A1)
|
|
580
|
+
B[key].append([0])
|
|
581
|
+
|
|
582
|
+
elif data[i][1].lower() == "classes":
|
|
583
|
+
C0 = G[data[i][2]]
|
|
584
|
+
I_m = np.identity(m)
|
|
585
|
+
|
|
586
|
+
if data[i][4].lower()[:4] == "card":
|
|
587
|
+
if (
|
|
588
|
+
data[i][6].lower() != ""
|
|
589
|
+
or data[i][7].lower() != ""
|
|
590
|
+
or data[i][8].lower() != ""
|
|
591
|
+
):
|
|
592
|
+
raise ValueError(
|
|
593
|
+
"Cardinality constraints don't require Type Relative, Relative Set and Relative columns."
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
if data[i][5] != "":
|
|
597
|
+
d = int(data[i][5])
|
|
598
|
+
if data[i][3].lower() == "":
|
|
599
|
+
A1, B1 = [], []
|
|
600
|
+
C1 = np.vstack([C0, -C0])
|
|
601
|
+
D1 = C0.sum(axis=1).reshape((-1, 1))
|
|
602
|
+
D1 = np.vstack([D1, -np.ones((D1.shape[0], 1))])
|
|
603
|
+
E1 = np.ones((C0.shape[0],))
|
|
604
|
+
C1, D1 = C1.tolist(), D1.tolist()
|
|
605
|
+
elif data[i][3].lower() == "all":
|
|
606
|
+
A1 = C0
|
|
607
|
+
B1, C1, D1, E1, F1 = [], [], [], [], []
|
|
608
|
+
else:
|
|
609
|
+
A1 = np.where(
|
|
610
|
+
asset_classes[data[i][2]].values == data[i][3], 1, 0
|
|
611
|
+
)
|
|
612
|
+
B1, C1, D1, E1, F1 = [], [], [], [], []
|
|
613
|
+
|
|
614
|
+
if data[i][4].lower()[4:] == "up":
|
|
615
|
+
if len(A1) == 0:
|
|
616
|
+
E1 = E1.tolist()
|
|
617
|
+
F1 = [d]
|
|
618
|
+
else:
|
|
619
|
+
if data[i][3].lower() == "all":
|
|
620
|
+
A1 = A1.tolist()
|
|
621
|
+
B1 = (np.ones((C0.shape[0], 1)) * d).tolist()
|
|
622
|
+
else:
|
|
623
|
+
A1 = A1.tolist()
|
|
624
|
+
B1 = [d]
|
|
625
|
+
elif data[i][4].lower()[4:] == "low":
|
|
626
|
+
if len(A1) == 0:
|
|
627
|
+
E1 = (-E1).tolist()
|
|
628
|
+
F1 = [-d]
|
|
629
|
+
else:
|
|
630
|
+
if data[i][3].lower() == "all":
|
|
631
|
+
A1 = (-A1).tolist()
|
|
632
|
+
B1 = (np.ones((C0.shape[0], 1)) * -d).tolist()
|
|
633
|
+
else:
|
|
634
|
+
A1 = (-A1).tolist()
|
|
635
|
+
B1 = [-d]
|
|
636
|
+
else:
|
|
637
|
+
raise ValueError("Only CardLow and CardUp values are allowed.")
|
|
638
|
+
|
|
639
|
+
if A1 != []:
|
|
640
|
+
if data[i][3].lower() == "all":
|
|
641
|
+
for row1 in A1:
|
|
642
|
+
A[key].append(row1)
|
|
643
|
+
else:
|
|
644
|
+
A[key].append(A1)
|
|
645
|
+
if B1 != []:
|
|
646
|
+
if data[i][3].lower() == "all":
|
|
647
|
+
for row1 in B1:
|
|
648
|
+
B[key].append(row1)
|
|
649
|
+
else:
|
|
650
|
+
B[key].append(B1)
|
|
651
|
+
if E1 != []:
|
|
652
|
+
E[key].append(E1)
|
|
653
|
+
if F1 != []:
|
|
654
|
+
F[key].append(F1)
|
|
655
|
+
if I[key] == False:
|
|
656
|
+
if C1 != []:
|
|
657
|
+
for row1 in C1:
|
|
658
|
+
C[key].append(row1)
|
|
659
|
+
if D1 != []:
|
|
660
|
+
for row1 in D1:
|
|
661
|
+
D[key].append(row1)
|
|
662
|
+
I[key] = True
|
|
663
|
+
|
|
664
|
+
elif data[i][5] == "":
|
|
665
|
+
raise ValueError("Cardinality constraints require a Value column.")
|
|
666
|
+
|
|
667
|
+
elif data[i][4].lower() in ["muex", "join"]:
|
|
668
|
+
if data[i][3].lower() != "":
|
|
669
|
+
A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
670
|
+
if data[i][6].lower() == "assets":
|
|
671
|
+
item = assetslist.index(data[i][8])
|
|
672
|
+
A2 = [0] * m
|
|
673
|
+
A2[item] = 1
|
|
674
|
+
if data[i][4].lower() == "muex":
|
|
675
|
+
A3 = np.sum(A1) * np.array(A2, ndmin=2) + np.array(
|
|
676
|
+
A1, ndmin=2
|
|
677
|
+
)
|
|
678
|
+
B1 = [np.sum(A1)]
|
|
679
|
+
elif data[i][4].lower() == "join":
|
|
680
|
+
A3 = -np.array(A2) - np.array(A1, ndmin=2)
|
|
681
|
+
B1 = [-np.sum(A1) - 1]
|
|
682
|
+
|
|
683
|
+
A[key].append(A3.flatten().tolist())
|
|
684
|
+
B[key].append(B1)
|
|
685
|
+
|
|
686
|
+
elif data[i][6].lower() == "classes":
|
|
687
|
+
A2 = np.where(
|
|
688
|
+
asset_classes[data[i][7]].values == data[i][8], 1, 0
|
|
689
|
+
)
|
|
690
|
+
A3 = I_m[np.array(A2, dtype=bool)]
|
|
691
|
+
A4 = np.repeat(np.array(A1, ndmin=2), A3.shape[0], axis=0)
|
|
692
|
+
if data[i][4].lower() == "muex":
|
|
693
|
+
A5 = np.sum(A1) * np.array(A3, ndmin=2) + np.array(
|
|
694
|
+
A4, ndmin=2
|
|
695
|
+
)
|
|
696
|
+
B1 = np.ones((A3.shape[0], 1)) * np.sum(A1)
|
|
697
|
+
elif data[i][4].lower() == "join":
|
|
698
|
+
A5 = -np.array(A3, ndmin=2) - np.array(A4, ndmin=2)
|
|
699
|
+
B1 = np.ones((A3.shape[0], 1)) * (-np.sum(A1) - 1)
|
|
700
|
+
|
|
701
|
+
for row1 in A5:
|
|
702
|
+
A[key].append(row1.tolist())
|
|
703
|
+
for row1 in B1:
|
|
704
|
+
B[key].append(row1.tolist())
|
|
705
|
+
|
|
706
|
+
for i in A.keys():
|
|
707
|
+
A[i] = np.array(A[i], ndmin=2, dtype=float)
|
|
708
|
+
B[i] = np.array(B[i], ndmin=2, dtype=float)
|
|
709
|
+
C[i] = np.array(C[i], ndmin=2, dtype=float)
|
|
710
|
+
D[i] = np.array(D[i], ndmin=2, dtype=float)
|
|
711
|
+
E[i] = np.array(E[i], ndmin=2, dtype=float)
|
|
712
|
+
F[i] = np.array(F[i], ndmin=2, dtype=float)
|
|
713
|
+
|
|
714
|
+
return A, B, C, D, E, F
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def assets_views(views, asset_classes):
|
|
718
|
+
r"""
|
|
719
|
+
Create the assets views matrixes P and Q of the views :math:`Pw = Q`.
|
|
720
|
+
|
|
721
|
+
Parameters
|
|
722
|
+
----------
|
|
723
|
+
views : DataFrame of shape (n_views, n_fields)
|
|
724
|
+
views DataFrame, where n_views is the number of views
|
|
725
|
+
and n_fields is the number of fields of views DataFrame, the fields
|
|
726
|
+
are:
|
|
727
|
+
|
|
728
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
729
|
+
- Type: (str) can be: 'Assets' or 'Classes'.
|
|
730
|
+
- Set: (str) if Type is 'Classes' specified the name of the set of asset classes.
|
|
731
|
+
- Position: (str) the name of the asset or asset class of the view.
|
|
732
|
+
- Sign: (str) can be '>=' or '<='.
|
|
733
|
+
- Return: (scalar) is the return of the view.
|
|
734
|
+
- Type Relative: (str) can be: 'Assets' or 'Classes'.
|
|
735
|
+
- Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
|
|
736
|
+
- Relative: (str) the name of the asset or asset class of the relative view.
|
|
737
|
+
|
|
738
|
+
asset_classes : DataFrame of shape (n_assets, n_cols)
|
|
739
|
+
Asset's classes matrix, where n_assets is the number of assets and
|
|
740
|
+
n_cols is the number of columns of the matrix where the first column
|
|
741
|
+
is the asset list and the next columns are the different asset's
|
|
742
|
+
classes sets.
|
|
743
|
+
|
|
744
|
+
Returns
|
|
745
|
+
-------
|
|
746
|
+
P : nd-array
|
|
747
|
+
The matrix P that shows the relation among assets in each view.
|
|
748
|
+
|
|
749
|
+
Q : nd-array
|
|
750
|
+
The matrix Q that shows the expected return of each view.
|
|
751
|
+
|
|
752
|
+
Raises
|
|
753
|
+
------
|
|
754
|
+
ValueError when the value cannot be calculated.
|
|
755
|
+
|
|
756
|
+
Examples
|
|
757
|
+
--------
|
|
758
|
+
::
|
|
759
|
+
|
|
760
|
+
asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
|
|
761
|
+
'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
|
|
762
|
+
'Fixed Income', 'Fixed Income'],
|
|
763
|
+
'Class 2': ['Technology', 'Technology', 'Technology',
|
|
764
|
+
'Financial', 'Financial', 'Treasury', 'Treasury'],}
|
|
765
|
+
|
|
766
|
+
asset_classes = pd.DataFrame(asset_classes)
|
|
767
|
+
asset_classes = asset_classes.sort_values(by=['Assets'])
|
|
768
|
+
|
|
769
|
+
views = {'Disabled': [False, False, False, False],
|
|
770
|
+
'Type': ['Assets', 'Classes', 'Classes', 'Assets'],
|
|
771
|
+
'Set': ['', 'Class 2','Class 1', ''],
|
|
772
|
+
'Position': ['WFC', 'Financial', 'Equity', 'FB'],
|
|
773
|
+
'Sign': ['<=', '>=', '>=', '>='],
|
|
774
|
+
'Return': [ 0.3, 0.1, 0.05, 0.03 ],
|
|
775
|
+
'Type Relative': [ 'Assets', 'Classes', 'Assets', ''],
|
|
776
|
+
'Relative Set': [ '', 'Class 1', '', ''],
|
|
777
|
+
'Relative': ['FB', 'Fixed Income', 'TLT', '']}
|
|
778
|
+
|
|
779
|
+
views = pd.DataFrame(views)
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
The constraints look like the following image:
|
|
783
|
+
|
|
784
|
+
.. image:: images/Views.png
|
|
785
|
+
|
|
786
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
787
|
+
dataframe.
|
|
788
|
+
|
|
789
|
+
To create the matrixes P and Q we use the following command:
|
|
790
|
+
|
|
791
|
+
::
|
|
792
|
+
|
|
793
|
+
P, Q = rp.assets_views(views, asset_classes)
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
The matrixes P and Q look like the following image:
|
|
797
|
+
|
|
798
|
+
.. image:: images/PxQ.png
|
|
799
|
+
|
|
800
|
+
"""
|
|
801
|
+
|
|
802
|
+
if not isinstance(views, pd.DataFrame) and not isinstance(
|
|
803
|
+
asset_classes, pd.DataFrame
|
|
804
|
+
):
|
|
805
|
+
raise ValueError("constraints and asset_classes must be DataFrames")
|
|
806
|
+
|
|
807
|
+
if views.shape[1] != 9:
|
|
808
|
+
raise ValueError("constraints must have nine columns")
|
|
809
|
+
|
|
810
|
+
n = len(views)
|
|
811
|
+
m = len(asset_classes)
|
|
812
|
+
views0 = views.fillna("")
|
|
813
|
+
views0 = views0[views0["Disabled"] == False]
|
|
814
|
+
data = views0.values.tolist()
|
|
815
|
+
assetslist = asset_classes.iloc[:, 0].values.tolist()
|
|
816
|
+
|
|
817
|
+
P = []
|
|
818
|
+
Q = []
|
|
819
|
+
for i in range(0, n):
|
|
820
|
+
valid = False
|
|
821
|
+
if data[i][1] == "Assets":
|
|
822
|
+
item = assetslist.index(data[i][3])
|
|
823
|
+
if data[i][4] == ">=":
|
|
824
|
+
d = 1
|
|
825
|
+
elif data[i][4] == "<=":
|
|
826
|
+
d = -1
|
|
827
|
+
if data[i][5] != "":
|
|
828
|
+
P1 = [0] * m
|
|
829
|
+
P1[item] = 1
|
|
830
|
+
if data[i][6] == "Assets" and data[i][8] != "":
|
|
831
|
+
item2 = assetslist.index(data[i][8])
|
|
832
|
+
P2 = [0] * m
|
|
833
|
+
P2[item2] = 1
|
|
834
|
+
valid = True
|
|
835
|
+
elif data[i][6] == "Classes" and data[i][7] != "" and data[i][8] != "":
|
|
836
|
+
P2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
837
|
+
P2 = P2 / np.sum(P2)
|
|
838
|
+
valid = True
|
|
839
|
+
elif data[i][6] == "" and data[i][7] == "" and data[i][8] == "":
|
|
840
|
+
P2 = [0] * m
|
|
841
|
+
valid = True
|
|
842
|
+
if valid == True:
|
|
843
|
+
P1 = ((np.array(P1) - np.array(P2)) * d).tolist()
|
|
844
|
+
P.append(P1)
|
|
845
|
+
Q.append([data[i][5] * d])
|
|
846
|
+
elif data[i][1] == "Classes":
|
|
847
|
+
if data[i][4] == ">=":
|
|
848
|
+
d = 1
|
|
849
|
+
else:
|
|
850
|
+
d = -1
|
|
851
|
+
if data[i][5] != "":
|
|
852
|
+
P1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
|
|
853
|
+
P1 = P1 / np.sum(P1)
|
|
854
|
+
if data[i][6] == "Assets" and data[i][8] != "":
|
|
855
|
+
item2 = assetslist.index(data[i][8])
|
|
856
|
+
P2 = [0] * m
|
|
857
|
+
P2[item2] = 1
|
|
858
|
+
valid = True
|
|
859
|
+
elif data[i][6] == "Classes" and data[i][7] != "" and data[i][8] != "":
|
|
860
|
+
P2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
|
|
861
|
+
P2 = P2 / np.sum(P2)
|
|
862
|
+
valid = True
|
|
863
|
+
elif data[i][6] == "" and data[i][7] == "" and data[i][8] == "":
|
|
864
|
+
P2 = [0] * m
|
|
865
|
+
valid = True
|
|
866
|
+
if valid == True:
|
|
867
|
+
P1 = ((np.array(P1) - np.array(P2)) * d).tolist()
|
|
868
|
+
P.append(P1)
|
|
869
|
+
Q.append([data[i][5] * d])
|
|
870
|
+
|
|
871
|
+
P = np.array(P, ndmin=2, dtype=float)
|
|
872
|
+
Q = np.array(Q, ndmin=2, dtype=float)
|
|
873
|
+
|
|
874
|
+
for i in range(len(Q)):
|
|
875
|
+
if Q[i, 0] < 0:
|
|
876
|
+
P[i, :] = -1.0 * P[i, :]
|
|
877
|
+
Q[i, :] = -1.0 * Q[i, :]
|
|
878
|
+
|
|
879
|
+
return P, Q
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def factors_views(views, loadings, const=True):
|
|
883
|
+
r"""
|
|
884
|
+
Create the factors constraints matrixes C and D of the constraint
|
|
885
|
+
:math:`Cw \geq D`.
|
|
886
|
+
|
|
887
|
+
Parameters
|
|
888
|
+
----------
|
|
889
|
+
views : DataFrame of shape (n_views, n_fields)
|
|
890
|
+
views DataFrame, where n_views is the number of views
|
|
891
|
+
and n_fields is the number of fields of views DataFrame, the fields
|
|
892
|
+
are:
|
|
893
|
+
|
|
894
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
895
|
+
- Factor: (str) the name of the factor of the constraint.
|
|
896
|
+
- Sign: (str) can be '>=' or '<='.
|
|
897
|
+
- Value: (scalar) is the maximum or minimum value of the factor.
|
|
898
|
+
|
|
899
|
+
loadings : DataFrame of shape (n_assets, n_features)
|
|
900
|
+
The loadings matrix.
|
|
901
|
+
|
|
902
|
+
Returns
|
|
903
|
+
-------
|
|
904
|
+
P : nd-array
|
|
905
|
+
The matrix P that shows the relation among factors in each factor view.
|
|
906
|
+
|
|
907
|
+
Q : nd-array
|
|
908
|
+
The matrix Q that shows the expected return of each factor view.
|
|
909
|
+
|
|
910
|
+
Raises
|
|
911
|
+
------
|
|
912
|
+
ValueError when the value cannot be calculated.
|
|
913
|
+
|
|
914
|
+
Examples
|
|
915
|
+
--------
|
|
916
|
+
::
|
|
917
|
+
|
|
918
|
+
loadings = {'const': [0.0004, 0.0002, 0.0000, 0.0006, 0.0001, 0.0003, -0.0003],
|
|
919
|
+
'MTUM': [0.1916, 1.0061, 0.8695, 1.9996, 0.0000, 0.0000, 0.0000],
|
|
920
|
+
'QUAL': [0.0000, 2.0129, 1.4301, 0.0000, 0.0000, 0.0000, 0.0000],
|
|
921
|
+
'SIZE': [0.0000, 0.0000, 0.0000, 0.4717, 0.0000, -0.1857, 0.0000],
|
|
922
|
+
'USMV': [-0.7838, -1.6439, -1.0176, -1.4407, 0.0055, 0.5781, 0.0000],
|
|
923
|
+
'VLUE': [1.4772, -0.7590, -0.4090, 0.0000, -0.0054, -0.4844, 0.9435]}
|
|
924
|
+
|
|
925
|
+
loadings = pd.DataFrame(loadings)
|
|
926
|
+
|
|
927
|
+
factorsviews = {'Disabled': [False, False, False],
|
|
928
|
+
'Factor': ['MTUM', 'USMV', 'VLUE'],
|
|
929
|
+
'Sign': ['<=', '<=', '>='],
|
|
930
|
+
'Value': [0.9, -1.2, 0.3],
|
|
931
|
+
'Relative Factor': ['USMV', '', '']}
|
|
932
|
+
|
|
933
|
+
factorsviews = pd.DataFrame(factorsviews)
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
The constraints look like the following image:
|
|
937
|
+
|
|
938
|
+
.. image:: images/factorsviews.png
|
|
939
|
+
|
|
940
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
941
|
+
dataframe.
|
|
942
|
+
|
|
943
|
+
To create the matrixes P and Q we use the following command:
|
|
944
|
+
|
|
945
|
+
::
|
|
946
|
+
|
|
947
|
+
P, Q = rp.factors_views(factorsviews,
|
|
948
|
+
loadings,
|
|
949
|
+
const=True)
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
The matrixes P and Q look like the following image:
|
|
953
|
+
|
|
954
|
+
.. image:: images/P_fxQ_f.png
|
|
955
|
+
|
|
956
|
+
"""
|
|
957
|
+
|
|
958
|
+
if not isinstance(views, pd.DataFrame) and not isinstance(loadings, pd.DataFrame):
|
|
959
|
+
raise ValueError("constraints and loadings must be DataFrames")
|
|
960
|
+
|
|
961
|
+
if views.shape[1] != 5:
|
|
962
|
+
raise ValueError("constraints must have five columns")
|
|
963
|
+
|
|
964
|
+
n = len(views)
|
|
965
|
+
views0 = views.fillna("")
|
|
966
|
+
views0 = views0[views0["Disabled"] == False]
|
|
967
|
+
data = views0.values.tolist()
|
|
968
|
+
factorslist = loadings.columns.tolist()
|
|
969
|
+
if const == True:
|
|
970
|
+
factorslist = factorslist[1:]
|
|
971
|
+
m = len(factorslist)
|
|
972
|
+
|
|
973
|
+
P = []
|
|
974
|
+
Q = []
|
|
975
|
+
for i in range(0, n):
|
|
976
|
+
if data[i][0] == False:
|
|
977
|
+
item = factorslist.index(data[i][1])
|
|
978
|
+
if data[i][2] == ">=":
|
|
979
|
+
d = 1
|
|
980
|
+
elif data[i][2] == "<=":
|
|
981
|
+
d = -1
|
|
982
|
+
P1 = [0] * m
|
|
983
|
+
P1[item] = d
|
|
984
|
+
if data[i][4] != "":
|
|
985
|
+
item = factorslist.index(data[i][4])
|
|
986
|
+
P1[item] = -d
|
|
987
|
+
P.append(P1)
|
|
988
|
+
Q.append([data[i][3] * d])
|
|
989
|
+
|
|
990
|
+
P = np.array(P, ndmin=2, dtype=float)
|
|
991
|
+
Q = np.array(Q, ndmin=2, dtype=float)
|
|
992
|
+
|
|
993
|
+
return P, Q
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
def assets_clusters(
|
|
997
|
+
returns,
|
|
998
|
+
custom_cov=None,
|
|
999
|
+
codependence="pearson",
|
|
1000
|
+
linkage="ward",
|
|
1001
|
+
opt_k_method="twodiff",
|
|
1002
|
+
k=None,
|
|
1003
|
+
max_k=10,
|
|
1004
|
+
bins_info="KN",
|
|
1005
|
+
alpha_tail=0.05,
|
|
1006
|
+
gs_threshold=0.5,
|
|
1007
|
+
leaf_order=True,
|
|
1008
|
+
):
|
|
1009
|
+
r"""
|
|
1010
|
+
Create asset classes based on hierarchical clustering.
|
|
1011
|
+
|
|
1012
|
+
Parameters
|
|
1013
|
+
----------
|
|
1014
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1015
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1016
|
+
observations and n_assets is the number of assets.
|
|
1017
|
+
custom_cov : DataFrame or None, optional
|
|
1018
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1019
|
+
'custom_cov'. The default is None.
|
|
1020
|
+
codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
|
|
1021
|
+
The codependence or similarity matrix used to build the distance
|
|
1022
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1023
|
+
|
|
1024
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1025
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1026
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1027
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1028
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1029
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1030
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1031
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1032
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1033
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1034
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1035
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1036
|
+
|
|
1037
|
+
linkage : string, optional
|
|
1038
|
+
Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
|
|
1039
|
+
The default is 'ward'. Possible values are:
|
|
1040
|
+
|
|
1041
|
+
- 'single'.
|
|
1042
|
+
- 'complete'.
|
|
1043
|
+
- 'average'.
|
|
1044
|
+
- 'weighted'.
|
|
1045
|
+
- 'centroid'.
|
|
1046
|
+
- 'median'.
|
|
1047
|
+
- 'ward'.
|
|
1048
|
+
- 'DBHT'. Direct Bubble Hierarchical Tree.
|
|
1049
|
+
|
|
1050
|
+
opt_k_method : str
|
|
1051
|
+
Method used to calculate the optimum number of clusters.
|
|
1052
|
+
The default is 'twodiff'. Possible values are:
|
|
1053
|
+
|
|
1054
|
+
- 'twodiff': two difference gap statistic.
|
|
1055
|
+
- 'stdsil': standarized silhouette score.
|
|
1056
|
+
|
|
1057
|
+
k : int, optional
|
|
1058
|
+
Number of clusters. This value is took instead of the optimal number
|
|
1059
|
+
of clusters calculated with the two difference gap statistic.
|
|
1060
|
+
The default is None.
|
|
1061
|
+
max_k : int, optional
|
|
1062
|
+
Max number of clusters used by the two difference gap statistic
|
|
1063
|
+
to find the optimal number of clusters. The default is 10.
|
|
1064
|
+
bins_info: int or str
|
|
1065
|
+
Number of bins used to calculate variation of information. The default
|
|
1066
|
+
value is 'KN'. Possible values are:
|
|
1067
|
+
|
|
1068
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
1069
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
1070
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
1071
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
1072
|
+
- int: integer value choice by user.
|
|
1073
|
+
|
|
1074
|
+
alpha_tail : float, optional
|
|
1075
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
1076
|
+
gs_threshold : float, optional
|
|
1077
|
+
Gerber statistic threshold. The default is 0.5.
|
|
1078
|
+
leaf_order : bool, optional
|
|
1079
|
+
Indicates if the cluster are ordered so that the distance between
|
|
1080
|
+
successive leaves is minimal. The default is True.
|
|
1081
|
+
|
|
1082
|
+
Returns
|
|
1083
|
+
-------
|
|
1084
|
+
clusters : DataFrame
|
|
1085
|
+
A dataframe with asset classes based on hierarchical clustering.
|
|
1086
|
+
|
|
1087
|
+
Raises
|
|
1088
|
+
------
|
|
1089
|
+
ValueError when the value cannot be calculated.
|
|
1090
|
+
|
|
1091
|
+
Examples
|
|
1092
|
+
--------
|
|
1093
|
+
|
|
1094
|
+
::
|
|
1095
|
+
|
|
1096
|
+
clusters = rp.assets_clusters(returns,
|
|
1097
|
+
codependence='pearson',
|
|
1098
|
+
linkage='ward',
|
|
1099
|
+
k=None,
|
|
1100
|
+
max_k=10,
|
|
1101
|
+
alpha_tail=0.05,
|
|
1102
|
+
leaf_order=True)
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
The clusters dataframe looks like the following image:
|
|
1106
|
+
|
|
1107
|
+
.. image:: images/clusters_df.png
|
|
1108
|
+
|
|
1109
|
+
"""
|
|
1110
|
+
|
|
1111
|
+
if not isinstance(returns, pd.DataFrame):
|
|
1112
|
+
raise ValueError("returns must be a DataFrame")
|
|
1113
|
+
|
|
1114
|
+
# Calculating codependence matrix and distance metric
|
|
1115
|
+
codep, dist = af.codep_dist(
|
|
1116
|
+
returns=returns,
|
|
1117
|
+
custom_cov=custom_cov,
|
|
1118
|
+
codependence=codependence,
|
|
1119
|
+
bins_info=bins_info,
|
|
1120
|
+
alpha_tail=alpha_tail,
|
|
1121
|
+
gs_threshold=gs_threshold,
|
|
1122
|
+
)
|
|
1123
|
+
# Hierarchical clustering
|
|
1124
|
+
dist = dist.to_numpy()
|
|
1125
|
+
dist = pd.DataFrame(dist, columns=codep.columns, index=codep.index)
|
|
1126
|
+
if linkage == "DBHT":
|
|
1127
|
+
# different choices for D, S give different outputs!
|
|
1128
|
+
D = dist.to_numpy() # dissimilarity matrix
|
|
1129
|
+
if codependence in {"pearson", "spearman"}:
|
|
1130
|
+
S = (1 - dist**2).to_numpy()
|
|
1131
|
+
else:
|
|
1132
|
+
S = codep.copy().to_numpy() # similarity matrix
|
|
1133
|
+
(_, _, _, _, _, clustering) = db.DBHTs(
|
|
1134
|
+
D, S, leaf_order=leaf_order
|
|
1135
|
+
) # DBHT clustering
|
|
1136
|
+
else:
|
|
1137
|
+
p_dist = squareform(dist, checks=False)
|
|
1138
|
+
clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order)
|
|
1139
|
+
|
|
1140
|
+
# optimal number of clusters
|
|
1141
|
+
if k is None:
|
|
1142
|
+
if opt_k_method == "twodiff":
|
|
1143
|
+
k, clustering_inds = af.two_diff_gap_stat(dist, clustering, max_k)
|
|
1144
|
+
elif opt_k_method == "stdsil":
|
|
1145
|
+
k, clustering_inds = af.std_silhouette_score(dist, clustering, max_k)
|
|
1146
|
+
else:
|
|
1147
|
+
raise ValueError("The only opt_k_method available are twodiff and stdsil")
|
|
1148
|
+
else:
|
|
1149
|
+
clustering_inds = hr.fcluster(clustering, k, criterion="maxclust")
|
|
1150
|
+
|
|
1151
|
+
# Building clusters
|
|
1152
|
+
labels = np.array(returns.columns.tolist())
|
|
1153
|
+
clusters = {"Assets": [], "Clusters": []}
|
|
1154
|
+
|
|
1155
|
+
for i, v in enumerate(clustering_inds):
|
|
1156
|
+
clusters["Assets"].append(labels[i])
|
|
1157
|
+
clusters["Clusters"].append("Cluster " + str(v))
|
|
1158
|
+
|
|
1159
|
+
clusters = pd.DataFrame(clusters)
|
|
1160
|
+
clusters = clusters.sort_values(by=["Assets"])
|
|
1161
|
+
|
|
1162
|
+
return clusters
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def hrp_constraints(constraints, asset_classes):
|
|
1166
|
+
r"""
|
|
1167
|
+
Create the upper and lower bounds constraints for hierarchical risk parity
|
|
1168
|
+
model.
|
|
1169
|
+
|
|
1170
|
+
Parameters
|
|
1171
|
+
----------
|
|
1172
|
+
constraints : DataFrame of shape (n_constraints, n_fields)
|
|
1173
|
+
Constraints DataFrame, where n_constraints is the number of constraints
|
|
1174
|
+
and n_fields is the number of fields of constraints DataFrame, the fields
|
|
1175
|
+
are:
|
|
1176
|
+
|
|
1177
|
+
- Disabled: (bool) indicates if the constraint is enable.
|
|
1178
|
+
- Type: (str) can be: 'Assets', All Assets' and 'Each asset in a class'.
|
|
1179
|
+
- Position: (str) the name of the asset or asset class of the constraint.
|
|
1180
|
+
- Sign: (str) can be '>=' or '<='.
|
|
1181
|
+
- Weight: (scalar) is the maximum or minimum weight of the absolute constraint.
|
|
1182
|
+
|
|
1183
|
+
asset_classes : DataFrame of shape (n_assets, n_cols)
|
|
1184
|
+
Asset's classes DataFrame, where n_assets is the number of assets and
|
|
1185
|
+
n_cols is the number of columns of the DataFrame where the first column
|
|
1186
|
+
is the asset list and the next columns are the different asset's
|
|
1187
|
+
classes sets.
|
|
1188
|
+
|
|
1189
|
+
Returns
|
|
1190
|
+
-------
|
|
1191
|
+
w_max : pd.Series
|
|
1192
|
+
The upper bound of hierarchical risk parity weights constraints.
|
|
1193
|
+
|
|
1194
|
+
w_min : pd.Series
|
|
1195
|
+
The lower bound of hierarchical risk parity weights constraints.
|
|
1196
|
+
|
|
1197
|
+
Raises
|
|
1198
|
+
------
|
|
1199
|
+
ValueError when the value cannot be calculated.
|
|
1200
|
+
|
|
1201
|
+
Examples
|
|
1202
|
+
--------
|
|
1203
|
+
::
|
|
1204
|
+
|
|
1205
|
+
asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
|
|
1206
|
+
'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
|
|
1207
|
+
'Fixed Income', 'Fixed Income'],
|
|
1208
|
+
'Class 2': ['Technology', 'Technology', 'Technology',
|
|
1209
|
+
'Financial', 'Financial', 'Treasury', 'Treasury'],}
|
|
1210
|
+
|
|
1211
|
+
asset_classes = pd.DataFrame(asset_classes)
|
|
1212
|
+
asset_classes = asset_classes.sort_values(by=['Assets'])
|
|
1213
|
+
|
|
1214
|
+
constraints = {'Disabled': [False, False, False, False, False, False],
|
|
1215
|
+
'Type': ['Assets', 'Assets', 'All Assets', 'All Assets',
|
|
1216
|
+
'Each asset in a class', 'Each asset in a class'],
|
|
1217
|
+
'Set': ['', '', '', '','Class 1', 'Class 2'],
|
|
1218
|
+
'Position': ['BAC', 'FB', '', '', 'Equity', 'Treasury'],
|
|
1219
|
+
'Sign': ['>=', '<=', '<=', '>=', '<=', '<='],
|
|
1220
|
+
'Weight': [0.02, 0.085, 0.09, 0.01, 0.07, 0.06]}
|
|
1221
|
+
|
|
1222
|
+
constraints = pd.DataFrame(constraints)
|
|
1223
|
+
|
|
1224
|
+
The constraints look like the following image:
|
|
1225
|
+
|
|
1226
|
+
.. image:: images/HRPConstraints.png
|
|
1227
|
+
|
|
1228
|
+
It is easier to construct the constraints in excel and then upload to a
|
|
1229
|
+
dataframe.
|
|
1230
|
+
|
|
1231
|
+
To create the pd.Series w_max and w_min we use the following command:
|
|
1232
|
+
|
|
1233
|
+
::
|
|
1234
|
+
|
|
1235
|
+
w_max, w_min = rp.hrp_constraints(constraints, asset_classes)
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
The pd.Series w_max and w_min looks like this (all constraints were
|
|
1239
|
+
merged to a single upper bound for each asset):
|
|
1240
|
+
|
|
1241
|
+
.. image:: images/HRP_Bounds.png
|
|
1242
|
+
|
|
1243
|
+
"""
|
|
1244
|
+
|
|
1245
|
+
if not isinstance(constraints, pd.DataFrame) and not isinstance(
|
|
1246
|
+
asset_classes, pd.DataFrame
|
|
1247
|
+
):
|
|
1248
|
+
raise ValueError("constraints and asset_classes must be DataFrames")
|
|
1249
|
+
|
|
1250
|
+
if constraints.shape[1] != 6:
|
|
1251
|
+
raise ValueError("constraints must have six columns")
|
|
1252
|
+
|
|
1253
|
+
n = len(constraints)
|
|
1254
|
+
data = constraints.fillna("").copy()
|
|
1255
|
+
assetslist = asset_classes.iloc[:, 0].values.tolist()
|
|
1256
|
+
|
|
1257
|
+
w_max = pd.Series(1.0, index=assetslist)
|
|
1258
|
+
w_min = pd.Series(0.0, index=assetslist)
|
|
1259
|
+
|
|
1260
|
+
for i in range(0, n):
|
|
1261
|
+
if data.loc[i, "Disabled"] == False:
|
|
1262
|
+
if data.loc[i, "Type"] == "Assets":
|
|
1263
|
+
assets = data.loc[i, "Position"]
|
|
1264
|
+
if data.loc[i, "Sign"] == ">=":
|
|
1265
|
+
if w_min.loc[assets] <= data.loc[i, "Weight"]:
|
|
1266
|
+
w_min.loc[assets] = data.loc[i, "Weight"]
|
|
1267
|
+
elif data.loc[i, "Sign"] == "<=":
|
|
1268
|
+
if w_max.loc[assets] >= data.loc[i, "Weight"]:
|
|
1269
|
+
w_max.loc[assets] = data.loc[i, "Weight"]
|
|
1270
|
+
elif data.loc[i, "Type"] == "All Assets":
|
|
1271
|
+
if data.loc[i, "Sign"] == ">=":
|
|
1272
|
+
if w_min[w_min <= data.loc[i, "Weight"]].shape[0] != 0:
|
|
1273
|
+
w_min[w_min <= data.loc[i, "Weight"]] = data.loc[i, "Weight"]
|
|
1274
|
+
elif data.loc[i, "Sign"] == "<=":
|
|
1275
|
+
if w_max[w_max >= data.loc[i, "Weight"]].shape[0] != 0:
|
|
1276
|
+
w_max[w_max >= data.loc[i, "Weight"]] = data.loc[i, "Weight"]
|
|
1277
|
+
elif data.loc[i, "Type"] == "Each asset in a class":
|
|
1278
|
+
label_0 = asset_classes.columns.tolist()[0]
|
|
1279
|
+
label_1 = data.loc[i, "Set"]
|
|
1280
|
+
label_2 = data.loc[i, "Position"]
|
|
1281
|
+
assets = asset_classes[[label_0, label_1]][
|
|
1282
|
+
asset_classes[label_1] == label_2
|
|
1283
|
+
]
|
|
1284
|
+
assets = assets["Assets"].tolist()
|
|
1285
|
+
if data.loc[i, "Sign"] == ">=":
|
|
1286
|
+
if (
|
|
1287
|
+
w_min.loc[assets][
|
|
1288
|
+
w_min.loc[assets] <= data.loc[i, "Weight"]
|
|
1289
|
+
].shape[0]
|
|
1290
|
+
!= 0
|
|
1291
|
+
):
|
|
1292
|
+
w_min.loc[assets] = np.where(
|
|
1293
|
+
w_min.loc[assets] <= data.loc[i, "Weight"],
|
|
1294
|
+
data.loc[i, "Weight"],
|
|
1295
|
+
w_min.loc[assets],
|
|
1296
|
+
)
|
|
1297
|
+
elif data.loc[i, "Sign"] == "<=":
|
|
1298
|
+
if (
|
|
1299
|
+
w_max.loc[assets][
|
|
1300
|
+
w_max.loc[assets] >= data.loc[i, "Weight"]
|
|
1301
|
+
].shape[0]
|
|
1302
|
+
!= 0
|
|
1303
|
+
):
|
|
1304
|
+
w_max.loc[assets] = np.where(
|
|
1305
|
+
w_max.loc[assets] >= data.loc[i, "Weight"],
|
|
1306
|
+
data.loc[i, "Weight"],
|
|
1307
|
+
w_max.loc[assets],
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
return w_max, w_min
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
def risk_constraint(asset_classes, kind="vanilla", classes_col=None):
|
|
1314
|
+
r"""
|
|
1315
|
+
Create the risk contribution constraint vector for the risk parity model.
|
|
1316
|
+
|
|
1317
|
+
Parameters
|
|
1318
|
+
----------
|
|
1319
|
+
asset_classes : DataFrame of shape (n_assets, n_cols)
|
|
1320
|
+
Asset's classes DataFrame, where n_assets is the number of assets and
|
|
1321
|
+
n_cols is the number of columns of the DataFrame where the first column
|
|
1322
|
+
is the asset list and the next columns are the different asset's
|
|
1323
|
+
classes sets. It is only used when kind value is 'classes'. The default
|
|
1324
|
+
value is None.
|
|
1325
|
+
|
|
1326
|
+
kind : str
|
|
1327
|
+
Kind of risk contribution constraint vector. The default value is 'vanilla'.
|
|
1328
|
+
Possible values are:
|
|
1329
|
+
|
|
1330
|
+
- 'vanilla': vector of equal risk contribution per asset.
|
|
1331
|
+
- 'classes': vector of equal risk contribution per class.
|
|
1332
|
+
|
|
1333
|
+
classes_col : str or int
|
|
1334
|
+
If value is str, it is the column name of the set of classes from
|
|
1335
|
+
asset_classes dataframe. If value is int, it is the column number of
|
|
1336
|
+
the set of classes from asset_classes dataframe. The default
|
|
1337
|
+
value is None.
|
|
1338
|
+
|
|
1339
|
+
Returns
|
|
1340
|
+
-------
|
|
1341
|
+
rb : nd-array
|
|
1342
|
+
The risk contribution constraint vector.
|
|
1343
|
+
|
|
1344
|
+
Raises
|
|
1345
|
+
------
|
|
1346
|
+
ValueError when the value cannot be calculated.
|
|
1347
|
+
|
|
1348
|
+
Examples
|
|
1349
|
+
--------
|
|
1350
|
+
::
|
|
1351
|
+
|
|
1352
|
+
asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
|
|
1353
|
+
'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
|
|
1354
|
+
'Fixed Income', 'Fixed Income'],
|
|
1355
|
+
'Class 2': ['Technology', 'Technology', 'Technology',
|
|
1356
|
+
'Financial', 'Financial', 'Treasury', 'Treasury'],}
|
|
1357
|
+
|
|
1358
|
+
asset_classes = pd.DataFrame(asset_classes)
|
|
1359
|
+
asset_classes = asset_classes.sort_values(by=['Assets'])
|
|
1360
|
+
asset_classes.reset_index(inplace=True, drop=True)
|
|
1361
|
+
|
|
1362
|
+
rb = rp.risk_constraint(asset_classes
|
|
1363
|
+
kind='classes',
|
|
1364
|
+
classes_col='Class 1')
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
"""
|
|
1368
|
+
if not isinstance(asset_classes, pd.DataFrame):
|
|
1369
|
+
raise ValueError("asset_classes must be a DataFrame")
|
|
1370
|
+
|
|
1371
|
+
if kind == "vanilla":
|
|
1372
|
+
if asset_classes.shape[1] < 1:
|
|
1373
|
+
raise ValueError("asset_classes must have at least one column")
|
|
1374
|
+
|
|
1375
|
+
assetslist = asset_classes.iloc[:, 0].values.tolist()
|
|
1376
|
+
rb = np.ones((len(assetslist), 1))
|
|
1377
|
+
rb /= len(assetslist)
|
|
1378
|
+
|
|
1379
|
+
elif kind == "classes":
|
|
1380
|
+
if asset_classes.shape[1] < 2:
|
|
1381
|
+
raise ValueError("asset_classes must have at least two columns")
|
|
1382
|
+
|
|
1383
|
+
classes = asset_classes.columns.tolist()
|
|
1384
|
+
|
|
1385
|
+
if isinstance(classes_col, str) and classes_col in classes:
|
|
1386
|
+
A = asset_classes.loc[:, classes_col].to_frame()
|
|
1387
|
+
col = A.columns.to_list()[0]
|
|
1388
|
+
elif isinstance(classes_col, int) and classes[classes_col] in classes:
|
|
1389
|
+
A = asset_classes.iloc[:, classes_col].to_frame()
|
|
1390
|
+
col = A.columns.to_list()[0]
|
|
1391
|
+
else:
|
|
1392
|
+
raise ValueError(
|
|
1393
|
+
"classes_col must be a valid column or column position of asset_classes"
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
A["rb"] = 1
|
|
1397
|
+
B = A.groupby([col]).count()
|
|
1398
|
+
A = pd.merge(A, B, left_on=col, right_index=True, how="left")
|
|
1399
|
+
A["rb"] = A["rb_x"] / A["rb_y"]
|
|
1400
|
+
A["rb"] /= A["rb"].sum()
|
|
1401
|
+
|
|
1402
|
+
rb = A["rb"].to_numpy().reshape(-1, 1)
|
|
1403
|
+
|
|
1404
|
+
else:
|
|
1405
|
+
raise ValueError(
|
|
1406
|
+
"The only available values for kind parameter are 'vanilla' and 'classes'"
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
return rb
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def connection_matrix(
|
|
1413
|
+
returns,
|
|
1414
|
+
custom_cov=None,
|
|
1415
|
+
codependence="pearson",
|
|
1416
|
+
graph="MST",
|
|
1417
|
+
walk_size=1,
|
|
1418
|
+
bins_info="KN",
|
|
1419
|
+
alpha_tail=0.05,
|
|
1420
|
+
gs_threshold=0.5,
|
|
1421
|
+
):
|
|
1422
|
+
r"""
|
|
1423
|
+
Create a connection matrix of walks of a specific size based on :cite:`e-Cajas10` formula.
|
|
1424
|
+
|
|
1425
|
+
Parameters
|
|
1426
|
+
----------
|
|
1427
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1428
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1429
|
+
observations and n_assets is the number of assets.
|
|
1430
|
+
custom_cov : DataFrame or None, optional
|
|
1431
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1432
|
+
'custom_cov'. The default is None.
|
|
1433
|
+
codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
|
|
1434
|
+
The codependence or similarity matrix used to build the distance
|
|
1435
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1436
|
+
|
|
1437
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1438
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1439
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1440
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1441
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1442
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1443
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1444
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1445
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1446
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1447
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1448
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1449
|
+
|
|
1450
|
+
graph : string, optional
|
|
1451
|
+
Graph used to build the adjacency matrix. The default is 'MST'.
|
|
1452
|
+
Possible values are:
|
|
1453
|
+
|
|
1454
|
+
- 'MST': Minimum Spanning Tree.
|
|
1455
|
+
- 'TMFG': Plannar Maximally Filtered Graph.
|
|
1456
|
+
|
|
1457
|
+
walk_size : int, optional
|
|
1458
|
+
Size of the walk represented by the adjacency matrix. The default is 1.
|
|
1459
|
+
bins_info: int or str
|
|
1460
|
+
Number of bins used to calculate variation of information. The default
|
|
1461
|
+
value is 'KN'. Possible values are:
|
|
1462
|
+
|
|
1463
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
1464
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
1465
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
1466
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
1467
|
+
- int: integer value choice by user.
|
|
1468
|
+
|
|
1469
|
+
alpha_tail : float, optional
|
|
1470
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
1471
|
+
gs_threshold : float, optional
|
|
1472
|
+
Gerber statistic threshold. The default is 0.5.
|
|
1473
|
+
|
|
1474
|
+
Returns
|
|
1475
|
+
-------
|
|
1476
|
+
A_p : DataFrame
|
|
1477
|
+
Adjacency matrix of walks of size lower and equal than 'walk_size'.
|
|
1478
|
+
|
|
1479
|
+
Raises
|
|
1480
|
+
------
|
|
1481
|
+
ValueError when the value cannot be calculated.
|
|
1482
|
+
|
|
1483
|
+
Examples
|
|
1484
|
+
--------
|
|
1485
|
+
|
|
1486
|
+
::
|
|
1487
|
+
|
|
1488
|
+
A_p = rp.connection_matrix(returns,
|
|
1489
|
+
codependence="pearson",
|
|
1490
|
+
graph="MST",
|
|
1491
|
+
walk_size=1)
|
|
1492
|
+
|
|
1493
|
+
The connection matrix dataframe looks like the following image:
|
|
1494
|
+
|
|
1495
|
+
.. image:: images/Connection_df.png
|
|
1496
|
+
|
|
1497
|
+
"""
|
|
1498
|
+
|
|
1499
|
+
if not isinstance(returns, pd.DataFrame):
|
|
1500
|
+
raise ValueError("returns must be a DataFrame")
|
|
1501
|
+
|
|
1502
|
+
assets = returns.columns.tolist()
|
|
1503
|
+
|
|
1504
|
+
# Calculating codependence matrix and distance metric
|
|
1505
|
+
codep, dist = af.codep_dist(
|
|
1506
|
+
returns=returns,
|
|
1507
|
+
custom_cov=custom_cov,
|
|
1508
|
+
codependence=codependence,
|
|
1509
|
+
bins_info=bins_info,
|
|
1510
|
+
alpha_tail=alpha_tail,
|
|
1511
|
+
gs_threshold=gs_threshold,
|
|
1512
|
+
)
|
|
1513
|
+
|
|
1514
|
+
# Adjacency Matrix Construction
|
|
1515
|
+
dist = dist.to_numpy()
|
|
1516
|
+
dist = pd.DataFrame(dist, columns=codep.columns, index=codep.index)
|
|
1517
|
+
if graph == "TMFG":
|
|
1518
|
+
# different choices for D, S give different outputs!
|
|
1519
|
+
D = dist.to_numpy() # dissimilarity matrix
|
|
1520
|
+
if codependence in {"pearson", "spearman"}:
|
|
1521
|
+
S = (1 - dist**2).to_numpy()
|
|
1522
|
+
else:
|
|
1523
|
+
S = codep.copy().to_numpy()
|
|
1524
|
+
(_, Rpm, _, _, _, clustering) = db.DBHTs(D, S) # DBHT clustering
|
|
1525
|
+
MAdj = pd.DataFrame(Rpm, index=assets, columns=assets)
|
|
1526
|
+
G = nx.from_pandas_adjacency(MAdj)
|
|
1527
|
+
elif graph == "MST":
|
|
1528
|
+
MAdj = nx.from_pandas_adjacency(dist)
|
|
1529
|
+
G = nx.minimum_spanning_tree(MAdj)
|
|
1530
|
+
else:
|
|
1531
|
+
raise ValueError("Only TMFG or MST graphs are available")
|
|
1532
|
+
|
|
1533
|
+
A = nx.adjacency_matrix(G).toarray()
|
|
1534
|
+
A = np.where(A != 0, 1, 0)
|
|
1535
|
+
|
|
1536
|
+
A_p = np.zeros_like(A)
|
|
1537
|
+
for i in range(int(walk_size) + 1):
|
|
1538
|
+
A_p += np.linalg.matrix_power(A, i)
|
|
1539
|
+
|
|
1540
|
+
n, n = A.shape
|
|
1541
|
+
A_p = np.clip(A_p, 0, 1) - np.identity(n)
|
|
1542
|
+
A_p = np.ceil(A_p)
|
|
1543
|
+
|
|
1544
|
+
return A_p
|
|
1545
|
+
|
|
1546
|
+
|
|
1547
|
+
def centrality_vector(
|
|
1548
|
+
returns,
|
|
1549
|
+
measure="Degree",
|
|
1550
|
+
custom_cov=None,
|
|
1551
|
+
codependence="pearson",
|
|
1552
|
+
graph="MST",
|
|
1553
|
+
bins_info="KN",
|
|
1554
|
+
alpha_tail=0.05,
|
|
1555
|
+
gs_threshold=0.5,
|
|
1556
|
+
):
|
|
1557
|
+
r"""
|
|
1558
|
+
Create a centrality vector from the adjacency matrix of an asset network based on :cite:`e-Cajas10` formula.
|
|
1559
|
+
|
|
1560
|
+
Parameters
|
|
1561
|
+
----------
|
|
1562
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1563
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1564
|
+
observations and n_assets is the number of assets.
|
|
1565
|
+
measure : str, optional
|
|
1566
|
+
Centrality measure. The default is 'Degree'. Possible values are:
|
|
1567
|
+
|
|
1568
|
+
- 'Degre': Node's degree centrality. Number of edges connected to a node.
|
|
1569
|
+
- 'Eigenvector': Eigenvector centrality. See more in `eigenvector_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality_numpy.html#eigenvector-centrality-numpy>`_.
|
|
1570
|
+
- 'Katz': Katz centrality. See more in `katz_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.katz_centrality_numpy.html#katz-centrality-numpy>`_.
|
|
1571
|
+
- 'Closeness': Closeness centrality. See more in `closeness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#closeness-centrality>`_.
|
|
1572
|
+
- 'Betweeness': Betweeness centrality. See more in `betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#betweenness-centrality>`_.
|
|
1573
|
+
- 'Communicability': Communicability betweeness centrality. See more in `communicability_betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.communicability_betweenness_centrality.html#communicability-betweenness-centrality>`_.
|
|
1574
|
+
- 'Subgraph': Subgraph centrality. See more in `subgraph_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.subgraph_centrality.html#subgraph-centrality>`_.
|
|
1575
|
+
- 'Laplacian': Laplacian centrality. See more in `laplacian_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.laplacian_centrality.html#laplacian-centrality>`_.
|
|
1576
|
+
|
|
1577
|
+
custom_cov : DataFrame or None, optional
|
|
1578
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1579
|
+
'custom_cov'. The default is None.
|
|
1580
|
+
codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
|
|
1581
|
+
The codependence or similarity matrix used to build the distance
|
|
1582
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1583
|
+
|
|
1584
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1585
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1586
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1587
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1588
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1589
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1590
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1591
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1592
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1593
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1594
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1595
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1596
|
+
|
|
1597
|
+
graph : string, optional
|
|
1598
|
+
Graph used to build the adjacency matrix. The default is 'MST'.
|
|
1599
|
+
Possible values are:
|
|
1600
|
+
|
|
1601
|
+
- 'MST': Minimum Spanning Tree.
|
|
1602
|
+
- 'TMFG': Plannar Maximally Filtered Graph.
|
|
1603
|
+
|
|
1604
|
+
bins_info: int or str
|
|
1605
|
+
Number of bins used to calculate variation of information. The default
|
|
1606
|
+
value is 'KN'. Possible values are:
|
|
1607
|
+
|
|
1608
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
1609
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
1610
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
1611
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
1612
|
+
- int: integer value choice by user.
|
|
1613
|
+
|
|
1614
|
+
alpha_tail : float, optional
|
|
1615
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
1616
|
+
gs_threshold : float, optional
|
|
1617
|
+
Gerber statistic threshold. The default is 0.5.
|
|
1618
|
+
|
|
1619
|
+
Returns
|
|
1620
|
+
-------
|
|
1621
|
+
A_p : DataFrame
|
|
1622
|
+
Adjacency matrix of walks of size 'walk_size'.
|
|
1623
|
+
|
|
1624
|
+
Raises
|
|
1625
|
+
------
|
|
1626
|
+
ValueError when the value cannot be calculated.
|
|
1627
|
+
|
|
1628
|
+
Examples
|
|
1629
|
+
--------
|
|
1630
|
+
|
|
1631
|
+
::
|
|
1632
|
+
|
|
1633
|
+
C_v = rp.centrality_vector(returns,
|
|
1634
|
+
measure='Degree',
|
|
1635
|
+
codependence="pearson",
|
|
1636
|
+
graph="MST")
|
|
1637
|
+
|
|
1638
|
+
The neighborhood matrix looks like the following image:
|
|
1639
|
+
|
|
1640
|
+
.. image:: images/Centrality_df.png
|
|
1641
|
+
|
|
1642
|
+
"""
|
|
1643
|
+
|
|
1644
|
+
Adj = connection_matrix(
|
|
1645
|
+
returns=returns,
|
|
1646
|
+
custom_cov=custom_cov,
|
|
1647
|
+
codependence=codependence,
|
|
1648
|
+
graph=graph,
|
|
1649
|
+
walk_size=1,
|
|
1650
|
+
bins_info=bins_info,
|
|
1651
|
+
gs_threshold=gs_threshold,
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
n, n = Adj.shape
|
|
1655
|
+
G = nx.from_numpy_array(Adj)
|
|
1656
|
+
if measure == "Degree":
|
|
1657
|
+
CM = np.ones((1, n)) @ Adj
|
|
1658
|
+
elif measure == "Eigenvector":
|
|
1659
|
+
CM = nx.eigenvector_centrality_numpy(G)
|
|
1660
|
+
elif measure == "Katz":
|
|
1661
|
+
CM = nx.katz_centrality_numpy(G)
|
|
1662
|
+
elif measure == "Closeness":
|
|
1663
|
+
CM = nx.closeness_centrality(G)
|
|
1664
|
+
elif measure == "Betweeness":
|
|
1665
|
+
CM = nx.betweenness_centrality(G)
|
|
1666
|
+
elif measure == "Communicability":
|
|
1667
|
+
CM = nx.communicability_betweenness_centrality(G)
|
|
1668
|
+
elif measure == "Subgraph":
|
|
1669
|
+
CM = nx.subgraph_centrality(G)
|
|
1670
|
+
elif measure == "Laplacian":
|
|
1671
|
+
CM = nx.laplacian_centrality(G)
|
|
1672
|
+
|
|
1673
|
+
if measure != "Degree":
|
|
1674
|
+
CM = pd.Series(CM).to_numpy().reshape(1, -1)
|
|
1675
|
+
|
|
1676
|
+
return CM
|
|
1677
|
+
|
|
1678
|
+
|
|
1679
|
+
def clusters_matrix(
|
|
1680
|
+
returns,
|
|
1681
|
+
custom_cov=None,
|
|
1682
|
+
codependence="pearson",
|
|
1683
|
+
linkage="ward",
|
|
1684
|
+
opt_k_method="twodiff",
|
|
1685
|
+
k=None,
|
|
1686
|
+
max_k=10,
|
|
1687
|
+
bins_info="KN",
|
|
1688
|
+
alpha_tail=0.05,
|
|
1689
|
+
gs_threshold=0.5,
|
|
1690
|
+
leaf_order=True,
|
|
1691
|
+
):
|
|
1692
|
+
r"""
|
|
1693
|
+
Creates an adjacency matrix that represents the clusters from the hierarchical
|
|
1694
|
+
clustering process based on :cite:`e-Cajas11` formula.
|
|
1695
|
+
|
|
1696
|
+
Parameters
|
|
1697
|
+
----------
|
|
1698
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1699
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1700
|
+
observations and n_assets is the number of assets.
|
|
1701
|
+
custom_cov : DataFrame or None, optional
|
|
1702
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1703
|
+
'custom_cov'. The default is None.
|
|
1704
|
+
codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
|
|
1705
|
+
The codependence or similarity matrix used to build the distance
|
|
1706
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1707
|
+
|
|
1708
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1709
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1710
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1711
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1712
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1713
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1714
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1715
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1716
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1717
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1718
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1719
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1720
|
+
|
|
1721
|
+
linkage : string, optional
|
|
1722
|
+
Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
|
|
1723
|
+
The default is 'ward'. Possible values are:
|
|
1724
|
+
|
|
1725
|
+
- 'single'.
|
|
1726
|
+
- 'complete'.
|
|
1727
|
+
- 'average'.
|
|
1728
|
+
- 'weighted'.
|
|
1729
|
+
- 'centroid'.
|
|
1730
|
+
- 'median'.
|
|
1731
|
+
- 'ward'.
|
|
1732
|
+
- 'DBHT'. Direct Bubble Hierarchical Tree.
|
|
1733
|
+
|
|
1734
|
+
opt_k_method : str
|
|
1735
|
+
Method used to calculate the optimum number of clusters.
|
|
1736
|
+
The default is 'twodiff'. Possible values are:
|
|
1737
|
+
|
|
1738
|
+
- 'twodiff': two difference gap statistic.
|
|
1739
|
+
- 'stdsil': standarized silhouette score.
|
|
1740
|
+
|
|
1741
|
+
k : int, optional
|
|
1742
|
+
Number of clusters. This value is took instead of the optimal number
|
|
1743
|
+
of clusters calculated with the two difference gap statistic.
|
|
1744
|
+
The default is None.
|
|
1745
|
+
max_k : int, optional
|
|
1746
|
+
Max number of clusters used by the two difference gap statistic
|
|
1747
|
+
to find the optimal number of clusters. The default is 10.
|
|
1748
|
+
bins_info: int or str
|
|
1749
|
+
Number of bins used to calculate variation of information. The default
|
|
1750
|
+
value is 'KN'. Possible values are:
|
|
1751
|
+
|
|
1752
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
1753
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
1754
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
1755
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
1756
|
+
- int: integer value choice by user.
|
|
1757
|
+
|
|
1758
|
+
alpha_tail : float, optional
|
|
1759
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
1760
|
+
gs_threshold : float, optional
|
|
1761
|
+
Gerber statistic threshold. The default is 0.5.
|
|
1762
|
+
leaf_order : bool, optional
|
|
1763
|
+
Indicates if the cluster are ordered so that the distance between
|
|
1764
|
+
successive leaves is minimal. The default is True.
|
|
1765
|
+
|
|
1766
|
+
Returns
|
|
1767
|
+
-------
|
|
1768
|
+
A_c : ndarray
|
|
1769
|
+
Adjacency matrix of clusters.
|
|
1770
|
+
|
|
1771
|
+
Raises
|
|
1772
|
+
------
|
|
1773
|
+
ValueError when the value cannot be calculated.
|
|
1774
|
+
|
|
1775
|
+
Examples
|
|
1776
|
+
--------
|
|
1777
|
+
|
|
1778
|
+
::
|
|
1779
|
+
|
|
1780
|
+
C_M = rp.clusters_matrix(returns,
|
|
1781
|
+
codependence='pearson',
|
|
1782
|
+
linkage='ward',
|
|
1783
|
+
k=None,
|
|
1784
|
+
max_k=10)
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
The clusters matrix looks like the following image:
|
|
1788
|
+
|
|
1789
|
+
.. image:: images/Clusters_matrix_df.png
|
|
1790
|
+
|
|
1791
|
+
"""
|
|
1792
|
+
|
|
1793
|
+
assets = returns.columns.tolist()
|
|
1794
|
+
n = len(assets)
|
|
1795
|
+
clusters = assets_clusters(
|
|
1796
|
+
returns=returns,
|
|
1797
|
+
custom_cov=custom_cov,
|
|
1798
|
+
codependence=codependence,
|
|
1799
|
+
linkage=linkage,
|
|
1800
|
+
opt_k_method=opt_k_method,
|
|
1801
|
+
k=k,
|
|
1802
|
+
max_k=max_k,
|
|
1803
|
+
bins_info=bins_info,
|
|
1804
|
+
alpha_tail=alpha_tail,
|
|
1805
|
+
gs_threshold=gs_threshold,
|
|
1806
|
+
leaf_order=leaf_order,
|
|
1807
|
+
)
|
|
1808
|
+
|
|
1809
|
+
df = pd.DataFrame([], index=assets)
|
|
1810
|
+
|
|
1811
|
+
for i in clusters["Clusters"].unique():
|
|
1812
|
+
labels = clusters[clusters["Clusters"] == i]["Assets"].tolist()
|
|
1813
|
+
df1 = pd.Series(np.zeros((n,)), index=assets)
|
|
1814
|
+
df1[labels] = 1
|
|
1815
|
+
df = pd.concat([df, df1], axis=1)
|
|
1816
|
+
|
|
1817
|
+
A_c = df.to_numpy()
|
|
1818
|
+
A_c = A_c @ A_c.T - np.identity(n)
|
|
1819
|
+
|
|
1820
|
+
return A_c
|
|
1821
|
+
|
|
1822
|
+
|
|
1823
|
+
def average_centrality(
|
|
1824
|
+
returns,
|
|
1825
|
+
w,
|
|
1826
|
+
measure="Degree",
|
|
1827
|
+
custom_cov=None,
|
|
1828
|
+
codependence="pearson",
|
|
1829
|
+
graph="MST",
|
|
1830
|
+
bins_info="KN",
|
|
1831
|
+
alpha_tail=0.05,
|
|
1832
|
+
gs_threshold=0.5,
|
|
1833
|
+
):
|
|
1834
|
+
r"""
|
|
1835
|
+
Calculates the average centrality of assets of the portfolio based on :cite:`e-Cajas10` formula.
|
|
1836
|
+
|
|
1837
|
+
Parameters
|
|
1838
|
+
----------
|
|
1839
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1840
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1841
|
+
observations and n_assets is the number of assets.
|
|
1842
|
+
w : DataFrame or Series of shape (n_assets, 1)
|
|
1843
|
+
Portfolio weights, where n_assets is the number of assets.
|
|
1844
|
+
measure : str, optional
|
|
1845
|
+
Centrality measure. The default is 'Degree'. Possible values are:
|
|
1846
|
+
|
|
1847
|
+
- 'Degre': Node's degree centrality. Number of edges connected to a node.
|
|
1848
|
+
- 'Eigenvector': Eigenvector centrality. See more in `eigenvector_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality_numpy.html#eigenvector-centrality-numpy>`_.
|
|
1849
|
+
- 'Katz': Katz centrality. See more in `katz_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.katz_centrality_numpy.html#katz-centrality-numpy>`_.
|
|
1850
|
+
- 'Closeness': Closeness centrality. See more in `closeness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#closeness-centrality>`_.
|
|
1851
|
+
- 'Betweeness': Betweeness centrality. See more in `betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#betweenness-centrality>`_.
|
|
1852
|
+
- 'Communicability': Communicability betweeness centrality. See more in `communicability_betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.communicability_betweenness_centrality.html#communicability-betweenness-centrality>`_.
|
|
1853
|
+
- 'Subgraph': Subgraph centrality. See more in `subgraph_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.subgraph_centrality.html#subgraph-centrality>`_.
|
|
1854
|
+
- 'Laplacian': Laplacian centrality. See more in `laplacian_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.laplacian_centrality.html#laplacian-centrality>`_.
|
|
1855
|
+
|
|
1856
|
+
custom_cov : DataFrame or None, optional
|
|
1857
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1858
|
+
'custom_cov'. The default is None.
|
|
1859
|
+
codependence : str, optional
|
|
1860
|
+
The codependence or similarity matrix used to build the distance
|
|
1861
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1862
|
+
|
|
1863
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1864
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1865
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1866
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1867
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1868
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1869
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1870
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1871
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1872
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1873
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1874
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1875
|
+
|
|
1876
|
+
graph : string, optional
|
|
1877
|
+
Graph used to build the adjacency matrix. The default is 'MST'.
|
|
1878
|
+
Possible values are:
|
|
1879
|
+
|
|
1880
|
+
- 'MST': Minimum Spanning Tree.
|
|
1881
|
+
- 'TMFG': Plannar Maximally Filtered Graph.
|
|
1882
|
+
|
|
1883
|
+
bins_info: int or str
|
|
1884
|
+
Number of bins used to calculate variation of information. The default
|
|
1885
|
+
value is 'KN'. Possible values are:
|
|
1886
|
+
|
|
1887
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
1888
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
1889
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
1890
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
1891
|
+
- int: integer value choice by user.
|
|
1892
|
+
|
|
1893
|
+
alpha_tail : float, optional
|
|
1894
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
1895
|
+
gs_threshold : float, optional
|
|
1896
|
+
Gerber statistic threshold. The default is 0.5.
|
|
1897
|
+
|
|
1898
|
+
Returns
|
|
1899
|
+
-------
|
|
1900
|
+
AC : float
|
|
1901
|
+
Average centrality of assets.
|
|
1902
|
+
|
|
1903
|
+
Raises
|
|
1904
|
+
------
|
|
1905
|
+
ValueError when the value cannot be calculated.
|
|
1906
|
+
|
|
1907
|
+
Examples
|
|
1908
|
+
--------
|
|
1909
|
+
|
|
1910
|
+
::
|
|
1911
|
+
|
|
1912
|
+
ac = rp.average_centrality(returns,
|
|
1913
|
+
w,
|
|
1914
|
+
measure="Degree"
|
|
1915
|
+
codependence="pearson",
|
|
1916
|
+
graph="MST")
|
|
1917
|
+
|
|
1918
|
+
"""
|
|
1919
|
+
|
|
1920
|
+
w_ = np.array(w, ndmin=2)
|
|
1921
|
+
if w_.shape[0] == 1 and w_.shape[1] > 1:
|
|
1922
|
+
w_ = w_.T
|
|
1923
|
+
if w_.shape[0] > 1 and w_.shape[1] > 1:
|
|
1924
|
+
raise ValueError("w must have n_assets x 1 size")
|
|
1925
|
+
if w.index.tolist() != returns.columns.tolist():
|
|
1926
|
+
raise ValueError("w and returns must have the same columns.")
|
|
1927
|
+
if codependence == "custom_cov" and custom_cov is None:
|
|
1928
|
+
raise ValueError(
|
|
1929
|
+
"custom_cov value of codependence parameter requires a custom_cov parameter."
|
|
1930
|
+
)
|
|
1931
|
+
|
|
1932
|
+
CM = centrality_vector(
|
|
1933
|
+
returns=returns,
|
|
1934
|
+
measure=measure,
|
|
1935
|
+
custom_cov=custom_cov,
|
|
1936
|
+
codependence=codependence,
|
|
1937
|
+
graph=graph,
|
|
1938
|
+
bins_info=bins_info,
|
|
1939
|
+
alpha_tail=alpha_tail,
|
|
1940
|
+
gs_threshold=gs_threshold,
|
|
1941
|
+
)
|
|
1942
|
+
|
|
1943
|
+
w_ = np.array(w)
|
|
1944
|
+
ac = CM @ w_
|
|
1945
|
+
|
|
1946
|
+
return ac.item()
|
|
1947
|
+
|
|
1948
|
+
|
|
1949
|
+
def connected_assets(
|
|
1950
|
+
returns,
|
|
1951
|
+
w,
|
|
1952
|
+
custom_cov=None,
|
|
1953
|
+
codependence="pearson",
|
|
1954
|
+
graph="MST",
|
|
1955
|
+
walk_size=1,
|
|
1956
|
+
bins_info="KN",
|
|
1957
|
+
alpha_tail=0.05,
|
|
1958
|
+
gs_threshold=0.5,
|
|
1959
|
+
):
|
|
1960
|
+
r"""
|
|
1961
|
+
Calculates the percentage invested in connected assets of the portfolio based on :cite:`e-Cajas10` formula.
|
|
1962
|
+
|
|
1963
|
+
Parameters
|
|
1964
|
+
----------
|
|
1965
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
1966
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
1967
|
+
observations and n_assets is the number of assets.
|
|
1968
|
+
w : DataFrame or Series of shape (n_assets, 1)
|
|
1969
|
+
Portfolio weights, where n_assets is the number of assets.
|
|
1970
|
+
custom_cov : DataFrame or None, optional
|
|
1971
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
1972
|
+
'custom_cov'. The default is None.
|
|
1973
|
+
codependence : str, optional
|
|
1974
|
+
The codependence or similarity matrix used to build the distance
|
|
1975
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
1976
|
+
|
|
1977
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1978
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
1979
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
1980
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
1981
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
1982
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1983
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
1984
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
1985
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
1986
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
1987
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
1988
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
1989
|
+
|
|
1990
|
+
graph : string, optional
|
|
1991
|
+
Graph used to build the adjacency matrix. The default is 'MST'.
|
|
1992
|
+
Possible values are:
|
|
1993
|
+
|
|
1994
|
+
- 'MST': Minimum Spanning Tree.
|
|
1995
|
+
- 'TMFG': Plannar Maximally Filtered Graph.
|
|
1996
|
+
|
|
1997
|
+
walk_size : int, optional
|
|
1998
|
+
Size of the walk represented by the adjacency matrix. The default is 1.
|
|
1999
|
+
bins_info: int or str
|
|
2000
|
+
Number of bins used to calculate variation of information. The default
|
|
2001
|
+
value is 'KN'. Possible values are:
|
|
2002
|
+
|
|
2003
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
2004
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
2005
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
2006
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
2007
|
+
- int: integer value choice by user.
|
|
2008
|
+
|
|
2009
|
+
alpha_tail : float, optional
|
|
2010
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
2011
|
+
gs_threshold : float, optional
|
|
2012
|
+
Gerber statistic threshold. The default is 0.5.
|
|
2013
|
+
|
|
2014
|
+
Returns
|
|
2015
|
+
-------
|
|
2016
|
+
CA : float
|
|
2017
|
+
Percentage invested in connected assets.
|
|
2018
|
+
|
|
2019
|
+
Raises
|
|
2020
|
+
------
|
|
2021
|
+
ValueError when the value cannot be calculated.
|
|
2022
|
+
|
|
2023
|
+
Examples
|
|
2024
|
+
--------
|
|
2025
|
+
|
|
2026
|
+
::
|
|
2027
|
+
|
|
2028
|
+
ca = rp.connected_assets(returns,
|
|
2029
|
+
w,
|
|
2030
|
+
codependence="pearson",
|
|
2031
|
+
graph="MST",
|
|
2032
|
+
walk_size=1)
|
|
2033
|
+
|
|
2034
|
+
"""
|
|
2035
|
+
|
|
2036
|
+
w_ = np.array(w, ndmin=2)
|
|
2037
|
+
if w_.shape[0] == 1 and w_.shape[1] > 1:
|
|
2038
|
+
w_ = w_.T
|
|
2039
|
+
if w_.shape[0] > 1 and w_.shape[1] > 1:
|
|
2040
|
+
raise ValueError("w must have n_assets x 1 size")
|
|
2041
|
+
if w.index.tolist() != returns.columns.tolist():
|
|
2042
|
+
raise ValueError("w and returns must have the same columns.")
|
|
2043
|
+
if codependence == "custom_cov" and custom_cov is None:
|
|
2044
|
+
raise ValueError(
|
|
2045
|
+
"custom_cov value of codependence parameter requires a custom_cov parameter."
|
|
2046
|
+
)
|
|
2047
|
+
|
|
2048
|
+
A_p = connection_matrix(
|
|
2049
|
+
returns=returns,
|
|
2050
|
+
custom_cov=custom_cov,
|
|
2051
|
+
codependence=codependence,
|
|
2052
|
+
graph=graph,
|
|
2053
|
+
walk_size=walk_size,
|
|
2054
|
+
bins_info=bins_info,
|
|
2055
|
+
gs_threshold=gs_threshold,
|
|
2056
|
+
)
|
|
2057
|
+
|
|
2058
|
+
n, n = A_p.shape
|
|
2059
|
+
ones = np.ones((n, 1))
|
|
2060
|
+
wwt = np.abs(w_ @ w_.T)
|
|
2061
|
+
ca = ones.T @ (A_p * wwt) @ ones
|
|
2062
|
+
ca /= ones.T @ wwt @ ones
|
|
2063
|
+
|
|
2064
|
+
return ca.item()
|
|
2065
|
+
|
|
2066
|
+
|
|
2067
|
+
def related_assets(
|
|
2068
|
+
returns,
|
|
2069
|
+
w,
|
|
2070
|
+
custom_cov=None,
|
|
2071
|
+
codependence="pearson",
|
|
2072
|
+
linkage="ward",
|
|
2073
|
+
opt_k_method="twodiff",
|
|
2074
|
+
k=None,
|
|
2075
|
+
max_k=10,
|
|
2076
|
+
bins_info="KN",
|
|
2077
|
+
alpha_tail=0.05,
|
|
2078
|
+
gs_threshold=0.5,
|
|
2079
|
+
leaf_order=True,
|
|
2080
|
+
):
|
|
2081
|
+
r"""
|
|
2082
|
+
Calculates the percentage invested in related assets based of the portfolio on :cite:`e-Cajas11` formula.
|
|
2083
|
+
|
|
2084
|
+
Parameters
|
|
2085
|
+
----------
|
|
2086
|
+
returns : DataFrame of shape (n_samples, n_assets)
|
|
2087
|
+
Assets returns DataFrame, where n_samples is the number of
|
|
2088
|
+
observations and n_assets is the number of assets.
|
|
2089
|
+
w : DataFrame or Series of shape (n_assets, 1)
|
|
2090
|
+
Portfolio weights, where n_assets is the number of assets.
|
|
2091
|
+
custom_cov : DataFrame or None, optional
|
|
2092
|
+
Custom covariance matrix, used when codependence parameter has value
|
|
2093
|
+
'custom_cov'. The default is None.
|
|
2094
|
+
codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
|
|
2095
|
+
The codependence or similarity matrix used to build the distance
|
|
2096
|
+
metric and clusters. The default is 'pearson'. Possible values are:
|
|
2097
|
+
|
|
2098
|
+
- 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
2099
|
+
- 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
|
|
2100
|
+
- 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
|
|
2101
|
+
- 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
|
|
2102
|
+
- 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
|
|
2103
|
+
- 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
2104
|
+
- 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
|
|
2105
|
+
- 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
|
|
2106
|
+
- 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
|
|
2107
|
+
- 'mutual_info': mutual information matrix. Distance used is variation information matrix.
|
|
2108
|
+
- 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
|
|
2109
|
+
- 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
|
|
2110
|
+
|
|
2111
|
+
linkage : string, optional
|
|
2112
|
+
Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
|
|
2113
|
+
The default is 'ward'. Possible values are:
|
|
2114
|
+
|
|
2115
|
+
- 'single'.
|
|
2116
|
+
- 'complete'.
|
|
2117
|
+
- 'average'.
|
|
2118
|
+
- 'weighted'.
|
|
2119
|
+
- 'centroid'.
|
|
2120
|
+
- 'median'.
|
|
2121
|
+
- 'ward'.
|
|
2122
|
+
- 'DBHT'. Direct Bubble Hierarchical Tree.
|
|
2123
|
+
|
|
2124
|
+
opt_k_method : str
|
|
2125
|
+
Method used to calculate the optimum number of clusters.
|
|
2126
|
+
The default is 'twodiff'. Possible values are:
|
|
2127
|
+
|
|
2128
|
+
- 'twodiff': two difference gap statistic.
|
|
2129
|
+
- 'stdsil': standarized silhouette score.
|
|
2130
|
+
|
|
2131
|
+
k : int, optional
|
|
2132
|
+
Number of clusters. This value is took instead of the optimal number
|
|
2133
|
+
of clusters calculated with the two difference gap statistic.
|
|
2134
|
+
The default is None.
|
|
2135
|
+
max_k : int, optional
|
|
2136
|
+
Max number of clusters used by the two difference gap statistic
|
|
2137
|
+
to find the optimal number of clusters. The default is 10.
|
|
2138
|
+
bins_info: int or str
|
|
2139
|
+
Number of bins used to calculate variation of information. The default
|
|
2140
|
+
value is 'KN'. Possible values are:
|
|
2141
|
+
|
|
2142
|
+
- 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
|
|
2143
|
+
- 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
|
|
2144
|
+
- 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
|
|
2145
|
+
- 'HGR': Hacine-Gharbi and Ravier' choice method.
|
|
2146
|
+
- int: integer value choice by user.
|
|
2147
|
+
|
|
2148
|
+
alpha_tail : float, optional
|
|
2149
|
+
Significance level for lower tail dependence index. The default is 0.05.
|
|
2150
|
+
gs_threshold : float, optional
|
|
2151
|
+
Gerber statistic threshold. The default is 0.5.
|
|
2152
|
+
leaf_order : bool, optional
|
|
2153
|
+
Indicates if the cluster are ordered so that the distance between
|
|
2154
|
+
successive leaves is minimal. The default is True.
|
|
2155
|
+
|
|
2156
|
+
Returns
|
|
2157
|
+
-------
|
|
2158
|
+
RA : float
|
|
2159
|
+
Percentage invested in related assets.
|
|
2160
|
+
|
|
2161
|
+
Raises
|
|
2162
|
+
------
|
|
2163
|
+
ValueError when the value cannot be calculated.
|
|
2164
|
+
|
|
2165
|
+
Examples
|
|
2166
|
+
--------
|
|
2167
|
+
|
|
2168
|
+
::
|
|
2169
|
+
|
|
2170
|
+
ra = rp.related_assets(returns,
|
|
2171
|
+
w,
|
|
2172
|
+
codependence="pearson",
|
|
2173
|
+
linkage="ward",
|
|
2174
|
+
k=None,
|
|
2175
|
+
max_k=10)
|
|
2176
|
+
|
|
2177
|
+
"""
|
|
2178
|
+
|
|
2179
|
+
w_ = np.array(w, ndmin=2)
|
|
2180
|
+
if w_.shape[0] == 1 and w_.shape[1] > 1:
|
|
2181
|
+
w_ = w_.T
|
|
2182
|
+
if w_.shape[0] > 1 and w_.shape[1] > 1:
|
|
2183
|
+
raise ValueError("w must have n_assets x 1 size")
|
|
2184
|
+
if w.index.tolist() != returns.columns.tolist():
|
|
2185
|
+
raise ValueError("w and returns must have the same columns.")
|
|
2186
|
+
if codependence == "custom_cov" and custom_cov is None:
|
|
2187
|
+
raise ValueError(
|
|
2188
|
+
"custom_cov value of codependence parameter requires a custom_cov parameter."
|
|
2189
|
+
)
|
|
2190
|
+
|
|
2191
|
+
L_a = clusters_matrix(
|
|
2192
|
+
returns,
|
|
2193
|
+
custom_cov=custom_cov,
|
|
2194
|
+
codependence=codependence,
|
|
2195
|
+
linkage=linkage,
|
|
2196
|
+
k=k,
|
|
2197
|
+
max_k=max_k,
|
|
2198
|
+
bins_info=bins_info,
|
|
2199
|
+
alpha_tail=alpha_tail,
|
|
2200
|
+
gs_threshold=gs_threshold,
|
|
2201
|
+
leaf_order=leaf_order,
|
|
2202
|
+
)
|
|
2203
|
+
|
|
2204
|
+
n, n = L_a.shape
|
|
2205
|
+
ones = np.ones((n, 1))
|
|
2206
|
+
wwt = np.abs(w_ @ w_.T)
|
|
2207
|
+
ra = ones.T @ (L_a * wwt) @ ones
|
|
2208
|
+
ra /= ones.T @ wwt @ ones
|
|
2209
|
+
|
|
2210
|
+
return ra.item()
|