riskfolio-lib 7.2.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2210 @@
1
+ """""" #
2
+
3
+ """
4
+ Copyright (c) 2020-2026, Dany Cajas
5
+ All rights reserved.
6
+ This work is licensed under BSD 3-Clause "New" or "Revised" License.
7
+ License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import networkx as nx
13
+ import scipy.cluster.hierarchy as hr
14
+ from scipy.spatial.distance import squareform
15
+ import riskfolio.src.AuxFunctions as af
16
+ import riskfolio.src.DBHT as db
17
+
18
+
19
+ __all__ = [
20
+ "assets_constraints",
21
+ "factors_constraints",
22
+ "integer_constraints",
23
+ "assets_views",
24
+ "factors_views",
25
+ "assets_clusters",
26
+ "hrp_constraints",
27
+ "risk_constraint",
28
+ "connection_matrix",
29
+ "centrality_vector",
30
+ "clusters_matrix",
31
+ "average_centrality",
32
+ "connected_assets",
33
+ "related_assets",
34
+ ]
35
+
36
+
37
+ def assets_constraints(constraints, asset_classes):
38
+ r"""
39
+ Create the linear constraints matrixes A and B of the constraint
40
+ :math:`Aw \leq B`.
41
+
42
+ Parameters
43
+ ----------
44
+ constraints : DataFrame of shape (n_constraints, n_fields)
45
+ Constraints DataFrame, where n_constraints is the number of constraints
46
+ and n_fields is the number of fields of constraints DataFrame, the fields
47
+ are:
48
+
49
+ - Disabled: (bool) indicates if the constraint is enable.
50
+ - Type: (str) can be 'Assets', 'Classes', 'All Assets', 'Each asset in a class' and 'All Classes'.
51
+ - Set: (str) if Type is 'Classes', 'Each asset in a class' or 'All Classes' specified the name of the asset's classes set.
52
+ - Position: (str) the name of the asset or asset class of the constraint.
53
+ - Sign: (str) can be '>=' or '<='.
54
+ - Weight: (scalar) is the maximum or minimum weight of the absolute constraint.
55
+ - Type Relative: (str) can be 'Assets' or 'Classes'.
56
+ - Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
57
+ - Relative: (str) the name of the asset or asset class of the relative constraint.
58
+ - Factor: (scalar) is the factor of the relative constraint.
59
+
60
+ asset_classes : DataFrame of shape (n_assets, n_cols)
61
+ Asset's classes matrix, where n_assets is the number of assets and
62
+ n_cols is the number of columns of the matrix where the first column
63
+ is the asset list and the next columns are the different asset's
64
+ classes sets.
65
+
66
+ Returns
67
+ -------
68
+ A : nd-array
69
+ The matrix A of :math:`Aw \leq B`.
70
+
71
+ B : nd-array
72
+ The matrix B of :math:`Aw \leq B`.
73
+
74
+ Raises
75
+ ------
76
+ ValueError when the value cannot be calculated.
77
+
78
+ Examples
79
+ --------
80
+ ::
81
+
82
+ import riskfolio as rp
83
+
84
+ asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
85
+ 'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
86
+ 'Fixed Income', 'Fixed Income'],
87
+ 'Class 2': ['Technology', 'Technology', 'Technology',
88
+ 'Financial', 'Financial', 'Treasury', 'Treasury'],}
89
+
90
+ asset_classes = pd.DataFrame(asset_classes)
91
+ asset_classes = asset_classes.sort_values(by=['Assets'])
92
+
93
+ constraints = {'Disabled': [False, False, False, False, False, False, False],
94
+ 'Type': ['Classes', 'Classes', 'Assets', 'Assets', 'Classes',
95
+ 'All Assets', 'Each asset in a class'],
96
+ 'Set': ['Class 1', 'Class 1', '', '', 'Class 2', '', 'Class 1'],
97
+ 'Position': ['Equity', 'Fixed Income', 'BAC', 'WFC', 'Financial',
98
+ '', 'Equity'],
99
+ 'Sign': ['<=', '<=', '<=', '<=', '>=', '>=', '>='],
100
+ 'Weight': [0.6, 0.5, 0.1, '', '', 0.02, ''],
101
+ 'Type Relative': ['', '', '', 'Assets', 'Classes', '', 'Assets'],
102
+ 'Relative Set': ['', '', '', '', 'Class 1', '', ''],
103
+ 'Relative': ['', '', '', 'FB', 'Fixed Income', '', 'TLT'],
104
+ 'Factor': ['', '', '', 1.2, 0.5, '', 0.4]}
105
+
106
+ constraints = pd.DataFrame(constraints)
107
+
108
+
109
+ The constraints look like the following image:
110
+
111
+ .. image:: images/Constraints.png
112
+
113
+ It is easier to construct the constraints in excel and then upload to a
114
+ dataframe.
115
+
116
+ To create the matrixes A and B we use the following command:
117
+
118
+ ::
119
+
120
+ A, B = rp.assets_constraints(constraints, asset_classes)
121
+
122
+
123
+ The matrixes A and B looks like this (all constraints were converted to a linear
124
+ constraint):
125
+
126
+ .. image:: images/AxB.png
127
+
128
+ """
129
+
130
+ if not isinstance(constraints, pd.DataFrame) and not isinstance(
131
+ asset_classes, pd.DataFrame
132
+ ):
133
+ raise ValueError("constraints and asset_classes must be DataFrames")
134
+
135
+ if constraints.shape[1] != 10:
136
+ raise ValueError("constraints must have ten columns")
137
+
138
+ constraints0 = constraints.fillna("")
139
+ constraints0 = constraints0[constraints0["Disabled"] == False]
140
+ data = constraints0.values.tolist()
141
+ assetslist = asset_classes.iloc[:, 0].values.tolist()
142
+
143
+ n, m = len(constraints0), len(asset_classes)
144
+
145
+ A = []
146
+ B = []
147
+ for i in range(0, n):
148
+ if data[i][1] == "Assets":
149
+ item = assetslist.index(data[i][3])
150
+ if data[i][4] == ">=":
151
+ d = 1
152
+ elif data[i][4] == "<=":
153
+ d = -1
154
+ if data[i][5] != "":
155
+ A1 = [0] * m
156
+ A1[item] = d
157
+ A.append(A1)
158
+ B.append([data[i][5] * d])
159
+ else:
160
+ A1 = [0] * m
161
+ A1[item] = 1
162
+ if data[i][6] == "Assets":
163
+ item2 = assetslist.index(data[i][8])
164
+ A2 = [0] * m
165
+ A2[item2] = 1
166
+ elif data[i][6] == "Classes":
167
+ A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
168
+ A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
169
+ A.append(A1)
170
+ B.append([0])
171
+ elif data[i][1] == "All Assets":
172
+ item = len(assetslist)
173
+ if data[i][4] == ">=":
174
+ d = 1
175
+ elif data[i][4] == "<=":
176
+ d = -1
177
+ if data[i][5] != "":
178
+ A1 = np.identity(item) * d
179
+ A1 = A1.tolist()
180
+ B1 = np.ones((item, 1)) * d * data[i][5]
181
+ for i in range(0, item):
182
+ A.append(A1[i])
183
+ B.append(B1.tolist()[0])
184
+ else:
185
+ A1 = np.identity(item)
186
+ if data[i][6] == "Assets":
187
+ item2 = assetslist.index(data[i][8])
188
+ A2 = np.zeros((item, item - 1))
189
+ A2 = np.insert(A2, item2 - 1, 1, axis=1)
190
+ elif data[i][6] == "Classes":
191
+ A1 = np.identity(item)
192
+ A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
193
+ A2 = np.ones((item, item)) * np.array(A2)
194
+ A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
195
+ for i in range(0, item):
196
+ A.append(A1[i])
197
+ B.append([0])
198
+ elif data[i][1] == "Classes":
199
+ if data[i][4] == ">=":
200
+ d = 1
201
+ elif data[i][4] == "<=":
202
+ d = -1
203
+ if data[i][5] != "":
204
+ A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
205
+ A1 = np.array(A1) * d
206
+ A1 = A1.tolist()
207
+ A.append(A1)
208
+ B.append([data[i][5] * d])
209
+ else:
210
+ A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
211
+ if data[i][6] == "Assets":
212
+ item2 = assetslist.index(data[i][8])
213
+ A2 = [0] * m
214
+ A2[item2] = 1
215
+ elif data[i][6] == "Classes":
216
+ A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
217
+ A1 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
218
+ A.append(A1)
219
+ B.append([0])
220
+ elif data[i][1] == "Each asset in a class":
221
+ if data[i][4] == ">=":
222
+ d = 1
223
+ elif data[i][4] == "<=":
224
+ d = -1
225
+ if data[i][5] != "":
226
+ A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
227
+ l = 0
228
+ for k in A1:
229
+ if k == 1:
230
+ A3 = [0] * m
231
+ A3[l] = 1 * d
232
+ A.append(A3)
233
+ B.append([data[i][5] * d])
234
+ l = l + 1
235
+ else:
236
+ A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
237
+ l = 0
238
+ for k in A1:
239
+ if k == 1:
240
+ A3 = [0] * m
241
+ A3[l] = 1
242
+ if data[i][6] == "Assets":
243
+ item2 = assetslist.index(data[i][8])
244
+ A2 = [0] * m
245
+ A2[item2] = 1
246
+ elif data[i][6] == "Classes":
247
+ A2 = np.where(
248
+ asset_classes[data[i][7]].values == data[i][8], 1, 0
249
+ )
250
+ A3 = (
251
+ (np.array(A3) + np.array(A2) * data[i][9] * -1) * d
252
+ ).tolist()
253
+ A.append(A3)
254
+ B.append([0])
255
+ l = l + 1
256
+ elif data[i][1] == "All Classes":
257
+ if data[i][4] == ">=":
258
+ d = 1
259
+ elif data[i][4] == "<=":
260
+ d = -1
261
+ if data[i][5] != "":
262
+ for k in np.unique(asset_classes[data[i][2]].values):
263
+ A1 = np.where(asset_classes[data[i][2]].values == k, 1, 0) * d
264
+ A1 = A1.tolist()
265
+ A.append(A1)
266
+ B.append([data[i][5] * d])
267
+ else:
268
+ for k in np.unique(asset_classes[data[i][2]].values):
269
+ A1 = np.where(asset_classes[data[i][2]].values == k, 1, 0)
270
+ if data[i][6] == "Assets":
271
+ item2 = assetslist.index(data[i][8])
272
+ A2 = [0] * m
273
+ A2[item2] = 1
274
+ elif data[i][6] == "Classes":
275
+ A2 = np.where(
276
+ asset_classes[data[i][7]].values == data[i][8], 1, 0
277
+ )
278
+ A3 = ((np.array(A1) + np.array(A2) * data[i][9] * -1) * d).tolist()
279
+ A.append(A3)
280
+ B.append([0])
281
+
282
+ A = -np.array(A, ndmin=2, dtype=float)
283
+ B = -np.array(B, ndmin=2, dtype=float)
284
+
285
+ return A, B
286
+
287
+
288
+ def factors_constraints(constraints, loadings):
289
+ r"""
290
+ Create the factors constraints matrixes C and D of the constraint
291
+ :math:`Cw \leq D`.
292
+
293
+ Parameters
294
+ ----------
295
+ constraints : DataFrame of shape (n_constraints, n_fields)
296
+ Constraints DataFrame, where n_constraints is the number of constraints
297
+ and n_fields is the number of fields of constraints DataFrame, the fields
298
+ are:
299
+
300
+ - Disabled: (bool) indicates if the constraint is enable.
301
+ - Factor: (str) the name of the factor of the constraint.
302
+ - Sign: (str) can be '>=' or '<='.
303
+ - Value: (scalar) is the maximum or minimum value of the factor.
304
+
305
+ loadings : DataFrame of shape (n_assets, n_features)
306
+ The loadings matrix.
307
+
308
+ Returns
309
+ -------
310
+ C : nd-array
311
+ The matrix C of :math:`Cw \leq D`.
312
+
313
+ D : nd-array
314
+ The matrix D of :math:`Cw \leq D`.
315
+
316
+ Raises
317
+ ------
318
+ ValueError when the value cannot be calculated.
319
+
320
+ Examples
321
+ --------
322
+ ::
323
+
324
+ loadings = {'const': [0.0004, 0.0002, 0.0000, 0.0006, 0.0001, 0.0003, -0.0003],
325
+ 'MTUM': [0.1916, 1.0061, 0.8695, 1.9996, 0.0000, 0.0000, 0.0000],
326
+ 'QUAL': [0.0000, 2.0129, 1.4301, 0.0000, 0.0000, 0.0000, 0.0000],
327
+ 'SIZE': [0.0000, 0.0000, 0.0000, 0.4717, 0.0000, -0.1857, 0.0000],
328
+ 'USMV': [-0.7838, -1.6439, -1.0176, -1.4407, 0.0055, 0.5781, 0.0000],
329
+ 'VLUE': [1.4772, -0.7590, -0.4090, 0.0000, -0.0054, -0.4844, 0.9435]}
330
+
331
+ loadings = pd.DataFrame(loadings)
332
+
333
+ constraints = {'Disabled': [False, False, False],
334
+ 'Factor': ['MTUM', 'USMV', 'VLUE'],
335
+ 'Sign': ['<=', '<=', '>='],
336
+ 'Value': [0.9, -1.2, 0.3],
337
+ 'Relative Factor': ['USMV', '', '']}
338
+
339
+ constraints = pd.DataFrame(constraints)
340
+
341
+
342
+ The constraints look like the following image:
343
+
344
+ .. image:: images/Constraints2.png
345
+
346
+ It is easier to construct the constraints in excel and then upload to a
347
+ dataframe.
348
+
349
+ To create the matrixes C and D we use the following command:
350
+
351
+ ::
352
+
353
+ C, D = rp.factors_constraints(constraints, loadings)
354
+
355
+
356
+ The matrixes C and D looks like this (all constraints were converted to a linear
357
+ constraint):
358
+
359
+ .. image:: images/CxD.png
360
+
361
+ """
362
+
363
+ if not isinstance(constraints, pd.DataFrame) and not isinstance(
364
+ loadings, pd.DataFrame
365
+ ):
366
+ raise ValueError("constraints and loadings must be DataFrames")
367
+
368
+ if constraints.shape[1] != 5:
369
+ raise ValueError("constraints must have five columns")
370
+
371
+ constraints0 = constraints.fillna("")
372
+ constraints0 = constraints0[constraints0["Disabled"] == False]
373
+ data = constraints0.values.tolist()
374
+
375
+ n = len(constraints0)
376
+
377
+ C = []
378
+ D = []
379
+ for i in range(0, n):
380
+ if data[i][2] == ">=":
381
+ d = 1
382
+ elif data[i][2] == "<=":
383
+ d = -1
384
+ C1 = loadings[data[i][1]].values
385
+ if data[i][4] != "":
386
+ C2 = loadings[data[i][4]].values
387
+ C1 = C2 - C1
388
+ C.append(C1 * d)
389
+ D.append([data[i][3] * d])
390
+
391
+ C = -np.array(C, ndmin=2, dtype=float)
392
+ D = -np.array(D, ndmin=2, dtype=float)
393
+
394
+ return C, D
395
+
396
+
397
+ def integer_constraints(constraints, asset_classes):
398
+ r"""
399
+ Create the integer constraints matrixes A, B, C, D, E, F associated to the
400
+ constrainta :math:`Ak \leq B`, :math:`Ck \leq D \odot k_{s}` and
401
+ :math:`E k_{s}\leq F`.
402
+
403
+ Parameters
404
+ ----------
405
+ constraints : DataFrame of shape (n_constraints, n_fields)
406
+ Constraints DataFrame, where n_constraints is the number of constraints
407
+ and n_fields is the number of fields of constraints DataFrame, the fields
408
+ are:
409
+
410
+ - Disabled: (bool) indicates if the constraint is enable.
411
+ - Type: (str) can be 'Assets' and 'Classes'.
412
+ - Set: (str) if Type is 'Classes' specified the name of the asset's classes set.
413
+ - Position: (str) the name of the asset or asset class of the constraint, or 'All' for all categories.
414
+ - Kind: (str) can be 'CardUp' (Upper Cardinality), 'CardLow' (Lower Cardinality), 'MuEx' (Mutually Exclusive) and 'Join' (Join Investments).
415
+ - Value: (int or None) is the maximum or minimum value of cardinality constraints.
416
+ - Type Relative: (str) can be: 'Assets' or 'Classes'.
417
+ - Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
418
+ - Relative: (str) the name of the asset or asset class of the relative constraint.
419
+
420
+ asset_classes : DataFrame of shape (n_assets, n_cols)
421
+ Asset's classes matrix, where n_assets is the number of assets and
422
+ n_cols is the number of columns of the matrix where the first column
423
+ is the asset list and the next columns are the different asset's
424
+ classes sets.
425
+
426
+ Returns
427
+ -------
428
+ A : dict
429
+ The dictionary that containts the matrices A of :math:`Ak \leq B`.
430
+
431
+ B : dict
432
+ The dictionary that containts the matrices B of :math:`Ak \leq B`.
433
+
434
+ C : dict
435
+ The dictionary that containts the matrices C of :math:`Ck \leq D \odot k_{s}`.
436
+
437
+ D : dict
438
+ The dictionary that containts the matrices D of :math:`Ck \leq D \odot k_{s}`.
439
+
440
+ E : dict
441
+ The dictionary that containts the matrices E of :math:`E k_{s}\leq F`.
442
+
443
+ F : dict
444
+ The dictionary that containts the matrices F of :math:`E k_{s}\leq F`.
445
+
446
+ Raises
447
+ ------
448
+ ValueError when the value cannot be calculated.
449
+
450
+ Examples
451
+ --------
452
+ ::
453
+
454
+ import riskfolio as rp
455
+
456
+ asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
457
+ 'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
458
+ 'Fixed Income', 'Fixed Income'],
459
+ 'Class 2': ['Technology', 'Technology', 'Technology',
460
+ 'Financial', 'Financial', 'Treasury', 'Treasury'],}
461
+
462
+ asset_classes = pd.DataFrame(asset_classes)
463
+ asset_classes = asset_classes.sort_values(by=['Assets'])
464
+
465
+ constraints = {'Disabled': [True, True, True, True, True, True, True, True, True, True, True, False],
466
+ 'Type': ['Assets', 'Assets', 'Assets', 'Assets', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes', 'Classes'],
467
+ 'Set': ['', '', '', '', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry', 'Industry'],
468
+ 'Position': ['', '', 'PCAR', 'PSA', '', '', 'Financials', 'Energy', 'Financials', 'Financials', 'Industrials', 'Financials'],
469
+ 'Kind': ['CardUp', 'CardLow', 'MuEx', 'Join', 'CardUp', 'CardLow', 'CardUp', 'CardLow', 'MuEx', 'MuEx', 'Join', 'Join'],
470
+ 'Value': [7.0, 16.0, '', '', 4.0, 9.0, 1.0, 1.0, '', '', '', ''],
471
+ 'Type Relative': ['', '', 'Assets', 'Assets', '', '', '', '', 'Assets', 'Classes', 'Assets', 'Classes'],
472
+ 'Relative Set': ['', '', '', '', '', '', '', '', '', 'Industry', '', 'Industry'],
473
+ 'Relative': ['', '', 'CPB', 'MMC', '', '', '', '', 'BAX', 'Consumer Staples', 'PSA', 'Information Technology']}
474
+ constraints = pd.DataFrame(constraints)
475
+
476
+
477
+ The constraints look like the following image:
478
+
479
+ .. image:: images/Constraints_int.png
480
+
481
+ It is easier to construct the constraints in excel and then upload to a
482
+ dataframe.
483
+
484
+ To create the dictionaries A, B, C, D, E, and F we use the following command:
485
+
486
+ ::
487
+
488
+ A, B, C, D, E, F = rp.integer_constraints(constraints, asset_classes)
489
+
490
+
491
+ The dictionaries A and B look like the following image:
492
+
493
+ .. image:: images/AxB_int.png
494
+
495
+ The dictionaries C and D look like the following image:
496
+
497
+ .. image:: images/CxD_int.png
498
+
499
+ The dictionaries E and F look like the following image:
500
+
501
+ .. image:: images/ExF_int.png
502
+
503
+ """
504
+
505
+ if not isinstance(constraints, pd.DataFrame) and not isinstance(
506
+ asset_classes, pd.DataFrame
507
+ ):
508
+ raise ValueError("constraints and asset_classes must be DataFrames")
509
+
510
+ if constraints.shape[1] != 9:
511
+ raise ValueError("constraints must have nine columns")
512
+
513
+ constraints0 = constraints.fillna("")
514
+ constraints0 = constraints0[constraints0["Disabled"] == False]
515
+ data = constraints0.values.tolist()
516
+ assetslist = asset_classes.iloc[:, 0].values.tolist()
517
+ groups = constraints0["Set"].unique().tolist()
518
+
519
+ n = len(constraints0)
520
+ m = len(asset_classes)
521
+
522
+ A, B, C, D, E, F, G, H, I = {}, {}, {}, {}, {}, {}, {}, {}, {}
523
+ for i, j in enumerate(groups):
524
+ A[i], B[i], C[i], D[i], E[i], F[i], I[i] = [], [], [], [], [], [], False
525
+
526
+ for group in groups:
527
+ G[group] = []
528
+ if group != "":
529
+ H[group] = asset_classes[group].unique().tolist()
530
+ for i in H[group]:
531
+ G1 = np.where(asset_classes[group].values == i, 1, 0).tolist()
532
+ G[group].append(G1)
533
+ G[group] = np.array(G[group])
534
+
535
+ for i in range(0, n):
536
+ key = groups.index(data[i][2])
537
+ if data[i][1].lower() == "assets":
538
+ if data[i][4].lower()[:4] == "card":
539
+ if (
540
+ data[i][6].lower() != ""
541
+ or data[i][7].lower() != ""
542
+ or data[i][8].lower() != ""
543
+ ):
544
+ raise ValueError(
545
+ "Cardinality constraints don't require Type Relative , Relative Set and Relative columns."
546
+ )
547
+
548
+ if data[i][5] != "":
549
+ d = int(data[i][5])
550
+ if data[i][4].lower()[4:] == "up":
551
+ A1 = np.ones((m,)).tolist()
552
+ B1 = [d]
553
+ elif data[i][4].lower()[4:] == "low":
554
+ A1 = (-np.ones((m,))).tolist()
555
+ B1 = [-d]
556
+ else:
557
+ raise ValueError("Only CardLow and CardUp values are allowed.")
558
+ A[key].append(A1)
559
+ B[key].append(B1)
560
+ elif data[i][5] == "":
561
+ raise ValueError("Cardinality constraints require a Value column.")
562
+
563
+ elif data[i][4].lower() in ["muex", "join"]:
564
+ item = assetslist.index(data[i][3])
565
+ A1 = [0] * m
566
+ A1[item] = 1
567
+ if data[i][6].lower() == "assets":
568
+ item2 = assetslist.index(data[i][8])
569
+ A2 = [0] * m
570
+ A2[item2] = 1
571
+ elif data[i][6].lower() == "classes":
572
+ A2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
573
+ if data[i][4].lower() == "muex":
574
+ A1 = (np.sum(A2) * np.array(A1) + np.array(A2)).tolist()
575
+ A[key].append(A1)
576
+ B[key].append([np.sum(A2)])
577
+ elif data[i][4].lower() == "join":
578
+ A1 = (np.sum(A2) * np.array(A1) - np.array(A2)).tolist()
579
+ A[key].append(A1)
580
+ B[key].append([0])
581
+
582
+ elif data[i][1].lower() == "classes":
583
+ C0 = G[data[i][2]]
584
+ I_m = np.identity(m)
585
+
586
+ if data[i][4].lower()[:4] == "card":
587
+ if (
588
+ data[i][6].lower() != ""
589
+ or data[i][7].lower() != ""
590
+ or data[i][8].lower() != ""
591
+ ):
592
+ raise ValueError(
593
+ "Cardinality constraints don't require Type Relative, Relative Set and Relative columns."
594
+ )
595
+
596
+ if data[i][5] != "":
597
+ d = int(data[i][5])
598
+ if data[i][3].lower() == "":
599
+ A1, B1 = [], []
600
+ C1 = np.vstack([C0, -C0])
601
+ D1 = C0.sum(axis=1).reshape((-1, 1))
602
+ D1 = np.vstack([D1, -np.ones((D1.shape[0], 1))])
603
+ E1 = np.ones((C0.shape[0],))
604
+ C1, D1 = C1.tolist(), D1.tolist()
605
+ elif data[i][3].lower() == "all":
606
+ A1 = C0
607
+ B1, C1, D1, E1, F1 = [], [], [], [], []
608
+ else:
609
+ A1 = np.where(
610
+ asset_classes[data[i][2]].values == data[i][3], 1, 0
611
+ )
612
+ B1, C1, D1, E1, F1 = [], [], [], [], []
613
+
614
+ if data[i][4].lower()[4:] == "up":
615
+ if len(A1) == 0:
616
+ E1 = E1.tolist()
617
+ F1 = [d]
618
+ else:
619
+ if data[i][3].lower() == "all":
620
+ A1 = A1.tolist()
621
+ B1 = (np.ones((C0.shape[0], 1)) * d).tolist()
622
+ else:
623
+ A1 = A1.tolist()
624
+ B1 = [d]
625
+ elif data[i][4].lower()[4:] == "low":
626
+ if len(A1) == 0:
627
+ E1 = (-E1).tolist()
628
+ F1 = [-d]
629
+ else:
630
+ if data[i][3].lower() == "all":
631
+ A1 = (-A1).tolist()
632
+ B1 = (np.ones((C0.shape[0], 1)) * -d).tolist()
633
+ else:
634
+ A1 = (-A1).tolist()
635
+ B1 = [-d]
636
+ else:
637
+ raise ValueError("Only CardLow and CardUp values are allowed.")
638
+
639
+ if A1 != []:
640
+ if data[i][3].lower() == "all":
641
+ for row1 in A1:
642
+ A[key].append(row1)
643
+ else:
644
+ A[key].append(A1)
645
+ if B1 != []:
646
+ if data[i][3].lower() == "all":
647
+ for row1 in B1:
648
+ B[key].append(row1)
649
+ else:
650
+ B[key].append(B1)
651
+ if E1 != []:
652
+ E[key].append(E1)
653
+ if F1 != []:
654
+ F[key].append(F1)
655
+ if I[key] == False:
656
+ if C1 != []:
657
+ for row1 in C1:
658
+ C[key].append(row1)
659
+ if D1 != []:
660
+ for row1 in D1:
661
+ D[key].append(row1)
662
+ I[key] = True
663
+
664
+ elif data[i][5] == "":
665
+ raise ValueError("Cardinality constraints require a Value column.")
666
+
667
+ elif data[i][4].lower() in ["muex", "join"]:
668
+ if data[i][3].lower() != "":
669
+ A1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
670
+ if data[i][6].lower() == "assets":
671
+ item = assetslist.index(data[i][8])
672
+ A2 = [0] * m
673
+ A2[item] = 1
674
+ if data[i][4].lower() == "muex":
675
+ A3 = np.sum(A1) * np.array(A2, ndmin=2) + np.array(
676
+ A1, ndmin=2
677
+ )
678
+ B1 = [np.sum(A1)]
679
+ elif data[i][4].lower() == "join":
680
+ A3 = -np.array(A2) - np.array(A1, ndmin=2)
681
+ B1 = [-np.sum(A1) - 1]
682
+
683
+ A[key].append(A3.flatten().tolist())
684
+ B[key].append(B1)
685
+
686
+ elif data[i][6].lower() == "classes":
687
+ A2 = np.where(
688
+ asset_classes[data[i][7]].values == data[i][8], 1, 0
689
+ )
690
+ A3 = I_m[np.array(A2, dtype=bool)]
691
+ A4 = np.repeat(np.array(A1, ndmin=2), A3.shape[0], axis=0)
692
+ if data[i][4].lower() == "muex":
693
+ A5 = np.sum(A1) * np.array(A3, ndmin=2) + np.array(
694
+ A4, ndmin=2
695
+ )
696
+ B1 = np.ones((A3.shape[0], 1)) * np.sum(A1)
697
+ elif data[i][4].lower() == "join":
698
+ A5 = -np.array(A3, ndmin=2) - np.array(A4, ndmin=2)
699
+ B1 = np.ones((A3.shape[0], 1)) * (-np.sum(A1) - 1)
700
+
701
+ for row1 in A5:
702
+ A[key].append(row1.tolist())
703
+ for row1 in B1:
704
+ B[key].append(row1.tolist())
705
+
706
+ for i in A.keys():
707
+ A[i] = np.array(A[i], ndmin=2, dtype=float)
708
+ B[i] = np.array(B[i], ndmin=2, dtype=float)
709
+ C[i] = np.array(C[i], ndmin=2, dtype=float)
710
+ D[i] = np.array(D[i], ndmin=2, dtype=float)
711
+ E[i] = np.array(E[i], ndmin=2, dtype=float)
712
+ F[i] = np.array(F[i], ndmin=2, dtype=float)
713
+
714
+ return A, B, C, D, E, F
715
+
716
+
717
+ def assets_views(views, asset_classes):
718
+ r"""
719
+ Create the assets views matrixes P and Q of the views :math:`Pw = Q`.
720
+
721
+ Parameters
722
+ ----------
723
+ views : DataFrame of shape (n_views, n_fields)
724
+ views DataFrame, where n_views is the number of views
725
+ and n_fields is the number of fields of views DataFrame, the fields
726
+ are:
727
+
728
+ - Disabled: (bool) indicates if the constraint is enable.
729
+ - Type: (str) can be: 'Assets' or 'Classes'.
730
+ - Set: (str) if Type is 'Classes' specified the name of the set of asset classes.
731
+ - Position: (str) the name of the asset or asset class of the view.
732
+ - Sign: (str) can be '>=' or '<='.
733
+ - Return: (scalar) is the return of the view.
734
+ - Type Relative: (str) can be: 'Assets' or 'Classes'.
735
+ - Relative Set: (str) if Type Relative is 'Classes' specified the name of the set of asset classes.
736
+ - Relative: (str) the name of the asset or asset class of the relative view.
737
+
738
+ asset_classes : DataFrame of shape (n_assets, n_cols)
739
+ Asset's classes matrix, where n_assets is the number of assets and
740
+ n_cols is the number of columns of the matrix where the first column
741
+ is the asset list and the next columns are the different asset's
742
+ classes sets.
743
+
744
+ Returns
745
+ -------
746
+ P : nd-array
747
+ The matrix P that shows the relation among assets in each view.
748
+
749
+ Q : nd-array
750
+ The matrix Q that shows the expected return of each view.
751
+
752
+ Raises
753
+ ------
754
+ ValueError when the value cannot be calculated.
755
+
756
+ Examples
757
+ --------
758
+ ::
759
+
760
+ asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
761
+ 'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
762
+ 'Fixed Income', 'Fixed Income'],
763
+ 'Class 2': ['Technology', 'Technology', 'Technology',
764
+ 'Financial', 'Financial', 'Treasury', 'Treasury'],}
765
+
766
+ asset_classes = pd.DataFrame(asset_classes)
767
+ asset_classes = asset_classes.sort_values(by=['Assets'])
768
+
769
+ views = {'Disabled': [False, False, False, False],
770
+ 'Type': ['Assets', 'Classes', 'Classes', 'Assets'],
771
+ 'Set': ['', 'Class 2','Class 1', ''],
772
+ 'Position': ['WFC', 'Financial', 'Equity', 'FB'],
773
+ 'Sign': ['<=', '>=', '>=', '>='],
774
+ 'Return': [ 0.3, 0.1, 0.05, 0.03 ],
775
+ 'Type Relative': [ 'Assets', 'Classes', 'Assets', ''],
776
+ 'Relative Set': [ '', 'Class 1', '', ''],
777
+ 'Relative': ['FB', 'Fixed Income', 'TLT', '']}
778
+
779
+ views = pd.DataFrame(views)
780
+
781
+
782
+ The constraints look like the following image:
783
+
784
+ .. image:: images/Views.png
785
+
786
+ It is easier to construct the constraints in excel and then upload to a
787
+ dataframe.
788
+
789
+ To create the matrixes P and Q we use the following command:
790
+
791
+ ::
792
+
793
+ P, Q = rp.assets_views(views, asset_classes)
794
+
795
+
796
+ The matrixes P and Q look like the following image:
797
+
798
+ .. image:: images/PxQ.png
799
+
800
+ """
801
+
802
+ if not isinstance(views, pd.DataFrame) and not isinstance(
803
+ asset_classes, pd.DataFrame
804
+ ):
805
+ raise ValueError("constraints and asset_classes must be DataFrames")
806
+
807
+ if views.shape[1] != 9:
808
+ raise ValueError("constraints must have nine columns")
809
+
810
+ n = len(views)
811
+ m = len(asset_classes)
812
+ views0 = views.fillna("")
813
+ views0 = views0[views0["Disabled"] == False]
814
+ data = views0.values.tolist()
815
+ assetslist = asset_classes.iloc[:, 0].values.tolist()
816
+
817
+ P = []
818
+ Q = []
819
+ for i in range(0, n):
820
+ valid = False
821
+ if data[i][1] == "Assets":
822
+ item = assetslist.index(data[i][3])
823
+ if data[i][4] == ">=":
824
+ d = 1
825
+ elif data[i][4] == "<=":
826
+ d = -1
827
+ if data[i][5] != "":
828
+ P1 = [0] * m
829
+ P1[item] = 1
830
+ if data[i][6] == "Assets" and data[i][8] != "":
831
+ item2 = assetslist.index(data[i][8])
832
+ P2 = [0] * m
833
+ P2[item2] = 1
834
+ valid = True
835
+ elif data[i][6] == "Classes" and data[i][7] != "" and data[i][8] != "":
836
+ P2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
837
+ P2 = P2 / np.sum(P2)
838
+ valid = True
839
+ elif data[i][6] == "" and data[i][7] == "" and data[i][8] == "":
840
+ P2 = [0] * m
841
+ valid = True
842
+ if valid == True:
843
+ P1 = ((np.array(P1) - np.array(P2)) * d).tolist()
844
+ P.append(P1)
845
+ Q.append([data[i][5] * d])
846
+ elif data[i][1] == "Classes":
847
+ if data[i][4] == ">=":
848
+ d = 1
849
+ else:
850
+ d = -1
851
+ if data[i][5] != "":
852
+ P1 = np.where(asset_classes[data[i][2]].values == data[i][3], 1, 0)
853
+ P1 = P1 / np.sum(P1)
854
+ if data[i][6] == "Assets" and data[i][8] != "":
855
+ item2 = assetslist.index(data[i][8])
856
+ P2 = [0] * m
857
+ P2[item2] = 1
858
+ valid = True
859
+ elif data[i][6] == "Classes" and data[i][7] != "" and data[i][8] != "":
860
+ P2 = np.where(asset_classes[data[i][7]].values == data[i][8], 1, 0)
861
+ P2 = P2 / np.sum(P2)
862
+ valid = True
863
+ elif data[i][6] == "" and data[i][7] == "" and data[i][8] == "":
864
+ P2 = [0] * m
865
+ valid = True
866
+ if valid == True:
867
+ P1 = ((np.array(P1) - np.array(P2)) * d).tolist()
868
+ P.append(P1)
869
+ Q.append([data[i][5] * d])
870
+
871
+ P = np.array(P, ndmin=2, dtype=float)
872
+ Q = np.array(Q, ndmin=2, dtype=float)
873
+
874
+ for i in range(len(Q)):
875
+ if Q[i, 0] < 0:
876
+ P[i, :] = -1.0 * P[i, :]
877
+ Q[i, :] = -1.0 * Q[i, :]
878
+
879
+ return P, Q
880
+
881
+
882
+ def factors_views(views, loadings, const=True):
883
+ r"""
884
+ Create the factors constraints matrixes C and D of the constraint
885
+ :math:`Cw \geq D`.
886
+
887
+ Parameters
888
+ ----------
889
+ views : DataFrame of shape (n_views, n_fields)
890
+ views DataFrame, where n_views is the number of views
891
+ and n_fields is the number of fields of views DataFrame, the fields
892
+ are:
893
+
894
+ - Disabled: (bool) indicates if the constraint is enable.
895
+ - Factor: (str) the name of the factor of the constraint.
896
+ - Sign: (str) can be '>=' or '<='.
897
+ - Value: (scalar) is the maximum or minimum value of the factor.
898
+
899
+ loadings : DataFrame of shape (n_assets, n_features)
900
+ The loadings matrix.
901
+
902
+ Returns
903
+ -------
904
+ P : nd-array
905
+ The matrix P that shows the relation among factors in each factor view.
906
+
907
+ Q : nd-array
908
+ The matrix Q that shows the expected return of each factor view.
909
+
910
+ Raises
911
+ ------
912
+ ValueError when the value cannot be calculated.
913
+
914
+ Examples
915
+ --------
916
+ ::
917
+
918
+ loadings = {'const': [0.0004, 0.0002, 0.0000, 0.0006, 0.0001, 0.0003, -0.0003],
919
+ 'MTUM': [0.1916, 1.0061, 0.8695, 1.9996, 0.0000, 0.0000, 0.0000],
920
+ 'QUAL': [0.0000, 2.0129, 1.4301, 0.0000, 0.0000, 0.0000, 0.0000],
921
+ 'SIZE': [0.0000, 0.0000, 0.0000, 0.4717, 0.0000, -0.1857, 0.0000],
922
+ 'USMV': [-0.7838, -1.6439, -1.0176, -1.4407, 0.0055, 0.5781, 0.0000],
923
+ 'VLUE': [1.4772, -0.7590, -0.4090, 0.0000, -0.0054, -0.4844, 0.9435]}
924
+
925
+ loadings = pd.DataFrame(loadings)
926
+
927
+ factorsviews = {'Disabled': [False, False, False],
928
+ 'Factor': ['MTUM', 'USMV', 'VLUE'],
929
+ 'Sign': ['<=', '<=', '>='],
930
+ 'Value': [0.9, -1.2, 0.3],
931
+ 'Relative Factor': ['USMV', '', '']}
932
+
933
+ factorsviews = pd.DataFrame(factorsviews)
934
+
935
+
936
+ The constraints look like the following image:
937
+
938
+ .. image:: images/factorsviews.png
939
+
940
+ It is easier to construct the constraints in excel and then upload to a
941
+ dataframe.
942
+
943
+ To create the matrixes P and Q we use the following command:
944
+
945
+ ::
946
+
947
+ P, Q = rp.factors_views(factorsviews,
948
+ loadings,
949
+ const=True)
950
+
951
+
952
+ The matrixes P and Q look like the following image:
953
+
954
+ .. image:: images/P_fxQ_f.png
955
+
956
+ """
957
+
958
+ if not isinstance(views, pd.DataFrame) and not isinstance(loadings, pd.DataFrame):
959
+ raise ValueError("constraints and loadings must be DataFrames")
960
+
961
+ if views.shape[1] != 5:
962
+ raise ValueError("constraints must have five columns")
963
+
964
+ n = len(views)
965
+ views0 = views.fillna("")
966
+ views0 = views0[views0["Disabled"] == False]
967
+ data = views0.values.tolist()
968
+ factorslist = loadings.columns.tolist()
969
+ if const == True:
970
+ factorslist = factorslist[1:]
971
+ m = len(factorslist)
972
+
973
+ P = []
974
+ Q = []
975
+ for i in range(0, n):
976
+ if data[i][0] == False:
977
+ item = factorslist.index(data[i][1])
978
+ if data[i][2] == ">=":
979
+ d = 1
980
+ elif data[i][2] == "<=":
981
+ d = -1
982
+ P1 = [0] * m
983
+ P1[item] = d
984
+ if data[i][4] != "":
985
+ item = factorslist.index(data[i][4])
986
+ P1[item] = -d
987
+ P.append(P1)
988
+ Q.append([data[i][3] * d])
989
+
990
+ P = np.array(P, ndmin=2, dtype=float)
991
+ Q = np.array(Q, ndmin=2, dtype=float)
992
+
993
+ return P, Q
994
+
995
+
996
+ def assets_clusters(
997
+ returns,
998
+ custom_cov=None,
999
+ codependence="pearson",
1000
+ linkage="ward",
1001
+ opt_k_method="twodiff",
1002
+ k=None,
1003
+ max_k=10,
1004
+ bins_info="KN",
1005
+ alpha_tail=0.05,
1006
+ gs_threshold=0.5,
1007
+ leaf_order=True,
1008
+ ):
1009
+ r"""
1010
+ Create asset classes based on hierarchical clustering.
1011
+
1012
+ Parameters
1013
+ ----------
1014
+ returns : DataFrame of shape (n_samples, n_assets)
1015
+ Assets returns DataFrame, where n_samples is the number of
1016
+ observations and n_assets is the number of assets.
1017
+ custom_cov : DataFrame or None, optional
1018
+ Custom covariance matrix, used when codependence parameter has value
1019
+ 'custom_cov'. The default is None.
1020
+ codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
1021
+ The codependence or similarity matrix used to build the distance
1022
+ metric and clusters. The default is 'pearson'. Possible values are:
1023
+
1024
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1025
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1026
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1027
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1028
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1029
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1030
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1031
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1032
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1033
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1034
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1035
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1036
+
1037
+ linkage : string, optional
1038
+ Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
1039
+ The default is 'ward'. Possible values are:
1040
+
1041
+ - 'single'.
1042
+ - 'complete'.
1043
+ - 'average'.
1044
+ - 'weighted'.
1045
+ - 'centroid'.
1046
+ - 'median'.
1047
+ - 'ward'.
1048
+ - 'DBHT'. Direct Bubble Hierarchical Tree.
1049
+
1050
+ opt_k_method : str
1051
+ Method used to calculate the optimum number of clusters.
1052
+ The default is 'twodiff'. Possible values are:
1053
+
1054
+ - 'twodiff': two difference gap statistic.
1055
+ - 'stdsil': standarized silhouette score.
1056
+
1057
+ k : int, optional
1058
+ Number of clusters. This value is took instead of the optimal number
1059
+ of clusters calculated with the two difference gap statistic.
1060
+ The default is None.
1061
+ max_k : int, optional
1062
+ Max number of clusters used by the two difference gap statistic
1063
+ to find the optimal number of clusters. The default is 10.
1064
+ bins_info: int or str
1065
+ Number of bins used to calculate variation of information. The default
1066
+ value is 'KN'. Possible values are:
1067
+
1068
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
1069
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
1070
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
1071
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
1072
+ - int: integer value choice by user.
1073
+
1074
+ alpha_tail : float, optional
1075
+ Significance level for lower tail dependence index. The default is 0.05.
1076
+ gs_threshold : float, optional
1077
+ Gerber statistic threshold. The default is 0.5.
1078
+ leaf_order : bool, optional
1079
+ Indicates if the cluster are ordered so that the distance between
1080
+ successive leaves is minimal. The default is True.
1081
+
1082
+ Returns
1083
+ -------
1084
+ clusters : DataFrame
1085
+ A dataframe with asset classes based on hierarchical clustering.
1086
+
1087
+ Raises
1088
+ ------
1089
+ ValueError when the value cannot be calculated.
1090
+
1091
+ Examples
1092
+ --------
1093
+
1094
+ ::
1095
+
1096
+ clusters = rp.assets_clusters(returns,
1097
+ codependence='pearson',
1098
+ linkage='ward',
1099
+ k=None,
1100
+ max_k=10,
1101
+ alpha_tail=0.05,
1102
+ leaf_order=True)
1103
+
1104
+
1105
+ The clusters dataframe looks like the following image:
1106
+
1107
+ .. image:: images/clusters_df.png
1108
+
1109
+ """
1110
+
1111
+ if not isinstance(returns, pd.DataFrame):
1112
+ raise ValueError("returns must be a DataFrame")
1113
+
1114
+ # Calculating codependence matrix and distance metric
1115
+ codep, dist = af.codep_dist(
1116
+ returns=returns,
1117
+ custom_cov=custom_cov,
1118
+ codependence=codependence,
1119
+ bins_info=bins_info,
1120
+ alpha_tail=alpha_tail,
1121
+ gs_threshold=gs_threshold,
1122
+ )
1123
+ # Hierarchical clustering
1124
+ dist = dist.to_numpy()
1125
+ dist = pd.DataFrame(dist, columns=codep.columns, index=codep.index)
1126
+ if linkage == "DBHT":
1127
+ # different choices for D, S give different outputs!
1128
+ D = dist.to_numpy() # dissimilarity matrix
1129
+ if codependence in {"pearson", "spearman"}:
1130
+ S = (1 - dist**2).to_numpy()
1131
+ else:
1132
+ S = codep.copy().to_numpy() # similarity matrix
1133
+ (_, _, _, _, _, clustering) = db.DBHTs(
1134
+ D, S, leaf_order=leaf_order
1135
+ ) # DBHT clustering
1136
+ else:
1137
+ p_dist = squareform(dist, checks=False)
1138
+ clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order)
1139
+
1140
+ # optimal number of clusters
1141
+ if k is None:
1142
+ if opt_k_method == "twodiff":
1143
+ k, clustering_inds = af.two_diff_gap_stat(dist, clustering, max_k)
1144
+ elif opt_k_method == "stdsil":
1145
+ k, clustering_inds = af.std_silhouette_score(dist, clustering, max_k)
1146
+ else:
1147
+ raise ValueError("The only opt_k_method available are twodiff and stdsil")
1148
+ else:
1149
+ clustering_inds = hr.fcluster(clustering, k, criterion="maxclust")
1150
+
1151
+ # Building clusters
1152
+ labels = np.array(returns.columns.tolist())
1153
+ clusters = {"Assets": [], "Clusters": []}
1154
+
1155
+ for i, v in enumerate(clustering_inds):
1156
+ clusters["Assets"].append(labels[i])
1157
+ clusters["Clusters"].append("Cluster " + str(v))
1158
+
1159
+ clusters = pd.DataFrame(clusters)
1160
+ clusters = clusters.sort_values(by=["Assets"])
1161
+
1162
+ return clusters
1163
+
1164
+
1165
+ def hrp_constraints(constraints, asset_classes):
1166
+ r"""
1167
+ Create the upper and lower bounds constraints for hierarchical risk parity
1168
+ model.
1169
+
1170
+ Parameters
1171
+ ----------
1172
+ constraints : DataFrame of shape (n_constraints, n_fields)
1173
+ Constraints DataFrame, where n_constraints is the number of constraints
1174
+ and n_fields is the number of fields of constraints DataFrame, the fields
1175
+ are:
1176
+
1177
+ - Disabled: (bool) indicates if the constraint is enable.
1178
+ - Type: (str) can be: 'Assets', All Assets' and 'Each asset in a class'.
1179
+ - Position: (str) the name of the asset or asset class of the constraint.
1180
+ - Sign: (str) can be '>=' or '<='.
1181
+ - Weight: (scalar) is the maximum or minimum weight of the absolute constraint.
1182
+
1183
+ asset_classes : DataFrame of shape (n_assets, n_cols)
1184
+ Asset's classes DataFrame, where n_assets is the number of assets and
1185
+ n_cols is the number of columns of the DataFrame where the first column
1186
+ is the asset list and the next columns are the different asset's
1187
+ classes sets.
1188
+
1189
+ Returns
1190
+ -------
1191
+ w_max : pd.Series
1192
+ The upper bound of hierarchical risk parity weights constraints.
1193
+
1194
+ w_min : pd.Series
1195
+ The lower bound of hierarchical risk parity weights constraints.
1196
+
1197
+ Raises
1198
+ ------
1199
+ ValueError when the value cannot be calculated.
1200
+
1201
+ Examples
1202
+ --------
1203
+ ::
1204
+
1205
+ asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
1206
+ 'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
1207
+ 'Fixed Income', 'Fixed Income'],
1208
+ 'Class 2': ['Technology', 'Technology', 'Technology',
1209
+ 'Financial', 'Financial', 'Treasury', 'Treasury'],}
1210
+
1211
+ asset_classes = pd.DataFrame(asset_classes)
1212
+ asset_classes = asset_classes.sort_values(by=['Assets'])
1213
+
1214
+ constraints = {'Disabled': [False, False, False, False, False, False],
1215
+ 'Type': ['Assets', 'Assets', 'All Assets', 'All Assets',
1216
+ 'Each asset in a class', 'Each asset in a class'],
1217
+ 'Set': ['', '', '', '','Class 1', 'Class 2'],
1218
+ 'Position': ['BAC', 'FB', '', '', 'Equity', 'Treasury'],
1219
+ 'Sign': ['>=', '<=', '<=', '>=', '<=', '<='],
1220
+ 'Weight': [0.02, 0.085, 0.09, 0.01, 0.07, 0.06]}
1221
+
1222
+ constraints = pd.DataFrame(constraints)
1223
+
1224
+ The constraints look like the following image:
1225
+
1226
+ .. image:: images/HRPConstraints.png
1227
+
1228
+ It is easier to construct the constraints in excel and then upload to a
1229
+ dataframe.
1230
+
1231
+ To create the pd.Series w_max and w_min we use the following command:
1232
+
1233
+ ::
1234
+
1235
+ w_max, w_min = rp.hrp_constraints(constraints, asset_classes)
1236
+
1237
+
1238
+ The pd.Series w_max and w_min looks like this (all constraints were
1239
+ merged to a single upper bound for each asset):
1240
+
1241
+ .. image:: images/HRP_Bounds.png
1242
+
1243
+ """
1244
+
1245
+ if not isinstance(constraints, pd.DataFrame) and not isinstance(
1246
+ asset_classes, pd.DataFrame
1247
+ ):
1248
+ raise ValueError("constraints and asset_classes must be DataFrames")
1249
+
1250
+ if constraints.shape[1] != 6:
1251
+ raise ValueError("constraints must have six columns")
1252
+
1253
+ n = len(constraints)
1254
+ data = constraints.fillna("").copy()
1255
+ assetslist = asset_classes.iloc[:, 0].values.tolist()
1256
+
1257
+ w_max = pd.Series(1.0, index=assetslist)
1258
+ w_min = pd.Series(0.0, index=assetslist)
1259
+
1260
+ for i in range(0, n):
1261
+ if data.loc[i, "Disabled"] == False:
1262
+ if data.loc[i, "Type"] == "Assets":
1263
+ assets = data.loc[i, "Position"]
1264
+ if data.loc[i, "Sign"] == ">=":
1265
+ if w_min.loc[assets] <= data.loc[i, "Weight"]:
1266
+ w_min.loc[assets] = data.loc[i, "Weight"]
1267
+ elif data.loc[i, "Sign"] == "<=":
1268
+ if w_max.loc[assets] >= data.loc[i, "Weight"]:
1269
+ w_max.loc[assets] = data.loc[i, "Weight"]
1270
+ elif data.loc[i, "Type"] == "All Assets":
1271
+ if data.loc[i, "Sign"] == ">=":
1272
+ if w_min[w_min <= data.loc[i, "Weight"]].shape[0] != 0:
1273
+ w_min[w_min <= data.loc[i, "Weight"]] = data.loc[i, "Weight"]
1274
+ elif data.loc[i, "Sign"] == "<=":
1275
+ if w_max[w_max >= data.loc[i, "Weight"]].shape[0] != 0:
1276
+ w_max[w_max >= data.loc[i, "Weight"]] = data.loc[i, "Weight"]
1277
+ elif data.loc[i, "Type"] == "Each asset in a class":
1278
+ label_0 = asset_classes.columns.tolist()[0]
1279
+ label_1 = data.loc[i, "Set"]
1280
+ label_2 = data.loc[i, "Position"]
1281
+ assets = asset_classes[[label_0, label_1]][
1282
+ asset_classes[label_1] == label_2
1283
+ ]
1284
+ assets = assets["Assets"].tolist()
1285
+ if data.loc[i, "Sign"] == ">=":
1286
+ if (
1287
+ w_min.loc[assets][
1288
+ w_min.loc[assets] <= data.loc[i, "Weight"]
1289
+ ].shape[0]
1290
+ != 0
1291
+ ):
1292
+ w_min.loc[assets] = np.where(
1293
+ w_min.loc[assets] <= data.loc[i, "Weight"],
1294
+ data.loc[i, "Weight"],
1295
+ w_min.loc[assets],
1296
+ )
1297
+ elif data.loc[i, "Sign"] == "<=":
1298
+ if (
1299
+ w_max.loc[assets][
1300
+ w_max.loc[assets] >= data.loc[i, "Weight"]
1301
+ ].shape[0]
1302
+ != 0
1303
+ ):
1304
+ w_max.loc[assets] = np.where(
1305
+ w_max.loc[assets] >= data.loc[i, "Weight"],
1306
+ data.loc[i, "Weight"],
1307
+ w_max.loc[assets],
1308
+ )
1309
+
1310
+ return w_max, w_min
1311
+
1312
+
1313
+ def risk_constraint(asset_classes, kind="vanilla", classes_col=None):
1314
+ r"""
1315
+ Create the risk contribution constraint vector for the risk parity model.
1316
+
1317
+ Parameters
1318
+ ----------
1319
+ asset_classes : DataFrame of shape (n_assets, n_cols)
1320
+ Asset's classes DataFrame, where n_assets is the number of assets and
1321
+ n_cols is the number of columns of the DataFrame where the first column
1322
+ is the asset list and the next columns are the different asset's
1323
+ classes sets. It is only used when kind value is 'classes'. The default
1324
+ value is None.
1325
+
1326
+ kind : str
1327
+ Kind of risk contribution constraint vector. The default value is 'vanilla'.
1328
+ Possible values are:
1329
+
1330
+ - 'vanilla': vector of equal risk contribution per asset.
1331
+ - 'classes': vector of equal risk contribution per class.
1332
+
1333
+ classes_col : str or int
1334
+ If value is str, it is the column name of the set of classes from
1335
+ asset_classes dataframe. If value is int, it is the column number of
1336
+ the set of classes from asset_classes dataframe. The default
1337
+ value is None.
1338
+
1339
+ Returns
1340
+ -------
1341
+ rb : nd-array
1342
+ The risk contribution constraint vector.
1343
+
1344
+ Raises
1345
+ ------
1346
+ ValueError when the value cannot be calculated.
1347
+
1348
+ Examples
1349
+ --------
1350
+ ::
1351
+
1352
+ asset_classes = {'Assets': ['FB', 'GOOGL', 'NTFX', 'BAC', 'WFC', 'TLT', 'SHV'],
1353
+ 'Class 1': ['Equity', 'Equity', 'Equity', 'Equity', 'Equity',
1354
+ 'Fixed Income', 'Fixed Income'],
1355
+ 'Class 2': ['Technology', 'Technology', 'Technology',
1356
+ 'Financial', 'Financial', 'Treasury', 'Treasury'],}
1357
+
1358
+ asset_classes = pd.DataFrame(asset_classes)
1359
+ asset_classes = asset_classes.sort_values(by=['Assets'])
1360
+ asset_classes.reset_index(inplace=True, drop=True)
1361
+
1362
+ rb = rp.risk_constraint(asset_classes
1363
+ kind='classes',
1364
+ classes_col='Class 1')
1365
+
1366
+
1367
+ """
1368
+ if not isinstance(asset_classes, pd.DataFrame):
1369
+ raise ValueError("asset_classes must be a DataFrame")
1370
+
1371
+ if kind == "vanilla":
1372
+ if asset_classes.shape[1] < 1:
1373
+ raise ValueError("asset_classes must have at least one column")
1374
+
1375
+ assetslist = asset_classes.iloc[:, 0].values.tolist()
1376
+ rb = np.ones((len(assetslist), 1))
1377
+ rb /= len(assetslist)
1378
+
1379
+ elif kind == "classes":
1380
+ if asset_classes.shape[1] < 2:
1381
+ raise ValueError("asset_classes must have at least two columns")
1382
+
1383
+ classes = asset_classes.columns.tolist()
1384
+
1385
+ if isinstance(classes_col, str) and classes_col in classes:
1386
+ A = asset_classes.loc[:, classes_col].to_frame()
1387
+ col = A.columns.to_list()[0]
1388
+ elif isinstance(classes_col, int) and classes[classes_col] in classes:
1389
+ A = asset_classes.iloc[:, classes_col].to_frame()
1390
+ col = A.columns.to_list()[0]
1391
+ else:
1392
+ raise ValueError(
1393
+ "classes_col must be a valid column or column position of asset_classes"
1394
+ )
1395
+
1396
+ A["rb"] = 1
1397
+ B = A.groupby([col]).count()
1398
+ A = pd.merge(A, B, left_on=col, right_index=True, how="left")
1399
+ A["rb"] = A["rb_x"] / A["rb_y"]
1400
+ A["rb"] /= A["rb"].sum()
1401
+
1402
+ rb = A["rb"].to_numpy().reshape(-1, 1)
1403
+
1404
+ else:
1405
+ raise ValueError(
1406
+ "The only available values for kind parameter are 'vanilla' and 'classes'"
1407
+ )
1408
+
1409
+ return rb
1410
+
1411
+
1412
+ def connection_matrix(
1413
+ returns,
1414
+ custom_cov=None,
1415
+ codependence="pearson",
1416
+ graph="MST",
1417
+ walk_size=1,
1418
+ bins_info="KN",
1419
+ alpha_tail=0.05,
1420
+ gs_threshold=0.5,
1421
+ ):
1422
+ r"""
1423
+ Create a connection matrix of walks of a specific size based on :cite:`e-Cajas10` formula.
1424
+
1425
+ Parameters
1426
+ ----------
1427
+ returns : DataFrame of shape (n_samples, n_assets)
1428
+ Assets returns DataFrame, where n_samples is the number of
1429
+ observations and n_assets is the number of assets.
1430
+ custom_cov : DataFrame or None, optional
1431
+ Custom covariance matrix, used when codependence parameter has value
1432
+ 'custom_cov'. The default is None.
1433
+ codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
1434
+ The codependence or similarity matrix used to build the distance
1435
+ metric and clusters. The default is 'pearson'. Possible values are:
1436
+
1437
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1438
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1439
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1440
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1441
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1442
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1443
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1444
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1445
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1446
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1447
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1448
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1449
+
1450
+ graph : string, optional
1451
+ Graph used to build the adjacency matrix. The default is 'MST'.
1452
+ Possible values are:
1453
+
1454
+ - 'MST': Minimum Spanning Tree.
1455
+ - 'TMFG': Plannar Maximally Filtered Graph.
1456
+
1457
+ walk_size : int, optional
1458
+ Size of the walk represented by the adjacency matrix. The default is 1.
1459
+ bins_info: int or str
1460
+ Number of bins used to calculate variation of information. The default
1461
+ value is 'KN'. Possible values are:
1462
+
1463
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
1464
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
1465
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
1466
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
1467
+ - int: integer value choice by user.
1468
+
1469
+ alpha_tail : float, optional
1470
+ Significance level for lower tail dependence index. The default is 0.05.
1471
+ gs_threshold : float, optional
1472
+ Gerber statistic threshold. The default is 0.5.
1473
+
1474
+ Returns
1475
+ -------
1476
+ A_p : DataFrame
1477
+ Adjacency matrix of walks of size lower and equal than 'walk_size'.
1478
+
1479
+ Raises
1480
+ ------
1481
+ ValueError when the value cannot be calculated.
1482
+
1483
+ Examples
1484
+ --------
1485
+
1486
+ ::
1487
+
1488
+ A_p = rp.connection_matrix(returns,
1489
+ codependence="pearson",
1490
+ graph="MST",
1491
+ walk_size=1)
1492
+
1493
+ The connection matrix dataframe looks like the following image:
1494
+
1495
+ .. image:: images/Connection_df.png
1496
+
1497
+ """
1498
+
1499
+ if not isinstance(returns, pd.DataFrame):
1500
+ raise ValueError("returns must be a DataFrame")
1501
+
1502
+ assets = returns.columns.tolist()
1503
+
1504
+ # Calculating codependence matrix and distance metric
1505
+ codep, dist = af.codep_dist(
1506
+ returns=returns,
1507
+ custom_cov=custom_cov,
1508
+ codependence=codependence,
1509
+ bins_info=bins_info,
1510
+ alpha_tail=alpha_tail,
1511
+ gs_threshold=gs_threshold,
1512
+ )
1513
+
1514
+ # Adjacency Matrix Construction
1515
+ dist = dist.to_numpy()
1516
+ dist = pd.DataFrame(dist, columns=codep.columns, index=codep.index)
1517
+ if graph == "TMFG":
1518
+ # different choices for D, S give different outputs!
1519
+ D = dist.to_numpy() # dissimilarity matrix
1520
+ if codependence in {"pearson", "spearman"}:
1521
+ S = (1 - dist**2).to_numpy()
1522
+ else:
1523
+ S = codep.copy().to_numpy()
1524
+ (_, Rpm, _, _, _, clustering) = db.DBHTs(D, S) # DBHT clustering
1525
+ MAdj = pd.DataFrame(Rpm, index=assets, columns=assets)
1526
+ G = nx.from_pandas_adjacency(MAdj)
1527
+ elif graph == "MST":
1528
+ MAdj = nx.from_pandas_adjacency(dist)
1529
+ G = nx.minimum_spanning_tree(MAdj)
1530
+ else:
1531
+ raise ValueError("Only TMFG or MST graphs are available")
1532
+
1533
+ A = nx.adjacency_matrix(G).toarray()
1534
+ A = np.where(A != 0, 1, 0)
1535
+
1536
+ A_p = np.zeros_like(A)
1537
+ for i in range(int(walk_size) + 1):
1538
+ A_p += np.linalg.matrix_power(A, i)
1539
+
1540
+ n, n = A.shape
1541
+ A_p = np.clip(A_p, 0, 1) - np.identity(n)
1542
+ A_p = np.ceil(A_p)
1543
+
1544
+ return A_p
1545
+
1546
+
1547
+ def centrality_vector(
1548
+ returns,
1549
+ measure="Degree",
1550
+ custom_cov=None,
1551
+ codependence="pearson",
1552
+ graph="MST",
1553
+ bins_info="KN",
1554
+ alpha_tail=0.05,
1555
+ gs_threshold=0.5,
1556
+ ):
1557
+ r"""
1558
+ Create a centrality vector from the adjacency matrix of an asset network based on :cite:`e-Cajas10` formula.
1559
+
1560
+ Parameters
1561
+ ----------
1562
+ returns : DataFrame of shape (n_samples, n_assets)
1563
+ Assets returns DataFrame, where n_samples is the number of
1564
+ observations and n_assets is the number of assets.
1565
+ measure : str, optional
1566
+ Centrality measure. The default is 'Degree'. Possible values are:
1567
+
1568
+ - 'Degre': Node's degree centrality. Number of edges connected to a node.
1569
+ - 'Eigenvector': Eigenvector centrality. See more in `eigenvector_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality_numpy.html#eigenvector-centrality-numpy>`_.
1570
+ - 'Katz': Katz centrality. See more in `katz_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.katz_centrality_numpy.html#katz-centrality-numpy>`_.
1571
+ - 'Closeness': Closeness centrality. See more in `closeness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#closeness-centrality>`_.
1572
+ - 'Betweeness': Betweeness centrality. See more in `betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#betweenness-centrality>`_.
1573
+ - 'Communicability': Communicability betweeness centrality. See more in `communicability_betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.communicability_betweenness_centrality.html#communicability-betweenness-centrality>`_.
1574
+ - 'Subgraph': Subgraph centrality. See more in `subgraph_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.subgraph_centrality.html#subgraph-centrality>`_.
1575
+ - 'Laplacian': Laplacian centrality. See more in `laplacian_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.laplacian_centrality.html#laplacian-centrality>`_.
1576
+
1577
+ custom_cov : DataFrame or None, optional
1578
+ Custom covariance matrix, used when codependence parameter has value
1579
+ 'custom_cov'. The default is None.
1580
+ codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
1581
+ The codependence or similarity matrix used to build the distance
1582
+ metric and clusters. The default is 'pearson'. Possible values are:
1583
+
1584
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1585
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1586
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1587
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1588
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1589
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1590
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1591
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1592
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1593
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1594
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1595
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1596
+
1597
+ graph : string, optional
1598
+ Graph used to build the adjacency matrix. The default is 'MST'.
1599
+ Possible values are:
1600
+
1601
+ - 'MST': Minimum Spanning Tree.
1602
+ - 'TMFG': Plannar Maximally Filtered Graph.
1603
+
1604
+ bins_info: int or str
1605
+ Number of bins used to calculate variation of information. The default
1606
+ value is 'KN'. Possible values are:
1607
+
1608
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
1609
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
1610
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
1611
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
1612
+ - int: integer value choice by user.
1613
+
1614
+ alpha_tail : float, optional
1615
+ Significance level for lower tail dependence index. The default is 0.05.
1616
+ gs_threshold : float, optional
1617
+ Gerber statistic threshold. The default is 0.5.
1618
+
1619
+ Returns
1620
+ -------
1621
+ A_p : DataFrame
1622
+ Adjacency matrix of walks of size 'walk_size'.
1623
+
1624
+ Raises
1625
+ ------
1626
+ ValueError when the value cannot be calculated.
1627
+
1628
+ Examples
1629
+ --------
1630
+
1631
+ ::
1632
+
1633
+ C_v = rp.centrality_vector(returns,
1634
+ measure='Degree',
1635
+ codependence="pearson",
1636
+ graph="MST")
1637
+
1638
+ The neighborhood matrix looks like the following image:
1639
+
1640
+ .. image:: images/Centrality_df.png
1641
+
1642
+ """
1643
+
1644
+ Adj = connection_matrix(
1645
+ returns=returns,
1646
+ custom_cov=custom_cov,
1647
+ codependence=codependence,
1648
+ graph=graph,
1649
+ walk_size=1,
1650
+ bins_info=bins_info,
1651
+ gs_threshold=gs_threshold,
1652
+ )
1653
+
1654
+ n, n = Adj.shape
1655
+ G = nx.from_numpy_array(Adj)
1656
+ if measure == "Degree":
1657
+ CM = np.ones((1, n)) @ Adj
1658
+ elif measure == "Eigenvector":
1659
+ CM = nx.eigenvector_centrality_numpy(G)
1660
+ elif measure == "Katz":
1661
+ CM = nx.katz_centrality_numpy(G)
1662
+ elif measure == "Closeness":
1663
+ CM = nx.closeness_centrality(G)
1664
+ elif measure == "Betweeness":
1665
+ CM = nx.betweenness_centrality(G)
1666
+ elif measure == "Communicability":
1667
+ CM = nx.communicability_betweenness_centrality(G)
1668
+ elif measure == "Subgraph":
1669
+ CM = nx.subgraph_centrality(G)
1670
+ elif measure == "Laplacian":
1671
+ CM = nx.laplacian_centrality(G)
1672
+
1673
+ if measure != "Degree":
1674
+ CM = pd.Series(CM).to_numpy().reshape(1, -1)
1675
+
1676
+ return CM
1677
+
1678
+
1679
+ def clusters_matrix(
1680
+ returns,
1681
+ custom_cov=None,
1682
+ codependence="pearson",
1683
+ linkage="ward",
1684
+ opt_k_method="twodiff",
1685
+ k=None,
1686
+ max_k=10,
1687
+ bins_info="KN",
1688
+ alpha_tail=0.05,
1689
+ gs_threshold=0.5,
1690
+ leaf_order=True,
1691
+ ):
1692
+ r"""
1693
+ Creates an adjacency matrix that represents the clusters from the hierarchical
1694
+ clustering process based on :cite:`e-Cajas11` formula.
1695
+
1696
+ Parameters
1697
+ ----------
1698
+ returns : DataFrame of shape (n_samples, n_assets)
1699
+ Assets returns DataFrame, where n_samples is the number of
1700
+ observations and n_assets is the number of assets.
1701
+ custom_cov : DataFrame or None, optional
1702
+ Custom covariance matrix, used when codependence parameter has value
1703
+ 'custom_cov'. The default is None.
1704
+ codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
1705
+ The codependence or similarity matrix used to build the distance
1706
+ metric and clusters. The default is 'pearson'. Possible values are:
1707
+
1708
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1709
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1710
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1711
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1712
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1713
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1714
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1715
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1716
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1717
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1718
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1719
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1720
+
1721
+ linkage : string, optional
1722
+ Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
1723
+ The default is 'ward'. Possible values are:
1724
+
1725
+ - 'single'.
1726
+ - 'complete'.
1727
+ - 'average'.
1728
+ - 'weighted'.
1729
+ - 'centroid'.
1730
+ - 'median'.
1731
+ - 'ward'.
1732
+ - 'DBHT'. Direct Bubble Hierarchical Tree.
1733
+
1734
+ opt_k_method : str
1735
+ Method used to calculate the optimum number of clusters.
1736
+ The default is 'twodiff'. Possible values are:
1737
+
1738
+ - 'twodiff': two difference gap statistic.
1739
+ - 'stdsil': standarized silhouette score.
1740
+
1741
+ k : int, optional
1742
+ Number of clusters. This value is took instead of the optimal number
1743
+ of clusters calculated with the two difference gap statistic.
1744
+ The default is None.
1745
+ max_k : int, optional
1746
+ Max number of clusters used by the two difference gap statistic
1747
+ to find the optimal number of clusters. The default is 10.
1748
+ bins_info: int or str
1749
+ Number of bins used to calculate variation of information. The default
1750
+ value is 'KN'. Possible values are:
1751
+
1752
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
1753
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
1754
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
1755
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
1756
+ - int: integer value choice by user.
1757
+
1758
+ alpha_tail : float, optional
1759
+ Significance level for lower tail dependence index. The default is 0.05.
1760
+ gs_threshold : float, optional
1761
+ Gerber statistic threshold. The default is 0.5.
1762
+ leaf_order : bool, optional
1763
+ Indicates if the cluster are ordered so that the distance between
1764
+ successive leaves is minimal. The default is True.
1765
+
1766
+ Returns
1767
+ -------
1768
+ A_c : ndarray
1769
+ Adjacency matrix of clusters.
1770
+
1771
+ Raises
1772
+ ------
1773
+ ValueError when the value cannot be calculated.
1774
+
1775
+ Examples
1776
+ --------
1777
+
1778
+ ::
1779
+
1780
+ C_M = rp.clusters_matrix(returns,
1781
+ codependence='pearson',
1782
+ linkage='ward',
1783
+ k=None,
1784
+ max_k=10)
1785
+
1786
+
1787
+ The clusters matrix looks like the following image:
1788
+
1789
+ .. image:: images/Clusters_matrix_df.png
1790
+
1791
+ """
1792
+
1793
+ assets = returns.columns.tolist()
1794
+ n = len(assets)
1795
+ clusters = assets_clusters(
1796
+ returns=returns,
1797
+ custom_cov=custom_cov,
1798
+ codependence=codependence,
1799
+ linkage=linkage,
1800
+ opt_k_method=opt_k_method,
1801
+ k=k,
1802
+ max_k=max_k,
1803
+ bins_info=bins_info,
1804
+ alpha_tail=alpha_tail,
1805
+ gs_threshold=gs_threshold,
1806
+ leaf_order=leaf_order,
1807
+ )
1808
+
1809
+ df = pd.DataFrame([], index=assets)
1810
+
1811
+ for i in clusters["Clusters"].unique():
1812
+ labels = clusters[clusters["Clusters"] == i]["Assets"].tolist()
1813
+ df1 = pd.Series(np.zeros((n,)), index=assets)
1814
+ df1[labels] = 1
1815
+ df = pd.concat([df, df1], axis=1)
1816
+
1817
+ A_c = df.to_numpy()
1818
+ A_c = A_c @ A_c.T - np.identity(n)
1819
+
1820
+ return A_c
1821
+
1822
+
1823
+ def average_centrality(
1824
+ returns,
1825
+ w,
1826
+ measure="Degree",
1827
+ custom_cov=None,
1828
+ codependence="pearson",
1829
+ graph="MST",
1830
+ bins_info="KN",
1831
+ alpha_tail=0.05,
1832
+ gs_threshold=0.5,
1833
+ ):
1834
+ r"""
1835
+ Calculates the average centrality of assets of the portfolio based on :cite:`e-Cajas10` formula.
1836
+
1837
+ Parameters
1838
+ ----------
1839
+ returns : DataFrame of shape (n_samples, n_assets)
1840
+ Assets returns DataFrame, where n_samples is the number of
1841
+ observations and n_assets is the number of assets.
1842
+ w : DataFrame or Series of shape (n_assets, 1)
1843
+ Portfolio weights, where n_assets is the number of assets.
1844
+ measure : str, optional
1845
+ Centrality measure. The default is 'Degree'. Possible values are:
1846
+
1847
+ - 'Degre': Node's degree centrality. Number of edges connected to a node.
1848
+ - 'Eigenvector': Eigenvector centrality. See more in `eigenvector_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality_numpy.html#eigenvector-centrality-numpy>`_.
1849
+ - 'Katz': Katz centrality. See more in `katz_centrality_numpy <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.katz_centrality_numpy.html#katz-centrality-numpy>`_.
1850
+ - 'Closeness': Closeness centrality. See more in `closeness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.closeness_centrality.html#closeness-centrality>`_.
1851
+ - 'Betweeness': Betweeness centrality. See more in `betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#betweenness-centrality>`_.
1852
+ - 'Communicability': Communicability betweeness centrality. See more in `communicability_betweenness_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.communicability_betweenness_centrality.html#communicability-betweenness-centrality>`_.
1853
+ - 'Subgraph': Subgraph centrality. See more in `subgraph_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.subgraph_centrality.html#subgraph-centrality>`_.
1854
+ - 'Laplacian': Laplacian centrality. See more in `laplacian_centrality <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.laplacian_centrality.html#laplacian-centrality>`_.
1855
+
1856
+ custom_cov : DataFrame or None, optional
1857
+ Custom covariance matrix, used when codependence parameter has value
1858
+ 'custom_cov'. The default is None.
1859
+ codependence : str, optional
1860
+ The codependence or similarity matrix used to build the distance
1861
+ metric and clusters. The default is 'pearson'. Possible values are:
1862
+
1863
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1864
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1865
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1866
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1867
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1868
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1869
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1870
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1871
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1872
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1873
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1874
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1875
+
1876
+ graph : string, optional
1877
+ Graph used to build the adjacency matrix. The default is 'MST'.
1878
+ Possible values are:
1879
+
1880
+ - 'MST': Minimum Spanning Tree.
1881
+ - 'TMFG': Plannar Maximally Filtered Graph.
1882
+
1883
+ bins_info: int or str
1884
+ Number of bins used to calculate variation of information. The default
1885
+ value is 'KN'. Possible values are:
1886
+
1887
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
1888
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
1889
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
1890
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
1891
+ - int: integer value choice by user.
1892
+
1893
+ alpha_tail : float, optional
1894
+ Significance level for lower tail dependence index. The default is 0.05.
1895
+ gs_threshold : float, optional
1896
+ Gerber statistic threshold. The default is 0.5.
1897
+
1898
+ Returns
1899
+ -------
1900
+ AC : float
1901
+ Average centrality of assets.
1902
+
1903
+ Raises
1904
+ ------
1905
+ ValueError when the value cannot be calculated.
1906
+
1907
+ Examples
1908
+ --------
1909
+
1910
+ ::
1911
+
1912
+ ac = rp.average_centrality(returns,
1913
+ w,
1914
+ measure="Degree"
1915
+ codependence="pearson",
1916
+ graph="MST")
1917
+
1918
+ """
1919
+
1920
+ w_ = np.array(w, ndmin=2)
1921
+ if w_.shape[0] == 1 and w_.shape[1] > 1:
1922
+ w_ = w_.T
1923
+ if w_.shape[0] > 1 and w_.shape[1] > 1:
1924
+ raise ValueError("w must have n_assets x 1 size")
1925
+ if w.index.tolist() != returns.columns.tolist():
1926
+ raise ValueError("w and returns must have the same columns.")
1927
+ if codependence == "custom_cov" and custom_cov is None:
1928
+ raise ValueError(
1929
+ "custom_cov value of codependence parameter requires a custom_cov parameter."
1930
+ )
1931
+
1932
+ CM = centrality_vector(
1933
+ returns=returns,
1934
+ measure=measure,
1935
+ custom_cov=custom_cov,
1936
+ codependence=codependence,
1937
+ graph=graph,
1938
+ bins_info=bins_info,
1939
+ alpha_tail=alpha_tail,
1940
+ gs_threshold=gs_threshold,
1941
+ )
1942
+
1943
+ w_ = np.array(w)
1944
+ ac = CM @ w_
1945
+
1946
+ return ac.item()
1947
+
1948
+
1949
+ def connected_assets(
1950
+ returns,
1951
+ w,
1952
+ custom_cov=None,
1953
+ codependence="pearson",
1954
+ graph="MST",
1955
+ walk_size=1,
1956
+ bins_info="KN",
1957
+ alpha_tail=0.05,
1958
+ gs_threshold=0.5,
1959
+ ):
1960
+ r"""
1961
+ Calculates the percentage invested in connected assets of the portfolio based on :cite:`e-Cajas10` formula.
1962
+
1963
+ Parameters
1964
+ ----------
1965
+ returns : DataFrame of shape (n_samples, n_assets)
1966
+ Assets returns DataFrame, where n_samples is the number of
1967
+ observations and n_assets is the number of assets.
1968
+ w : DataFrame or Series of shape (n_assets, 1)
1969
+ Portfolio weights, where n_assets is the number of assets.
1970
+ custom_cov : DataFrame or None, optional
1971
+ Custom covariance matrix, used when codependence parameter has value
1972
+ 'custom_cov'. The default is None.
1973
+ codependence : str, optional
1974
+ The codependence or similarity matrix used to build the distance
1975
+ metric and clusters. The default is 'pearson'. Possible values are:
1976
+
1977
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1978
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
1979
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
1980
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
1981
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
1982
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1983
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
1984
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
1985
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
1986
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
1987
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
1988
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
1989
+
1990
+ graph : string, optional
1991
+ Graph used to build the adjacency matrix. The default is 'MST'.
1992
+ Possible values are:
1993
+
1994
+ - 'MST': Minimum Spanning Tree.
1995
+ - 'TMFG': Plannar Maximally Filtered Graph.
1996
+
1997
+ walk_size : int, optional
1998
+ Size of the walk represented by the adjacency matrix. The default is 1.
1999
+ bins_info: int or str
2000
+ Number of bins used to calculate variation of information. The default
2001
+ value is 'KN'. Possible values are:
2002
+
2003
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
2004
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
2005
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
2006
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
2007
+ - int: integer value choice by user.
2008
+
2009
+ alpha_tail : float, optional
2010
+ Significance level for lower tail dependence index. The default is 0.05.
2011
+ gs_threshold : float, optional
2012
+ Gerber statistic threshold. The default is 0.5.
2013
+
2014
+ Returns
2015
+ -------
2016
+ CA : float
2017
+ Percentage invested in connected assets.
2018
+
2019
+ Raises
2020
+ ------
2021
+ ValueError when the value cannot be calculated.
2022
+
2023
+ Examples
2024
+ --------
2025
+
2026
+ ::
2027
+
2028
+ ca = rp.connected_assets(returns,
2029
+ w,
2030
+ codependence="pearson",
2031
+ graph="MST",
2032
+ walk_size=1)
2033
+
2034
+ """
2035
+
2036
+ w_ = np.array(w, ndmin=2)
2037
+ if w_.shape[0] == 1 and w_.shape[1] > 1:
2038
+ w_ = w_.T
2039
+ if w_.shape[0] > 1 and w_.shape[1] > 1:
2040
+ raise ValueError("w must have n_assets x 1 size")
2041
+ if w.index.tolist() != returns.columns.tolist():
2042
+ raise ValueError("w and returns must have the same columns.")
2043
+ if codependence == "custom_cov" and custom_cov is None:
2044
+ raise ValueError(
2045
+ "custom_cov value of codependence parameter requires a custom_cov parameter."
2046
+ )
2047
+
2048
+ A_p = connection_matrix(
2049
+ returns=returns,
2050
+ custom_cov=custom_cov,
2051
+ codependence=codependence,
2052
+ graph=graph,
2053
+ walk_size=walk_size,
2054
+ bins_info=bins_info,
2055
+ gs_threshold=gs_threshold,
2056
+ )
2057
+
2058
+ n, n = A_p.shape
2059
+ ones = np.ones((n, 1))
2060
+ wwt = np.abs(w_ @ w_.T)
2061
+ ca = ones.T @ (A_p * wwt) @ ones
2062
+ ca /= ones.T @ wwt @ ones
2063
+
2064
+ return ca.item()
2065
+
2066
+
2067
+ def related_assets(
2068
+ returns,
2069
+ w,
2070
+ custom_cov=None,
2071
+ codependence="pearson",
2072
+ linkage="ward",
2073
+ opt_k_method="twodiff",
2074
+ k=None,
2075
+ max_k=10,
2076
+ bins_info="KN",
2077
+ alpha_tail=0.05,
2078
+ gs_threshold=0.5,
2079
+ leaf_order=True,
2080
+ ):
2081
+ r"""
2082
+ Calculates the percentage invested in related assets based of the portfolio on :cite:`e-Cajas11` formula.
2083
+
2084
+ Parameters
2085
+ ----------
2086
+ returns : DataFrame of shape (n_samples, n_assets)
2087
+ Assets returns DataFrame, where n_samples is the number of
2088
+ observations and n_assets is the number of assets.
2089
+ w : DataFrame or Series of shape (n_assets, 1)
2090
+ Portfolio weights, where n_assets is the number of assets.
2091
+ custom_cov : DataFrame or None, optional
2092
+ Custom covariance matrix, used when codependence parameter has value
2093
+ 'custom_cov'. The default is None.
2094
+ codependence : str, can be {'pearson', 'spearman', 'abs_pearson', 'abs_spearman', 'distance', 'mutual_info', 'tail' or 'custom_cov'}
2095
+ The codependence or similarity matrix used to build the distance
2096
+ metric and clusters. The default is 'pearson'. Possible values are:
2097
+
2098
+ - 'pearson': pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
2099
+ - 'spearman': spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{spearman}_{i,j})}`.
2100
+ - 'kendall': kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{kendall}_{i,j})}`.
2101
+ - 'gerber1': Gerber statistic 1 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber1}_{i,j})}`.
2102
+ - 'gerber2': Gerber statistic 2 correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{gerber2}_{i,j})}`.
2103
+ - 'abs_pearson': absolute value pearson correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
2104
+ - 'abs_spearman': absolute value spearman correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho_{i,j}|)}`.
2105
+ - 'abs_kendall': absolute value kendall correlation matrix. Distance formula: :math:`D_{i,j} = \sqrt{(1-|\rho^{kendall}_{i,j}|)}`.
2106
+ - 'distance': distance correlation matrix. Distance formula :math:`D_{i,j} = \sqrt{(1-\rho^{distance}_{i,j})}`.
2107
+ - 'mutual_info': mutual information matrix. Distance used is variation information matrix.
2108
+ - 'tail': lower tail dependence index matrix. Dissimilarity formula :math:`D_{i,j} = -\log{\lambda_{i,j}}`.
2109
+ - 'custom_cov': use custom correlation matrix based on the custom_cov parameter. Distance formula: :math:`D_{i,j} = \sqrt{0.5(1-\rho^{pearson}_{i,j})}`.
2110
+
2111
+ linkage : string, optional
2112
+ Linkage method of hierarchical clustering, see `linkage <https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html?highlight=linkage#scipy.cluster.hierarchy.linkage>`_ for more details.
2113
+ The default is 'ward'. Possible values are:
2114
+
2115
+ - 'single'.
2116
+ - 'complete'.
2117
+ - 'average'.
2118
+ - 'weighted'.
2119
+ - 'centroid'.
2120
+ - 'median'.
2121
+ - 'ward'.
2122
+ - 'DBHT'. Direct Bubble Hierarchical Tree.
2123
+
2124
+ opt_k_method : str
2125
+ Method used to calculate the optimum number of clusters.
2126
+ The default is 'twodiff'. Possible values are:
2127
+
2128
+ - 'twodiff': two difference gap statistic.
2129
+ - 'stdsil': standarized silhouette score.
2130
+
2131
+ k : int, optional
2132
+ Number of clusters. This value is took instead of the optimal number
2133
+ of clusters calculated with the two difference gap statistic.
2134
+ The default is None.
2135
+ max_k : int, optional
2136
+ Max number of clusters used by the two difference gap statistic
2137
+ to find the optimal number of clusters. The default is 10.
2138
+ bins_info: int or str
2139
+ Number of bins used to calculate variation of information. The default
2140
+ value is 'KN'. Possible values are:
2141
+
2142
+ - 'KN': Knuth's choice method. See more in `knuth_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.knuth_bin_width.html>`_.
2143
+ - 'FD': Freedman–Diaconis' choice method. See more in `freedman_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.freedman_bin_width.html>`_.
2144
+ - 'SC': Scotts' choice method. See more in `scott_bin_width <https://docs.astropy.org/en/stable/api/astropy.stats.scott_bin_width.html>`_.
2145
+ - 'HGR': Hacine-Gharbi and Ravier' choice method.
2146
+ - int: integer value choice by user.
2147
+
2148
+ alpha_tail : float, optional
2149
+ Significance level for lower tail dependence index. The default is 0.05.
2150
+ gs_threshold : float, optional
2151
+ Gerber statistic threshold. The default is 0.5.
2152
+ leaf_order : bool, optional
2153
+ Indicates if the cluster are ordered so that the distance between
2154
+ successive leaves is minimal. The default is True.
2155
+
2156
+ Returns
2157
+ -------
2158
+ RA : float
2159
+ Percentage invested in related assets.
2160
+
2161
+ Raises
2162
+ ------
2163
+ ValueError when the value cannot be calculated.
2164
+
2165
+ Examples
2166
+ --------
2167
+
2168
+ ::
2169
+
2170
+ ra = rp.related_assets(returns,
2171
+ w,
2172
+ codependence="pearson",
2173
+ linkage="ward",
2174
+ k=None,
2175
+ max_k=10)
2176
+
2177
+ """
2178
+
2179
+ w_ = np.array(w, ndmin=2)
2180
+ if w_.shape[0] == 1 and w_.shape[1] > 1:
2181
+ w_ = w_.T
2182
+ if w_.shape[0] > 1 and w_.shape[1] > 1:
2183
+ raise ValueError("w must have n_assets x 1 size")
2184
+ if w.index.tolist() != returns.columns.tolist():
2185
+ raise ValueError("w and returns must have the same columns.")
2186
+ if codependence == "custom_cov" and custom_cov is None:
2187
+ raise ValueError(
2188
+ "custom_cov value of codependence parameter requires a custom_cov parameter."
2189
+ )
2190
+
2191
+ L_a = clusters_matrix(
2192
+ returns,
2193
+ custom_cov=custom_cov,
2194
+ codependence=codependence,
2195
+ linkage=linkage,
2196
+ k=k,
2197
+ max_k=max_k,
2198
+ bins_info=bins_info,
2199
+ alpha_tail=alpha_tail,
2200
+ gs_threshold=gs_threshold,
2201
+ leaf_order=leaf_order,
2202
+ )
2203
+
2204
+ n, n = L_a.shape
2205
+ ones = np.ones((n, 1))
2206
+ wwt = np.abs(w_ @ w_.T)
2207
+ ra = ones.T @ (L_a * wwt) @ ones
2208
+ ra /= ones.T @ wwt @ ones
2209
+
2210
+ return ra.item()