riskfolio-lib 7.1.0__cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ """""" #
2
+
3
+ """
4
+ Copyright (c) 2020-2025, Dany Cajas
5
+ This code is mainly based on Yinsen Miao's work available in:
6
+ https://github.com/yinsenm/gerber/blob/af04c2ee5adf342393b028b85ab5546f31c0c8d3/src/gerber.py
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import riskfolio.src.AuxFunctions as af
12
+
13
+
14
+ __all__ = [
15
+ "gerber_cov_stat0",
16
+ "gerber_cov_stat1",
17
+ "gerber_cov_stat2",
18
+ ]
19
+
20
+
21
+ def gerber_cov_stat0(X, threshold=0.5):
22
+ r"""
23
+ Compute Gerber covariance Statistics 0 or original Gerber statistics
24
+ :cite:`d-Gerber2021`, not always PSD, however this function fixes the
25
+ covariance matrix finding the nearest covariance matrix that is positive
26
+ semidefinite.
27
+
28
+ Parameters
29
+ ----------
30
+ X : DataFrame of shape (n_samples, n_assets), optional
31
+ Assets returns DataFrame, where n_samples is the number of
32
+ observations and n_assets is the number of assets.
33
+
34
+ threshold : float
35
+ Threshold is between 0 and 1.
36
+
37
+ Returns
38
+ -------
39
+ value : bool
40
+ Gerber covariance matrix of shape (n_features, n_features), where
41
+ n_features is the number of features.
42
+
43
+ Raises
44
+ ------
45
+ ValueError when the value cannot be calculated.
46
+
47
+ """
48
+ # Threshold shall between 0 and 1
49
+ assert 1 > threshold > 0
50
+
51
+ flag = False
52
+ if isinstance(X, pd.DataFrame):
53
+ cols = X.columns.tolist()
54
+ X1 = X.to_numpy()
55
+ flag = True
56
+ else:
57
+ X1 = X.copy()
58
+
59
+ n, p = X1.shape
60
+ sd_vec = X1.std(axis=0).reshape((p, 1))
61
+ cov = np.zeros((p, p)) # Store correlation matrix
62
+ corr = np.zeros((p, p)) # Store correlation matrix
63
+
64
+ for i in range(p):
65
+ for j in range(i + 1):
66
+ neg = 0
67
+ pos = 0
68
+ for k in range(n):
69
+ if (
70
+ (X1[k, i] >= threshold * sd_vec[i])
71
+ and (X1[k, j] >= threshold * sd_vec[j])
72
+ ) or (
73
+ (X1[k, i] <= -threshold * sd_vec[i])
74
+ and (X1[k, j] <= -threshold * sd_vec[j])
75
+ ):
76
+ pos += 1
77
+ elif (
78
+ (X1[k, i] >= threshold * sd_vec[i])
79
+ and (X1[k, j] <= -threshold * sd_vec[j])
80
+ ) or (
81
+ (X1[k, i] <= -threshold * sd_vec[i])
82
+ and (X1[k, j] >= threshold * sd_vec[j])
83
+ ):
84
+ neg += 1
85
+
86
+ # Compute Gerber correlation matrix
87
+ corr[i, j] = (pos - neg) / (pos + neg)
88
+ corr[j, i] = corr[i, j]
89
+
90
+ cov = corr * np.outer(sd_vec, sd_vec)
91
+ if af.is_pos_def(cov) == False:
92
+ cov = af.cov_fix(cov, method="clipped")
93
+
94
+ if flag:
95
+ cov = pd.DataFrame(cov, index=cols, columns=cols)
96
+
97
+ return cov
98
+
99
+
100
+ def gerber_cov_stat1(X, threshold=0.5):
101
+ r"""
102
+ Compute Gerber covariance Statistics 1 :cite:`d-Gerber2021`.
103
+
104
+ Parameters
105
+ ----------
106
+ X : DataFrame of shape (n_samples, n_assets), optional
107
+ Assets returns DataFrame, where n_samples is the number of
108
+ observations and n_assets is the number of assets.
109
+
110
+ threshold : float
111
+ Threshold is between 0 and 1.
112
+
113
+ Returns
114
+ -------
115
+ value : bool
116
+ Gerber covariance matrix of shape (n_features, n_features), where
117
+ n_features is the number of features.
118
+
119
+ Raises
120
+ ------
121
+ ValueError when the value cannot be calculated.
122
+
123
+ """
124
+ # Threshold shall between 0 and 1
125
+ assert 1 > threshold > 0
126
+
127
+ flag = False
128
+ if isinstance(X, pd.DataFrame):
129
+ cols = X.columns.tolist()
130
+ X1 = X.to_numpy()
131
+ flag = True
132
+ else:
133
+ X1 = X.copy()
134
+
135
+ n, p = X1.shape
136
+ sd_vec = X1.std(axis=0).reshape((p, 1))
137
+ corr = np.zeros((p, p)) # Store correlation matrix
138
+
139
+ for i in range(p):
140
+ for j in range(i + 1):
141
+ neg = 0
142
+ pos = 0
143
+ nn = 0
144
+ for k in range(n):
145
+ if (
146
+ (X1[k, i] >= threshold * sd_vec[i])
147
+ and (X1[k, j] >= threshold * sd_vec[j])
148
+ ) or (
149
+ (X1[k, i] <= -threshold * sd_vec[i])
150
+ and (X1[k, j] <= -threshold * sd_vec[j])
151
+ ):
152
+ pos += 1
153
+ elif (
154
+ (X1[k, i] >= threshold * sd_vec[i])
155
+ and (X1[k, j] <= -threshold * sd_vec[j])
156
+ ) or (
157
+ (X1[k, i] <= -threshold * sd_vec[i])
158
+ and (X1[k, j] >= threshold * sd_vec[j])
159
+ ):
160
+ neg += 1
161
+ elif (
162
+ abs(X1[k, i]) < threshold * sd_vec[i]
163
+ and abs(X1[k, j]) < threshold * sd_vec[j]
164
+ ):
165
+ nn += 1
166
+
167
+ # Compute Gerber correlation matrix
168
+ corr[i, j] = (pos - neg) / (n - nn)
169
+ corr[j, i] = corr[i, j]
170
+
171
+ cov = corr * np.outer(sd_vec, sd_vec)
172
+
173
+ if flag:
174
+ cov = pd.DataFrame(cov, index=cols, columns=cols)
175
+
176
+ return cov
177
+
178
+
179
+ def gerber_cov_stat2(X, threshold=0.5):
180
+ r"""
181
+ Compute Gerber covariance Statistics 2 :cite:`d-Gerber2021`.
182
+
183
+ Parameters
184
+ ----------
185
+ X : : DataFrame of shape (n_samples, n_assets), optional
186
+ Assets returns DataFrame, where n_samples is the number of
187
+ observations and n_assets is the number of assets.
188
+
189
+ threshold : float
190
+ Threshold is between 0 and 1.
191
+
192
+ Returns
193
+ -------
194
+ value : bool
195
+ Gerber covariance mtrix of shape (n_features, n_features), where
196
+ n_features is the number of features.
197
+
198
+ Raises
199
+ ------
200
+ ValueError when the value cannot be calculated.
201
+
202
+ """
203
+ # Threshold shall between 0 and 1
204
+ assert 1 > threshold > 0
205
+
206
+ flag = False
207
+ if isinstance(X, pd.DataFrame):
208
+ cols = X.columns.tolist()
209
+ X1 = X.to_numpy()
210
+ flag = True
211
+ else:
212
+ X1 = X.copy()
213
+
214
+ n, p = X1.shape
215
+ sd_vec = X1.std(axis=0).reshape((p, 1))
216
+ U = np.copy(X1)
217
+ D = np.copy(X1)
218
+
219
+ # Update U and D matrix
220
+ for i in range(p):
221
+ U[:, i] = U[:, i] >= sd_vec[i] * threshold
222
+ D[:, i] = D[:, i] <= -sd_vec[i] * threshold
223
+
224
+ # Update concordant matrix
225
+ N_CONC = U.transpose() @ U + D.transpose() @ D
226
+
227
+ # Update discordant matrix
228
+ N_DISC = U.transpose() @ D + D.transpose() @ U
229
+ H = N_CONC - N_DISC
230
+ h = np.sqrt(H.diagonal())
231
+ h = h.reshape((p, 1))
232
+
233
+ corr = H / (h @ h.transpose())
234
+
235
+ cov = corr * np.outer(sd_vec, sd_vec)
236
+
237
+ if flag:
238
+ cov = pd.DataFrame(cov, index=cols, columns=cols)
239
+
240
+ return cov