skfolio 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +29 -0
- skfolio/cluster/__init__.py +8 -0
- skfolio/cluster/_hierarchical.py +387 -0
- skfolio/datasets/__init__.py +20 -0
- skfolio/datasets/_base.py +389 -0
- skfolio/datasets/data/__init__.py +0 -0
- skfolio/datasets/data/factors_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_index.csv.gz +0 -0
- skfolio/distance/__init__.py +26 -0
- skfolio/distance/_base.py +55 -0
- skfolio/distance/_distance.py +574 -0
- skfolio/exceptions.py +30 -0
- skfolio/measures/__init__.py +76 -0
- skfolio/measures/_enums.py +355 -0
- skfolio/measures/_measures.py +607 -0
- skfolio/metrics/__init__.py +3 -0
- skfolio/metrics/_scorer.py +121 -0
- skfolio/model_selection/__init__.py +18 -0
- skfolio/model_selection/_combinatorial.py +407 -0
- skfolio/model_selection/_validation.py +194 -0
- skfolio/model_selection/_walk_forward.py +221 -0
- skfolio/moments/__init__.py +41 -0
- skfolio/moments/covariance/__init__.py +29 -0
- skfolio/moments/covariance/_base.py +101 -0
- skfolio/moments/covariance/_covariance.py +1108 -0
- skfolio/moments/expected_returns/__init__.py +21 -0
- skfolio/moments/expected_returns/_base.py +31 -0
- skfolio/moments/expected_returns/_expected_returns.py +415 -0
- skfolio/optimization/__init__.py +36 -0
- skfolio/optimization/_base.py +147 -0
- skfolio/optimization/cluster/__init__.py +13 -0
- skfolio/optimization/cluster/_nco.py +348 -0
- skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
- skfolio/optimization/cluster/hierarchical/_base.py +440 -0
- skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
- skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
- skfolio/optimization/convex/__init__.py +16 -0
- skfolio/optimization/convex/_base.py +1944 -0
- skfolio/optimization/convex/_distributionally_robust.py +392 -0
- skfolio/optimization/convex/_maximum_diversification.py +417 -0
- skfolio/optimization/convex/_mean_risk.py +974 -0
- skfolio/optimization/convex/_risk_budgeting.py +560 -0
- skfolio/optimization/ensemble/__init__.py +6 -0
- skfolio/optimization/ensemble/_base.py +87 -0
- skfolio/optimization/ensemble/_stacking.py +326 -0
- skfolio/optimization/naive/__init__.py +3 -0
- skfolio/optimization/naive/_naive.py +173 -0
- skfolio/population/__init__.py +3 -0
- skfolio/population/_population.py +883 -0
- skfolio/portfolio/__init__.py +13 -0
- skfolio/portfolio/_base.py +1096 -0
- skfolio/portfolio/_multi_period_portfolio.py +610 -0
- skfolio/portfolio/_portfolio.py +842 -0
- skfolio/pre_selection/__init__.py +7 -0
- skfolio/pre_selection/_pre_selection.py +342 -0
- skfolio/preprocessing/__init__.py +3 -0
- skfolio/preprocessing/_returns.py +114 -0
- skfolio/prior/__init__.py +18 -0
- skfolio/prior/_base.py +63 -0
- skfolio/prior/_black_litterman.py +238 -0
- skfolio/prior/_empirical.py +163 -0
- skfolio/prior/_factor_model.py +268 -0
- skfolio/typing.py +50 -0
- skfolio/uncertainty_set/__init__.py +23 -0
- skfolio/uncertainty_set/_base.py +108 -0
- skfolio/uncertainty_set/_bootstrap.py +281 -0
- skfolio/uncertainty_set/_empirical.py +237 -0
- skfolio/utils/__init__.py +0 -0
- skfolio/utils/bootstrap.py +115 -0
- skfolio/utils/equations.py +350 -0
- skfolio/utils/sorting.py +117 -0
- skfolio/utils/stats.py +466 -0
- skfolio/utils/tools.py +567 -0
- skfolio-0.0.1.dist-info/LICENSE +29 -0
- skfolio-0.0.1.dist-info/METADATA +568 -0
- skfolio-0.0.1.dist-info/RECORD +79 -0
- skfolio-0.0.1.dist-info/WHEEL +5 -0
- skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,574 @@
|
|
1
|
+
"""Distance Estimators"""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import numpy.typing as npt
|
9
|
+
import pandas as pd
|
10
|
+
import scipy.spatial.distance as scd
|
11
|
+
import scipy.stats as sct
|
12
|
+
import sklearn.metrics as skm
|
13
|
+
|
14
|
+
from skfolio.distance._base import BaseDistance
|
15
|
+
from skfolio.moments import BaseCovariance, GerberCovariance
|
16
|
+
from skfolio.utils.stats import (
|
17
|
+
NBinsMethod,
|
18
|
+
cov_to_corr,
|
19
|
+
n_bins_freedman,
|
20
|
+
n_bins_knuth,
|
21
|
+
)
|
22
|
+
from skfolio.utils.tools import check_estimator
|
23
|
+
|
24
|
+
|
25
|
+
class PearsonDistance(BaseDistance):
|
26
|
+
r"""Pearson Distance estimator.
|
27
|
+
|
28
|
+
The codependence is computed from the Pearson correlation to which is applied a
|
29
|
+
power and/or absolute transformation.
|
30
|
+
This codependence is then used to compute the distance matrix.
|
31
|
+
Some widely used distances are:
|
32
|
+
|
33
|
+
* Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
|
34
|
+
* Absolute angular distance = :math:`\sqrt{1 - |corr|}`
|
35
|
+
* Squared angular distance = :math:`\sqrt{1 - corr^2}`
|
36
|
+
|
37
|
+
Parameters
|
38
|
+
----------
|
39
|
+
absolute : bool, default=False
|
40
|
+
If this is set to True, the absolute transformation is applied to the
|
41
|
+
correlation matrix.
|
42
|
+
|
43
|
+
power : float, default=1
|
44
|
+
Exponent of the power transformation applied to the correlation matrix.
|
45
|
+
|
46
|
+
Attributes
|
47
|
+
----------
|
48
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
49
|
+
Codependence matrix.
|
50
|
+
|
51
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
52
|
+
Distance matrix.
|
53
|
+
|
54
|
+
n_features_in_ : int
|
55
|
+
Number of assets seen during `fit`.
|
56
|
+
|
57
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
58
|
+
Names of assets seen during `fit`. Defined only when `X`
|
59
|
+
has assets names that are all strings.
|
60
|
+
|
61
|
+
References
|
62
|
+
----------
|
63
|
+
.. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
|
64
|
+
Lòpez de Prado, Journal of Portfolio Management (2016)
|
65
|
+
"""
|
66
|
+
|
67
|
+
def __init__(self, absolute: bool = False, power: float = 1):
|
68
|
+
self.absolute = absolute
|
69
|
+
self.power = power
|
70
|
+
|
71
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "PearsonDistance":
|
72
|
+
"""Fit the Pearson Distance estimator.
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
X : array-like of shape (n_observations, n_assets)
|
77
|
+
Price returns of the assets.
|
78
|
+
|
79
|
+
y : Ignored
|
80
|
+
Not used, present for API consistency by convention.
|
81
|
+
|
82
|
+
Returns
|
83
|
+
-------
|
84
|
+
self : PearsonDistance
|
85
|
+
Fitted estimator.
|
86
|
+
"""
|
87
|
+
X = self._validate_data(X)
|
88
|
+
corr = np.corrcoef(X.T)
|
89
|
+
self.codependence_, self.distance_ = _corr_to_distance(
|
90
|
+
corr, absolute=self.absolute, power=self.power
|
91
|
+
)
|
92
|
+
return self
|
93
|
+
|
94
|
+
|
95
|
+
class KendallDistance(BaseDistance):
|
96
|
+
r"""Kendall Distance estimator.
|
97
|
+
|
98
|
+
The codependence is computed from the Kendall correlation to which is applied a
|
99
|
+
power and/or absolute transformation.
|
100
|
+
This codependence is then used to compute the distance matrix.
|
101
|
+
Some widely used distances are:
|
102
|
+
|
103
|
+
* Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
|
104
|
+
* Absolute angular distance = :math:`\sqrt{1 - |corr|}`
|
105
|
+
* Squared angular distance = :math:`\sqrt{1 - corr^2}`
|
106
|
+
|
107
|
+
Parameters
|
108
|
+
----------
|
109
|
+
absolute : bool, default=False
|
110
|
+
If this is set to True, the absolute transformation is applied to the
|
111
|
+
correlation matrix.
|
112
|
+
The default is `False`.
|
113
|
+
|
114
|
+
power : float, default=1
|
115
|
+
Exponent of the power transformation applied to the correlation matrix.
|
116
|
+
The default value is `1`.
|
117
|
+
|
118
|
+
Attributes
|
119
|
+
----------
|
120
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
121
|
+
Codependence matrix.
|
122
|
+
|
123
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
124
|
+
Distance matrix.
|
125
|
+
|
126
|
+
n_features_in_ : int
|
127
|
+
Number of assets seen during `fit`.
|
128
|
+
|
129
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
130
|
+
Names of assets seen during `fit`. Defined only when `X`
|
131
|
+
has assets names that are all strings.
|
132
|
+
|
133
|
+
References
|
134
|
+
----------
|
135
|
+
.. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
|
136
|
+
Lòpez de Prado, Journal of Portfolio Management (2016)
|
137
|
+
"""
|
138
|
+
|
139
|
+
def __init__(self, absolute: bool = False, power: float = 1):
|
140
|
+
self.absolute = absolute
|
141
|
+
self.power = power
|
142
|
+
|
143
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "KendallDistance":
|
144
|
+
"""Fit the Kendall estimator.
|
145
|
+
|
146
|
+
Parameters
|
147
|
+
----------
|
148
|
+
X : array-like of shape (n_observations, n_assets)
|
149
|
+
Price returns of the assets.
|
150
|
+
|
151
|
+
y : Ignored
|
152
|
+
Not used, present for API consistency by convention.
|
153
|
+
|
154
|
+
Returns
|
155
|
+
-------
|
156
|
+
self : KendallDistance
|
157
|
+
Fitted estimator.
|
158
|
+
"""
|
159
|
+
X = self._validate_data(X)
|
160
|
+
corr = pd.DataFrame(X).corr(method="kendall").to_numpy()
|
161
|
+
self.codependence_, self.distance_ = _corr_to_distance(
|
162
|
+
corr, absolute=self.absolute, power=self.power
|
163
|
+
)
|
164
|
+
return self
|
165
|
+
|
166
|
+
|
167
|
+
class SpearmanDistance(BaseDistance):
|
168
|
+
r"""Spearman Distance estimator.
|
169
|
+
|
170
|
+
The codependence is computed from the Spearman correlation to which is applied a
|
171
|
+
power and/or absolute transformation.
|
172
|
+
This codependence is then used to compute the distance matrix.
|
173
|
+
Some widely used distances are:
|
174
|
+
|
175
|
+
* Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
|
176
|
+
* Absolute angular distance = :math:`\sqrt{1 - |corr|}`
|
177
|
+
* Squared angular distance = :math:`\sqrt{1 - corr^2}`
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
absolute : bool, default=False
|
182
|
+
If this is set to True, the absolute transformation is applied to the
|
183
|
+
correlation matrix.
|
184
|
+
The default is `False`.
|
185
|
+
|
186
|
+
power : float, default=1
|
187
|
+
Exponent of the power transformation applied to the correlation matrix.
|
188
|
+
The default value is `1`.
|
189
|
+
|
190
|
+
Attributes
|
191
|
+
----------
|
192
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
193
|
+
Codependence matrix.
|
194
|
+
|
195
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
196
|
+
Distance matrix.
|
197
|
+
|
198
|
+
n_features_in_ : int
|
199
|
+
Number of assets seen during `fit`.
|
200
|
+
|
201
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
202
|
+
Names of assets seen during `fit`. Defined only when `X`
|
203
|
+
has assets names that are all strings.
|
204
|
+
|
205
|
+
References
|
206
|
+
----------
|
207
|
+
.. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
|
208
|
+
Lòpez de Prado, Journal of Portfolio Management (2016)
|
209
|
+
"""
|
210
|
+
|
211
|
+
def __init__(self, absolute: bool = False, power: float = 1):
|
212
|
+
self.absolute = absolute
|
213
|
+
self.power = power
|
214
|
+
|
215
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "SpearmanDistance":
|
216
|
+
"""Fit the Spearman Kendall estimator.
|
217
|
+
|
218
|
+
Parameters
|
219
|
+
----------
|
220
|
+
X : array-like of shape (n_observations, n_assets)
|
221
|
+
Price returns of the assets.
|
222
|
+
|
223
|
+
y : Ignored
|
224
|
+
Not used, present for API consistency by convention.
|
225
|
+
|
226
|
+
Returns
|
227
|
+
-------
|
228
|
+
self : SpearmanDistance
|
229
|
+
Fitted estimator.
|
230
|
+
"""
|
231
|
+
X = self._validate_data(X)
|
232
|
+
corr = pd.DataFrame(X).corr(method="spearman").to_numpy()
|
233
|
+
self.codependence_, self.distance_ = _corr_to_distance(
|
234
|
+
corr, absolute=self.absolute, power=self.power
|
235
|
+
)
|
236
|
+
return self
|
237
|
+
|
238
|
+
|
239
|
+
class CovarianceDistance(BaseDistance):
|
240
|
+
r"""Covariance Distance estimator.
|
241
|
+
|
242
|
+
The codependence is computed from the correlation matrix of a chosen
|
243
|
+
:ref:`covariance estimator <covariance_estimator>` to which is applied
|
244
|
+
a power and/or absolute transformation.
|
245
|
+
This codependence is then used to compute the distance matrix.
|
246
|
+
Some widely used distances are:
|
247
|
+
|
248
|
+
* Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
|
249
|
+
* Absolute angular distance = :math:`\sqrt{1 - |corr|}`
|
250
|
+
* Squared angular distance = :math:`\sqrt{1 - corr^2}`
|
251
|
+
|
252
|
+
Parameters
|
253
|
+
----------
|
254
|
+
covariance_estimator : BaseCovariance, optional
|
255
|
+
:ref:`Covariance estimator <covariance_estimator>`.
|
256
|
+
The default (`None`) is to use :class:`~skfolio.moments.GerberCovariance`.
|
257
|
+
|
258
|
+
absolute : bool, default=False
|
259
|
+
If this is set to True, the absolute transformation is applied to the
|
260
|
+
correlation matrix.
|
261
|
+
The default is `False`.
|
262
|
+
|
263
|
+
power : float, default=1
|
264
|
+
Exponent of the power transformation applied to the correlation matrix.
|
265
|
+
The default value is `1`.
|
266
|
+
|
267
|
+
Attributes
|
268
|
+
----------
|
269
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
270
|
+
Codependence matrix.
|
271
|
+
|
272
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
273
|
+
Distance matrix.
|
274
|
+
|
275
|
+
covariance_estimator_: BaseCovariance
|
276
|
+
Fitted `covariance_estimator`
|
277
|
+
|
278
|
+
n_features_in_ : int
|
279
|
+
Number of assets seen during `fit`.
|
280
|
+
|
281
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
282
|
+
Names of assets seen during `fit`. Defined only when `X`
|
283
|
+
has assets names that are all strings.
|
284
|
+
|
285
|
+
References
|
286
|
+
----------
|
287
|
+
.. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
|
288
|
+
Lòpez de Prado, Journal of Portfolio Management (2016)
|
289
|
+
"""
|
290
|
+
|
291
|
+
covariance_estimator_: BaseCovariance
|
292
|
+
|
293
|
+
def __init__(
|
294
|
+
self,
|
295
|
+
covariance_estimator: BaseCovariance | None = None,
|
296
|
+
absolute: bool = False,
|
297
|
+
power: float = 1,
|
298
|
+
):
|
299
|
+
self.covariance_estimator = covariance_estimator
|
300
|
+
self.absolute = absolute
|
301
|
+
self.power = power
|
302
|
+
|
303
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "CovarianceDistance":
|
304
|
+
"""Fit the Covariance Distance estimator.
|
305
|
+
|
306
|
+
Parameters
|
307
|
+
----------
|
308
|
+
X : array-like of shape (n_observations, n_assets)
|
309
|
+
Price returns of the assets.
|
310
|
+
|
311
|
+
y : Ignored
|
312
|
+
Not used, present for API consistency by convention.
|
313
|
+
|
314
|
+
Returns
|
315
|
+
-------
|
316
|
+
self : CovarianceDistance
|
317
|
+
Fitted estimator.
|
318
|
+
"""
|
319
|
+
# fitting estimators
|
320
|
+
self.covariance_estimator_ = check_estimator(
|
321
|
+
self.covariance_estimator,
|
322
|
+
default=GerberCovariance(),
|
323
|
+
check_type=BaseCovariance,
|
324
|
+
)
|
325
|
+
self.covariance_estimator_.fit(X)
|
326
|
+
|
327
|
+
# we validate and convert to numpy after all models have been fitted to keep the
|
328
|
+
# features names information.
|
329
|
+
_ = self._validate_data(X)
|
330
|
+
|
331
|
+
corr, _ = cov_to_corr(self.covariance_estimator_.covariance_)
|
332
|
+
self.codependence_, self.distance_ = _corr_to_distance(
|
333
|
+
corr, absolute=self.absolute, power=self.power
|
334
|
+
)
|
335
|
+
return self
|
336
|
+
|
337
|
+
|
338
|
+
class DistanceCorrelation(BaseDistance):
|
339
|
+
"""Distance Correlation estimator.
|
340
|
+
|
341
|
+
Distance Correlation was introduced by Szekely [1]_ to capture non-linear
|
342
|
+
dependencies.
|
343
|
+
|
344
|
+
Parameters
|
345
|
+
----------
|
346
|
+
threshold : float, default=0.5
|
347
|
+
Distance correlation threshold.
|
348
|
+
|
349
|
+
Attributes
|
350
|
+
----------
|
351
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
352
|
+
Codependence matrix.
|
353
|
+
|
354
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
355
|
+
Distance matrix.
|
356
|
+
|
357
|
+
n_features_in_ : int
|
358
|
+
Number of assets seen during `fit`.
|
359
|
+
|
360
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
361
|
+
Names of assets seen during `fit`. Defined only when `X`
|
362
|
+
has assets names that are all strings.
|
363
|
+
|
364
|
+
References
|
365
|
+
----------
|
366
|
+
.. [1] "Measuring and testing independence by correlation of distances"
|
367
|
+
Gábor J. Szekely , 2005
|
368
|
+
"""
|
369
|
+
|
370
|
+
def __init__(self, threshold: float = 0.5):
|
371
|
+
self.threshold = threshold
|
372
|
+
|
373
|
+
@staticmethod
|
374
|
+
def _dcorr(x: np.ndarray, y: np.ndarray):
|
375
|
+
"""Calculate the distance correlation between two variables"""
|
376
|
+
x = scd.squareform(scd.pdist(x.reshape(-1, 1)))
|
377
|
+
y = scd.squareform(scd.pdist(y.reshape(-1, 1)))
|
378
|
+
x = x - x.mean(axis=0)[np.newaxis, :] - x.mean(axis=1)[:, np.newaxis] + x.mean()
|
379
|
+
y = y - y.mean(axis=0)[np.newaxis, :] - y.mean(axis=1)[:, np.newaxis] + y.mean()
|
380
|
+
value = np.sqrt((x * y).sum()) / np.sqrt(
|
381
|
+
np.sqrt((x**2).sum()) * np.sqrt((y**2).sum())
|
382
|
+
)
|
383
|
+
return value
|
384
|
+
|
385
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "DistanceCorrelation":
|
386
|
+
"""Fit the Distance Correlation estimator.
|
387
|
+
|
388
|
+
Parameters
|
389
|
+
----------
|
390
|
+
X : array-like of shape (n_observations, n_assets)
|
391
|
+
Price returns of the assets.
|
392
|
+
|
393
|
+
y : Ignored
|
394
|
+
Not used, present for API consistency by convention.
|
395
|
+
|
396
|
+
Returns
|
397
|
+
-------
|
398
|
+
self : DistanceCorrelation
|
399
|
+
Fitted estimator.
|
400
|
+
"""
|
401
|
+
X = self._validate_data(X)
|
402
|
+
n_assets = X.shape[1]
|
403
|
+
corr = np.ones((n_assets, n_assets))
|
404
|
+
# TODO: parallelize
|
405
|
+
for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
|
406
|
+
corr[i, j] = self._dcorr(x=X[:, i], y=X[:, j])
|
407
|
+
corr[j, i] = corr[i, j]
|
408
|
+
self.codependence_ = corr
|
409
|
+
self.distance_ = np.sqrt(np.clip(1 - self.codependence_, a_min=0.0, a_max=1.0))
|
410
|
+
return self
|
411
|
+
|
412
|
+
|
413
|
+
class MutualInformation(BaseDistance):
|
414
|
+
r"""Mutual Information estimator.
|
415
|
+
|
416
|
+
In information theory, the mutual information is a measure of the mutual dependence
|
417
|
+
between variables.
|
418
|
+
The related distance metric is called the variation of information.
|
419
|
+
|
420
|
+
For two random variables X and Y, the mutual information I(X,Y) is defined as:
|
421
|
+
|
422
|
+
.. math:: I(X,Y) = H(X) + H(Y) - H(X,Y)
|
423
|
+
|
424
|
+
with H(X) and H(Y) the marginal entropies and H(X,Y) the joint entropy.
|
425
|
+
|
426
|
+
The related distance metric known as the variation of information is defined as:
|
427
|
+
|
428
|
+
.. math:: d(X,Y) = H(X,Y) - I(X,Y) = H(X) + H(Y) - 2 \times I(X,Y)
|
429
|
+
|
430
|
+
and its normalization as:
|
431
|
+
|
432
|
+
.. math:: D(X,Y) = \frac{d(X,Y)}{H(X,Y)} = \frac{H(X) + H(Y) - 2 \times I(X,Y)}{H(X) + H(Y) - I(X,Y)}
|
433
|
+
|
434
|
+
Parameters
|
435
|
+
----------
|
436
|
+
n_bins_method : NBinsMethod, default=NBinsMethod.FREEDMAN
|
437
|
+
Method to compute the number of bins for the contingency matrix estimation used
|
438
|
+
for the computation of the mutual information.
|
439
|
+
Possible values are:
|
440
|
+
|
441
|
+
* FREEDMAN (`default`)
|
442
|
+
* KNUTH
|
443
|
+
|
444
|
+
n_bins : int, optional
|
445
|
+
Instead of using `n_bins_method`, you can directly specify the number of bins
|
446
|
+
with `n_bins`.
|
447
|
+
|
448
|
+
normalize : bool, default=True
|
449
|
+
If this is set to True, the variation of information is normalized.
|
450
|
+
The default is `True`.
|
451
|
+
|
452
|
+
Attributes
|
453
|
+
----------
|
454
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
455
|
+
Codependence matrix.
|
456
|
+
|
457
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
458
|
+
Distance matrix.
|
459
|
+
|
460
|
+
n_features_in_ : int
|
461
|
+
Number of assets seen during `fit`.
|
462
|
+
|
463
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
464
|
+
Names of features seen during `fit`. Defined only when `X` has feature
|
465
|
+
names that are all strings.
|
466
|
+
"""
|
467
|
+
|
468
|
+
def __init__(
|
469
|
+
self,
|
470
|
+
n_bins_method: NBinsMethod = NBinsMethod.FREEDMAN,
|
471
|
+
n_bins: int | None = None,
|
472
|
+
normalize: bool = True,
|
473
|
+
):
|
474
|
+
self.n_bins_method = n_bins_method
|
475
|
+
self.n_bins = n_bins
|
476
|
+
self.normalize = normalize
|
477
|
+
|
478
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "MutualInformation":
|
479
|
+
"""Fit the Mutual Information estimator.
|
480
|
+
|
481
|
+
Parameters
|
482
|
+
----------
|
483
|
+
X : array-like of shape (n_observations, n_assets)
|
484
|
+
Price returns of the assets.
|
485
|
+
|
486
|
+
y : Ignored
|
487
|
+
Not used, present for API consistency by convention.
|
488
|
+
|
489
|
+
Returns
|
490
|
+
-------
|
491
|
+
self : MutualInformation
|
492
|
+
Fitted estimator.
|
493
|
+
"""
|
494
|
+
X = self._validate_data(X)
|
495
|
+
n_assets = X.shape[1]
|
496
|
+
if self.n_bins is None:
|
497
|
+
match self.n_bins_method:
|
498
|
+
case NBinsMethod.FREEDMAN:
|
499
|
+
n_bins_func = n_bins_freedman
|
500
|
+
case NBinsMethod.KNUTH:
|
501
|
+
n_bins_func = n_bins_knuth
|
502
|
+
case _:
|
503
|
+
raise ValueError(f"n_bins_method {self.n_bins_method} is not valid")
|
504
|
+
n_bins_list = [n_bins_func(x=X[:, i]) for i in range(n_assets)]
|
505
|
+
else:
|
506
|
+
n_bins_list = [self.n_bins] * n_assets
|
507
|
+
|
508
|
+
corr = np.full((n_assets, n_assets), np.nan)
|
509
|
+
dist = corr.copy()
|
510
|
+
for i, j in zip(*np.triu_indices(n_assets), strict=True):
|
511
|
+
n_bins = max(n_bins_list[i], n_bins_list[j])
|
512
|
+
x = X[:, i]
|
513
|
+
y = X[:, j]
|
514
|
+
contingency = np.histogram2d(x, y, bins=n_bins)[0]
|
515
|
+
mutual_information = skm.mutual_info_score(
|
516
|
+
None, None, contingency=contingency
|
517
|
+
)
|
518
|
+
entropy_x = sct.entropy(np.histogram(x, n_bins)[0])
|
519
|
+
entropy_y = sct.entropy(np.histogram(y, n_bins)[0])
|
520
|
+
if self.normalize:
|
521
|
+
corr[i, j] = mutual_information / min(entropy_x, entropy_y)
|
522
|
+
dist[i, j] = max(
|
523
|
+
0.0,
|
524
|
+
(entropy_x + entropy_y - 2 * mutual_information)
|
525
|
+
/ (entropy_x + entropy_y - mutual_information),
|
526
|
+
)
|
527
|
+
else:
|
528
|
+
corr[i, j] = mutual_information
|
529
|
+
dist[i, j] = max(0.0, entropy_x + entropy_y - 2 * mutual_information)
|
530
|
+
corr[j, i] = corr[i, j]
|
531
|
+
dist[j, i] = dist[i, j]
|
532
|
+
self.codependence_ = corr
|
533
|
+
self.distance_ = dist
|
534
|
+
return self
|
535
|
+
|
536
|
+
|
537
|
+
def _corr_to_distance(
|
538
|
+
corr: np.ndarray, absolute: bool, power: float
|
539
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
540
|
+
r"""Transform a correlation matrix to a codependence and distance matrix.
|
541
|
+
|
542
|
+
Some widely used distances are:
|
543
|
+
|
544
|
+
* Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
|
545
|
+
* Absolute angular distance = :math:`\sqrt{1 - |corr|}`
|
546
|
+
* Squared angular distance = :math:`\sqrt{1 - corr^2}`
|
547
|
+
|
548
|
+
|
549
|
+
Parameters
|
550
|
+
----------
|
551
|
+
corr : ndarray of shape (n_assets, n_assets)
|
552
|
+
Correlation matrix.
|
553
|
+
|
554
|
+
absolute : bool
|
555
|
+
If this is set to True, the absolute transformation is applied to the
|
556
|
+
correlation matrix.
|
557
|
+
|
558
|
+
power : float
|
559
|
+
Exponent of the power transformation applied to the correlation matrix.
|
560
|
+
|
561
|
+
Returns
|
562
|
+
-------
|
563
|
+
codependence, distance : tuple[np.ndarray, np.ndarray]
|
564
|
+
Codependence and distance matrices.
|
565
|
+
"""
|
566
|
+
bounds = np.array([-1, 0, 1])
|
567
|
+
if absolute:
|
568
|
+
corr = np.abs(corr)
|
569
|
+
bounds = np.abs(bounds)
|
570
|
+
corr = np.power(corr, power)
|
571
|
+
bounds = np.power(bounds, power)
|
572
|
+
scaler = 1 / (1 - min(bounds))
|
573
|
+
distance = np.sqrt(np.clip(scaler * (1 - corr), a_min=0.0, a_max=1.0))
|
574
|
+
return corr, distance
|
skfolio/exceptions.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
"""
|
2
|
+
The :mod:`skfolio.exceptions` module includes all custom warnings and error
|
3
|
+
classes used across skfolio.
|
4
|
+
"""
|
5
|
+
|
6
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
7
|
+
# License: BSD 3 clause
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"OptimizationError",
|
11
|
+
"EquationToMatrixError",
|
12
|
+
"GroupNotFoundError",
|
13
|
+
"NonPositiveVarianceError",
|
14
|
+
]
|
15
|
+
|
16
|
+
|
17
|
+
class OptimizationError(Exception):
|
18
|
+
"""Optimization Did not converge"""
|
19
|
+
|
20
|
+
|
21
|
+
class EquationToMatrixError(Exception):
|
22
|
+
"""Error while processing equations"""
|
23
|
+
|
24
|
+
|
25
|
+
class GroupNotFoundError(Exception):
|
26
|
+
"""Group name not found in the groups"""
|
27
|
+
|
28
|
+
|
29
|
+
class NonPositiveVarianceError(Exception):
|
30
|
+
"""Variance negative or null"""
|
@@ -0,0 +1,76 @@
|
|
1
|
+
"""Module that includes all Measures functions used across `skfolio`."""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
from skfolio.measures._enums import (
|
7
|
+
BaseMeasure,
|
8
|
+
ExtraRiskMeasure,
|
9
|
+
PerfMeasure,
|
10
|
+
RatioMeasure,
|
11
|
+
RiskMeasure,
|
12
|
+
)
|
13
|
+
from skfolio.measures._measures import (
|
14
|
+
average_drawdown,
|
15
|
+
cdar,
|
16
|
+
cvar,
|
17
|
+
drawdown_at_risk,
|
18
|
+
edar,
|
19
|
+
entropic_risk_measure,
|
20
|
+
evar,
|
21
|
+
first_lower_partial_moment,
|
22
|
+
fourth_central_moment,
|
23
|
+
fourth_lower_partial_moment,
|
24
|
+
get_cumulative_returns,
|
25
|
+
get_drawdowns,
|
26
|
+
gini_mean_difference,
|
27
|
+
kurtosis,
|
28
|
+
mean_absolute_deviation,
|
29
|
+
max_drawdown,
|
30
|
+
mean,
|
31
|
+
owa_gmd_weights,
|
32
|
+
semi_deviation,
|
33
|
+
semi_variance,
|
34
|
+
skew,
|
35
|
+
standard_deviation,
|
36
|
+
third_central_moment,
|
37
|
+
ulcer_index,
|
38
|
+
value_at_risk,
|
39
|
+
variance,
|
40
|
+
worst_realization,
|
41
|
+
)
|
42
|
+
|
43
|
+
__all__ = [
|
44
|
+
"BaseMeasure",
|
45
|
+
"PerfMeasure",
|
46
|
+
"RiskMeasure",
|
47
|
+
"ExtraRiskMeasure",
|
48
|
+
"RatioMeasure",
|
49
|
+
"mean",
|
50
|
+
"get_cumulative_returns",
|
51
|
+
"get_drawdowns",
|
52
|
+
"variance",
|
53
|
+
"semi_variance",
|
54
|
+
"standard_deviation",
|
55
|
+
"semi_deviation",
|
56
|
+
"third_central_moment",
|
57
|
+
"fourth_central_moment",
|
58
|
+
"fourth_lower_partial_moment",
|
59
|
+
"cvar",
|
60
|
+
"mean_absolute_deviation",
|
61
|
+
"value_at_risk",
|
62
|
+
"worst_realization",
|
63
|
+
"first_lower_partial_moment",
|
64
|
+
"entropic_risk_measure",
|
65
|
+
"evar",
|
66
|
+
"drawdown_at_risk",
|
67
|
+
"cdar",
|
68
|
+
"max_drawdown",
|
69
|
+
"average_drawdown",
|
70
|
+
"edar",
|
71
|
+
"ulcer_index",
|
72
|
+
"gini_mean_difference",
|
73
|
+
"owa_gmd_weights",
|
74
|
+
"skew",
|
75
|
+
"kurtosis",
|
76
|
+
]
|