skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,574 @@
1
+ """Distance Estimators"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ import pandas as pd
10
+ import scipy.spatial.distance as scd
11
+ import scipy.stats as sct
12
+ import sklearn.metrics as skm
13
+
14
+ from skfolio.distance._base import BaseDistance
15
+ from skfolio.moments import BaseCovariance, GerberCovariance
16
+ from skfolio.utils.stats import (
17
+ NBinsMethod,
18
+ cov_to_corr,
19
+ n_bins_freedman,
20
+ n_bins_knuth,
21
+ )
22
+ from skfolio.utils.tools import check_estimator
23
+
24
+
25
+ class PearsonDistance(BaseDistance):
26
+ r"""Pearson Distance estimator.
27
+
28
+ The codependence is computed from the Pearson correlation to which is applied a
29
+ power and/or absolute transformation.
30
+ This codependence is then used to compute the distance matrix.
31
+ Some widely used distances are:
32
+
33
+ * Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
34
+ * Absolute angular distance = :math:`\sqrt{1 - |corr|}`
35
+ * Squared angular distance = :math:`\sqrt{1 - corr^2}`
36
+
37
+ Parameters
38
+ ----------
39
+ absolute : bool, default=False
40
+ If this is set to True, the absolute transformation is applied to the
41
+ correlation matrix.
42
+
43
+ power : float, default=1
44
+ Exponent of the power transformation applied to the correlation matrix.
45
+
46
+ Attributes
47
+ ----------
48
+ codependence_ : ndarray of shape (n_assets, n_assets)
49
+ Codependence matrix.
50
+
51
+ distance_ : ndarray of shape (n_assets, n_assets)
52
+ Distance matrix.
53
+
54
+ n_features_in_ : int
55
+ Number of assets seen during `fit`.
56
+
57
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
58
+ Names of assets seen during `fit`. Defined only when `X`
59
+ has assets names that are all strings.
60
+
61
+ References
62
+ ----------
63
+ .. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
64
+ Lòpez de Prado, Journal of Portfolio Management (2016)
65
+ """
66
+
67
+ def __init__(self, absolute: bool = False, power: float = 1):
68
+ self.absolute = absolute
69
+ self.power = power
70
+
71
+ def fit(self, X: npt.ArrayLike, y=None) -> "PearsonDistance":
72
+ """Fit the Pearson Distance estimator.
73
+
74
+ Parameters
75
+ ----------
76
+ X : array-like of shape (n_observations, n_assets)
77
+ Price returns of the assets.
78
+
79
+ y : Ignored
80
+ Not used, present for API consistency by convention.
81
+
82
+ Returns
83
+ -------
84
+ self : PearsonDistance
85
+ Fitted estimator.
86
+ """
87
+ X = self._validate_data(X)
88
+ corr = np.corrcoef(X.T)
89
+ self.codependence_, self.distance_ = _corr_to_distance(
90
+ corr, absolute=self.absolute, power=self.power
91
+ )
92
+ return self
93
+
94
+
95
+ class KendallDistance(BaseDistance):
96
+ r"""Kendall Distance estimator.
97
+
98
+ The codependence is computed from the Kendall correlation to which is applied a
99
+ power and/or absolute transformation.
100
+ This codependence is then used to compute the distance matrix.
101
+ Some widely used distances are:
102
+
103
+ * Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
104
+ * Absolute angular distance = :math:`\sqrt{1 - |corr|}`
105
+ * Squared angular distance = :math:`\sqrt{1 - corr^2}`
106
+
107
+ Parameters
108
+ ----------
109
+ absolute : bool, default=False
110
+ If this is set to True, the absolute transformation is applied to the
111
+ correlation matrix.
112
+ The default is `False`.
113
+
114
+ power : float, default=1
115
+ Exponent of the power transformation applied to the correlation matrix.
116
+ The default value is `1`.
117
+
118
+ Attributes
119
+ ----------
120
+ codependence_ : ndarray of shape (n_assets, n_assets)
121
+ Codependence matrix.
122
+
123
+ distance_ : ndarray of shape (n_assets, n_assets)
124
+ Distance matrix.
125
+
126
+ n_features_in_ : int
127
+ Number of assets seen during `fit`.
128
+
129
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
130
+ Names of assets seen during `fit`. Defined only when `X`
131
+ has assets names that are all strings.
132
+
133
+ References
134
+ ----------
135
+ .. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
136
+ Lòpez de Prado, Journal of Portfolio Management (2016)
137
+ """
138
+
139
+ def __init__(self, absolute: bool = False, power: float = 1):
140
+ self.absolute = absolute
141
+ self.power = power
142
+
143
+ def fit(self, X: npt.ArrayLike, y=None) -> "KendallDistance":
144
+ """Fit the Kendall estimator.
145
+
146
+ Parameters
147
+ ----------
148
+ X : array-like of shape (n_observations, n_assets)
149
+ Price returns of the assets.
150
+
151
+ y : Ignored
152
+ Not used, present for API consistency by convention.
153
+
154
+ Returns
155
+ -------
156
+ self : KendallDistance
157
+ Fitted estimator.
158
+ """
159
+ X = self._validate_data(X)
160
+ corr = pd.DataFrame(X).corr(method="kendall").to_numpy()
161
+ self.codependence_, self.distance_ = _corr_to_distance(
162
+ corr, absolute=self.absolute, power=self.power
163
+ )
164
+ return self
165
+
166
+
167
+ class SpearmanDistance(BaseDistance):
168
+ r"""Spearman Distance estimator.
169
+
170
+ The codependence is computed from the Spearman correlation to which is applied a
171
+ power and/or absolute transformation.
172
+ This codependence is then used to compute the distance matrix.
173
+ Some widely used distances are:
174
+
175
+ * Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
176
+ * Absolute angular distance = :math:`\sqrt{1 - |corr|}`
177
+ * Squared angular distance = :math:`\sqrt{1 - corr^2}`
178
+
179
+ Parameters
180
+ ----------
181
+ absolute : bool, default=False
182
+ If this is set to True, the absolute transformation is applied to the
183
+ correlation matrix.
184
+ The default is `False`.
185
+
186
+ power : float, default=1
187
+ Exponent of the power transformation applied to the correlation matrix.
188
+ The default value is `1`.
189
+
190
+ Attributes
191
+ ----------
192
+ codependence_ : ndarray of shape (n_assets, n_assets)
193
+ Codependence matrix.
194
+
195
+ distance_ : ndarray of shape (n_assets, n_assets)
196
+ Distance matrix.
197
+
198
+ n_features_in_ : int
199
+ Number of assets seen during `fit`.
200
+
201
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
202
+ Names of assets seen during `fit`. Defined only when `X`
203
+ has assets names that are all strings.
204
+
205
+ References
206
+ ----------
207
+ .. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
208
+ Lòpez de Prado, Journal of Portfolio Management (2016)
209
+ """
210
+
211
+ def __init__(self, absolute: bool = False, power: float = 1):
212
+ self.absolute = absolute
213
+ self.power = power
214
+
215
+ def fit(self, X: npt.ArrayLike, y=None) -> "SpearmanDistance":
216
+ """Fit the Spearman Kendall estimator.
217
+
218
+ Parameters
219
+ ----------
220
+ X : array-like of shape (n_observations, n_assets)
221
+ Price returns of the assets.
222
+
223
+ y : Ignored
224
+ Not used, present for API consistency by convention.
225
+
226
+ Returns
227
+ -------
228
+ self : SpearmanDistance
229
+ Fitted estimator.
230
+ """
231
+ X = self._validate_data(X)
232
+ corr = pd.DataFrame(X).corr(method="spearman").to_numpy()
233
+ self.codependence_, self.distance_ = _corr_to_distance(
234
+ corr, absolute=self.absolute, power=self.power
235
+ )
236
+ return self
237
+
238
+
239
+ class CovarianceDistance(BaseDistance):
240
+ r"""Covariance Distance estimator.
241
+
242
+ The codependence is computed from the correlation matrix of a chosen
243
+ :ref:`covariance estimator <covariance_estimator>` to which is applied
244
+ a power and/or absolute transformation.
245
+ This codependence is then used to compute the distance matrix.
246
+ Some widely used distances are:
247
+
248
+ * Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
249
+ * Absolute angular distance = :math:`\sqrt{1 - |corr|}`
250
+ * Squared angular distance = :math:`\sqrt{1 - corr^2}`
251
+
252
+ Parameters
253
+ ----------
254
+ covariance_estimator : BaseCovariance, optional
255
+ :ref:`Covariance estimator <covariance_estimator>`.
256
+ The default (`None`) is to use :class:`~skfolio.moments.GerberCovariance`.
257
+
258
+ absolute : bool, default=False
259
+ If this is set to True, the absolute transformation is applied to the
260
+ correlation matrix.
261
+ The default is `False`.
262
+
263
+ power : float, default=1
264
+ Exponent of the power transformation applied to the correlation matrix.
265
+ The default value is `1`.
266
+
267
+ Attributes
268
+ ----------
269
+ codependence_ : ndarray of shape (n_assets, n_assets)
270
+ Codependence matrix.
271
+
272
+ distance_ : ndarray of shape (n_assets, n_assets)
273
+ Distance matrix.
274
+
275
+ covariance_estimator_: BaseCovariance
276
+ Fitted `covariance_estimator`
277
+
278
+ n_features_in_ : int
279
+ Number of assets seen during `fit`.
280
+
281
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
282
+ Names of assets seen during `fit`. Defined only when `X`
283
+ has assets names that are all strings.
284
+
285
+ References
286
+ ----------
287
+ .. [1] "Building Diversified Portfolios that Outperform Out-of-Sample",
288
+ Lòpez de Prado, Journal of Portfolio Management (2016)
289
+ """
290
+
291
+ covariance_estimator_: BaseCovariance
292
+
293
+ def __init__(
294
+ self,
295
+ covariance_estimator: BaseCovariance | None = None,
296
+ absolute: bool = False,
297
+ power: float = 1,
298
+ ):
299
+ self.covariance_estimator = covariance_estimator
300
+ self.absolute = absolute
301
+ self.power = power
302
+
303
+ def fit(self, X: npt.ArrayLike, y=None) -> "CovarianceDistance":
304
+ """Fit the Covariance Distance estimator.
305
+
306
+ Parameters
307
+ ----------
308
+ X : array-like of shape (n_observations, n_assets)
309
+ Price returns of the assets.
310
+
311
+ y : Ignored
312
+ Not used, present for API consistency by convention.
313
+
314
+ Returns
315
+ -------
316
+ self : CovarianceDistance
317
+ Fitted estimator.
318
+ """
319
+ # fitting estimators
320
+ self.covariance_estimator_ = check_estimator(
321
+ self.covariance_estimator,
322
+ default=GerberCovariance(),
323
+ check_type=BaseCovariance,
324
+ )
325
+ self.covariance_estimator_.fit(X)
326
+
327
+ # we validate and convert to numpy after all models have been fitted to keep the
328
+ # features names information.
329
+ _ = self._validate_data(X)
330
+
331
+ corr, _ = cov_to_corr(self.covariance_estimator_.covariance_)
332
+ self.codependence_, self.distance_ = _corr_to_distance(
333
+ corr, absolute=self.absolute, power=self.power
334
+ )
335
+ return self
336
+
337
+
338
+ class DistanceCorrelation(BaseDistance):
339
+ """Distance Correlation estimator.
340
+
341
+ Distance Correlation was introduced by Szekely [1]_ to capture non-linear
342
+ dependencies.
343
+
344
+ Parameters
345
+ ----------
346
+ threshold : float, default=0.5
347
+ Distance correlation threshold.
348
+
349
+ Attributes
350
+ ----------
351
+ codependence_ : ndarray of shape (n_assets, n_assets)
352
+ Codependence matrix.
353
+
354
+ distance_ : ndarray of shape (n_assets, n_assets)
355
+ Distance matrix.
356
+
357
+ n_features_in_ : int
358
+ Number of assets seen during `fit`.
359
+
360
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
361
+ Names of assets seen during `fit`. Defined only when `X`
362
+ has assets names that are all strings.
363
+
364
+ References
365
+ ----------
366
+ .. [1] "Measuring and testing independence by correlation of distances"
367
+ Gábor J. Szekely , 2005
368
+ """
369
+
370
+ def __init__(self, threshold: float = 0.5):
371
+ self.threshold = threshold
372
+
373
+ @staticmethod
374
+ def _dcorr(x: np.ndarray, y: np.ndarray):
375
+ """Calculate the distance correlation between two variables"""
376
+ x = scd.squareform(scd.pdist(x.reshape(-1, 1)))
377
+ y = scd.squareform(scd.pdist(y.reshape(-1, 1)))
378
+ x = x - x.mean(axis=0)[np.newaxis, :] - x.mean(axis=1)[:, np.newaxis] + x.mean()
379
+ y = y - y.mean(axis=0)[np.newaxis, :] - y.mean(axis=1)[:, np.newaxis] + y.mean()
380
+ value = np.sqrt((x * y).sum()) / np.sqrt(
381
+ np.sqrt((x**2).sum()) * np.sqrt((y**2).sum())
382
+ )
383
+ return value
384
+
385
+ def fit(self, X: npt.ArrayLike, y=None) -> "DistanceCorrelation":
386
+ """Fit the Distance Correlation estimator.
387
+
388
+ Parameters
389
+ ----------
390
+ X : array-like of shape (n_observations, n_assets)
391
+ Price returns of the assets.
392
+
393
+ y : Ignored
394
+ Not used, present for API consistency by convention.
395
+
396
+ Returns
397
+ -------
398
+ self : DistanceCorrelation
399
+ Fitted estimator.
400
+ """
401
+ X = self._validate_data(X)
402
+ n_assets = X.shape[1]
403
+ corr = np.ones((n_assets, n_assets))
404
+ # TODO: parallelize
405
+ for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
406
+ corr[i, j] = self._dcorr(x=X[:, i], y=X[:, j])
407
+ corr[j, i] = corr[i, j]
408
+ self.codependence_ = corr
409
+ self.distance_ = np.sqrt(np.clip(1 - self.codependence_, a_min=0.0, a_max=1.0))
410
+ return self
411
+
412
+
413
+ class MutualInformation(BaseDistance):
414
+ r"""Mutual Information estimator.
415
+
416
+ In information theory, the mutual information is a measure of the mutual dependence
417
+ between variables.
418
+ The related distance metric is called the variation of information.
419
+
420
+ For two random variables X and Y, the mutual information I(X,Y) is defined as:
421
+
422
+ .. math:: I(X,Y) = H(X) + H(Y) - H(X,Y)
423
+
424
+ with H(X) and H(Y) the marginal entropies and H(X,Y) the joint entropy.
425
+
426
+ The related distance metric known as the variation of information is defined as:
427
+
428
+ .. math:: d(X,Y) = H(X,Y) - I(X,Y) = H(X) + H(Y) - 2 \times I(X,Y)
429
+
430
+ and its normalization as:
431
+
432
+ .. math:: D(X,Y) = \frac{d(X,Y)}{H(X,Y)} = \frac{H(X) + H(Y) - 2 \times I(X,Y)}{H(X) + H(Y) - I(X,Y)}
433
+
434
+ Parameters
435
+ ----------
436
+ n_bins_method : NBinsMethod, default=NBinsMethod.FREEDMAN
437
+ Method to compute the number of bins for the contingency matrix estimation used
438
+ for the computation of the mutual information.
439
+ Possible values are:
440
+
441
+ * FREEDMAN (`default`)
442
+ * KNUTH
443
+
444
+ n_bins : int, optional
445
+ Instead of using `n_bins_method`, you can directly specify the number of bins
446
+ with `n_bins`.
447
+
448
+ normalize : bool, default=True
449
+ If this is set to True, the variation of information is normalized.
450
+ The default is `True`.
451
+
452
+ Attributes
453
+ ----------
454
+ codependence_ : ndarray of shape (n_assets, n_assets)
455
+ Codependence matrix.
456
+
457
+ distance_ : ndarray of shape (n_assets, n_assets)
458
+ Distance matrix.
459
+
460
+ n_features_in_ : int
461
+ Number of assets seen during `fit`.
462
+
463
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
464
+ Names of features seen during `fit`. Defined only when `X` has feature
465
+ names that are all strings.
466
+ """
467
+
468
+ def __init__(
469
+ self,
470
+ n_bins_method: NBinsMethod = NBinsMethod.FREEDMAN,
471
+ n_bins: int | None = None,
472
+ normalize: bool = True,
473
+ ):
474
+ self.n_bins_method = n_bins_method
475
+ self.n_bins = n_bins
476
+ self.normalize = normalize
477
+
478
+ def fit(self, X: npt.ArrayLike, y=None) -> "MutualInformation":
479
+ """Fit the Mutual Information estimator.
480
+
481
+ Parameters
482
+ ----------
483
+ X : array-like of shape (n_observations, n_assets)
484
+ Price returns of the assets.
485
+
486
+ y : Ignored
487
+ Not used, present for API consistency by convention.
488
+
489
+ Returns
490
+ -------
491
+ self : MutualInformation
492
+ Fitted estimator.
493
+ """
494
+ X = self._validate_data(X)
495
+ n_assets = X.shape[1]
496
+ if self.n_bins is None:
497
+ match self.n_bins_method:
498
+ case NBinsMethod.FREEDMAN:
499
+ n_bins_func = n_bins_freedman
500
+ case NBinsMethod.KNUTH:
501
+ n_bins_func = n_bins_knuth
502
+ case _:
503
+ raise ValueError(f"n_bins_method {self.n_bins_method} is not valid")
504
+ n_bins_list = [n_bins_func(x=X[:, i]) for i in range(n_assets)]
505
+ else:
506
+ n_bins_list = [self.n_bins] * n_assets
507
+
508
+ corr = np.full((n_assets, n_assets), np.nan)
509
+ dist = corr.copy()
510
+ for i, j in zip(*np.triu_indices(n_assets), strict=True):
511
+ n_bins = max(n_bins_list[i], n_bins_list[j])
512
+ x = X[:, i]
513
+ y = X[:, j]
514
+ contingency = np.histogram2d(x, y, bins=n_bins)[0]
515
+ mutual_information = skm.mutual_info_score(
516
+ None, None, contingency=contingency
517
+ )
518
+ entropy_x = sct.entropy(np.histogram(x, n_bins)[0])
519
+ entropy_y = sct.entropy(np.histogram(y, n_bins)[0])
520
+ if self.normalize:
521
+ corr[i, j] = mutual_information / min(entropy_x, entropy_y)
522
+ dist[i, j] = max(
523
+ 0.0,
524
+ (entropy_x + entropy_y - 2 * mutual_information)
525
+ / (entropy_x + entropy_y - mutual_information),
526
+ )
527
+ else:
528
+ corr[i, j] = mutual_information
529
+ dist[i, j] = max(0.0, entropy_x + entropy_y - 2 * mutual_information)
530
+ corr[j, i] = corr[i, j]
531
+ dist[j, i] = dist[i, j]
532
+ self.codependence_ = corr
533
+ self.distance_ = dist
534
+ return self
535
+
536
+
537
+ def _corr_to_distance(
538
+ corr: np.ndarray, absolute: bool, power: float
539
+ ) -> tuple[np.ndarray, np.ndarray]:
540
+ r"""Transform a correlation matrix to a codependence and distance matrix.
541
+
542
+ Some widely used distances are:
543
+
544
+ * Standard angular distance = :math:`\sqrt{0.5 \times (1 - corr)}`
545
+ * Absolute angular distance = :math:`\sqrt{1 - |corr|}`
546
+ * Squared angular distance = :math:`\sqrt{1 - corr^2}`
547
+
548
+
549
+ Parameters
550
+ ----------
551
+ corr : ndarray of shape (n_assets, n_assets)
552
+ Correlation matrix.
553
+
554
+ absolute : bool
555
+ If this is set to True, the absolute transformation is applied to the
556
+ correlation matrix.
557
+
558
+ power : float
559
+ Exponent of the power transformation applied to the correlation matrix.
560
+
561
+ Returns
562
+ -------
563
+ codependence, distance : tuple[np.ndarray, np.ndarray]
564
+ Codependence and distance matrices.
565
+ """
566
+ bounds = np.array([-1, 0, 1])
567
+ if absolute:
568
+ corr = np.abs(corr)
569
+ bounds = np.abs(bounds)
570
+ corr = np.power(corr, power)
571
+ bounds = np.power(bounds, power)
572
+ scaler = 1 / (1 - min(bounds))
573
+ distance = np.sqrt(np.clip(scaler * (1 - corr), a_min=0.0, a_max=1.0))
574
+ return corr, distance
skfolio/exceptions.py ADDED
@@ -0,0 +1,30 @@
1
+ """
2
+ The :mod:`skfolio.exceptions` module includes all custom warnings and error
3
+ classes used across skfolio.
4
+ """
5
+
6
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
7
+ # License: BSD 3 clause
8
+
9
+ __all__ = [
10
+ "OptimizationError",
11
+ "EquationToMatrixError",
12
+ "GroupNotFoundError",
13
+ "NonPositiveVarianceError",
14
+ ]
15
+
16
+
17
+ class OptimizationError(Exception):
18
+ """Optimization Did not converge"""
19
+
20
+
21
+ class EquationToMatrixError(Exception):
22
+ """Error while processing equations"""
23
+
24
+
25
+ class GroupNotFoundError(Exception):
26
+ """Group name not found in the groups"""
27
+
28
+
29
+ class NonPositiveVarianceError(Exception):
30
+ """Variance negative or null"""
@@ -0,0 +1,76 @@
1
+ """Module that includes all Measures functions used across `skfolio`."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from skfolio.measures._enums import (
7
+ BaseMeasure,
8
+ ExtraRiskMeasure,
9
+ PerfMeasure,
10
+ RatioMeasure,
11
+ RiskMeasure,
12
+ )
13
+ from skfolio.measures._measures import (
14
+ average_drawdown,
15
+ cdar,
16
+ cvar,
17
+ drawdown_at_risk,
18
+ edar,
19
+ entropic_risk_measure,
20
+ evar,
21
+ first_lower_partial_moment,
22
+ fourth_central_moment,
23
+ fourth_lower_partial_moment,
24
+ get_cumulative_returns,
25
+ get_drawdowns,
26
+ gini_mean_difference,
27
+ kurtosis,
28
+ mean_absolute_deviation,
29
+ max_drawdown,
30
+ mean,
31
+ owa_gmd_weights,
32
+ semi_deviation,
33
+ semi_variance,
34
+ skew,
35
+ standard_deviation,
36
+ third_central_moment,
37
+ ulcer_index,
38
+ value_at_risk,
39
+ variance,
40
+ worst_realization,
41
+ )
42
+
43
+ __all__ = [
44
+ "BaseMeasure",
45
+ "PerfMeasure",
46
+ "RiskMeasure",
47
+ "ExtraRiskMeasure",
48
+ "RatioMeasure",
49
+ "mean",
50
+ "get_cumulative_returns",
51
+ "get_drawdowns",
52
+ "variance",
53
+ "semi_variance",
54
+ "standard_deviation",
55
+ "semi_deviation",
56
+ "third_central_moment",
57
+ "fourth_central_moment",
58
+ "fourth_lower_partial_moment",
59
+ "cvar",
60
+ "mean_absolute_deviation",
61
+ "value_at_risk",
62
+ "worst_realization",
63
+ "first_lower_partial_moment",
64
+ "entropic_risk_measure",
65
+ "evar",
66
+ "drawdown_at_risk",
67
+ "cdar",
68
+ "max_drawdown",
69
+ "average_drawdown",
70
+ "edar",
71
+ "ulcer_index",
72
+ "gini_mean_difference",
73
+ "owa_gmd_weights",
74
+ "skew",
75
+ "kurtosis",
76
+ ]