skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,194 @@
1
+ """Model validation module."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+ import sklearn as sk
9
+ import sklearn.base as skb
10
+ import sklearn.model_selection as skm
11
+ import sklearn.utils as sku
12
+ import sklearn.utils.parallel as skp
13
+
14
+ from skfolio.model_selection._combinatorial import BaseCombinatorialCV
15
+ from skfolio.population import Population
16
+ from skfolio.portfolio import MultiPeriodPortfolio
17
+ from skfolio.utils.tools import fit_and_predict, safe_split
18
+
19
+
20
+ def cross_val_predict(
21
+ estimator: skb.BaseEstimator,
22
+ X: npt.ArrayLike,
23
+ y: npt.ArrayLike = None,
24
+ groups: np.ndarray | None = None,
25
+ cv: skm.BaseCrossValidator | BaseCombinatorialCV | int | None = None,
26
+ n_jobs: int | None = None,
27
+ method: str = "predict",
28
+ verbose: int = 0,
29
+ fit_params: dict | None = None,
30
+ pre_dispatch: str = "2*n_jobs",
31
+ column_indices: np.ndarray | None = None,
32
+ portfolio_params: dict | None = None,
33
+ ) -> MultiPeriodPortfolio | Population:
34
+ """Generate cross-validated `Portfolios` estimates.
35
+
36
+ The data is split according to the `cv` parameter.
37
+ The optimization estimator is fitted on the training set and portfolios are
38
+ predicted on the corresponding test set.
39
+
40
+ For non-combinatorial cross-validation like `Kfold`, the output is the predicted
41
+ :class:`~skfolio.portfolio.MultiPeriodPortfolio` where
42
+ each :class:`~skfolio.portfolio.Portfolio` corresponds to the prediction on each
43
+ train/test pair (`k` portfolios for `Kfold`).
44
+
45
+ For combinatorial cross-validation
46
+ like :class:`~skfolio.model_selection.CombinatorialPurgedCV`, the output is the
47
+ predicted :class:`~skfolio.population.Population` of multiple
48
+ :class:`~skfolio.portfolio.MultiPeriodPortfolio` (each test outputs are a
49
+ collection of multiple paths instead of one single path).
50
+
51
+ Parameters
52
+ ----------
53
+ estimator : BaseOptimization
54
+ :ref:`Optimization estimators <optimization>` use to fit the data.
55
+
56
+ X : array-like of shape (n_observations, n_assets)
57
+ Price returns of the assets.
58
+
59
+ y : array-like of shape (n_observations, n_targets), optional
60
+ Target data (optional).
61
+ For example, the price returns of the factors.
62
+
63
+ groups : array-like of shape (n_observations,), optional
64
+ Group labels for the samples used while splitting the dataset into
65
+ train/test set. Only used in conjunction with a "Group" `cv`
66
+ instance (e.g., `GroupKFold`).
67
+
68
+ cv : int | cross-validation generator, optional
69
+ Determines the cross-validation splitting strategy.
70
+ Possible inputs for cv are:
71
+
72
+ * None, to use the default 5-fold cross validation,
73
+ * int, to specify the number of folds in a `(Stratified)KFold`,
74
+ * `CV splitter`,
75
+ * An iterable that generates (train, test) splits as arrays of indices.
76
+
77
+ n_jobs : int, optional
78
+ The number of jobs to run in parallel for `fit` of all `estimators`.
79
+ `None` means 1 unless in a `joblib.parallel_backend` context. -1 means
80
+ using all processors.
81
+
82
+ method : str
83
+ Invokes the passed method name of the passed estimator.
84
+
85
+ verbose : int, default=0
86
+ The verbosity level.
87
+
88
+ fit_params : dict, optional
89
+ Parameters to pass to the fit method of the estimator.
90
+
91
+ pre_dispatch : int or str, default='2*n_jobs'
92
+ Controls the number of jobs that get dispatched during parallel
93
+ execution. Reducing this number can be useful to avoid an
94
+ explosion of memory consumption when more jobs get dispatched
95
+ than CPUs can process. This parameter can be:
96
+
97
+ * None, in which case all the jobs are immediately
98
+ created and spawned. Use this for lightweight and
99
+ fast-running jobs, to avoid delays due to on-demand
100
+ spawning of the jobs
101
+
102
+ * An int, giving the exact number of total jobs that are
103
+ spawned
104
+
105
+ * A str, giving an expression as a function of n_jobs,
106
+ as in '2*n_jobs'
107
+
108
+ column_indices : ndarray, optional
109
+ Indices of the `X` columns to cross-validate on.
110
+
111
+ portfolio_params : dict, optional
112
+ Additional portfolio parameters passed to `MultiPeriodPortfolio`.
113
+
114
+ Returns
115
+ -------
116
+ predictions : MultiPeriodPortfolio | Population
117
+ This is the result of calling `predict`
118
+ """
119
+ X, y = safe_split(X, y, indices=column_indices, axis=1)
120
+ X, y, groups = sku.indexable(X, y, groups)
121
+ cv = skm.check_cv(cv, y)
122
+ splits = list(cv.split(X, y, groups))
123
+ portfolio_params = {} if portfolio_params is None else portfolio_params.copy()
124
+
125
+ # We ensure that the folds are not shuffled
126
+ if not isinstance(cv, BaseCombinatorialCV):
127
+ try:
128
+ if cv.shuffle:
129
+ raise ValueError(
130
+ "`cross_val_predict` only works with cross-validation setting"
131
+ " `shuffle=False`"
132
+ )
133
+ except AttributeError:
134
+ # If we cannot find the attribute shuffle, we check if the first folds
135
+ # are shuffled
136
+ for fold in splits[0]:
137
+ if not np.all(np.diff(fold) > 0):
138
+ raise ValueError(
139
+ "`cross_val_predict` only works with un-shuffled folds"
140
+ ) from None
141
+
142
+ # We clone the estimator to make sure that all the folds are independent
143
+ # and that it is pickle-able.
144
+ parallel = skp.Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
145
+ # TODO remove when https://github.com/joblib/joblib/issues/1071 is fixed
146
+ predictions = parallel(
147
+ skp.delayed(fit_and_predict)(
148
+ sk.clone(estimator),
149
+ X,
150
+ y,
151
+ train=train,
152
+ test=test,
153
+ fit_params=fit_params,
154
+ method=method,
155
+ )
156
+ for train, test in splits
157
+ )
158
+
159
+ if isinstance(cv, BaseCombinatorialCV):
160
+ path_ids = cv.get_path_ids()
161
+ path_nb = np.max(path_ids) + 1
162
+ portfolios = [[] for _ in range(path_nb)]
163
+ for i, prediction in enumerate(predictions):
164
+ for j, p in enumerate(prediction):
165
+ path_id = path_ids[i, j]
166
+ portfolios[path_id].append(p)
167
+ name = portfolio_params.pop("name", "path")
168
+ pred = Population(
169
+ [
170
+ MultiPeriodPortfolio(
171
+ name=f"{name}_{i}", portfolios=portfolios[i], **portfolio_params
172
+ )
173
+ for i in range(path_nb)
174
+ ]
175
+ )
176
+ else:
177
+ # We need to re-order the test folds in case they were un-ordered by the
178
+ # CV generator.
179
+ # Because the tests folds are not shuffled, we use the first index of each
180
+ # fold to order them.
181
+ test_indices = np.concatenate([test for _, test in splits])
182
+ if np.unique(test_indices, axis=0).shape[0] != test_indices.shape[0]:
183
+ raise ValueError(
184
+ "`cross_val_predict` only works with non-duplicated test indices"
185
+ )
186
+ test_indices = [test for _, test in splits]
187
+ sorted_fold_id = np.argsort([x[0] for x in test_indices])
188
+ pred = MultiPeriodPortfolio(
189
+ portfolios=[predictions[fold_id] for fold_id in sorted_fold_id],
190
+ check_observations_order=False,
191
+ **portfolio_params,
192
+ )
193
+
194
+ return pred
@@ -0,0 +1,221 @@
1
+ """Walk Forward cross-validator"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from collections.abc import Iterator
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import sklearn.model_selection as skm
11
+ import sklearn.utils as sku
12
+
13
+
14
+ class WalkForward(skm.BaseCrossValidator):
15
+ """Walk Forward cross-validator.
16
+
17
+ Provides train/test indices to split time series data samples in a walk forward
18
+ logic.
19
+
20
+ In each split, test indices must be higher than before, and thus shuffling
21
+ in cross validator is inappropriate.
22
+
23
+ Compared to `sklearn.model_selection.TimeSeriesSplit`, you control the train/test
24
+ folds by providing a number of training and test samples instead of a number of
25
+ split making it more suitable for portfolio cross-validation.
26
+
27
+ Parameters
28
+ ----------
29
+ test_size : int
30
+ Number of observations in each test set.
31
+
32
+ train_size : int
33
+ Number of observations in each training set.
34
+
35
+ expend_train : bool, default=False
36
+ If this is set to True, each subsequent training set after the first one will
37
+ use all past observations.
38
+ The default is `False`
39
+
40
+ reduce_test : bool, default=False
41
+ If this is set to True, the last train/test split will be returned even if the
42
+ test set is partial (if it constains less observations than `test_size`),
43
+ otherwise it will be ignored.
44
+ The default is `False`
45
+
46
+ purged_size : int, default=0
47
+ Number of observations to exclude from the end of each train set before the
48
+ test set.
49
+ The default value is `0`
50
+
51
+ Examples
52
+ --------
53
+ >>> import numpy as np
54
+ >>> from skfolio.model_selection import WalkForward
55
+ >>> X = np.random.randn(6, 2)
56
+ >>> cv = WalkForward(test_size=1, train_size=2)
57
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
58
+ ... print(f"Fold {i}:")
59
+ ... print(f" Train: index={train_index}")
60
+ ... print(f" Test: index={test_index}")
61
+ Fold 0:
62
+ Train: index=[0 1]
63
+ Test: index=[2]
64
+ Fold 1:
65
+ Train: index=[1 2]
66
+ Test: index=[3]
67
+ Fold 2:
68
+ Train: index=[2 3]
69
+ Test: index=[4]
70
+ Fold 3:
71
+ Train: index=[3 4]
72
+ Test: index=[5]
73
+ >>> cv = WalkForward(test_size=1, train_size=2, purged_size=1)
74
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
75
+ ... print(f"Fold {i}:")
76
+ ... print(f" Train: index={train_index}")
77
+ ... print(f" Test: index={test_index}")
78
+ Fold 0:
79
+ Train: index=[0 1]
80
+ Test: index=[3]
81
+ Fold 1:
82
+ Train: index=[1 2]
83
+ Test: index=[4]
84
+ Fold 2:
85
+ Train: index=[2 3]
86
+ Test: index=[5]
87
+ >>> cv = WalkForward(test_size=2, train_size=3)
88
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
89
+ ... print(f"Fold {i}:")
90
+ ... print(f" Train: index={train_index}")
91
+ ... print(f" Test: index={test_index}")
92
+ Fold 0:
93
+ Train: index=[0 1 2]
94
+ Test: index=[3 4]
95
+ >>> cv = WalkForward(test_size=2, train_size=3, reduce_test=True)
96
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
97
+ ... print(f"Fold {i}:")
98
+ ... print(f" Train: index={train_index}")
99
+ ... print(f" Test: index={test_index}")
100
+ Fold 0:
101
+ Train: index=[0 1 2]
102
+ Test: index=[3 4]
103
+ Fold 1:
104
+ Train: index=[2 3 4]
105
+ Test: index=[5]
106
+ >>> cv = WalkForward(test_size=2, train_size=3, expend_train=True, reduce_test=True)
107
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
108
+ ... print(f"Fold {i}:")
109
+ ... print(f" Train: index={train_index}")
110
+ ... print(f" Test: index={test_index}")
111
+ Fold 0:
112
+ Train: index=[0 1 2]
113
+ Test: index=[3 4]
114
+ Fold 1:
115
+ Train: index=[0 1 2 3 4]
116
+ Test: index=[5]
117
+ """
118
+
119
+ def __init__(
120
+ self,
121
+ test_size: int,
122
+ train_size: int,
123
+ expend_train: bool = False,
124
+ reduce_test: bool = False,
125
+ purged_size: int = 0,
126
+ ):
127
+ self.test_size = test_size
128
+ self.train_size = train_size
129
+ self.expend_train = expend_train
130
+ self.reduce_test = reduce_test
131
+ self.purged_size = purged_size
132
+
133
+ def split(
134
+ self, X: npt.ArrayLike, y=None, groups=None
135
+ ) -> Iterator[np.ndarray, np.ndarray]:
136
+ """Generate indices to split data into training and test set.
137
+
138
+ Parameters
139
+ ----------
140
+ X : array-like of shape (n_observations, n_assets)
141
+ Price returns of the assets.
142
+
143
+ y : array-like of shape (n_observations, n_targets)
144
+ Always ignored, exists for compatibility.
145
+
146
+ groups : array-like of shape (n_observations,)
147
+ Always ignored, exists for compatibility.
148
+
149
+ Yields
150
+ ------
151
+ train : ndarray
152
+ The training set indices for that split.
153
+
154
+ test : ndarray
155
+ The testing set indices for that split.
156
+ """
157
+ X, y = sku.indexable(X, y)
158
+ n_samples = X.shape[0]
159
+ # Make sure we have enough samples for the given split parameters
160
+ if self.train_size + self.purged_size >= n_samples:
161
+ raise ValueError(
162
+ "The sum of `train_size` with `purged_size` "
163
+ f"({self.train_size + self.purged_size}) cannot be greater than the"
164
+ f" number of samples ({n_samples})."
165
+ )
166
+
167
+ indices = np.arange(n_samples)
168
+
169
+ test_start = self.train_size + self.purged_size
170
+ while True:
171
+ if test_start >= n_samples:
172
+ return
173
+ test_end = test_start + self.test_size
174
+ train_end = test_start - self.purged_size
175
+ if self.expend_train:
176
+ train_start = 0
177
+ else:
178
+ train_start = train_end - self.train_size
179
+
180
+ if test_end > n_samples:
181
+ if not self.reduce_test:
182
+ return
183
+ yield (
184
+ indices[train_start:train_end],
185
+ indices[test_start:],
186
+ )
187
+ else:
188
+ yield (
189
+ indices[train_start:train_end],
190
+ indices[test_start:test_end],
191
+ )
192
+ test_start = test_end
193
+
194
+ def get_n_splits(self, X: npt.ArrayLike, y=None, groups=None) -> int:
195
+ """Returns the number of splitting iterations in the cross-validator
196
+
197
+ Parameters
198
+ ----------
199
+ X : array-like of shape (n_observations, n_assets)
200
+ Price returns of the assets.
201
+
202
+ y : array-like of shape (n_observations, n_targets)
203
+ Always ignored, exists for compatibility.
204
+
205
+ groups : array-like of shape (n_observations,)
206
+ Always ignored, exists for compatibility.
207
+
208
+ Returns
209
+ -------
210
+ n_folds : int
211
+ Returns the number of splitting iterations in the cross-validator.
212
+ """
213
+ if X is None:
214
+ raise ValueError("The 'X' parameter should not be None.")
215
+ X, y = sku.indexable(X, y)
216
+ n_samples = X.shape[0]
217
+ n = n_samples - self.train_size - self.purged_size
218
+
219
+ if self.reduce_test and n % self.test_size != 0:
220
+ return n // self.test_size + 1
221
+ return n // self.test_size
@@ -0,0 +1,41 @@
1
+ """Moments module."""
2
+
3
+ from skfolio.moments.covariance import (
4
+ OAS,
5
+ BaseCovariance,
6
+ DenoiseCovariance,
7
+ DenoteCovariance,
8
+ EWCovariance,
9
+ EmpiricalCovariance,
10
+ GerberCovariance,
11
+ GraphicalLassoCV,
12
+ LedoitWolf,
13
+ ShrunkCovariance,
14
+ )
15
+ from skfolio.moments.expected_returns import (
16
+ BaseMu,
17
+ EWMu,
18
+ EmpiricalMu,
19
+ EquilibriumMu,
20
+ ShrunkMu,
21
+ ShrunkMuMethods,
22
+ )
23
+
24
+ __all__ = [
25
+ "BaseMu",
26
+ "EmpiricalMu",
27
+ "EWMu",
28
+ "ShrunkMu",
29
+ "EquilibriumMu",
30
+ "ShrunkMuMethods",
31
+ "BaseCovariance",
32
+ "EmpiricalCovariance",
33
+ "EWCovariance",
34
+ "GerberCovariance",
35
+ "DenoiseCovariance",
36
+ "DenoteCovariance",
37
+ "LedoitWolf",
38
+ "OAS",
39
+ "ShrunkCovariance",
40
+ "GraphicalLassoCV",
41
+ ]
@@ -0,0 +1,29 @@
1
+ """Covariance module."""
2
+
3
+ from skfolio.moments.covariance._base import (
4
+ BaseCovariance,
5
+ )
6
+ from skfolio.moments.covariance._covariance import (
7
+ OAS,
8
+ DenoiseCovariance,
9
+ DenoteCovariance,
10
+ EWCovariance,
11
+ EmpiricalCovariance,
12
+ GerberCovariance,
13
+ GraphicalLassoCV,
14
+ LedoitWolf,
15
+ ShrunkCovariance,
16
+ )
17
+
18
+ __all__ = [
19
+ "BaseCovariance",
20
+ "EmpiricalCovariance",
21
+ "EWCovariance",
22
+ "GerberCovariance",
23
+ "DenoiseCovariance",
24
+ "DenoteCovariance",
25
+ "LedoitWolf",
26
+ "OAS",
27
+ "ShrunkCovariance",
28
+ "GraphicalLassoCV",
29
+ ]
@@ -0,0 +1,101 @@
1
+ """Base Covariance Estimators."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+ import sklearn.base as skb
8
+
9
+ from skfolio.exceptions import NonPositiveVarianceError
10
+ from skfolio.utils.stats import cov_nearest
11
+
12
+
13
+ class BaseCovariance(skb.BaseEstimator, ABC):
14
+ """Base class for all covariance estimators in `skfolio`.
15
+
16
+ Parameters
17
+ ----------
18
+ nearest : bool, default=False
19
+ If this is set to True, the covariance is replaced by the nearest covariance
20
+ matrix that is positive definite and with a Cholesky decomposition than can be
21
+ computed. The variance is left unchanged. A covariance matrix is in theory PSD.
22
+ However, due to floating-point inaccuracies, we can end up with a covariance
23
+ matrix that is slightly non-PSD or where Cholesky decomposition is failing.
24
+ This often occurs in high dimensional problems.
25
+ For more details, see :func:`~skfolio.units.stats.cov_nearest`.
26
+ The default is `False`.
27
+
28
+ higham : bool, default=False
29
+ If this is set to True, the Higham & Nick (2002) algorithm is used to find the
30
+ nearest PSD covariance, otherwise the eigenvalues are clipped to a threshold
31
+ above zeros (1e-13). The default is `False` and use the clipping method as the
32
+ Higham & Nick algorithm can be slow for large datasets.
33
+
34
+ higham_max_iteration : int, default=100
35
+ Maximum number of iteration of the Higham & Nick (2002) algorithm.
36
+ The default value is `100`.
37
+
38
+ Attributes
39
+ ----------
40
+ covariance_ : ndarray of shape (n_assets, n_assets)
41
+ Estimated covariance matrix.
42
+
43
+ Notes
44
+ -----
45
+ All estimators should specify all the parameters that can be set
46
+ at the class level in their ``__init__`` as explicit keyword
47
+ arguments (no ``*args`` or ``**kwargs``).
48
+ """
49
+
50
+ covariance_: np.ndarray
51
+
52
+ @abstractmethod
53
+ def __init__(
54
+ self,
55
+ nearest: bool = False,
56
+ higham: bool = False,
57
+ higham_max_iteration: int = 100,
58
+ ):
59
+ self.nearest = nearest
60
+ self.higham = higham
61
+ self.higham_max_iteration = higham_max_iteration
62
+
63
+ @abstractmethod
64
+ def fit(self, X: npt.ArrayLike, y=None):
65
+ pass
66
+
67
+ def _sanity_check(self, covariance: np.ndarray) -> None:
68
+ """Perform a sanity check on the covariance matrix by verifying that all
69
+ diagonal elements are strictly positive.
70
+ The goal is to early detect corrupted asset data (with zero variance) that
71
+ would lead to optimizations errors.
72
+ """
73
+ cond = np.diag(covariance) < 1e-15
74
+ if np.any(cond):
75
+ corrupted_assets = list(np.argwhere(cond).flatten())
76
+ detail = "assets indices"
77
+ if hasattr(self, "feature_names_in_"):
78
+ corrupted_assets = list(self.feature_names_in_[corrupted_assets])
79
+ detail = "assets"
80
+ raise NonPositiveVarianceError(
81
+ f"The following {detail} have a non positive variance:"
82
+ f" {corrupted_assets}"
83
+ )
84
+
85
+ def _set_covariance(self, covariance: np.ndarray) -> None:
86
+ """Perform checks, convert to nearest PSD if specified and saves the covariance.
87
+
88
+ Parameters
89
+ ----------
90
+ covariance : array-like of shape (n_assets, n_assets)
91
+ Estimated covariance matrix to be stored.
92
+ """
93
+ self._sanity_check(covariance)
94
+ if self.nearest:
95
+ covariance = cov_nearest(
96
+ covariance,
97
+ higham=self.higham,
98
+ higham_max_iteration=self.higham_max_iteration,
99
+ )
100
+ # set covariance
101
+ self.covariance_ = covariance