skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,7 @@
1
+ from skfolio.pre_selection._pre_selection import (
2
+ DropCorrelated,
3
+ SelectKExtremes,
4
+ SelectNonDominated,
5
+ )
6
+
7
+ __all__ = ["DropCorrelated", "SelectKExtremes", "SelectNonDominated"]
@@ -0,0 +1,342 @@
1
+ """pre-selection estimators module"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+ import sklearn.base as skb
9
+ import sklearn.feature_selection as skf
10
+ import sklearn.utils.validation as skv
11
+
12
+ import skfolio.typing as skt
13
+ from skfolio.measures import RatioMeasure
14
+ from skfolio.population import Population
15
+ from skfolio.portfolio import Portfolio
16
+
17
+
18
+ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
19
+ """Transformer for dropping highly correlated assets.
20
+
21
+ Simply removing all correlation pairs above the threshold will remove more assets
22
+ than necessary and a naive sequential removal is suboptimal and depends on the
23
+ initial assets ordering.
24
+
25
+ Let's suppose X,Y,Z are three random variables with corr(X,Y) and corr(X,Z) above
26
+ the threshold and corr(Y,Z) below.
27
+ The first approach would remove X,Y,Z and the second approach would remove either
28
+ Y and Z or X depending on the initial ordering.
29
+
30
+ To avoid these shortcomings, we implement the below algorithm:
31
+
32
+ * Step 1: select all correlation pairs above the threshold.
33
+ * Step 2: sort all the selected correlation pairs from highest to lowest.
34
+ * Step 3: for each pair, if none of the two assets has been removed, keep the
35
+ asset with the lowest average correlation against the other assets.
36
+
37
+ Parameters
38
+ ----------
39
+ threshold : float, default=0.95
40
+ Correlation threshold. The default value is `0.95`.
41
+
42
+ absolute : bool, default=False
43
+ If this is set to True, we take the absolute value of the correlation. This has
44
+ for effect to also include negatively correlated assets.
45
+
46
+ Attributes
47
+ ----------
48
+ to_keep_ : ndarray of shape (n_assets, )
49
+ Boolean array indicating which assets are remaining.
50
+
51
+ n_features_in_ : int
52
+ Number of assets seen during `fit`.
53
+
54
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
55
+ Names of assets seen during `fit`. Defined only when `X`
56
+ has assets names that are all strings.
57
+ """
58
+
59
+ to_keep_: np.ndarray
60
+
61
+ def __init__(self, threshold: float = 0.95, absolute: bool = False):
62
+ self.threshold = threshold
63
+ self.absolute = absolute
64
+
65
+ def fit(self, X: npt.ArrayLike, y=None):
66
+ """Run the correlation transformer and get the appropriate assets.
67
+
68
+ Parameters
69
+ ----------
70
+ X : array-like of shape (n_observations, n_assets)
71
+ Price returns of the assets.
72
+
73
+ y : Ignored
74
+ Not used, present for API consistency by convention.
75
+
76
+ Returns
77
+ -------
78
+ self : DropCorrelated
79
+ Fitted estimator.
80
+ """
81
+ X = self._validate_data(X)
82
+ if not -1 <= self.threshold <= 1:
83
+ raise ValueError("`threshold` must be between -1 and 1")
84
+
85
+ n_assets = X.shape[1]
86
+ corr = np.corrcoef(X.T)
87
+ mean_corr = corr.mean(axis=0)
88
+
89
+ triu_idx = np.triu_indices(n_assets, 1)
90
+
91
+ # select all correlation pairs above the threshold
92
+ selected_idx = np.argwhere(corr[triu_idx] > self.threshold).flatten()
93
+
94
+ # sort all the selected correlation pairs from highest to lowest
95
+ selected_idx = selected_idx[np.argsort(-corr[triu_idx][selected_idx])]
96
+
97
+ # for each pair, if none of the two assets has been removed, keep the asset with
98
+ # the lowest average correlation with other assets
99
+ to_remove = set()
100
+ for idx in selected_idx:
101
+ i, j = triu_idx[0][idx], triu_idx[1][idx]
102
+ if i not in to_remove and j not in to_remove:
103
+ if mean_corr[i] > mean_corr[j]:
104
+ to_remove.add(i)
105
+ else:
106
+ to_remove.add(j)
107
+ self.to_keep_ = ~np.isin(np.arange(n_assets), list(to_remove))
108
+ return self
109
+
110
+ def _get_support_mask(self):
111
+ skv.check_is_fitted(self)
112
+ return self.to_keep_
113
+
114
+
115
+ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
116
+ """Transformer for selecting the `k` best or worst assets.
117
+
118
+ Keep the `k` best or worst assets according to a given measure.
119
+
120
+ Parameters
121
+ ----------
122
+ k : int, default=10
123
+ Number of assets to select. If `k` is higher than the number of assets, all
124
+ assets are selected.
125
+
126
+ measure : Measure, default=RatioMeasure.SHARPE_RATIO
127
+ The :ref:`measure <measures_ref>` used to sort the assets.
128
+ The default is `RatioMeasure.SHARPE_RATIO`.
129
+
130
+ highest : bool, default=True
131
+ If this is set to True, the `k` assets with the highest `measure` are selected,
132
+ otherwise it is the `k` lowest.
133
+
134
+ Attributes
135
+ ----------
136
+ to_keep_ : ndarray of shape (n_assets, )
137
+ Boolean array indicating which assets are remaining.
138
+
139
+ n_features_in_ : int
140
+ Number of assets seen during `fit`.
141
+
142
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
143
+ Names of features seen during `fit`. Defined only when `X`
144
+ has feature names that are all strings.
145
+ """
146
+
147
+ to_keep_: np.ndarray
148
+
149
+ def __init__(
150
+ self,
151
+ k: int = 10,
152
+ measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
153
+ highest: bool = True,
154
+ ):
155
+ self.k = k
156
+ self.measure = measure
157
+ self.highest = highest
158
+
159
+ def fit(self, X: npt.ArrayLike, y=None) -> "SelectKExtremes":
160
+ """Run the SelectKExtremes transformer and get the appropriate assets.
161
+
162
+ Parameters
163
+ ----------
164
+ X : array-like of shape (n_observations, n_assets)
165
+ Price returns of the assets.
166
+
167
+ y : Ignored
168
+ Not used, present for API consistency by convention.
169
+
170
+ Returns
171
+ -------
172
+ self : SelectKExtremes
173
+ Fitted estimator.
174
+ """
175
+ X = self._validate_data(X)
176
+ k = int(self.k)
177
+ if k <= 0:
178
+ raise ValueError("`k` must be strictly positive")
179
+ n_assets = X.shape[1]
180
+ # Build a population of single assets portfolio
181
+ population = Population([])
182
+ for i in range(n_assets):
183
+ weights = np.zeros(n_assets)
184
+ weights[i] = 1
185
+ population.append(Portfolio(X=X, weights=weights))
186
+
187
+ selected = population.sort_measure(measure=self.measure, reverse=self.highest)[
188
+ :k
189
+ ]
190
+ selected_idx = [x.nonzero_assets_index[0] for x in selected]
191
+ self.to_keep_ = np.isin(np.arange(n_assets), selected_idx)
192
+ return self
193
+
194
+ def _get_support_mask(self):
195
+ skv.check_is_fitted(self)
196
+ return self.to_keep_
197
+
198
+
199
+ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
200
+ """Transformer for selecting non dominated assets.
201
+
202
+ Pre-selection based on the Assets Preselection Process 2 [1]_.
203
+
204
+ Good single asset (for example with high return and low risk) is likely to
205
+ contribute to the final optimized portfolio. Each asset is considered as a portfolio
206
+ and these assets are ranked using the non-domination sorting method. The selection
207
+ is based on the ranks assigned to each asset based on their fitness until the number
208
+ of selected assets reaches the user-defined number.
209
+
210
+ Considering only the fitness of individual asset is insufficient because a pair of
211
+ negatively correlated assets has the potential to reduce the risk. Therefore,
212
+ negatively correlated pairs of assets are also considered.
213
+
214
+ Parameters
215
+ ----------
216
+ min_n_assets : int, optional
217
+ The minimum number of assets to select. If `min_n_assets` is reached before the
218
+ end of the current non-dominated front, we return the remaining assets of this
219
+ front. This is because all assets in the same front have same rank.
220
+ The default (`None`) is to select the first front.
221
+
222
+ threshold : float, default=0.0
223
+ Asset pair with a correlation below this threshold are included in the
224
+ non-domination sorting. The default value is `0.0`.
225
+
226
+ fitness_measures : list[Measure], optional
227
+ A list of :ref:`measure <measures_ref>` used to compute the portfolio fitness.
228
+ The fitness is used to compare portfolios in terms of domination, compute the
229
+ pareto fronts and run the portfolio selection using non-denominated sorting.
230
+ The default (`None`) is to use the list [PerfMeasure.MEAN, RiskMeasure.VARIANCE]
231
+
232
+ Attributes
233
+ ----------
234
+ to_keep_ : ndarray of shape (n_assets, )
235
+ Boolean array indicating which assets are remaining.
236
+
237
+ n_features_in_ : int
238
+ Number of assets seen during `fit`.
239
+
240
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
241
+ Names of features seen during `fit`. Defined only when `X`
242
+ has feature names that are all strings.
243
+
244
+ References
245
+ ----------
246
+ .. [1] "Large-Scale Portfolio Optimization Using Multi-objective Evolutionary
247
+ Algorithms and Preselection Methods",
248
+ B.Y. Qu and Q.Zhou (2017).
249
+ """
250
+
251
+ to_keep_: np.ndarray
252
+
253
+ def __init__(
254
+ self,
255
+ min_n_assets: int | None = None,
256
+ threshold: float = -0.5,
257
+ fitness_measures: list[skt.Measure] | None = None,
258
+ ):
259
+ self.min_n_assets = min_n_assets
260
+ self.threshold = threshold
261
+ self.fitness_measures = fitness_measures
262
+
263
+ def fit(self, X: npt.ArrayLike, y=None):
264
+ """Run the Non Dominated transformer and get the appropriate assets.
265
+
266
+ Parameters
267
+ ----------
268
+ X : array-like of shape (n_observations, n_assets)
269
+ Price returns of the assets.
270
+
271
+ y : Ignored
272
+ Not used, present for API consistency by convention.
273
+
274
+ Returns
275
+ -------
276
+ self : SelectNonDominated
277
+ Fitted estimator.
278
+ """
279
+ X = self._validate_data(X)
280
+ if not -1 <= self.threshold <= 1:
281
+ raise ValueError("`threshold` must be between -1 and 1")
282
+ n_assets = X.shape[1]
283
+
284
+ if self.min_n_assets is not None and self.min_n_assets >= n_assets:
285
+ self.to_keep_ = np.full(n_assets, True)
286
+ return self
287
+
288
+ # Build a population of portfolio
289
+ population = Population([])
290
+ # Add single assets
291
+ for i in range(n_assets):
292
+ weights = np.zeros(n_assets)
293
+ weights[i] = 1
294
+ population.append(
295
+ Portfolio(X=X, weights=weights, fitness_measures=self.fitness_measures)
296
+ )
297
+
298
+ # Add pairs with correlation below threshold with minimum variance
299
+ # ptf_variance = 𝜎1^2 𝑤1^2 + 𝜎2^2 𝑤2^2 + 2 𝜎12 𝑤1 𝑤2 (1)
300
+ # with 𝑤1 + 𝑤2 = 1
301
+ # To find the minimum we substitute 𝑤2 = 1 - 𝑤1 in (1) and differentiate with
302
+ # respect to 𝑤1 and set to zero.
303
+ # By solving the obtained equation, we get:
304
+ # 𝑤1 = (𝜎2^2 - 𝜎12) / (𝜎1^2 + 𝜎2^2 - 2 𝜎12)
305
+ # 𝑤2 = 1 - 𝑤1
306
+
307
+ corr = np.corrcoef(X.T)
308
+ covariance = np.cov(X.T)
309
+ for i, j in zip(*np.triu_indices(n_assets, 1), strict=True):
310
+ if corr[i, j] < self.threshold:
311
+ cov = covariance[i, j]
312
+ var1 = covariance[i, i]
313
+ var2 = covariance[j, j]
314
+ weights = np.zeros(n_assets)
315
+ weights[i] = (var2 - cov) / (var1 + var2 - 2 * cov)
316
+ weights[j] = 1 - weights[i]
317
+ population.append(
318
+ Portfolio(
319
+ X=X, weights=weights, fitness_measures=self.fitness_measures
320
+ )
321
+ )
322
+
323
+ fronts = population.non_denominated_sort(
324
+ first_front_only=self.min_n_assets is None
325
+ )
326
+ new_assets_idx = set()
327
+ i = 0
328
+ while i < len(fronts):
329
+ if (
330
+ self.min_n_assets is not None
331
+ and len(new_assets_idx) > self.min_n_assets
332
+ ):
333
+ break
334
+ for idx in fronts[i]:
335
+ new_assets_idx.update(population[idx].nonzero_assets_index)
336
+ i += 1
337
+ self.to_keep_ = np.isin(np.arange(n_assets), list(new_assets_idx))
338
+ return self
339
+
340
+ def _get_support_mask(self):
341
+ skv.check_is_fitted(self)
342
+ return self.to_keep_
@@ -0,0 +1,3 @@
1
+ from skfolio.preprocessing._returns import prices_to_returns
2
+
3
+ __all__ = ["prices_to_returns"]
@@ -0,0 +1,114 @@
1
+ """Preprocessing module to transform X to returns."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ def prices_to_returns(
11
+ X: pd.DataFrame,
12
+ y: pd.DataFrame | None = None,
13
+ log_returns: bool = False,
14
+ nan_threshold: float = 1,
15
+ join: str = "outer",
16
+ ) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
17
+ r"""Transforms a DataFrame of prices to linear or logarithmic returns.
18
+
19
+ Linear returns (also called simple returns) are defined as:
20
+ .. math:: \frac{S_{t}}{S_{t-1}} - 1
21
+
22
+ Logarithmic returns (also called continuously compounded return) are defined as:
23
+ .. math:: ln\Biggl(\frac{S_{t}}{S_{t-1}}\Biggr)
24
+
25
+ With :math:`S_{t}` the asset price at time :math:`t`.
26
+
27
+ .. warning::
28
+
29
+ The linear returns aggregate across securities, meaning that the linear return
30
+ of the portfolio is the weighted average of the linear returns of the
31
+ securities. For this reason, **portfolio optimization should be performed
32
+ using linear returns** [1]_.
33
+
34
+ On the other hand, the logarithmic returns aggregate across time, meaning that
35
+ the total logarithmic return over K time periods is the sum of all K
36
+ single-period logarithmic returns.
37
+
38
+ .. seealso::
39
+
40
+ :ref:`data preparation <data_preparation>`
41
+
42
+ Parameters
43
+ ----------
44
+ X : DataFrame
45
+ The DataFrame of assets prices.
46
+
47
+ y : DataFrame, optional
48
+ The DataFrame of target or factors prices.
49
+ If provided, it is joined with the DataFrame of prices to ensure identical
50
+ observations.
51
+
52
+ log_returns : bool, default=True
53
+ If this is set to True, logarithmic returns are used instead of simple returns.
54
+
55
+ join : str, default='outer
56
+ The join method between `X` and `y` when `y` is provided.
57
+
58
+ nan_threshold : float, default=1.0
59
+ Drop observations (rows) that have a percentage of missing assets prices above
60
+ this threshold. The default (`1.0`) is to keep all the observations.
61
+
62
+ Returns
63
+ -------
64
+ X : DataFrame
65
+ The DataFrame of price returns of the input `X`.
66
+
67
+ y : DataFrame, optional
68
+ The DataFrame of price returns of the input `y` when provided.
69
+
70
+ References
71
+ ----------
72
+ .. [1] "Linear vs. Compounded Returns – Common Pitfalls in Portfolio Management".
73
+ GARP Risk Professional.
74
+ Attilio Meucci (2010).
75
+ """
76
+ if not isinstance(X, pd.DataFrame):
77
+ raise TypeError("`X` must be a DataFrame")
78
+
79
+ if y is None:
80
+ df = X.copy()
81
+ else:
82
+ if not isinstance(y, pd.DataFrame):
83
+ raise TypeError("`y` must be a DataFrame")
84
+ df = pd.concat([X, y], join=join, axis=1)
85
+
86
+ n_observations, n_assets = X.shape
87
+
88
+ # Remove observations with missing X above threshold
89
+ if nan_threshold is not None:
90
+ nan_threshold = float(nan_threshold)
91
+ if not 0 < nan_threshold <= 1:
92
+ raise ValueError("`nan_threshold` must be between 0 and 1")
93
+ count_nan = df.isna().sum(axis=1)
94
+ to_drop = count_nan[count_nan > n_assets * nan_threshold].index
95
+ if len(to_drop) > 0:
96
+ df.drop(to_drop, axis=0, inplace=True)
97
+
98
+ # Forward fill missing values
99
+ df.ffill(inplace=True)
100
+ # Drop rows if any of its values is missing
101
+ df.dropna(axis=0, how="any", inplace=True)
102
+ # Drop column if all its values are missing
103
+ df.dropna(axis=1, how="all", inplace=True)
104
+
105
+ # returns
106
+ all_returns = df.pct_change().dropna()
107
+ if log_returns:
108
+ all_returns = np.log1p(all_returns)
109
+
110
+ if y is None:
111
+ return all_returns
112
+ returns = all_returns[[x for x in X.columns if x in df.columns]]
113
+ factor_returns = all_returns[[x for x in y.columns if x in df.columns]]
114
+ return returns, factor_returns
@@ -0,0 +1,18 @@
1
+ from skfolio.prior._base import BasePrior, PriorModel
2
+ from skfolio.prior._black_litterman import BlackLitterman
3
+ from skfolio.prior._empirical import EmpiricalPrior
4
+ from skfolio.prior._factor_model import (
5
+ FactorModel,
6
+ BaseLoadingMatrix,
7
+ LoadingMatrixRegression,
8
+ )
9
+
10
+ __all__ = [
11
+ "PriorModel",
12
+ "BasePrior",
13
+ "EmpiricalPrior",
14
+ "BlackLitterman",
15
+ "FactorModel",
16
+ "BaseLoadingMatrix",
17
+ "LoadingMatrixRegression",
18
+ ]
skfolio/prior/_base.py ADDED
@@ -0,0 +1,63 @@
1
+ """Base Prior estimator"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+ import sklearn.base as skb
12
+
13
+
14
+ # frozen=True with eq=False will lead to an id-based hashing which is needed for
15
+ # caching CVX models in Optimization without impacting performance
16
+ @dataclass(frozen=True, eq=False)
17
+ class PriorModel:
18
+ """Prior model dataclass.
19
+
20
+ Attributes
21
+ ----------
22
+ mu : ndarray of shape (n_assets,)
23
+ Estimation of the assets expected returns.
24
+
25
+ covariance : ndarray of shape (n_assets, n_assets)
26
+ Estimation of the assets covariance matrix.
27
+
28
+ returns : ndarray of shape (n_observations, n_assets)
29
+ Estimation of the assets returns.
30
+
31
+ cholesky : ndarray, optional
32
+ Lower-triangular Cholesky factor of the covariance. In some cases it is possible
33
+ to obtain a cholesky factor with less dimension compared to the one obtained
34
+ directly by applying the cholesky decomposition to the covariance estimation
35
+ (for example in Factor Models). When provided, this cholesky factor is use in
36
+ some optimizations (for example in mean-variance) to improve performance and
37
+ convergence. The default is `None`.
38
+ """
39
+ mu: np.ndarray
40
+ covariance: np.ndarray
41
+ returns: np.ndarray
42
+ cholesky: np.ndarray | None = None
43
+
44
+
45
+ class BasePrior(skb.BaseEstimator, ABC):
46
+ """Base class for all prior estimators in skfolio.
47
+
48
+ Notes
49
+ -----
50
+ All estimators should specify all the parameters that can be set
51
+ at the class level in their ``__init__`` as explicit keyword
52
+ arguments (no ``*args`` or ``**kwargs``).
53
+ """
54
+
55
+ prior_model_: PriorModel
56
+
57
+ @abstractmethod
58
+ def __init__(self):
59
+ pass
60
+
61
+ @abstractmethod
62
+ def fit(self, X: npt.ArrayLike, y=None):
63
+ pass