skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,238 @@
1
+ """Black & Litterman Prior Model estimator."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+
9
+ from skfolio.moments import EquilibriumMu
10
+ from skfolio.prior._base import BasePrior, PriorModel
11
+ from skfolio.prior._empirical import EmpiricalPrior
12
+ from skfolio.utils.equations import equations_to_matrix
13
+ from skfolio.utils.tools import check_estimator, input_to_array
14
+
15
+
16
+ class BlackLitterman(BasePrior):
17
+ """Black & Litterman Prior Model estimator.
18
+
19
+ The Black & Litterman model [1]_ takes a Bayesian approach by using a prior estimate
20
+ of the assets expected returns and covariance matrix which are updated using the
21
+ analyst views to get a posterior estimate.
22
+
23
+ Parameters
24
+ ----------
25
+ views : array-like of floats of shape (n_views,)
26
+ The analyst views about the assets expected returns.
27
+ The views must match the following patterns:
28
+
29
+ * Absolute view: "asset_i = a"
30
+ * Relative view: "asset_i - asset_j = b"
31
+
32
+ With "asset_i" and "asset_j" the assets names and "a" and "b" the analyst views
33
+ about the assets expected returns expressed in the same frequency as the
34
+ returns `X`.
35
+
36
+ Examples:
37
+
38
+ * "SPX = 0.00015" --> the SPX will have a daily expected return of 0.015%
39
+ * "SX5E - TLT = 0.00039" --> the SX5E will outperform the TLT by a daily expected return of 0.039%
40
+ * "SX5E - SPX = -0.0002" --> the SX5E will underperform the SPX by a daily expected return of 0.02%
41
+ * "Equity = 0.00010" --> the sum of Equity assets will have a daily expected return of 0.01%
42
+ * "Europe - US = 0.0004" --> the sum of European assets will outperform the sum of US assets by a daily expected return of 0.04%
43
+
44
+ groups : dict[str, list[str]] or array-like of strings of shape (n_groups, n_assets), optional
45
+ The assets groups to be referenced in `views`.
46
+ If a dictionary is provided, its (key/value) pair must be the
47
+ (asset name/asset groups) and the input `X` of the `fit` methods must be a
48
+ DataFrame with the assets names in columns.
49
+
50
+ Examples:
51
+
52
+ * groups = {"SX5E": ["Equity", "Europe"], "SPX": ["Equity", "US"], "TLT": ["Bond", "US"]}
53
+ * groups = [["Equity", "Equity", "Bond"], ["Europe", "US", "US"]]
54
+
55
+ prior_estimator : BasePrior, optional
56
+ The assets' :ref:`prior model estimator <prior>`. It is used to estimate
57
+ the :class:`~skfolio.prior.PriorModel` containing the estimation of the assets
58
+ expected returns, covariance matrix, returns and Cholesky decomposition.
59
+ The default (`None`) is to use `EmpiricalPrior(mu_estimator=EquilibriumMu())`.
60
+
61
+ tau : float, default=0.05
62
+ Tau controls the degree of uncertainty given to the analyst views. A low value
63
+ means high uncertainty and will put less weight on the analyst views compared to
64
+ the prior returns. The default value is `0.05`.
65
+ Other common values used in the literature are `1.0` or the inverse of the
66
+ number of observations.
67
+
68
+ view_confidences : array-like of floats of shape (n_views,), optional
69
+ Instead of using a diagonal uncertainty matrix (Omega) proportional to the prior
70
+ covariance matrix, you can provide the vector of view confidences (between 0
71
+ and 1) as describe by the Idzorek's method [2]_.
72
+
73
+ risk_free_rate : float, default=0.0
74
+ The risk-free rate.
75
+
76
+ Attributes
77
+ ----------
78
+ prior_model_ : PriorModel
79
+ The :class:`~skfolio.prior.PriorModel`.
80
+
81
+ groups_ : ndarray of shape(n_groups, n_assets)
82
+ Assets names and groups converted to an 2D array.
83
+
84
+ views_ : ndarray of shape (n_views,)
85
+ The analyst views converted to a ndarray of floats.
86
+
87
+ picking_matrix_ : ndarray of shape (n_views, n_assets)
88
+ Picking matrix computed from the views and assets names/groups.
89
+
90
+ prior_estimator_ : BasePrior
91
+ Fitted `prior_estimator`.
92
+
93
+ n_features_in_ : int
94
+ Number of assets seen during `fit`.
95
+
96
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
97
+ Names of features seen during `fit`. Defined only when `X`
98
+ has feature names that are all strings.
99
+
100
+ References
101
+ ----------
102
+ .. [1] "Combining investor views with market equilibrium",
103
+ The Journal of Fixed Income,
104
+ Fischer Black and Robert Litterman, 1991.
105
+
106
+ .. [2] "A step-by-step guide to the Black-Litterman model : Incorporating
107
+ user-specified confidence",
108
+ Forecasting Expected Returns in the Financial Markets,
109
+ Idzorek T, 2007.
110
+ """
111
+
112
+ groups_: np.ndarray
113
+ views_: np.ndarray
114
+ picking_matrix_: np.ndarray
115
+ prior_estimator_: BasePrior
116
+
117
+ def __init__(
118
+ self,
119
+ views: npt.ArrayLike,
120
+ groups: dict[str, list[str]] | npt.ArrayLike | None = None,
121
+ prior_estimator: BasePrior | None = None,
122
+ tau: float = 0.05,
123
+ view_confidences: npt.ArrayLike | None = None,
124
+ risk_free_rate: float = 0,
125
+ ):
126
+ self.views = views
127
+ self.groups = groups
128
+ self.prior_estimator = prior_estimator
129
+ self.tau = tau
130
+ self.view_confidences = view_confidences
131
+ self.risk_free_rate = risk_free_rate
132
+
133
+ def fit(self, X: npt.ArrayLike, y=None) -> "BlackLitterman":
134
+ """Fit the Black & Litterman estimator.
135
+
136
+ Parameters
137
+ ----------
138
+ X : array-like of shape (n_observations, n_assets)
139
+ Price returns of the assets.
140
+
141
+ y : Ignored
142
+ Not used, present for API consistency by convention.
143
+
144
+ Returns
145
+ -------
146
+ self : BlackLitterman
147
+ Fitted estimator.
148
+ """
149
+ self.prior_estimator_ = check_estimator(
150
+ self.prior_estimator,
151
+ default=EmpiricalPrior(mu_estimator=EquilibriumMu()),
152
+ check_type=BasePrior,
153
+ )
154
+ # fitting prior estimator
155
+ self.prior_estimator_.fit(X)
156
+
157
+ prior_mu = self.prior_estimator_.prior_model_.mu
158
+ prior_covariance = self.prior_estimator_.prior_model_.covariance
159
+ prior_returns = self.prior_estimator_.prior_model_.returns
160
+
161
+ # we validate after all models have been fitted to keep features names
162
+ # information.
163
+ self._validate_data(X)
164
+
165
+ n_assets = prior_returns.shape[1]
166
+ views = np.asarray(self.views)
167
+ if views.ndim != 1:
168
+ raise ValueError(f"`views` must be a 1D array, got a {views.ndim}D array.")
169
+ if self.groups is None:
170
+ if not hasattr(self, "feature_names_in_"):
171
+ raise ValueError(
172
+ "You must provide either `groups`"
173
+ " or `X` as a DataFrame with asset names in columns"
174
+ )
175
+ self.groups_ = np.asarray([self.feature_names_in_])
176
+ else:
177
+ self.groups_ = input_to_array(
178
+ items=self.groups,
179
+ n_assets=n_assets,
180
+ fill_value="",
181
+ dim=2,
182
+ assets_names=(
183
+ self.feature_names_in_
184
+ if hasattr(self, "feature_names_in_")
185
+ else None
186
+ ),
187
+ name="groups",
188
+ )
189
+ self.picking_matrix_, self.views_ = equations_to_matrix(
190
+ groups=self.groups_,
191
+ equations=views,
192
+ sum_to_one=True,
193
+ raise_if_group_missing=True,
194
+ names=("groups", "views"),
195
+ )
196
+
197
+ if self.view_confidences is None:
198
+ omega = np.diag(
199
+ np.diag(
200
+ self.tau
201
+ * self.picking_matrix_
202
+ @ prior_covariance
203
+ @ self.picking_matrix_.T
204
+ )
205
+ )
206
+ else:
207
+ # Idzorek's method using Jay Walters closed form solution
208
+ view_confidences = np.asarray(self.view_confidences)
209
+ if np.any(view_confidences < 0) or np.any(view_confidences > 1):
210
+ raise ValueError(
211
+ "all values of view_confidences must be between 0 and 1"
212
+ )
213
+ view_confidences[view_confidences == 0] = 1e-16
214
+ alphas = 1 / view_confidences - 1
215
+ omega = np.diag(
216
+ np.diag(
217
+ self.tau
218
+ * alphas[:, np.newaxis]
219
+ * self.picking_matrix_
220
+ @ prior_covariance
221
+ @ self.picking_matrix_.T
222
+ )
223
+ )
224
+
225
+ # solving linear system instead of matrix inversion
226
+ _v = self.tau * prior_covariance @ self.picking_matrix_.T
227
+ _a = self.picking_matrix_ @ _v + omega
228
+ _b = self.views_ - self.picking_matrix_ @ prior_mu
229
+ posterior_mu = prior_mu + _v @ np.linalg.solve(_a, _b) + self.risk_free_rate
230
+ posterior_covariance = (
231
+ prior_covariance
232
+ + self.tau * prior_covariance
233
+ - _v @ np.linalg.solve(_a, _v.T)
234
+ )
235
+ self.prior_model_ = PriorModel(
236
+ mu=posterior_mu, covariance=posterior_covariance, returns=prior_returns
237
+ )
238
+ return self
@@ -0,0 +1,163 @@
1
+ """Empirical Prior Model estimator."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+
8
+ from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
9
+ from skfolio.prior._base import BasePrior, PriorModel
10
+ from skfolio.utils.tools import check_estimator
11
+
12
+
13
+ class EmpiricalPrior(BasePrior):
14
+ """Empirical Prior estimator.
15
+
16
+ The Empirical Prior estimates the :class:`~skfolio.prior.PriorModel` by fitting a
17
+ `mu_estimator` and a `covariance_estimator` separately.
18
+
19
+ Parameters
20
+ ----------
21
+ mu_estimator : BaseMu, optional
22
+ The assets :ref:`expected returns estimator <mu_estimator>`.
23
+ The default (`None`) is to use :class:`~skfolio.moments.EmpiricalMu`.
24
+
25
+ covariance_estimator : BaseCovariance , optional
26
+ The assets :ref:`covariance matrix estimator <covariance_estimator>`.
27
+ The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
28
+
29
+ is_log_normal : bool, default=False
30
+ If this is set to True, the moments are estimated on the logarithmic returns
31
+ as opposed to the linear returns. Then the moments estimations of the
32
+ logarithmic returns are projected to the investment horizon and transformed
33
+ to obtain the moments estimation of the linear returns at the investment
34
+ horizon. If True, `investment_horizon` must be provided. The input `X` must be
35
+ **linear returns**. They will be converted into logarithmic returns only for the
36
+ moments estimation.
37
+
38
+ .. seealso::
39
+
40
+ :ref:`data preparation <data_preparation>`
41
+
42
+
43
+ investment_horizon : float, optional
44
+ The investment horizon used for the moments estimation of the linear returns
45
+ when `is_log_normal` is `True`.
46
+
47
+ Attributes
48
+ ----------
49
+ prior_model_ : PriorModel
50
+ The assets :class:`~skfolio.prior.PriorModel`.
51
+
52
+ mu_estimator_ : BaseMu
53
+ Fitted `mu_estimator`.
54
+
55
+ covariance_estimator_ : BaseCovariance
56
+ Fitted `covariance_estimator`.
57
+
58
+ n_features_in_ : int
59
+ Number of assets seen during `fit`.
60
+
61
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
62
+ Names of features seen during `fit`. Defined only when `X`
63
+ has feature names that are all strings.
64
+
65
+ References
66
+ ----------
67
+ .. [1] "Linear vs. Compounded Returns – Common Pitfalls in Portfolio Management".
68
+ GARP Risk Professional.
69
+ Attilio Meucci (2010).
70
+ """
71
+
72
+ mu_estimator_: BaseMu
73
+ covariance_estimator_: BaseCovariance
74
+
75
+ def __init__(
76
+ self,
77
+ mu_estimator: BaseMu | None = None,
78
+ covariance_estimator: BaseCovariance | None = None,
79
+ is_log_normal: bool = False,
80
+ investment_horizon: float | None = None,
81
+ ):
82
+ self.mu_estimator = mu_estimator
83
+ self.covariance_estimator = covariance_estimator
84
+ self.is_log_normal = is_log_normal
85
+ self.investment_horizon = investment_horizon
86
+
87
+ def fit(self, X: npt.ArrayLike, y=None) -> "EmpiricalPrior":
88
+ """Fit the Empirical Prior estimator.
89
+
90
+ Parameters
91
+ ----------
92
+ X : array-like of shape (n_observations, n_assets)
93
+ Price returns of the assets.
94
+
95
+ y : Ignored
96
+ Not used, present for API consistency by convention.
97
+
98
+ Returns
99
+ -------
100
+ self : EmpiricalPrior
101
+ Fitted estimator.
102
+ """
103
+ self.mu_estimator_ = check_estimator(
104
+ self.mu_estimator,
105
+ default=EmpiricalMu(),
106
+ check_type=BaseMu,
107
+ )
108
+ self.covariance_estimator_ = check_estimator(
109
+ self.covariance_estimator,
110
+ default=EmpiricalCovariance(),
111
+ check_type=BaseCovariance,
112
+ )
113
+ # fitting estimators
114
+ if not self.is_log_normal:
115
+ if self.investment_horizon is not None:
116
+ raise ValueError(
117
+ "`investment_horizon` must be `None` when "
118
+ "`is_log_normal` is `False`"
119
+ )
120
+ # Expected returns
121
+ self.mu_estimator_.fit(X)
122
+ mu = self.mu_estimator_.mu_
123
+
124
+ # Covariance
125
+ self.covariance_estimator_.fit(X)
126
+ covariance = self.covariance_estimator_.covariance_
127
+ else:
128
+ if self.investment_horizon is None:
129
+ raise ValueError(
130
+ "`investment_horizon` must be provided when "
131
+ "`is_log_normal` is `True`"
132
+ )
133
+ # Convert linear returns to log returns
134
+ X_log = np.log(1 + X)
135
+
136
+ # Estimates the moments on the log returns
137
+ # Expected returns
138
+ self.mu_estimator_.fit(X_log)
139
+ mu = self.mu_estimator_.mu_
140
+
141
+ # Covariance
142
+ self.covariance_estimator_.fit(X_log)
143
+ covariance = self.covariance_estimator_.covariance_
144
+
145
+ # Using the property of aggregation across time we scale this distribution
146
+ # to the investment horizon by the “square-root rule”.
147
+ mu *= self.investment_horizon
148
+ covariance *= self.investment_horizon
149
+
150
+ # We convert it into a distribution of linear returns over the investment
151
+ # horizon
152
+ mu = np.exp(mu + 0.5 * np.diag(covariance))
153
+ covariance = np.outer(mu, mu) * (np.exp(covariance) - 1)
154
+
155
+ # we validate and convert to numpy after all models have been fitted to keep
156
+ # features names information.
157
+ X = self._validate_data(X)
158
+ self.prior_model_ = PriorModel(
159
+ mu=mu,
160
+ covariance=covariance,
161
+ returns=X,
162
+ )
163
+ return self
@@ -0,0 +1,268 @@
1
+ """Factor Model estimator"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from abc import ABC, abstractmethod
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import sklearn.base as skb
11
+ import sklearn.linear_model as skl
12
+ import sklearn.multioutput as skm
13
+
14
+ from skfolio.prior._base import BasePrior, PriorModel
15
+ from skfolio.prior._empirical import EmpiricalPrior
16
+ from skfolio.utils.stats import cov_nearest
17
+ from skfolio.utils.tools import check_estimator
18
+
19
+
20
+ class BaseLoadingMatrix(skb.BaseEstimator, ABC):
21
+ """Base class for all Loading Matrix estimators.
22
+
23
+ Notes
24
+ -----
25
+ All estimators should specify all the parameters that can be set
26
+ at the class level in their ``__init__`` as explicit keyword
27
+ arguments (no ``*args`` or ``**kwargs``).
28
+ """
29
+
30
+ loading_matrix_: np.ndarray
31
+ intercepts_: np.ndarray
32
+
33
+ @abstractmethod
34
+ def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
35
+ pass
36
+
37
+
38
+ class LoadingMatrixRegression(BaseLoadingMatrix):
39
+ """Loading Matrix Regression estimator.
40
+
41
+ Estimate the loading matrix by fitting one linear regressor per asset.
42
+
43
+ Parameters
44
+ ----------
45
+ linear_regressor : BaseEstimator, optional
46
+ Linear regressor used to fit the factors on each asset separately.
47
+ The default (`None`) is to use `LassoCV(fit_intercept=False)`.
48
+
49
+ n_jobs : int, optional
50
+ The number of jobs to run in parallel.
51
+
52
+ When individual estimators are fast to train or predict,
53
+ using ``n_jobs > 1`` can result in slower performance due
54
+ to the parallelism overhead.
55
+
56
+ The value `-1` means using all processors.
57
+ The default (`None`) means 1 unless in a `joblib.parallel_backend` context.
58
+
59
+ Attributes
60
+ ----------
61
+ loading_matrix_ : ndarray of shape (n_assets, n_factors)
62
+ The loading matrix.
63
+
64
+ intercepts_: ndarray of shape (n_assets,)
65
+ The intercepts.
66
+
67
+ multi_output_regressor_: MultiOutputRegressor
68
+ Fitted `sklearn.multioutput.MultiOutputRegressor`
69
+ """
70
+
71
+ multi_output_regressor_: skm.MultiOutputRegressor
72
+
73
+ def __init__(
74
+ self,
75
+ linear_regressor: skb.BaseEstimator | None = None,
76
+ n_jobs: int | None = None,
77
+ ):
78
+ self.linear_regressor = linear_regressor
79
+ self.n_jobs = n_jobs
80
+
81
+ def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
82
+ """Fit the Loading Matrix Regression Estimator.
83
+
84
+ Parameters
85
+ ----------
86
+ X : array-like of shape (n_observations, n_assets)
87
+ Price returns of the assets.
88
+
89
+ y : array-like of shape (n_observations, n_factors)
90
+ Price returns of the factors.
91
+
92
+ Returns
93
+ -------
94
+ self : LoadingMatrixRegression
95
+ Fitted estimator.
96
+ """
97
+ _linear_regressor = check_estimator(
98
+ self.linear_regressor,
99
+ default=skl.LassoCV(fit_intercept=False),
100
+ check_type=skb.BaseEstimator,
101
+ )
102
+
103
+ self.multi_output_regressor_ = skm.MultiOutputRegressor(
104
+ _linear_regressor, n_jobs=self.n_jobs
105
+ )
106
+ self.multi_output_regressor_.fit(X=y, y=X)
107
+ # noinspection PyUnresolvedReferences
108
+ n_assets = X.shape[1]
109
+ self.loading_matrix_ = np.array(
110
+ [self.multi_output_regressor_.estimators_[i].coef_ for i in range(n_assets)]
111
+ )
112
+ self.intercepts_ = np.array(
113
+ [
114
+ self.multi_output_regressor_.estimators_[i].intercept_
115
+ for i in range(n_assets)
116
+ ]
117
+ )
118
+
119
+
120
+ class FactorModel(BasePrior):
121
+ """Factor Model estimator.
122
+
123
+ The purpose of Factor Models is to impose a structure on financial variables and
124
+ their covariance matrix by explaining them through a small number of common factors.
125
+ This can help to overcome estimation error by reducing the number of parameters,
126
+ i.e. the dimensionality of the estimation problem, making portfolio optimization
127
+ more robust against noise in the data. Factor Models also provide a decomposition of
128
+ financial risk to systematic and security specific components.
129
+
130
+ Parameters
131
+ ----------
132
+ loading_matrix_estimator : LoadingMatrixEstimator, optional
133
+ Estimator of the loading matrix (betas) of the factors.
134
+ The default (`None`) is to use :class:`LoadingMatrixRegression` which fit the
135
+ factors using `LassoCV` on each asset separately.
136
+
137
+ factor_prior_estimator : BasePrior, optional
138
+ The factors :ref:`prior estimator <prior>`.
139
+ It is used to estimate the :class:`~skfolio.prior.PriorModel` containing the
140
+ factors expected returns and covariance matrix.
141
+ The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
142
+
143
+ residual_variance : bool, default=True
144
+ If this is set to True, the diagonal term of the residuals covariance
145
+ (residuals variance) is added to the factor model covariance.
146
+
147
+ higham : bool, default=False
148
+ If this is set to True, we use the Higham & Nick (2002) algorithm to find the
149
+ nearest covariance matrix that is positive semi-definite. It is more accurate
150
+ but slower that the default clipping method. For more information
151
+ see :func:`~skfolio.utils.stats.cov_nearest`.
152
+
153
+ max_iteration : int, default=100
154
+ Only used when `higham` is set to True. Maximum number of iterations of the
155
+ Higham & Nick (2002) algorithm.
156
+
157
+ Attributes
158
+ ----------
159
+ prior_model_ : PriorModel
160
+ The :class:`~skfolio.prior.PriorModel`.
161
+
162
+ factor_prior_estimator_ : BasePrior
163
+ Fitted `factor_prior_estimator`.
164
+
165
+ loading_matrix_estimator_ : BaseLoadingMatrix
166
+ Fitted `loading_matrix_estimator`.
167
+
168
+ n_features_in_ : int
169
+ Number of assets seen during `fit`.
170
+
171
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
172
+ Names of features seen during `fit`. Defined only when `X`
173
+ has feature names that are all strings.
174
+ """
175
+
176
+ factor_prior_estimator_: BasePrior
177
+ loading_matrix_estimator_: BaseLoadingMatrix
178
+
179
+ def __init__(
180
+ self,
181
+ loading_matrix_estimator: BaseLoadingMatrix | None = None,
182
+ factor_prior_estimator: BasePrior | None = None,
183
+ residual_variance: bool = True,
184
+ higham: bool = False,
185
+ max_iteration: int = 100,
186
+ ):
187
+ self.loading_matrix_estimator = loading_matrix_estimator
188
+ self.factor_prior_estimator = factor_prior_estimator
189
+ self.residual_variance = residual_variance
190
+ self.higham = higham
191
+ self.max_iteration = max_iteration
192
+
193
+ # noinspection PyMethodOverriding, PyPep8Naming
194
+ def fit(self, X: npt.ArrayLike, y: any):
195
+ """Fit the Factor Model estimator.
196
+
197
+ Parameters
198
+ ----------
199
+ X : array-like of shape (n_observations, n_assets)
200
+ Price returns of the assets.
201
+
202
+ y : array-like of shape (n_observations, n_factors)
203
+ Factors' returns.
204
+
205
+ Returns
206
+ -------
207
+ self : FactorModel
208
+ Fitted estimator.
209
+ """
210
+ self.factor_prior_estimator_ = check_estimator(
211
+ self.factor_prior_estimator,
212
+ default=EmpiricalPrior(),
213
+ check_type=BasePrior,
214
+ )
215
+ self.loading_matrix_estimator_ = check_estimator(
216
+ self.loading_matrix_estimator,
217
+ default=LoadingMatrixRegression(),
218
+ check_type=BaseLoadingMatrix,
219
+ )
220
+
221
+ # Fitting prior estimator
222
+ self.factor_prior_estimator_.fit(y)
223
+ factor_mu = self.factor_prior_estimator_.prior_model_.mu
224
+ factor_covariance = self.factor_prior_estimator_.prior_model_.covariance
225
+
226
+ # Fitting loading matrix estimator
227
+ self.loading_matrix_estimator_.fit(X, y)
228
+ loading_matrix = self.loading_matrix_estimator_.loading_matrix_
229
+ intercepts = self.loading_matrix_estimator_.intercepts_
230
+
231
+ # we validate and convert to numpy after all models have been fitted to keep
232
+ # features names information.
233
+ X, y = self._validate_data(X, y, multi_output=True)
234
+ n_assets = X.shape[1]
235
+ n_factors = y.shape[1]
236
+
237
+ if loading_matrix.shape != (n_assets, n_factors):
238
+ raise ValueError(
239
+ "`loading_matrix_estimator.loading_matrix_` must ba a 2D array of"
240
+ f" shape {(n_assets, n_factors)}, got"
241
+ f" {loading_matrix.shape} instead."
242
+ )
243
+
244
+ if intercepts.shape != (n_assets,):
245
+ raise ValueError(
246
+ "`loading_matrix_estimator.intercepts_` must ba a 1D array of "
247
+ f"shape {(n_assets,)}, got {intercepts.shape} instead."
248
+ )
249
+
250
+ mu = loading_matrix @ factor_mu + intercepts
251
+ covariance = loading_matrix @ factor_covariance @ loading_matrix.T
252
+ returns = y @ loading_matrix.T + intercepts
253
+ cholesky = loading_matrix @ np.linalg.cholesky(factor_covariance)
254
+
255
+ if self.residual_variance:
256
+ err = X - returns
257
+ err_cov = np.diag(np.var(err, ddof=1, axis=0))
258
+ covariance += err_cov
259
+ cholesky = np.hstack((cholesky, np.sqrt(err_cov)))
260
+
261
+ covariance = cov_nearest(
262
+ covariance, higham=self.higham, higham_max_iteration=self.max_iteration
263
+ )
264
+
265
+ self.prior_model_ = PriorModel(
266
+ mu=mu, covariance=covariance, returns=returns, cholesky=cholesky
267
+ )
268
+ return self