skfolio 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +7 -7
- skfolio/cluster/__init__.py +2 -2
- skfolio/cluster/_hierarchical.py +2 -2
- skfolio/datasets/__init__.py +3 -3
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +4 -4
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +11 -10
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +8 -8
- skfolio/measures/__init__.py +24 -24
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +4 -4
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +11 -11
- skfolio/moments/covariance/__init__.py +6 -6
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +3 -2
- skfolio/moments/covariance/_detone_covariance.py +3 -2
- skfolio/moments/covariance/_empirical_covariance.py +3 -2
- skfolio/moments/covariance/_ew_covariance.py +3 -2
- skfolio/moments/covariance/_gerber_covariance.py +3 -2
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +3 -8
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/__init__.py +2 -2
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +3 -2
- skfolio/moments/expected_returns/_equilibrium_mu.py +3 -2
- skfolio/moments/expected_returns/_ew_mu.py +3 -2
- skfolio/moments/expected_returns/_shrunk_mu.py +4 -3
- skfolio/optimization/__init__.py +12 -10
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +3 -1
- skfolio/optimization/cluster/_nco.py +10 -9
- skfolio/optimization/cluster/hierarchical/__init__.py +3 -1
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +4 -3
- skfolio/optimization/cluster/hierarchical/_hrp.py +4 -3
- skfolio/optimization/convex/__init__.py +5 -3
- skfolio/optimization/convex/_base.py +10 -9
- skfolio/optimization/convex/_distributionally_robust.py +8 -5
- skfolio/optimization/convex/_maximum_diversification.py +8 -6
- skfolio/optimization/convex/_mean_risk.py +10 -8
- skfolio/optimization/convex/_risk_budgeting.py +6 -4
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +3 -3
- skfolio/optimization/naive/__init__.py +3 -1
- skfolio/optimization/naive/_naive.py +4 -3
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +34 -7
- skfolio/portfolio/__init__.py +1 -1
- skfolio/portfolio/_base.py +43 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +5 -4
- skfolio/pre_selection/__init__.py +3 -1
- skfolio/pre_selection/_drop_correlated.py +3 -3
- skfolio/pre_selection/_select_complete.py +31 -30
- skfolio/pre_selection/_select_k_extremes.py +3 -3
- skfolio/pre_selection/_select_non_dominated.py +3 -3
- skfolio/pre_selection/_select_non_expiring.py +8 -6
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +7 -3
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +7 -4
- skfolio/prior/_empirical.py +5 -2
- skfolio/prior/_factor_model.py +10 -5
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +7 -7
- skfolio/uncertainty_set/__init__.py +7 -5
- skfolio/uncertainty_set/_base.py +5 -4
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +15 -15
- skfolio/utils/tools.py +86 -22
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +122 -46
- skfolio-0.8.0.dist-info/RECORD +120 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
- skfolio-0.6.0.dist-info/RECORD +0 -95
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
skfolio/prior/_factor_model.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Factor Model estimator"""
|
1
|
+
"""Factor Model estimator."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
@@ -17,6 +17,7 @@ import sklearn.base as skb
|
|
17
17
|
import sklearn.linear_model as skl
|
18
18
|
import sklearn.multioutput as skmo
|
19
19
|
import sklearn.utils.metadata_routing as skm
|
20
|
+
import sklearn.utils.validation as skv
|
20
21
|
|
21
22
|
from skfolio.prior._base import BasePrior, PriorModel
|
22
23
|
from skfolio.prior._empirical import EmpiricalPrior
|
@@ -199,6 +200,8 @@ class FactorModel(BasePrior):
|
|
199
200
|
|
200
201
|
factor_prior_estimator_: BasePrior
|
201
202
|
loading_matrix_estimator_: BaseLoadingMatrix
|
203
|
+
n_features_in_: int
|
204
|
+
feature_names_in_: np.ndarray
|
202
205
|
|
203
206
|
def __init__(
|
204
207
|
self,
|
@@ -265,6 +268,7 @@ class FactorModel(BasePrior):
|
|
265
268
|
)
|
266
269
|
factor_mu = self.factor_prior_estimator_.prior_model_.mu
|
267
270
|
factor_covariance = self.factor_prior_estimator_.prior_model_.covariance
|
271
|
+
factor_returns = self.factor_prior_estimator_.prior_model_.returns
|
268
272
|
|
269
273
|
# Fitting loading matrix estimator
|
270
274
|
self.loading_matrix_estimator_.fit(X, y)
|
@@ -273,7 +277,7 @@ class FactorModel(BasePrior):
|
|
273
277
|
|
274
278
|
# we validate and convert to numpy after all models have been fitted to keep
|
275
279
|
# features names information.
|
276
|
-
X, y =
|
280
|
+
X, y = skv.validate_data(self, X, y, multi_output=True)
|
277
281
|
n_assets = X.shape[1]
|
278
282
|
n_factors = y.shape[1]
|
279
283
|
|
@@ -292,11 +296,12 @@ class FactorModel(BasePrior):
|
|
292
296
|
|
293
297
|
mu = loading_matrix @ factor_mu + intercepts
|
294
298
|
covariance = loading_matrix @ factor_covariance @ loading_matrix.T
|
295
|
-
returns =
|
299
|
+
returns = factor_returns @ loading_matrix.T + intercepts
|
296
300
|
cholesky = loading_matrix @ np.linalg.cholesky(factor_covariance)
|
297
301
|
|
298
302
|
if self.residual_variance:
|
299
|
-
|
303
|
+
y_pred = y @ loading_matrix.T + intercepts
|
304
|
+
err = X - y_pred
|
300
305
|
err_cov = np.diag(np.var(err, ddof=1, axis=0))
|
301
306
|
covariance += err_cov
|
302
307
|
cholesky = np.hstack((cholesky, np.sqrt(err_cov)))
|
@@ -0,0 +1,239 @@
|
|
1
|
+
"""Synthetic Data Prior Model estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
|
+
|
7
|
+
import inspect
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import numpy.typing as npt
|
11
|
+
import sklearn.base as skb
|
12
|
+
import sklearn.utils.metadata_routing as skm
|
13
|
+
import sklearn.utils.validation as skv
|
14
|
+
|
15
|
+
from skfolio.distribution import VineCopula
|
16
|
+
from skfolio.prior._base import BasePrior
|
17
|
+
from skfolio.prior._empirical import EmpiricalPrior
|
18
|
+
from skfolio.utils.tools import check_estimator
|
19
|
+
|
20
|
+
|
21
|
+
class SyntheticData(BasePrior):
|
22
|
+
"""Synthetic Data Estimator.
|
23
|
+
|
24
|
+
The Synthetic Data model estimates a :class:`~skfolio.prior.PriorModel` by
|
25
|
+
fitting a `distribution_estimator` and sampling new returns data from it.
|
26
|
+
|
27
|
+
The default ``distribution_estimator`` is a Regular Vine Copula model. Other common
|
28
|
+
choices are Generative Adversarial Networks (GANs) or Variational Autoencoders
|
29
|
+
(VAEs).
|
30
|
+
|
31
|
+
This class is particularly useful when the historical distribution tail dependencies
|
32
|
+
are sparse and need extrapolation for tail optimizations or when optimizing under
|
33
|
+
conditional or stressed scenarios.
|
34
|
+
|
35
|
+
Parameters
|
36
|
+
----------
|
37
|
+
distribution_estimator : BaseEstimator, optional
|
38
|
+
Estimator to model the distribution of asset returns. It must inherit from
|
39
|
+
`BaseEstimator` and implements a `sample` method. If None, the default
|
40
|
+
`VineCopula()` model is used.
|
41
|
+
|
42
|
+
n_samples : int, default=1000
|
43
|
+
Number of samples to generate from the `distribution_estimator`, default is
|
44
|
+
1000.
|
45
|
+
|
46
|
+
sample_args : dict, optional
|
47
|
+
Additional keyword arguments to pass to the `sample` method of the
|
48
|
+
`distribution_estimator`.
|
49
|
+
|
50
|
+
Attributes
|
51
|
+
----------
|
52
|
+
prior_model_ : PriorModel
|
53
|
+
The assets :class:`~skfolio.prior.PriorModel`.
|
54
|
+
|
55
|
+
distribution_estimator_ : BaseEstimator
|
56
|
+
The fitted distribution estimator.
|
57
|
+
|
58
|
+
n_features_in_ : int
|
59
|
+
Number of assets seen during `fit`.
|
60
|
+
|
61
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
62
|
+
Names of features seen during `fit`. Defined only when `X`
|
63
|
+
has feature names that are all strings.
|
64
|
+
|
65
|
+
Examples
|
66
|
+
--------
|
67
|
+
>>> import numpy as np
|
68
|
+
>>> from skfolio.datasets import load_sp500_dataset, load_factors_dataset
|
69
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
70
|
+
>>> from skfolio.distribution import VineCopula
|
71
|
+
>>> from skfolio.optimization import MeanRisk
|
72
|
+
>>> from skfolio.prior import FactorModel, SyntheticData
|
73
|
+
>>> from skfolio import RiskMeasure
|
74
|
+
>>>
|
75
|
+
>>> # Load historical prices and convert them to returns
|
76
|
+
>>> prices = load_sp500_dataset()
|
77
|
+
>>> factors = load_factors_dataset()
|
78
|
+
>>> X, y = prices_to_returns(prices, factors)
|
79
|
+
>>>
|
80
|
+
>>> # Instanciate the SyntheticData model and fit it
|
81
|
+
>>> model = SyntheticData()
|
82
|
+
>>> model.fit(X)
|
83
|
+
>>> print(model.prior_model_)
|
84
|
+
>>>
|
85
|
+
>>> # Minimum CVaR optimization on synthetic returns
|
86
|
+
>>> model = MeanRisk(
|
87
|
+
... risk_measure=RiskMeasure.CVAR,
|
88
|
+
... prior_estimator=SyntheticData(
|
89
|
+
... distribution_estimator=VineCopula(log_transform=True, n_jobs=-1),
|
90
|
+
... n_samples=2000,
|
91
|
+
... )
|
92
|
+
... )
|
93
|
+
>>> model.fit(X)
|
94
|
+
>>> print(model.weights_)
|
95
|
+
>>>
|
96
|
+
>>> # Minimum CVaR optimization on Stressed Factors
|
97
|
+
>>> factor_model = FactorModel(
|
98
|
+
... factor_prior_estimator=SyntheticData(
|
99
|
+
... distribution_estimator=VineCopula(
|
100
|
+
... central_assets=["QUAL"],
|
101
|
+
... log_transform=True,
|
102
|
+
... n_jobs=-1,
|
103
|
+
... ),
|
104
|
+
... n_samples=5000,
|
105
|
+
... sample_args=dict(conditioning={"QUAL": -0.2}),
|
106
|
+
... )
|
107
|
+
... )
|
108
|
+
>>> model = MeanRisk(risk_measure=RiskMeasure.CVAR, prior_estimator=factor_model)
|
109
|
+
>>> model.fit(X, y)
|
110
|
+
>>> print(model.weights_)
|
111
|
+
>>>
|
112
|
+
>>> # Stress Test the Portfolio
|
113
|
+
>>> factor_model.set_params(factor_prior_estimator__sample_args=dict(
|
114
|
+
... conditioning={"QUAL": -0.5}
|
115
|
+
... ))
|
116
|
+
>>> factor_model.fit(X,y)
|
117
|
+
>>> stressed_X = factor_model.prior_model_.returns
|
118
|
+
>>> stressed_ptf = model.predict(stressed_X)
|
119
|
+
"""
|
120
|
+
|
121
|
+
distribution_estimator_: skb.BaseEstimator
|
122
|
+
prior_estimator_: BasePrior
|
123
|
+
n_features_in_: int
|
124
|
+
feature_names_in_: np.ndarray
|
125
|
+
|
126
|
+
def __init__(
|
127
|
+
self,
|
128
|
+
distribution_estimator: skb.BaseEstimator | None = None,
|
129
|
+
n_samples: int = 1000,
|
130
|
+
sample_args: dict | None = None,
|
131
|
+
):
|
132
|
+
self.distribution_estimator = distribution_estimator
|
133
|
+
self.n_samples = n_samples
|
134
|
+
self.sample_args = sample_args
|
135
|
+
|
136
|
+
def get_metadata_routing(self):
|
137
|
+
# noinspection PyTypeChecker
|
138
|
+
router = skm.MetadataRouter(owner=self.__class__.__name__).add(
|
139
|
+
distance_estimator=self.distribution_estimator,
|
140
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
141
|
+
)
|
142
|
+
return router
|
143
|
+
|
144
|
+
def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "SyntheticData":
|
145
|
+
"""Fit the Synthetic Data estimator.
|
146
|
+
|
147
|
+
Parameters
|
148
|
+
----------
|
149
|
+
X : array-like of shape (n_observations, n_assets)
|
150
|
+
Price returns of the assets.
|
151
|
+
|
152
|
+
y : Ignored
|
153
|
+
Not used, present for API consistency by convention.
|
154
|
+
|
155
|
+
**fit_params : dict
|
156
|
+
Parameters to pass to the underlying estimators.
|
157
|
+
Only available if `enable_metadata_routing=True`, which can be
|
158
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
159
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
160
|
+
more details.
|
161
|
+
|
162
|
+
Returns
|
163
|
+
-------
|
164
|
+
self : SyntheticData
|
165
|
+
Fitted estimator.
|
166
|
+
"""
|
167
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
168
|
+
|
169
|
+
self.distribution_estimator_ = check_estimator(
|
170
|
+
self.distribution_estimator,
|
171
|
+
default=VineCopula(),
|
172
|
+
check_type=skb.BaseEstimator,
|
173
|
+
)
|
174
|
+
_check_sample_method(self.distribution_estimator_)
|
175
|
+
|
176
|
+
# fitting distribution estimator on prior returns
|
177
|
+
# noinspection PyUnresolvedReferences
|
178
|
+
self.distribution_estimator_.fit(
|
179
|
+
X, y, **routed_params.distribution_estimator.fit
|
180
|
+
)
|
181
|
+
|
182
|
+
# We validate after all models have been fitted to keep feature names
|
183
|
+
# information.
|
184
|
+
skv.validate_data(self, X)
|
185
|
+
|
186
|
+
# sample from the distribution estimator
|
187
|
+
sample_args = self.sample_args if self.sample_args is not None else {}
|
188
|
+
# noinspection PyUnresolvedReferences
|
189
|
+
synthetic_data = self.distribution_estimator_.sample(
|
190
|
+
n_samples=self.n_samples, **sample_args
|
191
|
+
)
|
192
|
+
|
193
|
+
# When performing conditional sampling, the conditioning samples are often
|
194
|
+
# constant. To avoid null variance, we add a small white noise.
|
195
|
+
constant_returns = np.var(synthetic_data, axis=0) < 1e-14
|
196
|
+
if np.any(constant_returns):
|
197
|
+
noise = 1e-6 * np.random.randn(len(synthetic_data), 1)
|
198
|
+
synthetic_data[:, constant_returns] += noise
|
199
|
+
|
200
|
+
# Fit empirical posterior estimator
|
201
|
+
posterior_estimator = EmpiricalPrior()
|
202
|
+
posterior_estimator.fit(synthetic_data)
|
203
|
+
self.prior_model_ = posterior_estimator.prior_model_
|
204
|
+
|
205
|
+
return self
|
206
|
+
|
207
|
+
|
208
|
+
def _check_sample_method(distribution_estimator: skb.BaseEstimator) -> None:
|
209
|
+
"""Check that the distribution_estimator implements a valid 'sample' method.
|
210
|
+
|
211
|
+
This helper function verifies that the given estimator has a callable 'sample'
|
212
|
+
method and that this method accepts an 'n_samples' parameter.
|
213
|
+
|
214
|
+
Parameters
|
215
|
+
----------
|
216
|
+
distribution_estimator : BaseEstimator
|
217
|
+
The estimator whose 'sample' method is to be validated.
|
218
|
+
|
219
|
+
Raises
|
220
|
+
------
|
221
|
+
ValueError
|
222
|
+
If the 'sample' method is missing or does not have an 'n_samples' parameter.
|
223
|
+
"""
|
224
|
+
# Get the 'sample' attribute; if it doesn't exist, return False.
|
225
|
+
sample_method = getattr(distribution_estimator, "sample", None)
|
226
|
+
if sample_method is None or not callable(sample_method):
|
227
|
+
raise ValueError(
|
228
|
+
f"The distribution_estimator {distribution_estimator} must implement a "
|
229
|
+
"`sample` method"
|
230
|
+
)
|
231
|
+
|
232
|
+
sig = inspect.signature(sample_method)
|
233
|
+
|
234
|
+
# Check if the parameter 'n_samples' is in the method's parameters.
|
235
|
+
if "n_samples" not in sig.parameters:
|
236
|
+
raise ValueError(
|
237
|
+
"The `sample` method of the distribution_estimator "
|
238
|
+
f"{distribution_estimator} must have `n_samples` as parameter"
|
239
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Synthetic Data module."""
|
skfolio/typing.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
from collections.abc import Callable
|
8
8
|
|
@@ -14,20 +14,20 @@ import plotly.graph_objects as go
|
|
14
14
|
from skfolio.measures import ExtraRiskMeasure, PerfMeasure, RatioMeasure, RiskMeasure
|
15
15
|
|
16
16
|
__all__ = [
|
17
|
+
"CvxMeasure",
|
18
|
+
"ExpressionFunction",
|
19
|
+
"Factor",
|
17
20
|
"Groups",
|
18
21
|
"Inequality",
|
19
22
|
"LinearConstraints",
|
23
|
+
"Measure",
|
20
24
|
"MultiInput",
|
21
|
-
"
|
25
|
+
"Names",
|
22
26
|
"ParametersValues",
|
23
|
-
"Factor",
|
24
27
|
"Result",
|
25
28
|
"RiskResult",
|
26
|
-
"ExpressionFunction",
|
27
|
-
"Measure",
|
28
|
-
"CvxMeasure",
|
29
|
-
"Names",
|
30
29
|
"Tags",
|
30
|
+
"Target",
|
31
31
|
]
|
32
32
|
|
33
33
|
Measure = PerfMeasure | RiskMeasure | ExtraRiskMeasure | RatioMeasure
|
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Uncertainty Set module."""
|
2
|
+
|
1
3
|
from skfolio.uncertainty_set._base import (
|
2
4
|
BaseCovarianceUncertaintySet,
|
3
5
|
BaseMuUncertaintySet,
|
@@ -13,11 +15,11 @@ from skfolio.uncertainty_set._empirical import (
|
|
13
15
|
)
|
14
16
|
|
15
17
|
__all__ = [
|
16
|
-
"UncertaintySet",
|
17
|
-
"BaseMuUncertaintySet",
|
18
18
|
"BaseCovarianceUncertaintySet",
|
19
|
-
"
|
20
|
-
"EmpiricalCovarianceUncertaintySet",
|
21
|
-
"BootstrapMuUncertaintySet",
|
19
|
+
"BaseMuUncertaintySet",
|
22
20
|
"BootstrapCovarianceUncertaintySet",
|
21
|
+
"BootstrapMuUncertaintySet",
|
22
|
+
"EmpiricalCovarianceUncertaintySet",
|
23
|
+
"EmpiricalMuUncertaintySet",
|
24
|
+
"UncertaintySet",
|
23
25
|
]
|
skfolio/uncertainty_set/_base.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Base Uncertainty estimator"""
|
1
|
+
"""Base Uncertainty estimator."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
from abc import ABC, abstractmethod
|
8
8
|
from dataclasses import dataclass
|
@@ -11,6 +11,7 @@ import numpy as np
|
|
11
11
|
import numpy.typing as npt
|
12
12
|
import sklearn.base as skb
|
13
13
|
import sklearn.utils.metadata_routing as skm
|
14
|
+
import sklearn.utils.validation as skv
|
14
15
|
|
15
16
|
from skfolio.prior import BasePrior
|
16
17
|
|
@@ -113,9 +114,9 @@ class BaseCovarianceUncertaintySet(skb.BaseEstimator, ABC):
|
|
113
114
|
Validated price returns of factors or a target benchmark if provided.
|
114
115
|
"""
|
115
116
|
if y is None:
|
116
|
-
X =
|
117
|
+
X = skv.validate_data(self, X)
|
117
118
|
else:
|
118
|
-
X, y =
|
119
|
+
X, y = skv.validate_data(self, X, y, multi_output=True)
|
119
120
|
return X, y
|
120
121
|
|
121
122
|
def get_metadata_routing(self):
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
skfolio/utils/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
"""Utils module."""
|
skfolio/utils/bootstrap.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
|
@@ -71,7 +71,7 @@ def stationary_bootstrap(
|
|
71
71
|
block_size: float | None = None,
|
72
72
|
seed: int | None = None,
|
73
73
|
) -> np.ndarray:
|
74
|
-
"""
|
74
|
+
"""Create `n_bootstrap_samples` samples from a multivariate return series via
|
75
75
|
stationary bootstrapping.
|
76
76
|
|
77
77
|
Parameters
|
skfolio/utils/equations.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Equation module"""
|
1
|
+
"""Equation module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import re
|
8
8
|
import warnings
|
@@ -44,7 +44,8 @@ def equations_to_matrix(
|
|
44
44
|
groups : array-like of shape (n_groups, n_assets)
|
45
45
|
2D array of assets groups.
|
46
46
|
|
47
|
-
|
47
|
+
For example:
|
48
|
+
|
48
49
|
groups = np.array(
|
49
50
|
[
|
50
51
|
["SPX", "SX5E", "NKY", "TLT"],
|
@@ -66,7 +67,8 @@ def equations_to_matrix(
|
|
66
67
|
The second expression means that the sum of all assets in "group_1" should be
|
67
68
|
less or equal to "number" times the sum of all assets in "group_2".
|
68
69
|
|
69
|
-
|
70
|
+
For example:
|
71
|
+
|
70
72
|
equations = [
|
71
73
|
"Equity <= 3 * Bond",
|
72
74
|
"US >= 1.5",
|
@@ -143,9 +145,10 @@ def group_cardinalities_to_matrix(
|
|
143
145
|
Parameters
|
144
146
|
----------
|
145
147
|
groups : array-like of shape (n_groups, n_assets)
|
146
|
-
|
148
|
+
2D array of assets groups.
|
149
|
+
|
150
|
+
For example:
|
147
151
|
|
148
|
-
Examples:
|
149
152
|
groups = np.array(
|
150
153
|
[
|
151
154
|
["Equity", "Equity", "Equity", "Bond"],
|
@@ -154,8 +157,8 @@ def group_cardinalities_to_matrix(
|
|
154
157
|
)
|
155
158
|
|
156
159
|
group_cardinalities : dict[str, int]
|
157
|
-
|
158
|
-
|
160
|
+
Dictionary of cardinality constraint per group.
|
161
|
+
For example: {"Equity": 1, "US": 3}
|
159
162
|
|
160
163
|
raise_if_group_missing : bool, default=False
|
161
164
|
If this is set to True, an error is raised when a group is not found in the
|
@@ -302,7 +305,7 @@ def _comparison_operator_sign(operator: str) -> int:
|
|
302
305
|
|
303
306
|
|
304
307
|
def _sub_add_operator_sign(operator: str) -> int:
|
305
|
-
"""Convert the operators '+' and '-' into 1 or -1
|
308
|
+
"""Convert the operators '+' and '-' into 1 or -1.
|
306
309
|
|
307
310
|
Parameters
|
308
311
|
----------
|
@@ -342,7 +345,7 @@ def _string_to_float(string: str) -> float:
|
|
342
345
|
|
343
346
|
|
344
347
|
def _split_equation_string(string: str) -> list[str]:
|
345
|
-
"""Split an equation strings by operators"""
|
348
|
+
"""Split an equation strings by operators."""
|
346
349
|
comp_pattern = "(?=" + "|".join([".+\\" + e for e in _COMPARISON_OPERATORS]) + ")"
|
347
350
|
if not bool(re.match(comp_pattern, string)):
|
348
351
|
raise EquationToMatrixError(
|
skfolio/utils/sorting.py
CHANGED
skfolio/utils/stats.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
"""Tools module"""
|
1
|
+
"""Tools module."""
|
2
2
|
|
3
3
|
import warnings
|
4
4
|
|
5
5
|
# Copyright (c) 2023
|
6
6
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
7
|
-
# License: BSD
|
7
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
8
8
|
# Implementation derived from:
|
9
9
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
10
10
|
# Statsmodels, Copyright (C) 2006, Jonathan E. Taylor, Licensed under BSD 3 clause.
|
@@ -23,25 +23,25 @@ from skfolio.utils.tools import AutoEnum
|
|
23
23
|
|
24
24
|
__all__ = [
|
25
25
|
"NBinsMethod",
|
26
|
-
"
|
27
|
-
"n_bins_knuth",
|
28
|
-
"is_cholesky_dec",
|
26
|
+
"assert_is_distance",
|
29
27
|
"assert_is_square",
|
30
28
|
"assert_is_symmetric",
|
31
|
-
"assert_is_distance",
|
32
|
-
"cov_nearest",
|
33
|
-
"cov_to_corr",
|
34
|
-
"corr_to_cov",
|
35
29
|
"commutation_matrix",
|
36
30
|
"compute_optimal_n_clusters",
|
31
|
+
"corr_to_cov",
|
32
|
+
"cov_nearest",
|
33
|
+
"cov_to_corr",
|
34
|
+
"is_cholesky_dec",
|
35
|
+
"minimize_relative_weight_deviation",
|
36
|
+
"n_bins_freedman",
|
37
|
+
"n_bins_knuth",
|
37
38
|
"rand_weights",
|
38
39
|
"rand_weights_dirichlet",
|
39
|
-
"minimize_relative_weight_deviation",
|
40
40
|
]
|
41
41
|
|
42
42
|
|
43
43
|
class NBinsMethod(AutoEnum):
|
44
|
-
"""Enumeration of the Number of Bins Methods
|
44
|
+
"""Enumeration of the Number of Bins Methods.
|
45
45
|
|
46
46
|
Parameters
|
47
47
|
----------
|
@@ -82,7 +82,7 @@ def n_bins_freedman(x: np.ndarray) -> int:
|
|
82
82
|
if d == 0:
|
83
83
|
return 5
|
84
84
|
n_bins = max(1, np.ceil((np.max(x) - np.min(x)) / d))
|
85
|
-
return
|
85
|
+
return round(n_bins)
|
86
86
|
|
87
87
|
|
88
88
|
def n_bins_knuth(x: np.ndarray) -> int:
|
@@ -122,12 +122,12 @@ def n_bins_knuth(x: np.ndarray) -> int:
|
|
122
122
|
|
123
123
|
n_bins_init = n_bins_freedman(x)
|
124
124
|
n_bins = sco.fmin(func, n_bins_init, disp=0)[0]
|
125
|
-
return
|
125
|
+
return round(n_bins)
|
126
126
|
|
127
127
|
|
128
128
|
def rand_weights_dirichlet(n: int) -> np.array:
|
129
129
|
"""Produces n random weights that sum to one from a dirichlet distribution
|
130
|
-
(uniform distribution over a simplex)
|
130
|
+
(uniform distribution over a simplex).
|
131
131
|
|
132
132
|
Parameters
|
133
133
|
----------
|
@@ -144,7 +144,7 @@ def rand_weights_dirichlet(n: int) -> np.array:
|
|
144
144
|
|
145
145
|
def rand_weights(n: int, zeros: int = 0) -> np.array:
|
146
146
|
"""Produces n random weights that sum to one from an uniform distribution
|
147
|
-
(non-uniform distribution over a simplex)
|
147
|
+
(non-uniform distribution over a simplex).
|
148
148
|
|
149
149
|
Parameters
|
150
150
|
----------
|