skfolio 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/distribution/multivariate/_vine_copula.py +35 -34
- skfolio/distribution/univariate/_base.py +20 -15
- skfolio/exceptions.py +5 -0
- skfolio/measures/__init__.py +2 -0
- skfolio/measures/_measures.py +390 -155
- skfolio/optimization/_base.py +21 -4
- skfolio/optimization/cluster/hierarchical/_base.py +16 -13
- skfolio/optimization/cluster/hierarchical/_herc.py +6 -6
- skfolio/optimization/cluster/hierarchical/_hrp.py +8 -6
- skfolio/optimization/convex/_base.py +238 -144
- skfolio/optimization/convex/_distributionally_robust.py +32 -20
- skfolio/optimization/convex/_maximum_diversification.py +15 -15
- skfolio/optimization/convex/_mean_risk.py +26 -24
- skfolio/optimization/convex/_risk_budgeting.py +23 -21
- skfolio/optimization/ensemble/__init__.py +2 -4
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/_naive.py +2 -2
- skfolio/population/_population.py +30 -9
- skfolio/portfolio/_base.py +68 -26
- skfolio/portfolio/_multi_period_portfolio.py +5 -0
- skfolio/portfolio/_portfolio.py +5 -0
- skfolio/prior/__init__.py +6 -2
- skfolio/prior/_base.py +7 -3
- skfolio/prior/_black_litterman.py +14 -12
- skfolio/prior/_empirical.py +8 -7
- skfolio/prior/_entropy_pooling.py +1493 -0
- skfolio/prior/_factor_model.py +39 -22
- skfolio/prior/_opinion_pooling.py +475 -0
- skfolio/prior/_synthetic_data.py +10 -8
- skfolio/uncertainty_set/_bootstrap.py +4 -4
- skfolio/uncertainty_set/_empirical.py +6 -6
- skfolio/utils/equations.py +10 -4
- skfolio/utils/figure.py +185 -0
- skfolio/utils/tools.py +4 -2
- {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/METADATA +94 -5
- {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/RECORD +40 -38
- {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/WHEEL +1 -1
- skfolio/synthetic_returns/__init__.py +0 -1
- /skfolio/{optimization/ensemble/_base.py → utils/composition.py} +0 -0
- {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/top_level.txt +0 -0
skfolio/prior/_factor_model.py
CHANGED
@@ -19,7 +19,8 @@ import sklearn.multioutput as skmo
|
|
19
19
|
import sklearn.utils.metadata_routing as skm
|
20
20
|
import sklearn.utils.validation as skv
|
21
21
|
|
22
|
-
|
22
|
+
import skfolio.measures as sm
|
23
|
+
from skfolio.prior._base import BasePrior, ReturnDistribution
|
23
24
|
from skfolio.prior._empirical import EmpiricalPrior
|
24
25
|
from skfolio.utils.stats import cov_nearest
|
25
26
|
from skfolio.utils.tools import check_estimator
|
@@ -39,7 +40,7 @@ class BaseLoadingMatrix(skb.BaseEstimator, ABC):
|
|
39
40
|
intercepts_: np.ndarray
|
40
41
|
|
41
42
|
@abstractmethod
|
42
|
-
def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
|
43
|
+
def fit(self, X: npt.ArrayLike, y: npt.ArrayLike, **fit_params):
|
43
44
|
pass
|
44
45
|
|
45
46
|
|
@@ -128,7 +129,9 @@ class LoadingMatrixRegression(BaseLoadingMatrix):
|
|
128
129
|
self.multi_output_regressor_ = skmo.MultiOutputRegressor(
|
129
130
|
_linear_regressor, n_jobs=self.n_jobs
|
130
131
|
)
|
131
|
-
self.multi_output_regressor_.fit(
|
132
|
+
self.multi_output_regressor_.fit(
|
133
|
+
X=y, y=X, **routed_params.factor_prior_estimator.fit
|
134
|
+
)
|
132
135
|
# noinspection PyUnresolvedReferences
|
133
136
|
n_assets = X.shape[1]
|
134
137
|
self.loading_matrix_ = np.array(
|
@@ -161,8 +164,8 @@ class FactorModel(BasePrior):
|
|
161
164
|
|
162
165
|
factor_prior_estimator : BasePrior, optional
|
163
166
|
The factors :ref:`prior estimator <prior>`.
|
164
|
-
It is used to estimate the :class:`~skfolio.prior.
|
165
|
-
factors expected returns and covariance matrix.
|
167
|
+
It is used to estimate the :class:`~skfolio.prior.ReturnDistribution` containing
|
168
|
+
the estimation of factors expected returns and covariance matrix.
|
166
169
|
The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
|
167
170
|
|
168
171
|
residual_variance : bool, default=True
|
@@ -181,8 +184,10 @@ class FactorModel(BasePrior):
|
|
181
184
|
|
182
185
|
Attributes
|
183
186
|
----------
|
184
|
-
|
185
|
-
|
187
|
+
return_distribution_ : ReturnDistribution
|
188
|
+
Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
|
189
|
+
estimators, containing the assets distribution, moments estimation and cholesky
|
190
|
+
decomposition based on the factor model.
|
186
191
|
|
187
192
|
factor_prior_estimator_ : BasePrior
|
188
193
|
Fitted `factor_prior_estimator`.
|
@@ -218,10 +223,18 @@ class FactorModel(BasePrior):
|
|
218
223
|
self.max_iteration = max_iteration
|
219
224
|
|
220
225
|
def get_metadata_routing(self):
|
221
|
-
#
|
222
|
-
router =
|
223
|
-
|
224
|
-
|
226
|
+
# route to factor_prior_estimator.fit
|
227
|
+
router = (
|
228
|
+
skm.MetadataRouter(owner=self.__class__.__name__)
|
229
|
+
.add(
|
230
|
+
factor_prior_estimator=self.factor_prior_estimator,
|
231
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
232
|
+
)
|
233
|
+
# route to loading_matrix_estimator.fit
|
234
|
+
.add(
|
235
|
+
loading_matrix_estimator=self.loading_matrix_estimator,
|
236
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
237
|
+
)
|
225
238
|
)
|
226
239
|
return router
|
227
240
|
|
@@ -266,12 +279,12 @@ class FactorModel(BasePrior):
|
|
266
279
|
self.factor_prior_estimator_.fit(
|
267
280
|
X=y, **routed_params.factor_prior_estimator.fit
|
268
281
|
)
|
269
|
-
|
270
|
-
factor_covariance = self.factor_prior_estimator_.prior_model_.covariance
|
271
|
-
factor_returns = self.factor_prior_estimator_.prior_model_.returns
|
282
|
+
factor_return_dist = self.factor_prior_estimator_.return_distribution_
|
272
283
|
|
273
284
|
# Fitting loading matrix estimator
|
274
|
-
self.loading_matrix_estimator_.fit(
|
285
|
+
self.loading_matrix_estimator_.fit(
|
286
|
+
X, y, **routed_params.loading_matrix_estimator.fit
|
287
|
+
)
|
275
288
|
loading_matrix = self.loading_matrix_estimator_.loading_matrix_
|
276
289
|
intercepts = self.loading_matrix_estimator_.intercepts_
|
277
290
|
|
@@ -294,15 +307,15 @@ class FactorModel(BasePrior):
|
|
294
307
|
f"shape {(n_assets,)}, got {intercepts.shape} instead."
|
295
308
|
)
|
296
309
|
|
297
|
-
mu = loading_matrix @
|
298
|
-
covariance = loading_matrix @
|
299
|
-
returns =
|
300
|
-
cholesky = loading_matrix @ np.linalg.cholesky(
|
310
|
+
mu = loading_matrix @ factor_return_dist.mu + intercepts
|
311
|
+
covariance = loading_matrix @ factor_return_dist.covariance @ loading_matrix.T
|
312
|
+
returns = factor_return_dist.returns @ loading_matrix.T + intercepts
|
313
|
+
cholesky = loading_matrix @ np.linalg.cholesky(factor_return_dist.covariance)
|
301
314
|
|
302
315
|
if self.residual_variance:
|
303
316
|
y_pred = y @ loading_matrix.T + intercepts
|
304
317
|
err = X - y_pred
|
305
|
-
err_cov = np.diag(
|
318
|
+
err_cov = np.diag(sm.variance(err))
|
306
319
|
covariance += err_cov
|
307
320
|
cholesky = np.hstack((cholesky, np.sqrt(err_cov)))
|
308
321
|
|
@@ -310,7 +323,11 @@ class FactorModel(BasePrior):
|
|
310
323
|
covariance, higham=self.higham, higham_max_iteration=self.max_iteration
|
311
324
|
)
|
312
325
|
|
313
|
-
self.
|
314
|
-
mu=mu,
|
326
|
+
self.return_distribution_ = ReturnDistribution(
|
327
|
+
mu=mu,
|
328
|
+
covariance=covariance,
|
329
|
+
returns=returns,
|
330
|
+
cholesky=cholesky,
|
331
|
+
sample_weight=factor_return_dist.sample_weight,
|
315
332
|
)
|
316
333
|
return self
|
@@ -0,0 +1,475 @@
|
|
1
|
+
"""Opinion Pooling estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# Credits: Vincent Maladière, Matteo Manzi, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import pandas as pd
|
11
|
+
import scipy.special as scs
|
12
|
+
import scipy.special as sp
|
13
|
+
import sklearn as sk
|
14
|
+
import sklearn.utils as sku
|
15
|
+
import sklearn.utils.metadata_routing as skm
|
16
|
+
import sklearn.utils.parallel as skp
|
17
|
+
import sklearn.utils.validation as skv
|
18
|
+
|
19
|
+
import skfolio.measures as sm
|
20
|
+
from skfolio.prior._base import BasePrior, ReturnDistribution
|
21
|
+
from skfolio.utils.composition import BaseComposition
|
22
|
+
from skfolio.utils.tools import check_estimator, fit_single_estimator
|
23
|
+
|
24
|
+
|
25
|
+
class OpinionPooling(BasePrior, BaseComposition):
|
26
|
+
r"""Opinion Pooling estimator.
|
27
|
+
|
28
|
+
Opinion Pooling (also called Belief Aggregation or Risk Aggregation) is a process
|
29
|
+
in which different probability distributions (opinions), produced by different
|
30
|
+
experts, are combined to yield a single probability distribution (consensus).
|
31
|
+
|
32
|
+
Expert opinions (also called individual prior distributions) can be
|
33
|
+
**elicited** from domain experts or **derived** from quantitative analyses.
|
34
|
+
|
35
|
+
The `OpinionPooling` estimator takes a list of prior estimators, each of which
|
36
|
+
produces scenario probabilities (which we use as `sample_weight`), and pools them
|
37
|
+
into a single consensus probability .
|
38
|
+
|
39
|
+
You can choose between linear (arithmetic) pooling or logarithmic (geometric)
|
40
|
+
pooling, and optionally apply robust pooling using a Kullback-Leibler divergence
|
41
|
+
penalty to down-weight experts whose views deviate strongly from the group
|
42
|
+
consensus.
|
43
|
+
|
44
|
+
Parameters
|
45
|
+
----------
|
46
|
+
estimators : list of (str, BasePrior)
|
47
|
+
A list of :ref:`prior estimators <prior>` representing opinions to be pooled
|
48
|
+
into a single consensus.
|
49
|
+
Each element of the list is defined as a tuple of string (i.e. name) and an
|
50
|
+
estimator instance. Each must expose `sample_weight` such as in
|
51
|
+
:class:`~skfolio.prior.EntropyPooling`.
|
52
|
+
|
53
|
+
opinion_probabilities : array-like of float, optional
|
54
|
+
Probability mass assigned to each opinion, in [0,1] summing to ≤1.
|
55
|
+
Any leftover mass is assigned to the uniform (uninformative) prior.
|
56
|
+
The default (None), is to assign the same probability to each opinion.
|
57
|
+
|
58
|
+
prior_estimator : BasePrior, optional
|
59
|
+
Common prior for all `estimators`. If provided, each estimator from `estimators`
|
60
|
+
will be fitted using this common prior before pooling. Setting `prior_estimator`
|
61
|
+
inside individual `estimators` is disabled to avoid mixing different prior
|
62
|
+
scenarios (each estimator must have the same underlying distribution).
|
63
|
+
For example, using `prior_estimator = SyntheticData(n_samples=10_000)` will
|
64
|
+
generate 10,000 synthetic data points from a Vine Copula before fitting the
|
65
|
+
estimators on this common distribution.
|
66
|
+
|
67
|
+
is_linear_pooling : bool, default=True
|
68
|
+
If True, combine each opinion via Linear Opinion Pooling
|
69
|
+
(arithmetic mean); if False, use Logarithmic Opinion Pooling (geometric
|
70
|
+
mean).
|
71
|
+
|
72
|
+
Linear Opinion Pooling:
|
73
|
+
* Retains all nonzero support (no "zero-forcing").
|
74
|
+
* Produces an averaging that is more evenly spread across all expert opinions.
|
75
|
+
|
76
|
+
Logarithmic Opinion Pooling:
|
77
|
+
* Zero-Preservation. Any scenario assigned zero probability by any expert
|
78
|
+
remains zero in the aggregate.
|
79
|
+
* Information-Theoretic Optimality. Yields the distribution that minimizes
|
80
|
+
the weighted sum of KL-divergences from each expert's distribution.
|
81
|
+
* Robust to Extremes: down-weight extreme or contrarian views more severely.
|
82
|
+
|
83
|
+
divergence_penalty : float, default=0.0
|
84
|
+
Non-negative factor (:math:`\alpha`) that penalizes each opinion's divergence
|
85
|
+
from the group consensus, yielding more robust pooling.
|
86
|
+
A higher value more strongly down-weights deviating opinions.
|
87
|
+
|
88
|
+
The robust opinion probabilities are given by:
|
89
|
+
|
90
|
+
.. math::
|
91
|
+
\tilde{p}_i = \frac{p_i \exp\bigl(-\alpha D_i\bigr)}
|
92
|
+
{\displaystyle \sum_{k=1}^N p_k \exp\bigl(-\alpha D_k\bigr)}
|
93
|
+
\quad\text{for }i = 1,\dots,N
|
94
|
+
|
95
|
+
where
|
96
|
+
|
97
|
+
* :math:`N` is the number of experts `len(estimators)`
|
98
|
+
|
99
|
+
* :math:`M` is the number of scenarios `len(observations)`
|
100
|
+
|
101
|
+
* :math:`D_i` is the KL-divergence of expert *i*'s distribution from consensus:
|
102
|
+
|
103
|
+
.. math::
|
104
|
+
D_i = \mathrm{KL}\bigl(w_i \,\|\, c\bigr)
|
105
|
+
= \sum_{j=1}^M w_{ij}\,\ln\!\frac{w_{ij}}{c_j}
|
106
|
+
\quad\text{for }i = 1,\dots,N.
|
107
|
+
|
108
|
+
* :math:`w_i` is the sample-weight vector (scenario probabilities) from expert
|
109
|
+
*i*, with :math:`\sum_{j=1}^M w_{ij} = 1`.
|
110
|
+
|
111
|
+
* :math:`p_i` is the initial opinion probability of expert *i*, with
|
112
|
+
:math:`\sum_{i=1}^N p_i \le 1` (any leftover mass goes to a uniform prior).
|
113
|
+
|
114
|
+
* :math:`c_j` is the consensus of scenario :math:`j`:
|
115
|
+
|
116
|
+
.. math::
|
117
|
+
c_j = \sum_{i=1}^N p_i \, w_{ij} \quad\text{for }j = 1,\dots,M.
|
118
|
+
|
119
|
+
n_jobs : int, optional
|
120
|
+
The number of jobs to run in parallel for `fit` of all `estimators`.
|
121
|
+
The value `-1` means using all processors.
|
122
|
+
The default (`None`) means 1 unless in a `joblib.parallel_backend` context.
|
123
|
+
|
124
|
+
Attributes
|
125
|
+
----------
|
126
|
+
return_distribution_ : ReturnDistribution
|
127
|
+
Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
|
128
|
+
estimators, containing the assets distribution, moments estimation and the
|
129
|
+
opinion-pooling sample weights.
|
130
|
+
|
131
|
+
estimators_ : list[BasePrior]
|
132
|
+
The elements of the `estimators` parameter, having been fitted on the
|
133
|
+
training data.
|
134
|
+
|
135
|
+
named_estimators_ : dict[str, BasePrior]
|
136
|
+
Attribute to access any fitted sub-estimators by name.
|
137
|
+
|
138
|
+
prior_estimator_ : BasePrior
|
139
|
+
Fitted `prior_estimator` if provided.
|
140
|
+
|
141
|
+
opinion_probabilities_ : ndarray of shape (n_opinions,)
|
142
|
+
Final opinion probabilities after applying the KL-divergence penalty.
|
143
|
+
If the initial `opinion_probabilities` doesn't sum to one, the last element of
|
144
|
+
`opinion_probabilities_` is the probability assigned to the uniform prior.
|
145
|
+
|
146
|
+
n_features_in_ : int
|
147
|
+
Number of assets seen during `fit`.
|
148
|
+
|
149
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
150
|
+
Names of assets seen during `fit`. Defined only when `X`
|
151
|
+
has assets names that are all strings.
|
152
|
+
|
153
|
+
References
|
154
|
+
----------
|
155
|
+
.. [1] "Probabilistic opinion pooling generalized",
|
156
|
+
Social Choice and Welfare, Dietrich & List (2017)
|
157
|
+
|
158
|
+
.. [2] "Opinion Aggregation and Individual Expertise",
|
159
|
+
Oxford University Press, Martini & Sprenger (2017)
|
160
|
+
|
161
|
+
.. [3] "Rational Decisions",
|
162
|
+
Journal of the Royal Statistical Society, Good (1952)
|
163
|
+
|
164
|
+
Examples
|
165
|
+
--------
|
166
|
+
For a full tutorial on entropy pooling, see :ref:`sphx_glr_auto_examples_entropy_pooling_plot_2_opinion_pooling.py`.
|
167
|
+
|
168
|
+
>>> from skfolio import RiskMeasure
|
169
|
+
>>> from skfolio.datasets import load_sp500_dataset
|
170
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
171
|
+
>>> from skfolio.prior import EntropyPooling, OpinionPooling
|
172
|
+
>>> from skfolio.optimization import RiskBudgeting
|
173
|
+
>>>
|
174
|
+
>>> prices = load_sp500_dataset()
|
175
|
+
>>> X = prices_to_returns(prices)
|
176
|
+
>>>
|
177
|
+
>>> # We consider two expert opinions, each generated via Entropy Pooling with
|
178
|
+
>>> # user-defined views.
|
179
|
+
>>> # We assign probabilities of 40% to Expert 1, 50% to Expert 2, and by default
|
180
|
+
>>> # the remaining 10% is allocated to the prior distribution:
|
181
|
+
>>> opinion_1 = EntropyPooling(cvar_views=["AMD == 0.10"])
|
182
|
+
>>> opinion_2 = EntropyPooling(
|
183
|
+
... mean_views=["AMD >= BAC", "JPM <= prior(JPM) * 0.8"],
|
184
|
+
... cvar_views=["GE == 0.12"],
|
185
|
+
... )
|
186
|
+
>>>
|
187
|
+
>>> opinion_pooling = OpinionPooling(
|
188
|
+
... estimators=[("opinion_1", opinion_1), ("opinion_2", opinion_2)],
|
189
|
+
... opinion_probabilities=[0.4, 0.5],
|
190
|
+
... )
|
191
|
+
>>>
|
192
|
+
>>> opinion_pooling.fit(X)
|
193
|
+
>>>
|
194
|
+
>>> print(opinion_pooling.return_distribution_.sample_weight)
|
195
|
+
>>>
|
196
|
+
>>> # CVaR Risk Parity optimization on opinion Pooling
|
197
|
+
>>> model = RiskBudgeting(
|
198
|
+
... risk_measure=RiskMeasure.CVAR,
|
199
|
+
... prior_estimator=opinion_pooling
|
200
|
+
... )
|
201
|
+
>>> model.fit(X)
|
202
|
+
>>> print(model.weights_)
|
203
|
+
>>>
|
204
|
+
>>> # Stress Test the Portfolio
|
205
|
+
>>> opinion_1 = EntropyPooling(cvar_views=["AMD == 0.05"])
|
206
|
+
>>> opinion_2 = EntropyPooling(cvar_views=["AMD == 0.10"])
|
207
|
+
>>> opinion_pooling = OpinionPooling(
|
208
|
+
... estimators=[("opinion_1", opinion_1), ("opinion_2", opinion_2)],
|
209
|
+
... opinion_probabilities=[0.6, 0.4],
|
210
|
+
... )
|
211
|
+
>>> opinion_pooling.fit(X)
|
212
|
+
>>>
|
213
|
+
>>> stressed_dist = opinion_pooling.return_distribution_
|
214
|
+
>>>
|
215
|
+
>>> stressed_ptf = model.predict(stressed_dist)
|
216
|
+
"""
|
217
|
+
|
218
|
+
estimators_: list[BasePrior]
|
219
|
+
named_estimators_: dict[str, BasePrior]
|
220
|
+
opinion_probabilities_: np.ndarray
|
221
|
+
prior_estimator_: BasePrior
|
222
|
+
n_features_in_: int
|
223
|
+
feature_names_in_: np.ndarray
|
224
|
+
|
225
|
+
def __init__(
|
226
|
+
self,
|
227
|
+
estimators: list[tuple[str, BasePrior]],
|
228
|
+
opinion_probabilities: list[float] | None = None,
|
229
|
+
prior_estimator: BasePrior | None = None,
|
230
|
+
is_linear_pooling: bool = True,
|
231
|
+
divergence_penalty: float = 0.0,
|
232
|
+
n_jobs: int | None = None,
|
233
|
+
):
|
234
|
+
self.estimators = estimators
|
235
|
+
self.opinion_probabilities = opinion_probabilities
|
236
|
+
self.prior_estimator = prior_estimator
|
237
|
+
self.divergence_penalty = divergence_penalty
|
238
|
+
self.is_linear_pooling = is_linear_pooling
|
239
|
+
self.n_jobs = n_jobs
|
240
|
+
|
241
|
+
@property
|
242
|
+
def named_estimators(self):
|
243
|
+
"""Dictionary to access any fitted sub-estimators by name.
|
244
|
+
|
245
|
+
Returns
|
246
|
+
-------
|
247
|
+
:class:`~sklearn.utils.Bunch`
|
248
|
+
"""
|
249
|
+
return sku.Bunch(**dict(self.estimators))
|
250
|
+
|
251
|
+
def _validate_estimators(self) -> tuple[list[str], list[BasePrior]]:
|
252
|
+
"""Validate the `estimators` parameter.
|
253
|
+
|
254
|
+
Returns
|
255
|
+
-------
|
256
|
+
names : list[str]
|
257
|
+
The list of estimators names.
|
258
|
+
estimators : list[BaseOptimization
|
259
|
+
The list of optimization estimators.
|
260
|
+
"""
|
261
|
+
if self.estimators is None or len(self.estimators) == 0:
|
262
|
+
raise ValueError(
|
263
|
+
"Invalid 'estimators' attribute, 'estimators' should be a list"
|
264
|
+
" of (string, estimator) tuples."
|
265
|
+
)
|
266
|
+
names, estimators = zip(*self.estimators, strict=True)
|
267
|
+
# defined by MetaEstimatorMixin
|
268
|
+
self._validate_names(names)
|
269
|
+
|
270
|
+
for estimator in estimators:
|
271
|
+
if getattr(estimator, "prior_estimator", None) is not None:
|
272
|
+
raise ValueError(
|
273
|
+
"Cannot set `prior_estimator` on individual estimators within "
|
274
|
+
"`OpinionPooling` to avoid mixing different prior scenarios. "
|
275
|
+
"Please leave those as `None` and specify your prior directly via "
|
276
|
+
"the `prior_estimator` parameter of the `OpinionPooling` class."
|
277
|
+
)
|
278
|
+
|
279
|
+
return names, estimators
|
280
|
+
|
281
|
+
def set_params(self, **params):
|
282
|
+
"""Set the parameters of an estimator from the ensemble.
|
283
|
+
|
284
|
+
Valid parameter keys can be listed with `get_params()`. Note that you
|
285
|
+
can directly set the parameters of the estimators contained in
|
286
|
+
`estimators`.
|
287
|
+
|
288
|
+
Parameters
|
289
|
+
----------
|
290
|
+
**params : keyword arguments
|
291
|
+
Specific parameters using e.g.
|
292
|
+
`set_params(parameter_name=new_value)`. In addition, to setting the
|
293
|
+
parameters of the estimator, the individual estimator of the
|
294
|
+
estimators can also be set, or can be removed by setting them to
|
295
|
+
'drop'.
|
296
|
+
|
297
|
+
Returns
|
298
|
+
-------
|
299
|
+
self : object
|
300
|
+
Estimator instance.
|
301
|
+
"""
|
302
|
+
super()._set_params("estimators", **params)
|
303
|
+
return self
|
304
|
+
|
305
|
+
def get_params(self, deep=True):
|
306
|
+
"""Get the parameters of an estimator from the ensemble.
|
307
|
+
|
308
|
+
Returns the parameters given in the constructor as well as the
|
309
|
+
estimators contained within the `estimators` parameter.
|
310
|
+
|
311
|
+
Parameters
|
312
|
+
----------
|
313
|
+
deep : bool, default=True
|
314
|
+
Setting it to True gets the various estimators and the parameters
|
315
|
+
of the estimators as well.
|
316
|
+
|
317
|
+
Returns
|
318
|
+
-------
|
319
|
+
params : dict
|
320
|
+
Parameter and estimator names mapped to their values or parameter
|
321
|
+
names mapped to their values.
|
322
|
+
"""
|
323
|
+
return super()._get_params("estimators", deep=deep)
|
324
|
+
|
325
|
+
def get_metadata_routing(self):
|
326
|
+
router = skm.MetadataRouter(owner=self.__class__.__name__)
|
327
|
+
for name, estimator in self.estimators:
|
328
|
+
router.add(
|
329
|
+
**{name: estimator},
|
330
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
331
|
+
)
|
332
|
+
return router
|
333
|
+
|
334
|
+
def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "OpinionPooling":
|
335
|
+
"""Fit the Opinion Pooling estimator.
|
336
|
+
|
337
|
+
Parameters
|
338
|
+
----------
|
339
|
+
X : array-like of shape (n_observations, n_assets)
|
340
|
+
Price returns of the assets.
|
341
|
+
|
342
|
+
y : Ignored
|
343
|
+
Not used, present for API consistency by convention.
|
344
|
+
|
345
|
+
**fit_params : dict
|
346
|
+
Parameters to pass to the underlying estimators.
|
347
|
+
Only available if `enable_metadata_routing=True`, which can be
|
348
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
349
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
350
|
+
more details.
|
351
|
+
|
352
|
+
Returns
|
353
|
+
-------
|
354
|
+
self : OpinionPooling
|
355
|
+
Fitted estimator.
|
356
|
+
"""
|
357
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
358
|
+
|
359
|
+
skv.validate_data(self, X)
|
360
|
+
|
361
|
+
names, all_estimators = self._validate_estimators()
|
362
|
+
|
363
|
+
opinion_probabilities = self._validate_opinion_probabilities()
|
364
|
+
|
365
|
+
if self.prior_estimator is not None:
|
366
|
+
self.prior_estimator_ = check_estimator(
|
367
|
+
self.prior_estimator,
|
368
|
+
default=None,
|
369
|
+
check_type=BasePrior,
|
370
|
+
)
|
371
|
+
# fitting prior estimator
|
372
|
+
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
373
|
+
returns = self.prior_estimator_.return_distribution_.returns
|
374
|
+
# To keep the asset_names
|
375
|
+
if hasattr(self, "feature_names_in_"):
|
376
|
+
returns = pd.DataFrame(returns, columns=self.feature_names_in_)
|
377
|
+
else:
|
378
|
+
returns = X
|
379
|
+
|
380
|
+
# Fit the prior estimators on the whole training data. Those
|
381
|
+
# prior estimators will be used to retrieve the sample weights.
|
382
|
+
self.estimators_ = skp.Parallel(n_jobs=self.n_jobs)(
|
383
|
+
skp.delayed(fit_single_estimator)(
|
384
|
+
sk.clone(est), returns, None, routed_params[name]["fit"]
|
385
|
+
)
|
386
|
+
for name, est in zip(names, all_estimators, strict=True)
|
387
|
+
)
|
388
|
+
|
389
|
+
self.named_estimators_ = {
|
390
|
+
name: estimator
|
391
|
+
for name, estimator in zip(names, self.estimators_, strict=True)
|
392
|
+
}
|
393
|
+
|
394
|
+
sample_weights = []
|
395
|
+
for estimator in self.estimators_:
|
396
|
+
if estimator.return_distribution_.sample_weight is None:
|
397
|
+
raise ValueError(
|
398
|
+
f"Estimator `{estimator.__class__.__name__}` did not produce "
|
399
|
+
"a `return_distribution_.sample_weight`. OpinionPooling requires "
|
400
|
+
"each estimator to expose sample weights (e.g. via EntropyPooling)."
|
401
|
+
)
|
402
|
+
sample_weights.append(estimator.return_distribution_.sample_weight)
|
403
|
+
sample_weights = np.array(sample_weights)
|
404
|
+
|
405
|
+
returns = np.asarray(returns)
|
406
|
+
n_observations = len(returns)
|
407
|
+
|
408
|
+
# Add the remaining part of the opinion_probabilities to the uniform prior
|
409
|
+
q_weight = 1.0 - opinion_probabilities.sum()
|
410
|
+
if q_weight > 1e-8:
|
411
|
+
opinion_probabilities = np.append(opinion_probabilities, q_weight)
|
412
|
+
q = np.ones(n_observations) / n_observations
|
413
|
+
sample_weights = np.vstack((sample_weights, q))
|
414
|
+
|
415
|
+
opinion_probabilities = self._compute_robust_opinion_probabilities(
|
416
|
+
opinion_probabilities=opinion_probabilities, sample_weights=sample_weights
|
417
|
+
)
|
418
|
+
|
419
|
+
if self.is_linear_pooling:
|
420
|
+
sample_weight = opinion_probabilities @ sample_weights
|
421
|
+
else:
|
422
|
+
# let exact 0 in sample weights flow through
|
423
|
+
with np.errstate(divide="ignore"):
|
424
|
+
u = opinion_probabilities @ np.log(sample_weights)
|
425
|
+
sample_weight = np.exp(u - sp.logsumexp(u))
|
426
|
+
|
427
|
+
self.opinion_probabilities_ = opinion_probabilities
|
428
|
+
self.return_distribution_ = ReturnDistribution(
|
429
|
+
mu=sm.mean(returns, sample_weight=sample_weight),
|
430
|
+
covariance=np.cov(returns, rowvar=False, aweights=sample_weight),
|
431
|
+
returns=returns,
|
432
|
+
sample_weight=sample_weight,
|
433
|
+
)
|
434
|
+
return self
|
435
|
+
|
436
|
+
def _validate_opinion_probabilities(self) -> np.ndarray:
|
437
|
+
"""Validate `opinion_probabilities`."""
|
438
|
+
n_opinions = len(self.estimators)
|
439
|
+
if self.opinion_probabilities is None:
|
440
|
+
return np.ones(n_opinions) / n_opinions
|
441
|
+
|
442
|
+
opinion_probabilities = np.asarray(self.opinion_probabilities)
|
443
|
+
|
444
|
+
if len(opinion_probabilities) != n_opinions:
|
445
|
+
raise ValueError(
|
446
|
+
f"`opinion_probabilities` length ({len(opinion_probabilities)}) "
|
447
|
+
f"does not match number of estimators ({n_opinions})."
|
448
|
+
)
|
449
|
+
|
450
|
+
if np.any(opinion_probabilities < 0) or np.any(opinion_probabilities > 1):
|
451
|
+
raise ValueError(
|
452
|
+
"`The entries of `opinion_probabilities` must be between 0 and 1"
|
453
|
+
)
|
454
|
+
if opinion_probabilities.sum() > 1.0:
|
455
|
+
raise ValueError(
|
456
|
+
"The entries of `opinion_probabilities` must sum to at most 1; "
|
457
|
+
"any remaining mass (1-sum) is allocated to the uniform prior."
|
458
|
+
)
|
459
|
+
return opinion_probabilities
|
460
|
+
|
461
|
+
def _compute_robust_opinion_probabilities(
|
462
|
+
self, opinion_probabilities: np.ndarray, sample_weights: np.ndarray
|
463
|
+
) -> np.ndarray:
|
464
|
+
"""Compute the robust `opinion_probabilities` using KL-divergence."""
|
465
|
+
if self.divergence_penalty < 0:
|
466
|
+
raise ValueError("`divergence_penalty` cannot be negative")
|
467
|
+
|
468
|
+
if self.divergence_penalty == 0:
|
469
|
+
return opinion_probabilities
|
470
|
+
|
471
|
+
consensus = opinion_probabilities @ sample_weights
|
472
|
+
divergences = np.sum(scs.rel_entr(sample_weights, consensus), axis=1)
|
473
|
+
opinion_probabilities *= np.exp(-self.divergence_penalty * divergences)
|
474
|
+
opinion_probabilities /= opinion_probabilities.sum()
|
475
|
+
return opinion_probabilities
|
skfolio/prior/_synthetic_data.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Synthetic Data Prior
|
1
|
+
"""Synthetic Data Prior estimator."""
|
2
2
|
|
3
3
|
# Copyright (c) 2025
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
@@ -21,7 +21,7 @@ from skfolio.utils.tools import check_estimator
|
|
21
21
|
class SyntheticData(BasePrior):
|
22
22
|
"""Synthetic Data Estimator.
|
23
23
|
|
24
|
-
The Synthetic Data model estimates a :class:`~skfolio.prior.
|
24
|
+
The Synthetic Data model estimates a :class:`~skfolio.prior.ReturnDistribution` by
|
25
25
|
fitting a `distribution_estimator` and sampling new returns data from it.
|
26
26
|
|
27
27
|
The default ``distribution_estimator`` is a Regular Vine Copula model. Other common
|
@@ -49,8 +49,10 @@ class SyntheticData(BasePrior):
|
|
49
49
|
|
50
50
|
Attributes
|
51
51
|
----------
|
52
|
-
|
53
|
-
|
52
|
+
return_distribution_ : ReturnDistribution
|
53
|
+
Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
|
54
|
+
estimators, containing the assets syntehtic data distribution and moments
|
55
|
+
estimation.
|
54
56
|
|
55
57
|
distribution_estimator_ : BaseEstimator
|
56
58
|
The fitted distribution estimator.
|
@@ -80,7 +82,7 @@ class SyntheticData(BasePrior):
|
|
80
82
|
>>> # Instanciate the SyntheticData model and fit it
|
81
83
|
>>> model = SyntheticData()
|
82
84
|
>>> model.fit(X)
|
83
|
-
>>> print(model.
|
85
|
+
>>> print(model.return_distribution_)
|
84
86
|
>>>
|
85
87
|
>>> # Minimum CVaR optimization on synthetic returns
|
86
88
|
>>> model = MeanRisk(
|
@@ -114,8 +116,8 @@ class SyntheticData(BasePrior):
|
|
114
116
|
... conditioning={"QUAL": -0.5}
|
115
117
|
... ))
|
116
118
|
>>> factor_model.fit(X,y)
|
117
|
-
>>>
|
118
|
-
>>> stressed_ptf = model.predict(
|
119
|
+
>>> stressed_dist = factor_model.return_distribution_
|
120
|
+
>>> stressed_ptf = model.predict(stressed_dist)
|
119
121
|
"""
|
120
122
|
|
121
123
|
distribution_estimator_: skb.BaseEstimator
|
@@ -200,7 +202,7 @@ class SyntheticData(BasePrior):
|
|
200
202
|
# Fit empirical posterior estimator
|
201
203
|
posterior_estimator = EmpiricalPrior()
|
202
204
|
posterior_estimator.fit(synthetic_data)
|
203
|
-
self.
|
205
|
+
self.return_distribution_ = posterior_estimator.return_distribution_
|
204
206
|
|
205
207
|
return self
|
206
208
|
|
@@ -138,8 +138,8 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
|
|
138
138
|
)
|
139
139
|
# fitting estimators
|
140
140
|
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
141
|
-
mu = self.prior_estimator_.
|
142
|
-
returns = self.prior_estimator_.
|
141
|
+
mu = self.prior_estimator_.return_distribution_.mu
|
142
|
+
returns = self.prior_estimator_.return_distribution_.returns
|
143
143
|
n_assets = returns.shape[1]
|
144
144
|
k = np.sqrt(st.chi2.ppf(q=self.confidence_level, df=n_assets))
|
145
145
|
samples = stationary_bootstrap(
|
@@ -276,8 +276,8 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
276
276
|
)
|
277
277
|
# fitting estimators
|
278
278
|
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
279
|
-
covariance = self.prior_estimator_.
|
280
|
-
returns = self.prior_estimator_.
|
279
|
+
covariance = self.prior_estimator_.return_distribution_.covariance
|
280
|
+
returns = self.prior_estimator_.return_distribution_.returns
|
281
281
|
n_assets = returns.shape[1]
|
282
282
|
k = np.sqrt(st.chi2.ppf(q=self.confidence_level, df=n_assets**2))
|
283
283
|
|