skfolio 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. skfolio/distribution/multivariate/_vine_copula.py +35 -34
  2. skfolio/distribution/univariate/_base.py +20 -15
  3. skfolio/exceptions.py +5 -0
  4. skfolio/measures/__init__.py +2 -0
  5. skfolio/measures/_measures.py +390 -155
  6. skfolio/optimization/_base.py +21 -4
  7. skfolio/optimization/cluster/hierarchical/_base.py +16 -13
  8. skfolio/optimization/cluster/hierarchical/_herc.py +6 -6
  9. skfolio/optimization/cluster/hierarchical/_hrp.py +8 -6
  10. skfolio/optimization/convex/_base.py +238 -144
  11. skfolio/optimization/convex/_distributionally_robust.py +32 -20
  12. skfolio/optimization/convex/_maximum_diversification.py +15 -15
  13. skfolio/optimization/convex/_mean_risk.py +26 -24
  14. skfolio/optimization/convex/_risk_budgeting.py +23 -21
  15. skfolio/optimization/ensemble/__init__.py +2 -4
  16. skfolio/optimization/ensemble/_stacking.py +1 -1
  17. skfolio/optimization/naive/_naive.py +2 -2
  18. skfolio/population/_population.py +30 -9
  19. skfolio/portfolio/_base.py +68 -26
  20. skfolio/portfolio/_multi_period_portfolio.py +5 -0
  21. skfolio/portfolio/_portfolio.py +5 -0
  22. skfolio/prior/__init__.py +6 -2
  23. skfolio/prior/_base.py +7 -3
  24. skfolio/prior/_black_litterman.py +14 -12
  25. skfolio/prior/_empirical.py +8 -7
  26. skfolio/prior/_entropy_pooling.py +1493 -0
  27. skfolio/prior/_factor_model.py +39 -22
  28. skfolio/prior/_opinion_pooling.py +475 -0
  29. skfolio/prior/_synthetic_data.py +10 -8
  30. skfolio/uncertainty_set/_bootstrap.py +4 -4
  31. skfolio/uncertainty_set/_empirical.py +6 -6
  32. skfolio/utils/equations.py +10 -4
  33. skfolio/utils/figure.py +185 -0
  34. skfolio/utils/tools.py +4 -2
  35. {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/METADATA +94 -5
  36. {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/RECORD +40 -38
  37. {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/WHEEL +1 -1
  38. skfolio/synthetic_returns/__init__.py +0 -1
  39. /skfolio/{optimization/ensemble/_base.py → utils/composition.py} +0 -0
  40. {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/licenses/LICENSE +0 -0
  41. {skfolio-0.9.1.dist-info → skfolio-0.10.0.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,8 @@ import sklearn.multioutput as skmo
19
19
  import sklearn.utils.metadata_routing as skm
20
20
  import sklearn.utils.validation as skv
21
21
 
22
- from skfolio.prior._base import BasePrior, PriorModel
22
+ import skfolio.measures as sm
23
+ from skfolio.prior._base import BasePrior, ReturnDistribution
23
24
  from skfolio.prior._empirical import EmpiricalPrior
24
25
  from skfolio.utils.stats import cov_nearest
25
26
  from skfolio.utils.tools import check_estimator
@@ -39,7 +40,7 @@ class BaseLoadingMatrix(skb.BaseEstimator, ABC):
39
40
  intercepts_: np.ndarray
40
41
 
41
42
  @abstractmethod
42
- def fit(self, X: npt.ArrayLike, y: npt.ArrayLike):
43
+ def fit(self, X: npt.ArrayLike, y: npt.ArrayLike, **fit_params):
43
44
  pass
44
45
 
45
46
 
@@ -128,7 +129,9 @@ class LoadingMatrixRegression(BaseLoadingMatrix):
128
129
  self.multi_output_regressor_ = skmo.MultiOutputRegressor(
129
130
  _linear_regressor, n_jobs=self.n_jobs
130
131
  )
131
- self.multi_output_regressor_.fit(X=y, y=X, **routed_params.linear_regressor.fit)
132
+ self.multi_output_regressor_.fit(
133
+ X=y, y=X, **routed_params.factor_prior_estimator.fit
134
+ )
132
135
  # noinspection PyUnresolvedReferences
133
136
  n_assets = X.shape[1]
134
137
  self.loading_matrix_ = np.array(
@@ -161,8 +164,8 @@ class FactorModel(BasePrior):
161
164
 
162
165
  factor_prior_estimator : BasePrior, optional
163
166
  The factors :ref:`prior estimator <prior>`.
164
- It is used to estimate the :class:`~skfolio.prior.PriorModel` containing the
165
- factors expected returns and covariance matrix.
167
+ It is used to estimate the :class:`~skfolio.prior.ReturnDistribution` containing
168
+ the estimation of factors expected returns and covariance matrix.
166
169
  The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
167
170
 
168
171
  residual_variance : bool, default=True
@@ -181,8 +184,10 @@ class FactorModel(BasePrior):
181
184
 
182
185
  Attributes
183
186
  ----------
184
- prior_model_ : PriorModel
185
- The :class:`~skfolio.prior.PriorModel`.
187
+ return_distribution_ : ReturnDistribution
188
+ Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
189
+ estimators, containing the assets distribution, moments estimation and cholesky
190
+ decomposition based on the factor model.
186
191
 
187
192
  factor_prior_estimator_ : BasePrior
188
193
  Fitted `factor_prior_estimator`.
@@ -218,10 +223,18 @@ class FactorModel(BasePrior):
218
223
  self.max_iteration = max_iteration
219
224
 
220
225
  def get_metadata_routing(self):
221
- # noinspection PyTypeChecker
222
- router = skm.MetadataRouter(owner=self.__class__.__name__).add(
223
- factor_prior_estimator=self.factor_prior_estimator,
224
- method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
226
+ # route to factor_prior_estimator.fit
227
+ router = (
228
+ skm.MetadataRouter(owner=self.__class__.__name__)
229
+ .add(
230
+ factor_prior_estimator=self.factor_prior_estimator,
231
+ method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
232
+ )
233
+ # route to loading_matrix_estimator.fit
234
+ .add(
235
+ loading_matrix_estimator=self.loading_matrix_estimator,
236
+ method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
237
+ )
225
238
  )
226
239
  return router
227
240
 
@@ -266,12 +279,12 @@ class FactorModel(BasePrior):
266
279
  self.factor_prior_estimator_.fit(
267
280
  X=y, **routed_params.factor_prior_estimator.fit
268
281
  )
269
- factor_mu = self.factor_prior_estimator_.prior_model_.mu
270
- factor_covariance = self.factor_prior_estimator_.prior_model_.covariance
271
- factor_returns = self.factor_prior_estimator_.prior_model_.returns
282
+ factor_return_dist = self.factor_prior_estimator_.return_distribution_
272
283
 
273
284
  # Fitting loading matrix estimator
274
- self.loading_matrix_estimator_.fit(X, y)
285
+ self.loading_matrix_estimator_.fit(
286
+ X, y, **routed_params.loading_matrix_estimator.fit
287
+ )
275
288
  loading_matrix = self.loading_matrix_estimator_.loading_matrix_
276
289
  intercepts = self.loading_matrix_estimator_.intercepts_
277
290
 
@@ -294,15 +307,15 @@ class FactorModel(BasePrior):
294
307
  f"shape {(n_assets,)}, got {intercepts.shape} instead."
295
308
  )
296
309
 
297
- mu = loading_matrix @ factor_mu + intercepts
298
- covariance = loading_matrix @ factor_covariance @ loading_matrix.T
299
- returns = factor_returns @ loading_matrix.T + intercepts
300
- cholesky = loading_matrix @ np.linalg.cholesky(factor_covariance)
310
+ mu = loading_matrix @ factor_return_dist.mu + intercepts
311
+ covariance = loading_matrix @ factor_return_dist.covariance @ loading_matrix.T
312
+ returns = factor_return_dist.returns @ loading_matrix.T + intercepts
313
+ cholesky = loading_matrix @ np.linalg.cholesky(factor_return_dist.covariance)
301
314
 
302
315
  if self.residual_variance:
303
316
  y_pred = y @ loading_matrix.T + intercepts
304
317
  err = X - y_pred
305
- err_cov = np.diag(np.var(err, ddof=1, axis=0))
318
+ err_cov = np.diag(sm.variance(err))
306
319
  covariance += err_cov
307
320
  cholesky = np.hstack((cholesky, np.sqrt(err_cov)))
308
321
 
@@ -310,7 +323,11 @@ class FactorModel(BasePrior):
310
323
  covariance, higham=self.higham, higham_max_iteration=self.max_iteration
311
324
  )
312
325
 
313
- self.prior_model_ = PriorModel(
314
- mu=mu, covariance=covariance, returns=returns, cholesky=cholesky
326
+ self.return_distribution_ = ReturnDistribution(
327
+ mu=mu,
328
+ covariance=covariance,
329
+ returns=returns,
330
+ cholesky=cholesky,
331
+ sample_weight=factor_return_dist.sample_weight,
315
332
  )
316
333
  return self
@@ -0,0 +1,475 @@
1
+ """Opinion Pooling estimator."""
2
+
3
+ # Copyright (c) 2025
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # Credits: Vincent Maladière, Matteo Manzi, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import pandas as pd
11
+ import scipy.special as scs
12
+ import scipy.special as sp
13
+ import sklearn as sk
14
+ import sklearn.utils as sku
15
+ import sklearn.utils.metadata_routing as skm
16
+ import sklearn.utils.parallel as skp
17
+ import sklearn.utils.validation as skv
18
+
19
+ import skfolio.measures as sm
20
+ from skfolio.prior._base import BasePrior, ReturnDistribution
21
+ from skfolio.utils.composition import BaseComposition
22
+ from skfolio.utils.tools import check_estimator, fit_single_estimator
23
+
24
+
25
+ class OpinionPooling(BasePrior, BaseComposition):
26
+ r"""Opinion Pooling estimator.
27
+
28
+ Opinion Pooling (also called Belief Aggregation or Risk Aggregation) is a process
29
+ in which different probability distributions (opinions), produced by different
30
+ experts, are combined to yield a single probability distribution (consensus).
31
+
32
+ Expert opinions (also called individual prior distributions) can be
33
+ **elicited** from domain experts or **derived** from quantitative analyses.
34
+
35
+ The `OpinionPooling` estimator takes a list of prior estimators, each of which
36
+ produces scenario probabilities (which we use as `sample_weight`), and pools them
37
+ into a single consensus probability .
38
+
39
+ You can choose between linear (arithmetic) pooling or logarithmic (geometric)
40
+ pooling, and optionally apply robust pooling using a Kullback-Leibler divergence
41
+ penalty to down-weight experts whose views deviate strongly from the group
42
+ consensus.
43
+
44
+ Parameters
45
+ ----------
46
+ estimators : list of (str, BasePrior)
47
+ A list of :ref:`prior estimators <prior>` representing opinions to be pooled
48
+ into a single consensus.
49
+ Each element of the list is defined as a tuple of string (i.e. name) and an
50
+ estimator instance. Each must expose `sample_weight` such as in
51
+ :class:`~skfolio.prior.EntropyPooling`.
52
+
53
+ opinion_probabilities : array-like of float, optional
54
+ Probability mass assigned to each opinion, in [0,1] summing to ≤1.
55
+ Any leftover mass is assigned to the uniform (uninformative) prior.
56
+ The default (None), is to assign the same probability to each opinion.
57
+
58
+ prior_estimator : BasePrior, optional
59
+ Common prior for all `estimators`. If provided, each estimator from `estimators`
60
+ will be fitted using this common prior before pooling. Setting `prior_estimator`
61
+ inside individual `estimators` is disabled to avoid mixing different prior
62
+ scenarios (each estimator must have the same underlying distribution).
63
+ For example, using `prior_estimator = SyntheticData(n_samples=10_000)` will
64
+ generate 10,000 synthetic data points from a Vine Copula before fitting the
65
+ estimators on this common distribution.
66
+
67
+ is_linear_pooling : bool, default=True
68
+ If True, combine each opinion via Linear Opinion Pooling
69
+ (arithmetic mean); if False, use Logarithmic Opinion Pooling (geometric
70
+ mean).
71
+
72
+ Linear Opinion Pooling:
73
+ * Retains all nonzero support (no "zero-forcing").
74
+ * Produces an averaging that is more evenly spread across all expert opinions.
75
+
76
+ Logarithmic Opinion Pooling:
77
+ * Zero-Preservation. Any scenario assigned zero probability by any expert
78
+ remains zero in the aggregate.
79
+ * Information-Theoretic Optimality. Yields the distribution that minimizes
80
+ the weighted sum of KL-divergences from each expert's distribution.
81
+ * Robust to Extremes: down-weight extreme or contrarian views more severely.
82
+
83
+ divergence_penalty : float, default=0.0
84
+ Non-negative factor (:math:`\alpha`) that penalizes each opinion's divergence
85
+ from the group consensus, yielding more robust pooling.
86
+ A higher value more strongly down-weights deviating opinions.
87
+
88
+ The robust opinion probabilities are given by:
89
+
90
+ .. math::
91
+ \tilde{p}_i = \frac{p_i \exp\bigl(-\alpha D_i\bigr)}
92
+ {\displaystyle \sum_{k=1}^N p_k \exp\bigl(-\alpha D_k\bigr)}
93
+ \quad\text{for }i = 1,\dots,N
94
+
95
+ where
96
+
97
+ * :math:`N` is the number of experts `len(estimators)`
98
+
99
+ * :math:`M` is the number of scenarios `len(observations)`
100
+
101
+ * :math:`D_i` is the KL-divergence of expert *i*'s distribution from consensus:
102
+
103
+ .. math::
104
+ D_i = \mathrm{KL}\bigl(w_i \,\|\, c\bigr)
105
+ = \sum_{j=1}^M w_{ij}\,\ln\!\frac{w_{ij}}{c_j}
106
+ \quad\text{for }i = 1,\dots,N.
107
+
108
+ * :math:`w_i` is the sample-weight vector (scenario probabilities) from expert
109
+ *i*, with :math:`\sum_{j=1}^M w_{ij} = 1`.
110
+
111
+ * :math:`p_i` is the initial opinion probability of expert *i*, with
112
+ :math:`\sum_{i=1}^N p_i \le 1` (any leftover mass goes to a uniform prior).
113
+
114
+ * :math:`c_j` is the consensus of scenario :math:`j`:
115
+
116
+ .. math::
117
+ c_j = \sum_{i=1}^N p_i \, w_{ij} \quad\text{for }j = 1,\dots,M.
118
+
119
+ n_jobs : int, optional
120
+ The number of jobs to run in parallel for `fit` of all `estimators`.
121
+ The value `-1` means using all processors.
122
+ The default (`None`) means 1 unless in a `joblib.parallel_backend` context.
123
+
124
+ Attributes
125
+ ----------
126
+ return_distribution_ : ReturnDistribution
127
+ Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
128
+ estimators, containing the assets distribution, moments estimation and the
129
+ opinion-pooling sample weights.
130
+
131
+ estimators_ : list[BasePrior]
132
+ The elements of the `estimators` parameter, having been fitted on the
133
+ training data.
134
+
135
+ named_estimators_ : dict[str, BasePrior]
136
+ Attribute to access any fitted sub-estimators by name.
137
+
138
+ prior_estimator_ : BasePrior
139
+ Fitted `prior_estimator` if provided.
140
+
141
+ opinion_probabilities_ : ndarray of shape (n_opinions,)
142
+ Final opinion probabilities after applying the KL-divergence penalty.
143
+ If the initial `opinion_probabilities` doesn't sum to one, the last element of
144
+ `opinion_probabilities_` is the probability assigned to the uniform prior.
145
+
146
+ n_features_in_ : int
147
+ Number of assets seen during `fit`.
148
+
149
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
150
+ Names of assets seen during `fit`. Defined only when `X`
151
+ has assets names that are all strings.
152
+
153
+ References
154
+ ----------
155
+ .. [1] "Probabilistic opinion pooling generalized",
156
+ Social Choice and Welfare, Dietrich & List (2017)
157
+
158
+ .. [2] "Opinion Aggregation and Individual Expertise",
159
+ Oxford University Press, Martini & Sprenger (2017)
160
+
161
+ .. [3] "Rational Decisions",
162
+ Journal of the Royal Statistical Society, Good (1952)
163
+
164
+ Examples
165
+ --------
166
+ For a full tutorial on entropy pooling, see :ref:`sphx_glr_auto_examples_entropy_pooling_plot_2_opinion_pooling.py`.
167
+
168
+ >>> from skfolio import RiskMeasure
169
+ >>> from skfolio.datasets import load_sp500_dataset
170
+ >>> from skfolio.preprocessing import prices_to_returns
171
+ >>> from skfolio.prior import EntropyPooling, OpinionPooling
172
+ >>> from skfolio.optimization import RiskBudgeting
173
+ >>>
174
+ >>> prices = load_sp500_dataset()
175
+ >>> X = prices_to_returns(prices)
176
+ >>>
177
+ >>> # We consider two expert opinions, each generated via Entropy Pooling with
178
+ >>> # user-defined views.
179
+ >>> # We assign probabilities of 40% to Expert 1, 50% to Expert 2, and by default
180
+ >>> # the remaining 10% is allocated to the prior distribution:
181
+ >>> opinion_1 = EntropyPooling(cvar_views=["AMD == 0.10"])
182
+ >>> opinion_2 = EntropyPooling(
183
+ ... mean_views=["AMD >= BAC", "JPM <= prior(JPM) * 0.8"],
184
+ ... cvar_views=["GE == 0.12"],
185
+ ... )
186
+ >>>
187
+ >>> opinion_pooling = OpinionPooling(
188
+ ... estimators=[("opinion_1", opinion_1), ("opinion_2", opinion_2)],
189
+ ... opinion_probabilities=[0.4, 0.5],
190
+ ... )
191
+ >>>
192
+ >>> opinion_pooling.fit(X)
193
+ >>>
194
+ >>> print(opinion_pooling.return_distribution_.sample_weight)
195
+ >>>
196
+ >>> # CVaR Risk Parity optimization on opinion Pooling
197
+ >>> model = RiskBudgeting(
198
+ ... risk_measure=RiskMeasure.CVAR,
199
+ ... prior_estimator=opinion_pooling
200
+ ... )
201
+ >>> model.fit(X)
202
+ >>> print(model.weights_)
203
+ >>>
204
+ >>> # Stress Test the Portfolio
205
+ >>> opinion_1 = EntropyPooling(cvar_views=["AMD == 0.05"])
206
+ >>> opinion_2 = EntropyPooling(cvar_views=["AMD == 0.10"])
207
+ >>> opinion_pooling = OpinionPooling(
208
+ ... estimators=[("opinion_1", opinion_1), ("opinion_2", opinion_2)],
209
+ ... opinion_probabilities=[0.6, 0.4],
210
+ ... )
211
+ >>> opinion_pooling.fit(X)
212
+ >>>
213
+ >>> stressed_dist = opinion_pooling.return_distribution_
214
+ >>>
215
+ >>> stressed_ptf = model.predict(stressed_dist)
216
+ """
217
+
218
+ estimators_: list[BasePrior]
219
+ named_estimators_: dict[str, BasePrior]
220
+ opinion_probabilities_: np.ndarray
221
+ prior_estimator_: BasePrior
222
+ n_features_in_: int
223
+ feature_names_in_: np.ndarray
224
+
225
+ def __init__(
226
+ self,
227
+ estimators: list[tuple[str, BasePrior]],
228
+ opinion_probabilities: list[float] | None = None,
229
+ prior_estimator: BasePrior | None = None,
230
+ is_linear_pooling: bool = True,
231
+ divergence_penalty: float = 0.0,
232
+ n_jobs: int | None = None,
233
+ ):
234
+ self.estimators = estimators
235
+ self.opinion_probabilities = opinion_probabilities
236
+ self.prior_estimator = prior_estimator
237
+ self.divergence_penalty = divergence_penalty
238
+ self.is_linear_pooling = is_linear_pooling
239
+ self.n_jobs = n_jobs
240
+
241
+ @property
242
+ def named_estimators(self):
243
+ """Dictionary to access any fitted sub-estimators by name.
244
+
245
+ Returns
246
+ -------
247
+ :class:`~sklearn.utils.Bunch`
248
+ """
249
+ return sku.Bunch(**dict(self.estimators))
250
+
251
+ def _validate_estimators(self) -> tuple[list[str], list[BasePrior]]:
252
+ """Validate the `estimators` parameter.
253
+
254
+ Returns
255
+ -------
256
+ names : list[str]
257
+ The list of estimators names.
258
+ estimators : list[BaseOptimization
259
+ The list of optimization estimators.
260
+ """
261
+ if self.estimators is None or len(self.estimators) == 0:
262
+ raise ValueError(
263
+ "Invalid 'estimators' attribute, 'estimators' should be a list"
264
+ " of (string, estimator) tuples."
265
+ )
266
+ names, estimators = zip(*self.estimators, strict=True)
267
+ # defined by MetaEstimatorMixin
268
+ self._validate_names(names)
269
+
270
+ for estimator in estimators:
271
+ if getattr(estimator, "prior_estimator", None) is not None:
272
+ raise ValueError(
273
+ "Cannot set `prior_estimator` on individual estimators within "
274
+ "`OpinionPooling` to avoid mixing different prior scenarios. "
275
+ "Please leave those as `None` and specify your prior directly via "
276
+ "the `prior_estimator` parameter of the `OpinionPooling` class."
277
+ )
278
+
279
+ return names, estimators
280
+
281
+ def set_params(self, **params):
282
+ """Set the parameters of an estimator from the ensemble.
283
+
284
+ Valid parameter keys can be listed with `get_params()`. Note that you
285
+ can directly set the parameters of the estimators contained in
286
+ `estimators`.
287
+
288
+ Parameters
289
+ ----------
290
+ **params : keyword arguments
291
+ Specific parameters using e.g.
292
+ `set_params(parameter_name=new_value)`. In addition, to setting the
293
+ parameters of the estimator, the individual estimator of the
294
+ estimators can also be set, or can be removed by setting them to
295
+ 'drop'.
296
+
297
+ Returns
298
+ -------
299
+ self : object
300
+ Estimator instance.
301
+ """
302
+ super()._set_params("estimators", **params)
303
+ return self
304
+
305
+ def get_params(self, deep=True):
306
+ """Get the parameters of an estimator from the ensemble.
307
+
308
+ Returns the parameters given in the constructor as well as the
309
+ estimators contained within the `estimators` parameter.
310
+
311
+ Parameters
312
+ ----------
313
+ deep : bool, default=True
314
+ Setting it to True gets the various estimators and the parameters
315
+ of the estimators as well.
316
+
317
+ Returns
318
+ -------
319
+ params : dict
320
+ Parameter and estimator names mapped to their values or parameter
321
+ names mapped to their values.
322
+ """
323
+ return super()._get_params("estimators", deep=deep)
324
+
325
+ def get_metadata_routing(self):
326
+ router = skm.MetadataRouter(owner=self.__class__.__name__)
327
+ for name, estimator in self.estimators:
328
+ router.add(
329
+ **{name: estimator},
330
+ method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
331
+ )
332
+ return router
333
+
334
+ def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "OpinionPooling":
335
+ """Fit the Opinion Pooling estimator.
336
+
337
+ Parameters
338
+ ----------
339
+ X : array-like of shape (n_observations, n_assets)
340
+ Price returns of the assets.
341
+
342
+ y : Ignored
343
+ Not used, present for API consistency by convention.
344
+
345
+ **fit_params : dict
346
+ Parameters to pass to the underlying estimators.
347
+ Only available if `enable_metadata_routing=True`, which can be
348
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
349
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
350
+ more details.
351
+
352
+ Returns
353
+ -------
354
+ self : OpinionPooling
355
+ Fitted estimator.
356
+ """
357
+ routed_params = skm.process_routing(self, "fit", **fit_params)
358
+
359
+ skv.validate_data(self, X)
360
+
361
+ names, all_estimators = self._validate_estimators()
362
+
363
+ opinion_probabilities = self._validate_opinion_probabilities()
364
+
365
+ if self.prior_estimator is not None:
366
+ self.prior_estimator_ = check_estimator(
367
+ self.prior_estimator,
368
+ default=None,
369
+ check_type=BasePrior,
370
+ )
371
+ # fitting prior estimator
372
+ self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
373
+ returns = self.prior_estimator_.return_distribution_.returns
374
+ # To keep the asset_names
375
+ if hasattr(self, "feature_names_in_"):
376
+ returns = pd.DataFrame(returns, columns=self.feature_names_in_)
377
+ else:
378
+ returns = X
379
+
380
+ # Fit the prior estimators on the whole training data. Those
381
+ # prior estimators will be used to retrieve the sample weights.
382
+ self.estimators_ = skp.Parallel(n_jobs=self.n_jobs)(
383
+ skp.delayed(fit_single_estimator)(
384
+ sk.clone(est), returns, None, routed_params[name]["fit"]
385
+ )
386
+ for name, est in zip(names, all_estimators, strict=True)
387
+ )
388
+
389
+ self.named_estimators_ = {
390
+ name: estimator
391
+ for name, estimator in zip(names, self.estimators_, strict=True)
392
+ }
393
+
394
+ sample_weights = []
395
+ for estimator in self.estimators_:
396
+ if estimator.return_distribution_.sample_weight is None:
397
+ raise ValueError(
398
+ f"Estimator `{estimator.__class__.__name__}` did not produce "
399
+ "a `return_distribution_.sample_weight`. OpinionPooling requires "
400
+ "each estimator to expose sample weights (e.g. via EntropyPooling)."
401
+ )
402
+ sample_weights.append(estimator.return_distribution_.sample_weight)
403
+ sample_weights = np.array(sample_weights)
404
+
405
+ returns = np.asarray(returns)
406
+ n_observations = len(returns)
407
+
408
+ # Add the remaining part of the opinion_probabilities to the uniform prior
409
+ q_weight = 1.0 - opinion_probabilities.sum()
410
+ if q_weight > 1e-8:
411
+ opinion_probabilities = np.append(opinion_probabilities, q_weight)
412
+ q = np.ones(n_observations) / n_observations
413
+ sample_weights = np.vstack((sample_weights, q))
414
+
415
+ opinion_probabilities = self._compute_robust_opinion_probabilities(
416
+ opinion_probabilities=opinion_probabilities, sample_weights=sample_weights
417
+ )
418
+
419
+ if self.is_linear_pooling:
420
+ sample_weight = opinion_probabilities @ sample_weights
421
+ else:
422
+ # let exact 0 in sample weights flow through
423
+ with np.errstate(divide="ignore"):
424
+ u = opinion_probabilities @ np.log(sample_weights)
425
+ sample_weight = np.exp(u - sp.logsumexp(u))
426
+
427
+ self.opinion_probabilities_ = opinion_probabilities
428
+ self.return_distribution_ = ReturnDistribution(
429
+ mu=sm.mean(returns, sample_weight=sample_weight),
430
+ covariance=np.cov(returns, rowvar=False, aweights=sample_weight),
431
+ returns=returns,
432
+ sample_weight=sample_weight,
433
+ )
434
+ return self
435
+
436
+ def _validate_opinion_probabilities(self) -> np.ndarray:
437
+ """Validate `opinion_probabilities`."""
438
+ n_opinions = len(self.estimators)
439
+ if self.opinion_probabilities is None:
440
+ return np.ones(n_opinions) / n_opinions
441
+
442
+ opinion_probabilities = np.asarray(self.opinion_probabilities)
443
+
444
+ if len(opinion_probabilities) != n_opinions:
445
+ raise ValueError(
446
+ f"`opinion_probabilities` length ({len(opinion_probabilities)}) "
447
+ f"does not match number of estimators ({n_opinions})."
448
+ )
449
+
450
+ if np.any(opinion_probabilities < 0) or np.any(opinion_probabilities > 1):
451
+ raise ValueError(
452
+ "`The entries of `opinion_probabilities` must be between 0 and 1"
453
+ )
454
+ if opinion_probabilities.sum() > 1.0:
455
+ raise ValueError(
456
+ "The entries of `opinion_probabilities` must sum to at most 1; "
457
+ "any remaining mass (1-sum) is allocated to the uniform prior."
458
+ )
459
+ return opinion_probabilities
460
+
461
+ def _compute_robust_opinion_probabilities(
462
+ self, opinion_probabilities: np.ndarray, sample_weights: np.ndarray
463
+ ) -> np.ndarray:
464
+ """Compute the robust `opinion_probabilities` using KL-divergence."""
465
+ if self.divergence_penalty < 0:
466
+ raise ValueError("`divergence_penalty` cannot be negative")
467
+
468
+ if self.divergence_penalty == 0:
469
+ return opinion_probabilities
470
+
471
+ consensus = opinion_probabilities @ sample_weights
472
+ divergences = np.sum(scs.rel_entr(sample_weights, consensus), axis=1)
473
+ opinion_probabilities *= np.exp(-self.divergence_penalty * divergences)
474
+ opinion_probabilities /= opinion_probabilities.sum()
475
+ return opinion_probabilities
@@ -1,4 +1,4 @@
1
- """Synthetic Data Prior Model estimator."""
1
+ """Synthetic Data Prior estimator."""
2
2
 
3
3
  # Copyright (c) 2025
4
4
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
@@ -21,7 +21,7 @@ from skfolio.utils.tools import check_estimator
21
21
  class SyntheticData(BasePrior):
22
22
  """Synthetic Data Estimator.
23
23
 
24
- The Synthetic Data model estimates a :class:`~skfolio.prior.PriorModel` by
24
+ The Synthetic Data model estimates a :class:`~skfolio.prior.ReturnDistribution` by
25
25
  fitting a `distribution_estimator` and sampling new returns data from it.
26
26
 
27
27
  The default ``distribution_estimator`` is a Regular Vine Copula model. Other common
@@ -49,8 +49,10 @@ class SyntheticData(BasePrior):
49
49
 
50
50
  Attributes
51
51
  ----------
52
- prior_model_ : PriorModel
53
- The assets :class:`~skfolio.prior.PriorModel`.
52
+ return_distribution_ : ReturnDistribution
53
+ Fitted :class:`~skfolio.prior.ReturnDistribution` to be used by the optimization
54
+ estimators, containing the assets syntehtic data distribution and moments
55
+ estimation.
54
56
 
55
57
  distribution_estimator_ : BaseEstimator
56
58
  The fitted distribution estimator.
@@ -80,7 +82,7 @@ class SyntheticData(BasePrior):
80
82
  >>> # Instanciate the SyntheticData model and fit it
81
83
  >>> model = SyntheticData()
82
84
  >>> model.fit(X)
83
- >>> print(model.prior_model_)
85
+ >>> print(model.return_distribution_)
84
86
  >>>
85
87
  >>> # Minimum CVaR optimization on synthetic returns
86
88
  >>> model = MeanRisk(
@@ -114,8 +116,8 @@ class SyntheticData(BasePrior):
114
116
  ... conditioning={"QUAL": -0.5}
115
117
  ... ))
116
118
  >>> factor_model.fit(X,y)
117
- >>> stressed_X = factor_model.prior_model_.returns
118
- >>> stressed_ptf = model.predict(stressed_X)
119
+ >>> stressed_dist = factor_model.return_distribution_
120
+ >>> stressed_ptf = model.predict(stressed_dist)
119
121
  """
120
122
 
121
123
  distribution_estimator_: skb.BaseEstimator
@@ -200,7 +202,7 @@ class SyntheticData(BasePrior):
200
202
  # Fit empirical posterior estimator
201
203
  posterior_estimator = EmpiricalPrior()
202
204
  posterior_estimator.fit(synthetic_data)
203
- self.prior_model_ = posterior_estimator.prior_model_
205
+ self.return_distribution_ = posterior_estimator.return_distribution_
204
206
 
205
207
  return self
206
208
 
@@ -138,8 +138,8 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
138
138
  )
139
139
  # fitting estimators
140
140
  self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
141
- mu = self.prior_estimator_.prior_model_.mu
142
- returns = self.prior_estimator_.prior_model_.returns
141
+ mu = self.prior_estimator_.return_distribution_.mu
142
+ returns = self.prior_estimator_.return_distribution_.returns
143
143
  n_assets = returns.shape[1]
144
144
  k = np.sqrt(st.chi2.ppf(q=self.confidence_level, df=n_assets))
145
145
  samples = stationary_bootstrap(
@@ -276,8 +276,8 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
276
276
  )
277
277
  # fitting estimators
278
278
  self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
279
- covariance = self.prior_estimator_.prior_model_.covariance
280
- returns = self.prior_estimator_.prior_model_.returns
279
+ covariance = self.prior_estimator_.return_distribution_.covariance
280
+ returns = self.prior_estimator_.return_distribution_.returns
281
281
  n_assets = returns.shape[1]
282
282
  k = np.sqrt(st.chi2.ppf(q=self.confidence_level, df=n_assets**2))
283
283