skfolio 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. skfolio/__init__.py +2 -2
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +1 -1
  5. skfolio/datasets/_base.py +2 -2
  6. skfolio/datasets/data/__init__.py +1 -0
  7. skfolio/distance/__init__.py +1 -1
  8. skfolio/distance/_base.py +2 -2
  9. skfolio/distance/_distance.py +4 -4
  10. skfolio/distribution/__init__.py +56 -0
  11. skfolio/distribution/_base.py +203 -0
  12. skfolio/distribution/copula/__init__.py +35 -0
  13. skfolio/distribution/copula/_base.py +456 -0
  14. skfolio/distribution/copula/_clayton.py +539 -0
  15. skfolio/distribution/copula/_gaussian.py +407 -0
  16. skfolio/distribution/copula/_gumbel.py +560 -0
  17. skfolio/distribution/copula/_independent.py +196 -0
  18. skfolio/distribution/copula/_joe.py +609 -0
  19. skfolio/distribution/copula/_selection.py +111 -0
  20. skfolio/distribution/copula/_student_t.py +486 -0
  21. skfolio/distribution/copula/_utils.py +509 -0
  22. skfolio/distribution/multivariate/__init__.py +11 -0
  23. skfolio/distribution/multivariate/_base.py +241 -0
  24. skfolio/distribution/multivariate/_utils.py +632 -0
  25. skfolio/distribution/multivariate/_vine_copula.py +1254 -0
  26. skfolio/distribution/univariate/__init__.py +19 -0
  27. skfolio/distribution/univariate/_base.py +308 -0
  28. skfolio/distribution/univariate/_gaussian.py +136 -0
  29. skfolio/distribution/univariate/_johnson_su.py +152 -0
  30. skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
  31. skfolio/distribution/univariate/_selection.py +85 -0
  32. skfolio/distribution/univariate/_student_t.py +144 -0
  33. skfolio/exceptions.py +6 -6
  34. skfolio/measures/__init__.py +1 -1
  35. skfolio/measures/_enums.py +7 -7
  36. skfolio/measures/_measures.py +4 -7
  37. skfolio/metrics/__init__.py +2 -0
  38. skfolio/metrics/_scorer.py +4 -4
  39. skfolio/model_selection/__init__.py +2 -2
  40. skfolio/model_selection/_combinatorial.py +15 -12
  41. skfolio/model_selection/_validation.py +2 -2
  42. skfolio/model_selection/_walk_forward.py +3 -3
  43. skfolio/moments/covariance/_base.py +1 -1
  44. skfolio/moments/covariance/_denoise_covariance.py +1 -1
  45. skfolio/moments/covariance/_detone_covariance.py +1 -1
  46. skfolio/moments/covariance/_empirical_covariance.py +1 -1
  47. skfolio/moments/covariance/_ew_covariance.py +1 -1
  48. skfolio/moments/covariance/_gerber_covariance.py +1 -1
  49. skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
  50. skfolio/moments/covariance/_implied_covariance.py +2 -7
  51. skfolio/moments/covariance/_ledoit_wolf.py +1 -1
  52. skfolio/moments/covariance/_oas.py +1 -1
  53. skfolio/moments/covariance/_shrunk_covariance.py +1 -1
  54. skfolio/moments/expected_returns/_base.py +1 -1
  55. skfolio/moments/expected_returns/_empirical_mu.py +1 -1
  56. skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
  57. skfolio/moments/expected_returns/_ew_mu.py +1 -1
  58. skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
  59. skfolio/optimization/__init__.py +2 -0
  60. skfolio/optimization/_base.py +2 -2
  61. skfolio/optimization/cluster/__init__.py +2 -0
  62. skfolio/optimization/cluster/_nco.py +7 -7
  63. skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
  64. skfolio/optimization/cluster/hierarchical/_base.py +1 -2
  65. skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
  66. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
  67. skfolio/optimization/convex/__init__.py +2 -0
  68. skfolio/optimization/convex/_base.py +8 -8
  69. skfolio/optimization/convex/_distributionally_robust.py +4 -4
  70. skfolio/optimization/convex/_maximum_diversification.py +5 -5
  71. skfolio/optimization/convex/_mean_risk.py +5 -6
  72. skfolio/optimization/convex/_risk_budgeting.py +3 -3
  73. skfolio/optimization/ensemble/__init__.py +2 -0
  74. skfolio/optimization/ensemble/_base.py +2 -2
  75. skfolio/optimization/ensemble/_stacking.py +1 -1
  76. skfolio/optimization/naive/__init__.py +2 -0
  77. skfolio/optimization/naive/_naive.py +1 -1
  78. skfolio/population/__init__.py +2 -0
  79. skfolio/population/_population.py +35 -9
  80. skfolio/portfolio/_base.py +42 -8
  81. skfolio/portfolio/_multi_period_portfolio.py +3 -2
  82. skfolio/portfolio/_portfolio.py +4 -4
  83. skfolio/pre_selection/__init__.py +2 -0
  84. skfolio/pre_selection/_drop_correlated.py +2 -2
  85. skfolio/pre_selection/_select_complete.py +25 -26
  86. skfolio/pre_selection/_select_k_extremes.py +2 -2
  87. skfolio/pre_selection/_select_non_dominated.py +2 -2
  88. skfolio/pre_selection/_select_non_expiring.py +2 -2
  89. skfolio/preprocessing/__init__.py +2 -0
  90. skfolio/preprocessing/_returns.py +2 -2
  91. skfolio/prior/__init__.py +4 -0
  92. skfolio/prior/_base.py +2 -2
  93. skfolio/prior/_black_litterman.py +5 -3
  94. skfolio/prior/_empirical.py +3 -1
  95. skfolio/prior/_factor_model.py +8 -4
  96. skfolio/prior/_synthetic_data.py +239 -0
  97. skfolio/synthetic_returns/__init__.py +1 -0
  98. skfolio/typing.py +1 -1
  99. skfolio/uncertainty_set/__init__.py +2 -0
  100. skfolio/uncertainty_set/_base.py +2 -2
  101. skfolio/uncertainty_set/_bootstrap.py +1 -1
  102. skfolio/uncertainty_set/_empirical.py +1 -1
  103. skfolio/utils/__init__.py +1 -0
  104. skfolio/utils/bootstrap.py +2 -2
  105. skfolio/utils/equations.py +13 -10
  106. skfolio/utils/sorting.py +2 -2
  107. skfolio/utils/stats.py +7 -7
  108. skfolio/utils/tools.py +76 -12
  109. {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/METADATA +99 -24
  110. skfolio-0.8.1.dist-info/RECORD +120 -0
  111. {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/WHEEL +1 -1
  112. skfolio-0.7.0.dist-info/RECORD +0 -95
  113. {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info/licenses}/LICENSE +0 -0
  114. {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,407 @@
1
+ """Bivariate Gaussian Copula Estimation."""
2
+
3
+ # Copyright (c) 2025
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import scipy.optimize as so
11
+ import scipy.special as sp
12
+ import scipy.stats as st
13
+ import sklearn.utils.validation as skv
14
+
15
+ from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
16
+ from skfolio.distribution.copula._utils import _apply_margin_swap
17
+
18
+
19
+ class GaussianCopula(BaseBivariateCopula):
20
+ r"""Bivariate Gaussian Copula Estimation.
21
+
22
+ The bivariate Gaussian copula is defined as:
23
+
24
+ .. math::
25
+ C_{\rho}(u, v) = \Phi_2\left(\Phi^{-1}(u), \Phi^{-1}(v) ; \rho\right)
26
+
27
+ where:
28
+ - :math:`\Phi_2` is the bivariate normal CDF with correlation :math:`\rho`.
29
+ - :math:`\Phi` is the standard normal CDF and :math:`\Phi^{-1}` its quantile function.
30
+ - :math:`\rho \in (-1, 1)` is the correlation coefficient.
31
+
32
+ .. note::
33
+
34
+ Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
35
+ because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
36
+ both positive and negative dependence, and they exhibit symmetric tail behavior.
37
+
38
+ Parameters
39
+ ----------
40
+ itau : bool, default=True
41
+ If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
42
+ otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
43
+ slower but more accurate.
44
+
45
+ kendall_tau : float, optional
46
+ If `itau` is True and `kendall_tau` is provided, this
47
+ value is used; otherwise, it is computed.
48
+
49
+ tolerance : float, default=1e-4
50
+ Convergence tolerance for the MLE optimization.
51
+
52
+ random_state : int, RandomState instance or None, default=None
53
+ Seed or random state to ensure reproducibility.
54
+
55
+ Attributes
56
+ ----------
57
+ rho_ : float
58
+ Fitted parameter (:math:`\rho`) in [-1, 1].
59
+
60
+ Examples
61
+ --------
62
+ >>> from skfolio.datasets import load_sp500_dataset
63
+ >>> from skfolio.preprocessing import prices_to_returns
64
+ >>> from skfolio.distribution import GaussianCopula, compute_pseudo_observations
65
+ >>>
66
+ >>> # Load historical prices and convert them to returns
67
+ >>> prices = load_sp500_dataset()
68
+ >>> X = prices_to_returns(prices)
69
+ >>> X = X[["AAPL", "JPM"]]
70
+ >>>
71
+ >>> # Convert returns to pseudo observation in the interval [0,1]
72
+ >>> X = compute_pseudo_observations(X)
73
+ >>>
74
+ >>> # Initialize the Copula estimator
75
+ >>> model = GaussianCopula()
76
+ >>>
77
+ >>> # Fit the model to the data.
78
+ >>> model.fit(X)
79
+ >>>
80
+ >>> # Display the fitted parameter and tail dependence coefficients
81
+ >>> print(model.fitted_repr)
82
+ GaussianCopula(rho=0.327)
83
+ >>> print(model.lower_tail_dependence)
84
+ 0.0
85
+ >>> print(model.upper_tail_dependence)
86
+ 0.0
87
+ >>>
88
+ >>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
89
+ >>> # Inverse Partial Derivative, AIC, and BIC
90
+ >>> log_likelihood = model.score_samples(X)
91
+ >>> score = model.score(X)
92
+ >>> cdf = model.cdf(X)
93
+ >>> p = model.partial_derivative(X)
94
+ >>> u = model.inverse_partial_derivative(X)
95
+ >>> aic = model.aic(X)
96
+ >>> bic = model.bic(X)
97
+ >>>
98
+ >>> # Generate 5 new samples
99
+ >>> samples = model.sample(n_samples=5)
100
+ >>>
101
+ >>> # Plot the tail concentration function.
102
+ >>> fig = model.plot_tail_concentration()
103
+ >>> fig.show()
104
+ >>>
105
+ >>> # Plot a 2D contour of the estimated PDF.
106
+ >>> fig = model.plot_pdf_2d()
107
+ >>> fig.show()
108
+ >>>
109
+ >>> # Plot a 3D surface of the estimated PDF.
110
+ >>> fig = model.plot_pdf_3d()
111
+ >>> fig.show()
112
+
113
+ References
114
+ ----------
115
+ .. [1] "An Introduction to Copulas (2nd ed.)",
116
+ Nelsen (2006)
117
+
118
+ .. [2] "Multivariate Models and Dependence Concepts",
119
+ Joe, Chapman & Hall (1997)
120
+
121
+ .. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
122
+ McNeil, Frey & Embrechts (2005)
123
+
124
+ .. [4] "The t Copula and Related Copulas",
125
+ Demarta & McNeil (2005)
126
+
127
+ .. [5] "Copula Methods in Finance",
128
+ Cherubini, Luciano & Vecchiato (2004)
129
+ """
130
+
131
+ rho_: float
132
+ _n_params = 1
133
+
134
+ def __init__(
135
+ self,
136
+ itau: bool = True,
137
+ kendall_tau: float | None = None,
138
+ tolerance: float = 1e-4,
139
+ random_state: int | None = None,
140
+ ):
141
+ super().__init__(random_state=random_state)
142
+ self.itau = itau
143
+ self.kendall_tau = kendall_tau
144
+ self.tolerance = tolerance
145
+
146
+ def fit(self, X: npt.ArrayLike, y=None) -> "GaussianCopula":
147
+ r"""Fit the Bivariate Gaussian Copula.
148
+
149
+ If `itau` is True, estimates :math:`\rho` using Kendall's tau inversion.
150
+ Otherwise, uses MLE by maximizing the log-likelihood.
151
+
152
+ Parameters
153
+ ----------
154
+ X : array-like of shape (n_observations, 2)
155
+ An array of bivariate inputs `(u, v)` where each row represents a
156
+ bivariate observation. Both `u` and `v` must be in the interval [0, 1],
157
+ having been transformed to uniform marginals.
158
+
159
+ y : None
160
+ Ignored. Provided for compatibility with scikit-learn's API.
161
+
162
+ Returns
163
+ -------
164
+ self : GaussianCopula
165
+ Returns the instance itself.
166
+ """
167
+ X = self._validate_X(X, reset=True)
168
+
169
+ if self.itau:
170
+ if self.kendall_tau is None:
171
+ kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
172
+ else:
173
+ kendall_tau = self.kendall_tau
174
+ self.rho_ = np.clip(
175
+ np.sin((np.pi * kendall_tau) / 2.0),
176
+ a_min=_RHO_BOUNDS[0],
177
+ a_max=_RHO_BOUNDS[1],
178
+ )
179
+
180
+ else:
181
+ result = so.minimize_scalar(
182
+ _neg_log_likelihood,
183
+ args=(X,),
184
+ bounds=_RHO_BOUNDS,
185
+ method="bounded",
186
+ options={"xatol": self.tolerance},
187
+ )
188
+ if not result.success:
189
+ raise RuntimeError(f"Optimization failed: {result.message}")
190
+ self.rho_ = result.x
191
+
192
+ return self
193
+
194
+ def cdf(self, X: npt.ArrayLike) -> np.ndarray:
195
+ """Compute the CDF of the bivariate Gaussian copula.
196
+
197
+ Parameters
198
+ ----------
199
+ X : array-like of shape (n_observations, 2)
200
+ An array of bivariate inputs `(u, v)` where each row represents a
201
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
202
+ having been transformed to uniform marginals.
203
+
204
+ Returns
205
+ -------
206
+ cdf : ndarray of shape (n_observations,)
207
+ CDF values for each observation in X.
208
+ """
209
+ skv.check_is_fitted(self)
210
+ X = self._validate_X(X, reset=False)
211
+ cdf = st.multivariate_normal.cdf(
212
+ x=sp.ndtri(X),
213
+ mean=np.array([0, 0]),
214
+ cov=np.array([[1, self.rho_], [self.rho_, 1]]),
215
+ )
216
+ return cdf
217
+
218
+ def partial_derivative(
219
+ self, X: npt.ArrayLike, first_margin: bool = False
220
+ ) -> np.ndarray:
221
+ r"""Compute the h-function (partial derivative) for the bivariate Gaussian
222
+ copula.
223
+
224
+ The h-function with respect to the second margin represents the conditional
225
+ distribution function of :math:`u` given :math:`v`:
226
+
227
+ .. math:: \begin{aligned}
228
+ h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
229
+ &= \Phi\Bigl(\frac{\Phi^{-1}(u)-\rho\,\Phi^{-1}(v)}{\sqrt{1-\rho^2}}\Bigr)
230
+ \end{aligned}
231
+
232
+ where :math:`\Phi` is the standard normal CDF and :math:`\Phi^{-1}` is its
233
+ inverse (the quantile function).
234
+
235
+ Parameters
236
+ ----------
237
+ X : array-like of shape (n_observations, 2)
238
+ An array of bivariate inputs `(u, v)` where each row represents a
239
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
240
+ having been transformed to uniform marginals.
241
+
242
+ first_margin : bool, default=False
243
+ If True, compute the partial derivative with respect to the first
244
+ margin `u`; otherwise, compute the partial derivative with respect to the
245
+ second margin `v`.
246
+
247
+ Returns
248
+ -------
249
+ p : ndarray of shape (n_observations,)
250
+ h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
251
+ """
252
+ skv.check_is_fitted(self)
253
+ X = self._validate_X(X, reset=False)
254
+ X = _apply_margin_swap(X, first_margin=first_margin)
255
+ # Compute the inverse CDF (percent point function) using ndtri for better
256
+ # performance
257
+ u_inv, v_inv = sp.ndtri(X).T
258
+ p = sp.ndtr((u_inv - self.rho_ * v_inv) / np.sqrt(1 - self.rho_**2))
259
+ return p
260
+
261
+ def inverse_partial_derivative(
262
+ self, X: npt.ArrayLike, first_margin: bool = False
263
+ ) -> np.ndarray:
264
+ r"""Compute the inverse of the bivariate copula's partial derivative, commonly
265
+ known as the inverse h-function [1]_.
266
+
267
+ Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
268
+ second margin is defined by
269
+
270
+ .. math::
271
+ h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
272
+
273
+ which is the conditional distribution of :math:`U` given :math:`V = v`.
274
+ The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
275
+ value :math:`u \in [0,1]` such that
276
+
277
+ .. math::
278
+ h(u \mid v) \;=\; p,
279
+ \quad \text{where } p \in [0,1].
280
+
281
+ In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
282
+ :math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
283
+ :math:`p = \partial C(u, v)/\partial v`.
284
+
285
+ Parameters
286
+ ----------
287
+ X : array-like of shape (n_observations, 2)
288
+ An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
289
+ - The first column `p` corresponds to the value of the h-function.
290
+ - The second column `v` is the conditioning variable.
291
+
292
+ first_margin : bool, default=False
293
+ If True, compute the inverse partial derivative with respect to the first
294
+ margin `u`; otherwise, compute the inverse partial derivative with respect
295
+ to the second margin `v`.
296
+
297
+ Returns
298
+ -------
299
+ u : ndarray of shape (n_observations,)
300
+ A 1D-array of length `n_observations`, where each element is the computed
301
+ :math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
302
+
303
+ References
304
+ ----------
305
+ .. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
306
+ .. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
307
+ """
308
+ skv.check_is_fitted(self)
309
+ X = self._validate_X(X, reset=False)
310
+ X = _apply_margin_swap(X, first_margin=first_margin)
311
+ p_inv, v_inv = sp.ndtri(X).T
312
+ u_inv = p_inv * np.sqrt(1 - self.rho_**2) + self.rho_ * v_inv
313
+ u = sp.ndtr(u_inv)
314
+ return u
315
+
316
+ def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
317
+ """Compute the log-likelihood of each sample (log-pdf) under the model.
318
+
319
+ Parameters
320
+ ----------
321
+ X : array-like of shape (n_observations, 2)
322
+ An array of bivariate inputs `(u, v)` where each row represents a
323
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
324
+ having been transformed to uniform marginals.
325
+
326
+ Returns
327
+ -------
328
+ density : ndarray of shape (n_observations,)
329
+ The log-likelihood of each sample under the fitted copula.
330
+ """
331
+ skv.check_is_fitted(self)
332
+ X = self._validate_X(X, reset=False)
333
+ log_density = _base_sample_scores(X=X, rho=self.rho_)
334
+ return log_density
335
+
336
+ @property
337
+ def lower_tail_dependence(self) -> float:
338
+ """Theoretical lower tail dependence coefficient."""
339
+ return 0
340
+
341
+ @property
342
+ def upper_tail_dependence(self) -> float:
343
+ """Theoretical upper tail dependence coefficient."""
344
+ return 0
345
+
346
+ @property
347
+ def fitted_repr(self) -> str:
348
+ """String representation of the fitted copula."""
349
+ return f"{self.__class__.__name__}(rho={self.rho_:0.3f})"
350
+
351
+
352
+ def _neg_log_likelihood(rho: float, X: np.ndarray) -> float:
353
+ """Negative log-likelihood function for optimization.
354
+
355
+ Parameters
356
+ ----------
357
+ X : array-like of shape (n_observations, 2)
358
+ An array of bivariate inputs `(u, v)` where each row represents a
359
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
360
+ having been transformed to uniform marginals.
361
+
362
+ rho : float
363
+ Correlation copula parameter.
364
+
365
+ Returns
366
+ -------
367
+ value : float
368
+ The negative log-likelihood value.
369
+ """
370
+ return -np.sum(_base_sample_scores(X=X, rho=rho))
371
+
372
+
373
+ def _base_sample_scores(X: np.ndarray, rho: float) -> np.ndarray:
374
+ """Compute the log-likelihood of each sample (log-pdf) under the bivariate
375
+ Gaussian copula model.
376
+
377
+ Parameters
378
+ ----------
379
+ X : array-like of shape (n_observations, 2)
380
+ An array of bivariate inputs `(u, v)` where each row represents a
381
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
382
+ having been transformed to uniform marginals.
383
+
384
+ rho : float
385
+ Gaussian copula parameter.
386
+
387
+ Returns
388
+ -------
389
+ density : ndarray of shape (n_observations,)
390
+ The log-likelihood of each sample under the fitted copula.
391
+
392
+ Raises
393
+ ------
394
+ ValueError
395
+ If rho is not in (-1, 1)
396
+ """
397
+ if not (-1.0 <= rho <= 1.0):
398
+ raise ValueError("rho must be between -1 and 1.")
399
+
400
+ # Inverse CDF (ppf) using stdtrit for better performance
401
+ u_inv, v_inv = sp.ndtri(X).T
402
+
403
+ # Using np.log1p to avoid loss of precision
404
+ log_density = -0.5 * np.log1p(-(rho**2)) - rho * (
405
+ 0.5 * rho * (u_inv**2 + v_inv**2) - u_inv * v_inv
406
+ ) / (1 - rho**2)
407
+ return log_density