skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. skfolio/__init__.py +2 -2
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +1 -1
  5. skfolio/datasets/_base.py +2 -2
  6. skfolio/datasets/data/__init__.py +1 -0
  7. skfolio/distance/__init__.py +1 -1
  8. skfolio/distance/_base.py +2 -2
  9. skfolio/distance/_distance.py +4 -4
  10. skfolio/distribution/__init__.py +56 -0
  11. skfolio/distribution/_base.py +203 -0
  12. skfolio/distribution/copula/__init__.py +35 -0
  13. skfolio/distribution/copula/_base.py +456 -0
  14. skfolio/distribution/copula/_clayton.py +539 -0
  15. skfolio/distribution/copula/_gaussian.py +407 -0
  16. skfolio/distribution/copula/_gumbel.py +560 -0
  17. skfolio/distribution/copula/_independent.py +196 -0
  18. skfolio/distribution/copula/_joe.py +609 -0
  19. skfolio/distribution/copula/_selection.py +111 -0
  20. skfolio/distribution/copula/_student_t.py +486 -0
  21. skfolio/distribution/copula/_utils.py +509 -0
  22. skfolio/distribution/multivariate/__init__.py +11 -0
  23. skfolio/distribution/multivariate/_base.py +241 -0
  24. skfolio/distribution/multivariate/_utils.py +632 -0
  25. skfolio/distribution/multivariate/_vine_copula.py +1254 -0
  26. skfolio/distribution/univariate/__init__.py +19 -0
  27. skfolio/distribution/univariate/_base.py +308 -0
  28. skfolio/distribution/univariate/_gaussian.py +136 -0
  29. skfolio/distribution/univariate/_johnson_su.py +152 -0
  30. skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
  31. skfolio/distribution/univariate/_selection.py +85 -0
  32. skfolio/distribution/univariate/_student_t.py +144 -0
  33. skfolio/exceptions.py +6 -6
  34. skfolio/measures/__init__.py +1 -1
  35. skfolio/measures/_enums.py +7 -7
  36. skfolio/measures/_measures.py +4 -7
  37. skfolio/metrics/__init__.py +2 -0
  38. skfolio/metrics/_scorer.py +4 -4
  39. skfolio/model_selection/__init__.py +2 -2
  40. skfolio/model_selection/_combinatorial.py +15 -12
  41. skfolio/model_selection/_validation.py +2 -2
  42. skfolio/model_selection/_walk_forward.py +3 -3
  43. skfolio/moments/covariance/_base.py +1 -1
  44. skfolio/moments/covariance/_denoise_covariance.py +1 -1
  45. skfolio/moments/covariance/_detone_covariance.py +1 -1
  46. skfolio/moments/covariance/_empirical_covariance.py +1 -1
  47. skfolio/moments/covariance/_ew_covariance.py +1 -1
  48. skfolio/moments/covariance/_gerber_covariance.py +1 -1
  49. skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
  50. skfolio/moments/covariance/_implied_covariance.py +2 -7
  51. skfolio/moments/covariance/_ledoit_wolf.py +1 -1
  52. skfolio/moments/covariance/_oas.py +1 -1
  53. skfolio/moments/covariance/_shrunk_covariance.py +1 -1
  54. skfolio/moments/expected_returns/_base.py +1 -1
  55. skfolio/moments/expected_returns/_empirical_mu.py +1 -1
  56. skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
  57. skfolio/moments/expected_returns/_ew_mu.py +1 -1
  58. skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
  59. skfolio/optimization/__init__.py +2 -0
  60. skfolio/optimization/_base.py +2 -2
  61. skfolio/optimization/cluster/__init__.py +2 -0
  62. skfolio/optimization/cluster/_nco.py +7 -7
  63. skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
  64. skfolio/optimization/cluster/hierarchical/_base.py +1 -2
  65. skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
  66. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
  67. skfolio/optimization/convex/__init__.py +2 -0
  68. skfolio/optimization/convex/_base.py +8 -8
  69. skfolio/optimization/convex/_distributionally_robust.py +4 -4
  70. skfolio/optimization/convex/_maximum_diversification.py +5 -5
  71. skfolio/optimization/convex/_mean_risk.py +5 -6
  72. skfolio/optimization/convex/_risk_budgeting.py +3 -3
  73. skfolio/optimization/ensemble/__init__.py +2 -0
  74. skfolio/optimization/ensemble/_base.py +2 -2
  75. skfolio/optimization/ensemble/_stacking.py +1 -1
  76. skfolio/optimization/naive/__init__.py +2 -0
  77. skfolio/optimization/naive/_naive.py +1 -1
  78. skfolio/population/__init__.py +2 -0
  79. skfolio/population/_population.py +34 -7
  80. skfolio/portfolio/_base.py +42 -8
  81. skfolio/portfolio/_multi_period_portfolio.py +3 -2
  82. skfolio/portfolio/_portfolio.py +4 -4
  83. skfolio/pre_selection/__init__.py +2 -0
  84. skfolio/pre_selection/_drop_correlated.py +2 -2
  85. skfolio/pre_selection/_select_complete.py +25 -26
  86. skfolio/pre_selection/_select_k_extremes.py +2 -2
  87. skfolio/pre_selection/_select_non_dominated.py +2 -2
  88. skfolio/pre_selection/_select_non_expiring.py +2 -2
  89. skfolio/preprocessing/__init__.py +2 -0
  90. skfolio/preprocessing/_returns.py +2 -2
  91. skfolio/prior/__init__.py +4 -0
  92. skfolio/prior/_base.py +2 -2
  93. skfolio/prior/_black_litterman.py +5 -3
  94. skfolio/prior/_empirical.py +3 -1
  95. skfolio/prior/_factor_model.py +8 -4
  96. skfolio/prior/_synthetic_data.py +239 -0
  97. skfolio/synthetic_returns/__init__.py +1 -0
  98. skfolio/typing.py +1 -1
  99. skfolio/uncertainty_set/__init__.py +2 -0
  100. skfolio/uncertainty_set/_base.py +2 -2
  101. skfolio/uncertainty_set/_bootstrap.py +1 -1
  102. skfolio/uncertainty_set/_empirical.py +1 -1
  103. skfolio/utils/__init__.py +1 -0
  104. skfolio/utils/bootstrap.py +2 -2
  105. skfolio/utils/equations.py +13 -10
  106. skfolio/utils/sorting.py +2 -2
  107. skfolio/utils/stats.py +7 -7
  108. skfolio/utils/tools.py +76 -12
  109. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
  110. skfolio-0.8.0.dist-info/RECORD +120 -0
  111. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
  112. skfolio-0.7.0.dist-info/RECORD +0 -95
  113. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
  114. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """Bivariate Copula Selection."""
2
+
3
+ # Copyright (c) 2025
4
+ # Authors: The skfolio developers
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import scipy.stats as st
11
+ import sklearn as sk
12
+
13
+ from skfolio.distribution._base import SelectionCriterion
14
+ from skfolio.distribution.copula._base import BaseBivariateCopula
15
+ from skfolio.distribution.copula._clayton import ClaytonCopula
16
+ from skfolio.distribution.copula._gaussian import GaussianCopula
17
+ from skfolio.distribution.copula._gumbel import GumbelCopula
18
+ from skfolio.distribution.copula._independent import IndependentCopula
19
+ from skfolio.distribution.copula._joe import JoeCopula
20
+ from skfolio.distribution.copula._student_t import StudentTCopula
21
+
22
+
23
+ def select_bivariate_copula(
24
+ X: npt.ArrayLike,
25
+ copula_candidates: list[BaseBivariateCopula] | None = None,
26
+ selection_criterion: SelectionCriterion = SelectionCriterion.AIC,
27
+ independence_level: float = 0.05,
28
+ ) -> BaseBivariateCopula:
29
+ """
30
+ Select the best bivariate copula from a list of candidates using an information
31
+ criterion.
32
+
33
+ This function first tests the dependence between the two variables in X using
34
+ Kendall's tau independence test. If the p-value is greater than or equal to
35
+ `independence_level`, the null hypothesis of independence is not rejected, and the
36
+ `IndependentCopula` is returned. Otherwise, each candidate copula in
37
+ `copula_candidates` is fitted to the data X. For each candidate, either the
38
+ Akaike Information Criterion (AIC) or the Bayesian Information Criterion (BIC) is
39
+ computed, and the copula with the lowest criterion value is selected.
40
+
41
+ Parameters
42
+ ----------
43
+ X : array-like of shape (n_observations, 2)
44
+ An array of bivariate inputs (u, v) with uniform marginals (values in [0, 1]).
45
+
46
+ copula_candidates : list[BaseBivariateCopula]
47
+ A list of candidate copula models. Each candidate must inherit from
48
+ `BaseBivariateCopula`. If None, defaults to
49
+ `[GaussianCopula(), StudentTCopula(), ClaytonCopula(), GumbelCopula(), JoeCopula()]`.
50
+
51
+ selection_criterion : SelectionCriterion, default=SelectionCriterion.AIC
52
+ The criterion used for model selection. Possible values are:
53
+ - SelectionCriterion.AIC : Akaike Information Criterion
54
+ - SelectionCriterion.BIC : Bayesian Information Criterion
55
+
56
+ independence_level : float, default=0.05
57
+ The significance level for the Kendall tau independence test. If the p-value is
58
+ greater than or equal to this level, the independence hypothesis is not
59
+ rejected, and the `IndependentCopula` is returned.
60
+
61
+ Returns
62
+ -------
63
+ selected_copula : BaseBivariateCopula
64
+ The fitted copula model among the candidates that minimizes the selected
65
+ information criterion (AIC or BIC).
66
+
67
+ Raises
68
+ ------
69
+ ValueError
70
+ If X is not a 2D array with exactly two columns, or if any candidate in
71
+ `copula_candidates` does not inherit from `BaseBivariateCopula`.
72
+ """
73
+ if copula_candidates is None:
74
+ copula_candidates = [
75
+ GaussianCopula(),
76
+ StudentTCopula(),
77
+ ClaytonCopula(),
78
+ GumbelCopula(),
79
+ JoeCopula(),
80
+ ]
81
+
82
+ X = np.asarray(X)
83
+ if X.ndim != 2 or X.shape[1] != 2:
84
+ raise ValueError("X must contains two columns for Bivariate Copula")
85
+
86
+ kendall_tau, p_value = st.kendalltau(X[:, 0], X[:, 1])
87
+ if p_value >= independence_level:
88
+ return IndependentCopula().fit(X)
89
+
90
+ results = {}
91
+ for copula in copula_candidates:
92
+ if not isinstance(copula, BaseBivariateCopula):
93
+ raise ValueError(
94
+ "The candidate copula must inherit from `BaseBivariateCopula`"
95
+ )
96
+ copula = sk.clone(copula)
97
+ if copula.itau and copula.kendall_tau is None:
98
+ # Faster computation by reusing kendall tau if itau
99
+ copula.kendall_tau = kendall_tau
100
+ copula.fit(X)
101
+
102
+ match selection_criterion:
103
+ case selection_criterion.AIC:
104
+ results[copula] = copula.aic(X)
105
+ case selection_criterion.BIC:
106
+ results[copula] = copula.bic(X)
107
+ case _:
108
+ raise ValueError(f"{selection_criterion} not implemented")
109
+
110
+ selected_copula = min(results, key=results.get)
111
+ return selected_copula
@@ -0,0 +1,486 @@
1
+ """Bivariate Student's t Copula Estimation."""
2
+
3
+ # Copyright (c) 2025
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import scipy.optimize as so
11
+ import scipy.special as sp
12
+ import scipy.stats as st
13
+ import sklearn.utils.validation as skv
14
+
15
+ from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
16
+ from skfolio.distribution.copula._utils import _apply_margin_swap
17
+
18
+ # Student's t copula with dof less than 2.0 is so extremely heavy-tailed that even the
19
+ # mean (and many moments) of the distribution do not exist. So Impractical in practice,
20
+ # and dof above 50 tends to a Gaussian Copula so we limit it to the interval [2, 50] for
21
+ # improved stability and robustness.
22
+ _DOF_BOUNDS = (2.0, 50.0)
23
+
24
+
25
+ class StudentTCopula(BaseBivariateCopula):
26
+ r"""Bivariate Student's t Copula Estimation.
27
+
28
+ The bivariate Student's t copula density is defined as:
29
+
30
+ .. math::
31
+ C_{\nu, \rho}(u, v) = T_{\nu, \rho} \Bigl(t_{\nu}^{-1}(u),\;t_{\nu}^{-1}(v)\Bigr)
32
+
33
+ where:
34
+ - :math:`\nu > 0` is the degrees of freedom.
35
+ - :math:`\rho \in (-1, 1)` is the correlation coefficient.
36
+ - :math:`T_{\nu, \rho}(x, y)` is the CDF of the bivariate t-distribution.
37
+ - :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
38
+ univariate t-distribution.
39
+
40
+ Student's t copula with degrees of freedom (dof) less than 2.0 is extremely
41
+ heavy-tailed, to the extent that even the mean (and many moments) do not exist,
42
+ rendering it impractical. Conversely, for dof above 50 the t copula behaves
43
+ similarly to a Gaussian copula. Thus, for improved stability and robustness,
44
+ the dof is limited to the interval [2, 50].
45
+
46
+ .. note::
47
+
48
+ Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
49
+ because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
50
+ both positive and negative dependence, and they exhibit symmetric tail behavior.
51
+
52
+
53
+ Parameters
54
+ ----------
55
+ itau : bool, default=True
56
+ itau : bool, default=True
57
+ If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
58
+ otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
59
+ slower but more accurate.
60
+
61
+ kendall_tau : float, optional
62
+ If `itau` is True and `kendall_tau` is provided, this
63
+ value is used; otherwise, it is computed.
64
+
65
+ tolerance : float, default=1e-4
66
+ Convergence tolerance for the MLE optimization.
67
+
68
+ random_state : int, RandomState instance or None, default=None
69
+ Seed or random state to ensure reproducibility.
70
+
71
+ Attributes
72
+ ----------
73
+ rho_ : float
74
+ Fitted correlation coefficient (:math:`\rho`) in [-1, 1].
75
+ dof_ : float
76
+ Fitted degrees of freedom (:math:`\nu`) > 2.
77
+
78
+ Examples
79
+ --------
80
+ >>> from skfolio.datasets import load_sp500_dataset
81
+ >>> from skfolio.preprocessing import prices_to_returns
82
+ >>> from skfolio.distribution import StudentTCopula, compute_pseudo_observations
83
+ >>>
84
+ >>> # Load historical prices and convert them to returns
85
+ >>> prices = load_sp500_dataset()
86
+ >>> X = prices_to_returns(prices)
87
+ >>> X = X[["AAPL", "JPM"]]
88
+ >>>
89
+ >>> # Convert returns to pseudo observation in the interval [0,1]
90
+ >>> X = compute_pseudo_observations(X)
91
+ >>>
92
+ >>> # Initialize the Copula estimator
93
+ >>> model = StudentTCopula()
94
+ >>>
95
+ >>> # Fit the model to the data.
96
+ >>> model.fit(X)
97
+ >>>
98
+ >>> # Display the fitted parameter and tail dependence coefficients
99
+ >>> print(model.fitted_repr)
100
+ StudentTCopula(rho=0.327, dof=5.14)
101
+ >>> print(model.lower_tail_dependence)
102
+ 0.1270
103
+ >>> print(model.upper_tail_dependence)
104
+ 0.1270
105
+ >>>
106
+ >>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
107
+ >>> # Inverse Partial Derivative, AIC, and BIC
108
+ >>> log_likelihood = model.score_samples(X)
109
+ >>> score = model.score(X)
110
+ >>> cdf = model.cdf(X)
111
+ >>> p = model.partial_derivative(X)
112
+ >>> u = model.inverse_partial_derivative(X)
113
+ >>> aic = model.aic(X)
114
+ >>> bic = model.bic(X)
115
+ >>>
116
+ >>> # Generate 5 new samples
117
+ >>> samples = model.sample(n_samples=5)
118
+ >>>
119
+ >>> # Plot the tail concentration function.
120
+ >>> fig = model.plot_tail_concentration()
121
+ >>> fig.show()
122
+ >>>
123
+ >>> # Plot a 2D contour of the estimated PDF.
124
+ >>> fig = model.plot_pdf_2d()
125
+ >>> fig.show()
126
+ >>>
127
+ >>> # Plot a 3D surface of the estimated PDF.
128
+ >>> fig = model.plot_pdf_3d()
129
+ >>> fig.show()
130
+
131
+ References
132
+ ----------
133
+ .. [1] "An Introduction to Copulas (2nd ed.)",
134
+ Nelsen (2006)
135
+
136
+ .. [2] "Multivariate Models and Dependence Concepts",
137
+ Joe, Chapman & Hall (1997)
138
+
139
+ .. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
140
+ McNeil, Frey & Embrechts (2005)
141
+
142
+ .. [4] "The t Copula and Related Copulas",
143
+ Demarta & McNeil (2005)
144
+
145
+ .. [5] "Copula Methods in Finance",
146
+ Cherubini, Luciano & Vecchiato (2004)
147
+ """
148
+
149
+ rho_: float
150
+ dof_: float
151
+ _n_params = 2
152
+
153
+ def __init__(
154
+ self,
155
+ itau: bool = True,
156
+ kendall_tau: float | None = None,
157
+ tolerance: float = 1e-4,
158
+ random_state: int | None = None,
159
+ ):
160
+ super().__init__(random_state=random_state)
161
+ self.itau = itau
162
+ self.kendall_tau = kendall_tau
163
+ self.tolerance = tolerance
164
+
165
+ def fit(self, X: npt.ArrayLike, y=None) -> "StudentTCopula":
166
+ r"""Fit the Bivariate Student's t Copula.
167
+
168
+ If `itau` is True, it uses a Kendall-based two-step method:
169
+ - Estimates the correlation parameter (:math:`\rho`) from Kendall's
170
+ tau inversion.
171
+
172
+ - Optimizes the degrees of freedom (:math:`\nu`) by maximizing the
173
+ log-likelihood.
174
+
175
+ Otherwise, it uses the full MLE method: optimizes both :math:`\rho` and
176
+ :math:`\nu` by maximizing the log-likelihood.
177
+
178
+ Parameters
179
+ ----------
180
+ X : array-like of shape (n_observations, 2)
181
+ An array of bivariate inputs `(u, v)` where each row represents a
182
+ bivariate observation. Both `u` and `v` must be in the interval [0, 1],
183
+ having been transformed to uniform marginals.
184
+
185
+ y : None
186
+ Ignored. Provided for compatibility with scikit-learn's API.
187
+
188
+ Returns
189
+ -------
190
+ self : StudentTCopula
191
+ Returns the instance itself.
192
+
193
+ """
194
+ X = self._validate_X(X, reset=True)
195
+
196
+ if self.kendall_tau is None:
197
+ kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
198
+ else:
199
+ kendall_tau = self.kendall_tau
200
+
201
+ # Either used directly or for initial guess
202
+ rho_from_tau = np.clip(
203
+ np.sin((np.pi * kendall_tau) / 2.0),
204
+ a_min=_RHO_BOUNDS[0],
205
+ a_max=_RHO_BOUNDS[1],
206
+ )
207
+
208
+ if self.itau:
209
+ res = so.minimize_scalar(
210
+ _neg_log_likelihood,
211
+ args=(
212
+ rho_from_tau,
213
+ X,
214
+ ),
215
+ bounds=_DOF_BOUNDS,
216
+ method="bounded",
217
+ options={"xatol": self.tolerance},
218
+ )
219
+ if not res.success:
220
+ raise RuntimeError(f"Optimization failed: {res.message}")
221
+ self.dof_ = res.x
222
+ self.rho_ = rho_from_tau
223
+ else:
224
+ # We'll use L-BFGS-B for the optimization because:
225
+ # 1) The bivariate Student-t copula's negative log-likelihood is smooth,
226
+ # making gradient-based methods more efficient than derivative-free
227
+ # methods.
228
+ # 2) L-BFGS-B directly supports simple box bounds (e.g., -1 < rho < 1,
229
+ # 0 < nu < 50).
230
+ # 3) It's typically faster and more stable for small-dimensional problems
231
+ # than more general constraint solvers (like trust-constr or SLSQP)
232
+ result = so.minimize(
233
+ fun=lambda x: _neg_log_likelihood(dof=x[0], rho=x[1], X=X),
234
+ x0=np.array([3.0, rho_from_tau]),
235
+ bounds=(_DOF_BOUNDS, _RHO_BOUNDS),
236
+ method="L-BFGS-B",
237
+ tol=self.tolerance,
238
+ )
239
+ if not result.success:
240
+ raise RuntimeError(f"Optimization failed: {result.message}")
241
+ self.dof_, self.rho_ = result.x
242
+
243
+ return self
244
+
245
+ def cdf(self, X: npt.ArrayLike) -> np.ndarray:
246
+ """Compute the CDF of the bivariate Student-t copula.
247
+
248
+ Parameters
249
+ ----------
250
+ X : array-like of shape (n_observations, 2)
251
+ An array of bivariate inputs `(u, v)` where each row represents a
252
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
253
+ having been transformed to uniform marginals.
254
+
255
+ Returns
256
+ -------
257
+ cdf : ndarray of shape (n_observations,)
258
+ CDF values for each observation in X.
259
+ """
260
+ skv.check_is_fitted(self)
261
+ X = self._validate_X(X, reset=False)
262
+ cdf = st.multivariate_t.cdf(
263
+ x=sp.stdtrit(self.dof_, X),
264
+ loc=np.array([0, 0]),
265
+ shape=np.array([[1, self.rho_], [self.rho_, 1]]),
266
+ df=self.dof_,
267
+ )
268
+ return cdf
269
+
270
+ def partial_derivative(
271
+ self, X: npt.ArrayLike, first_margin: bool = False
272
+ ) -> np.ndarray:
273
+ r"""Compute the h-function (partial derivative) for the bivariate Student's t
274
+ copula.
275
+
276
+ The h-function with respect to the second margin represents the conditional
277
+ distribution function of :math:`u` given :math:`v`:
278
+
279
+ .. math:: \begin{aligned}
280
+ h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
281
+ &= t_{\nu+1}\!\left(\frac{t_\nu^{-1}(u) - \rho\,t_\nu^{-1}(v)}
282
+ {\sqrt{\frac{(1-\rho^2)\left(\nu + \left(t_\nu^{-1}(v)\right)^2\right)}{\nu+1}}}\right).
283
+ \end{aligned}
284
+
285
+ where:
286
+ - :math:`\nu > 0` is the degrees of freedom.
287
+ - :math:`\rho \in (-1, 1)` is the correlation coefficient.
288
+ - :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
289
+ univariate \(t\)-distribution.
290
+
291
+ Parameters
292
+ ----------
293
+ X : array-like of shape (n_observations, 2)
294
+ An array of bivariate inputs `(u, v)` where each row represents a
295
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
296
+ having been transformed to uniform marginals.
297
+
298
+ first_margin : bool, default=False
299
+ If True, compute the partial derivative with respect to the first
300
+ margin `u`; otherwise, compute the partial derivative with respect to the
301
+ second margin `v`.
302
+
303
+ Returns
304
+ -------
305
+ p : ndarray of shape (n_observations,)
306
+ h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
307
+ """
308
+ skv.check_is_fitted(self)
309
+ X = self._validate_X(X, reset=False)
310
+ X = _apply_margin_swap(X, first_margin=first_margin)
311
+ # Compute the inverse CDF (percent point function) using stdtrit for better
312
+ # performance
313
+ u_inv, v_inv = sp.stdtrit(self.dof_, X).T
314
+ # Compute the denominator: sqrt((1 - rho^2) * (nu + y^2) / (nu + 1))
315
+ z = (u_inv - self.rho_ * v_inv) / (
316
+ np.sqrt((1 - self.rho_**2) * (self.dof_ + v_inv**2) / (self.dof_ + 1))
317
+ )
318
+ # Student's t CDF with (nu+1) degrees of freedom using stdtr for better
319
+ # performance
320
+ p = sp.stdtr(self.dof_ + 1, z)
321
+ return p
322
+
323
+ def inverse_partial_derivative(
324
+ self, X: npt.ArrayLike, first_margin: bool = False
325
+ ) -> np.ndarray:
326
+ r"""Compute the inverse of the bivariate copula's partial derivative, commonly
327
+ known as the inverse h-function [1]_.
328
+
329
+ Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
330
+ second margin is defined by
331
+
332
+ .. math::
333
+ h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
334
+
335
+ which is the conditional distribution of :math:`U` given :math:`V = v`.
336
+ The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
337
+ value :math:`u \in [0,1]` such that
338
+
339
+ .. math::
340
+ h(u \mid v) \;=\; p,
341
+ \quad \text{where } p \in [0,1].
342
+
343
+ In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
344
+ :math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
345
+ :math:`p = \partial C(u, v)/\partial v`.
346
+
347
+ Parameters
348
+ ----------
349
+ X : array-like of shape (n_observations, 2)
350
+ An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
351
+ - The first column `p` corresponds to the value of the h-function.
352
+ - The second column `v` is the conditioning variable.
353
+
354
+ first_margin : bool, default=False
355
+ If True, compute the inverse partial derivative with respect to the first
356
+ margin `u`; otherwise, compute the inverse partial derivative with respect
357
+ to the second margin `v`.
358
+
359
+ Returns
360
+ -------
361
+ u : ndarray of shape (n_observations,)
362
+ A 1D-array of length `n_observations`, where each element is the computed
363
+ :math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
364
+
365
+ References
366
+ ----------
367
+ .. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
368
+ .. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
369
+ """
370
+ skv.check_is_fitted(self)
371
+ X = self._validate_X(X, reset=False)
372
+ X = _apply_margin_swap(X, first_margin=first_margin)
373
+ p_inv = sp.stdtrit(self.dof_ + 1, X[:, 0])
374
+ v_inv = sp.stdtrit(self.dof_, X[:, 1])
375
+ u_inv = (
376
+ p_inv
377
+ * np.sqrt((self.dof_ + v_inv**2) / (self.dof_ + 1) * (1 - self.rho_**2))
378
+ + self.rho_ * v_inv
379
+ )
380
+ u = sp.stdtr(self.dof_, u_inv)
381
+ return u
382
+
383
+ def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
384
+ """Compute the log-likelihood of each sample (log-pdf) under the model.
385
+
386
+ Parameters
387
+ ----------
388
+ X : array-like of shape (n_observations, 2)
389
+ An array of bivariate inputs `(u, v)` where each row represents a
390
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
391
+ having been transformed to uniform marginals.
392
+
393
+ Returns
394
+ -------
395
+ density : ndarray of shape (n_observations,)
396
+ The log-likelihood of each sample under the fitted copula.
397
+ """
398
+ skv.check_is_fitted(self)
399
+ X = self._validate_X(X, reset=False)
400
+ log_density = _sample_scores(X=X, rho=self.rho_, dof=self.dof_)
401
+ return log_density
402
+
403
+ @property
404
+ def lower_tail_dependence(self) -> float:
405
+ """Theoretical lower tail dependence coefficient."""
406
+ skv.check_is_fitted(self)
407
+ arg = -np.sqrt((self.dof_ + 1) * (1 - self.rho_) / (1 + self.rho_))
408
+ return 2 * sp.stdtr(self.dof_ + 1, arg)
409
+
410
+ @property
411
+ def upper_tail_dependence(self) -> float:
412
+ """Theoretical upper tail dependence coefficient."""
413
+ return self.lower_tail_dependence
414
+
415
+ @property
416
+ def fitted_repr(self) -> str:
417
+ """String representation of the fitted copula."""
418
+ return f"{self.__class__.__name__}(rho={self.rho_:0.3f}, dof={self.dof_:0.2f})"
419
+
420
+
421
+ def _neg_log_likelihood(dof: float, rho: float, X: np.ndarray) -> float:
422
+ """Negative log-likelihood function for optimization.
423
+
424
+ Parameters
425
+ ----------
426
+ X : array-like of shape (n_observations, 2)
427
+ An array of bivariate inputs `(u, v)` where each row represents a
428
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
429
+ having been transformed to uniform marginals.
430
+
431
+ rho : float
432
+ Correlation copula parameter.
433
+
434
+ dof : float
435
+ Degree of freedom copula parameter.
436
+
437
+ Returns
438
+ -------
439
+ value : float
440
+ The negative log-likelihood value.
441
+ """
442
+ return -np.sum(_sample_scores(X=X, rho=rho, dof=dof))
443
+
444
+
445
+ def _sample_scores(X: np.ndarray, rho: float, dof: float) -> np.ndarray:
446
+ """Compute the log-likelihood of each sample (log-pdf) under the bivariate
447
+ Gaussian copula model.
448
+
449
+ Parameters
450
+ ----------
451
+ X : array-like of shape (n_observations, 2)
452
+ An array of bivariate inputs `(u, v)` where each row represents a
453
+ bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
454
+ having been transformed to uniform marginals.
455
+
456
+ rho : float
457
+ Gaussian copula parameter.
458
+
459
+ Returns
460
+ -------
461
+ density : ndarray of shape (n_observations,)
462
+ The log-likelihood of each sample under the fitted copula.
463
+
464
+ Raises
465
+ ------
466
+ ValueError
467
+ If rho is not in (-1, 1) or dof is not positive.
468
+ """
469
+ if not (-1.0 <= rho <= 1.0):
470
+ raise ValueError("rho must be between -1 and 1.")
471
+ if not 1.0 <= dof <= 50:
472
+ raise ValueError("Degrees of freedom `dof` must be between 1 and 50.")
473
+
474
+ # Inverse CDF (ppf) using stdtrit for better performance
475
+ x, y = sp.stdtrit(dof, X).T
476
+
477
+ a = 1.0 - rho**2
478
+ log_density = (
479
+ sp.gammaln((dof + 2.0) / 2.0)
480
+ + sp.gammaln(dof / 2.0)
481
+ - 2.0 * sp.gammaln((dof + 1.0) / 2.0)
482
+ - np.log(a) / 2
483
+ + (dof + 1.0) / 2.0 * (np.log1p(x**2 / dof) + np.log1p(y**2 / dof))
484
+ - (dof + 2.0) / 2.0 * np.log1p((x**2 - 2 * rho * x * y + y**2) / a / dof)
485
+ )
486
+ return log_density