skfolio 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +2 -2
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +1 -1
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +1 -1
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +4 -4
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +6 -6
- skfolio/measures/__init__.py +1 -1
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +2 -2
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +1 -1
- skfolio/moments/covariance/_detone_covariance.py +1 -1
- skfolio/moments/covariance/_empirical_covariance.py +1 -1
- skfolio/moments/covariance/_ew_covariance.py +1 -1
- skfolio/moments/covariance/_gerber_covariance.py +1 -1
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +2 -7
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +1 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
- skfolio/moments/expected_returns/_ew_mu.py +1 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
- skfolio/optimization/__init__.py +2 -0
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +2 -0
- skfolio/optimization/cluster/_nco.py +7 -7
- skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
- skfolio/optimization/convex/__init__.py +2 -0
- skfolio/optimization/convex/_base.py +8 -8
- skfolio/optimization/convex/_distributionally_robust.py +4 -4
- skfolio/optimization/convex/_maximum_diversification.py +5 -5
- skfolio/optimization/convex/_mean_risk.py +5 -6
- skfolio/optimization/convex/_risk_budgeting.py +3 -3
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/__init__.py +2 -0
- skfolio/optimization/naive/_naive.py +1 -1
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +35 -9
- skfolio/portfolio/_base.py +42 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +4 -4
- skfolio/pre_selection/__init__.py +2 -0
- skfolio/pre_selection/_drop_correlated.py +2 -2
- skfolio/pre_selection/_select_complete.py +25 -26
- skfolio/pre_selection/_select_k_extremes.py +2 -2
- skfolio/pre_selection/_select_non_dominated.py +2 -2
- skfolio/pre_selection/_select_non_expiring.py +2 -2
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +4 -0
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +5 -3
- skfolio/prior/_empirical.py +3 -1
- skfolio/prior/_factor_model.py +8 -4
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +1 -1
- skfolio/uncertainty_set/__init__.py +2 -0
- skfolio/uncertainty_set/_base.py +2 -2
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +7 -7
- skfolio/utils/tools.py +76 -12
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/METADATA +99 -24
- skfolio-0.8.1.dist-info/RECORD +120 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/WHEEL +1 -1
- skfolio-0.7.0.dist-info/RECORD +0 -95
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
|
|
1
|
+
"""Bivariate Copula Selection."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import scipy.stats as st
|
11
|
+
import sklearn as sk
|
12
|
+
|
13
|
+
from skfolio.distribution._base import SelectionCriterion
|
14
|
+
from skfolio.distribution.copula._base import BaseBivariateCopula
|
15
|
+
from skfolio.distribution.copula._clayton import ClaytonCopula
|
16
|
+
from skfolio.distribution.copula._gaussian import GaussianCopula
|
17
|
+
from skfolio.distribution.copula._gumbel import GumbelCopula
|
18
|
+
from skfolio.distribution.copula._independent import IndependentCopula
|
19
|
+
from skfolio.distribution.copula._joe import JoeCopula
|
20
|
+
from skfolio.distribution.copula._student_t import StudentTCopula
|
21
|
+
|
22
|
+
|
23
|
+
def select_bivariate_copula(
|
24
|
+
X: npt.ArrayLike,
|
25
|
+
copula_candidates: list[BaseBivariateCopula] | None = None,
|
26
|
+
selection_criterion: SelectionCriterion = SelectionCriterion.AIC,
|
27
|
+
independence_level: float = 0.05,
|
28
|
+
) -> BaseBivariateCopula:
|
29
|
+
"""
|
30
|
+
Select the best bivariate copula from a list of candidates using an information
|
31
|
+
criterion.
|
32
|
+
|
33
|
+
This function first tests the dependence between the two variables in X using
|
34
|
+
Kendall's tau independence test. If the p-value is greater than or equal to
|
35
|
+
`independence_level`, the null hypothesis of independence is not rejected, and the
|
36
|
+
`IndependentCopula` is returned. Otherwise, each candidate copula in
|
37
|
+
`copula_candidates` is fitted to the data X. For each candidate, either the
|
38
|
+
Akaike Information Criterion (AIC) or the Bayesian Information Criterion (BIC) is
|
39
|
+
computed, and the copula with the lowest criterion value is selected.
|
40
|
+
|
41
|
+
Parameters
|
42
|
+
----------
|
43
|
+
X : array-like of shape (n_observations, 2)
|
44
|
+
An array of bivariate inputs (u, v) with uniform marginals (values in [0, 1]).
|
45
|
+
|
46
|
+
copula_candidates : list[BaseBivariateCopula]
|
47
|
+
A list of candidate copula models. Each candidate must inherit from
|
48
|
+
`BaseBivariateCopula`. If None, defaults to
|
49
|
+
`[GaussianCopula(), StudentTCopula(), ClaytonCopula(), GumbelCopula(), JoeCopula()]`.
|
50
|
+
|
51
|
+
selection_criterion : SelectionCriterion, default=SelectionCriterion.AIC
|
52
|
+
The criterion used for model selection. Possible values are:
|
53
|
+
- SelectionCriterion.AIC : Akaike Information Criterion
|
54
|
+
- SelectionCriterion.BIC : Bayesian Information Criterion
|
55
|
+
|
56
|
+
independence_level : float, default=0.05
|
57
|
+
The significance level for the Kendall tau independence test. If the p-value is
|
58
|
+
greater than or equal to this level, the independence hypothesis is not
|
59
|
+
rejected, and the `IndependentCopula` is returned.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
selected_copula : BaseBivariateCopula
|
64
|
+
The fitted copula model among the candidates that minimizes the selected
|
65
|
+
information criterion (AIC or BIC).
|
66
|
+
|
67
|
+
Raises
|
68
|
+
------
|
69
|
+
ValueError
|
70
|
+
If X is not a 2D array with exactly two columns, or if any candidate in
|
71
|
+
`copula_candidates` does not inherit from `BaseBivariateCopula`.
|
72
|
+
"""
|
73
|
+
if copula_candidates is None:
|
74
|
+
copula_candidates = [
|
75
|
+
GaussianCopula(),
|
76
|
+
StudentTCopula(),
|
77
|
+
ClaytonCopula(),
|
78
|
+
GumbelCopula(),
|
79
|
+
JoeCopula(),
|
80
|
+
]
|
81
|
+
|
82
|
+
X = np.asarray(X)
|
83
|
+
if X.ndim != 2 or X.shape[1] != 2:
|
84
|
+
raise ValueError("X must contains two columns for Bivariate Copula")
|
85
|
+
|
86
|
+
kendall_tau, p_value = st.kendalltau(X[:, 0], X[:, 1])
|
87
|
+
if p_value >= independence_level:
|
88
|
+
return IndependentCopula().fit(X)
|
89
|
+
|
90
|
+
results = {}
|
91
|
+
for copula in copula_candidates:
|
92
|
+
if not isinstance(copula, BaseBivariateCopula):
|
93
|
+
raise ValueError(
|
94
|
+
"The candidate copula must inherit from `BaseBivariateCopula`"
|
95
|
+
)
|
96
|
+
copula = sk.clone(copula)
|
97
|
+
if copula.itau and copula.kendall_tau is None:
|
98
|
+
# Faster computation by reusing kendall tau if itau
|
99
|
+
copula.kendall_tau = kendall_tau
|
100
|
+
copula.fit(X)
|
101
|
+
|
102
|
+
match selection_criterion:
|
103
|
+
case selection_criterion.AIC:
|
104
|
+
results[copula] = copula.aic(X)
|
105
|
+
case selection_criterion.BIC:
|
106
|
+
results[copula] = copula.bic(X)
|
107
|
+
case _:
|
108
|
+
raise ValueError(f"{selection_criterion} not implemented")
|
109
|
+
|
110
|
+
selected_copula = min(results, key=results.get)
|
111
|
+
return selected_copula
|
@@ -0,0 +1,486 @@
|
|
1
|
+
"""Bivariate Student's t Copula Estimation."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import scipy.optimize as so
|
11
|
+
import scipy.special as sp
|
12
|
+
import scipy.stats as st
|
13
|
+
import sklearn.utils.validation as skv
|
14
|
+
|
15
|
+
from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
|
16
|
+
from skfolio.distribution.copula._utils import _apply_margin_swap
|
17
|
+
|
18
|
+
# Student's t copula with dof less than 2.0 is so extremely heavy-tailed that even the
|
19
|
+
# mean (and many moments) of the distribution do not exist. So Impractical in practice,
|
20
|
+
# and dof above 50 tends to a Gaussian Copula so we limit it to the interval [2, 50] for
|
21
|
+
# improved stability and robustness.
|
22
|
+
_DOF_BOUNDS = (2.0, 50.0)
|
23
|
+
|
24
|
+
|
25
|
+
class StudentTCopula(BaseBivariateCopula):
|
26
|
+
r"""Bivariate Student's t Copula Estimation.
|
27
|
+
|
28
|
+
The bivariate Student's t copula density is defined as:
|
29
|
+
|
30
|
+
.. math::
|
31
|
+
C_{\nu, \rho}(u, v) = T_{\nu, \rho} \Bigl(t_{\nu}^{-1}(u),\;t_{\nu}^{-1}(v)\Bigr)
|
32
|
+
|
33
|
+
where:
|
34
|
+
- :math:`\nu > 0` is the degrees of freedom.
|
35
|
+
- :math:`\rho \in (-1, 1)` is the correlation coefficient.
|
36
|
+
- :math:`T_{\nu, \rho}(x, y)` is the CDF of the bivariate t-distribution.
|
37
|
+
- :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
|
38
|
+
univariate t-distribution.
|
39
|
+
|
40
|
+
Student's t copula with degrees of freedom (dof) less than 2.0 is extremely
|
41
|
+
heavy-tailed, to the extent that even the mean (and many moments) do not exist,
|
42
|
+
rendering it impractical. Conversely, for dof above 50 the t copula behaves
|
43
|
+
similarly to a Gaussian copula. Thus, for improved stability and robustness,
|
44
|
+
the dof is limited to the interval [2, 50].
|
45
|
+
|
46
|
+
.. note::
|
47
|
+
|
48
|
+
Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
|
49
|
+
because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
|
50
|
+
both positive and negative dependence, and they exhibit symmetric tail behavior.
|
51
|
+
|
52
|
+
|
53
|
+
Parameters
|
54
|
+
----------
|
55
|
+
itau : bool, default=True
|
56
|
+
itau : bool, default=True
|
57
|
+
If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
|
58
|
+
otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
|
59
|
+
slower but more accurate.
|
60
|
+
|
61
|
+
kendall_tau : float, optional
|
62
|
+
If `itau` is True and `kendall_tau` is provided, this
|
63
|
+
value is used; otherwise, it is computed.
|
64
|
+
|
65
|
+
tolerance : float, default=1e-4
|
66
|
+
Convergence tolerance for the MLE optimization.
|
67
|
+
|
68
|
+
random_state : int, RandomState instance or None, default=None
|
69
|
+
Seed or random state to ensure reproducibility.
|
70
|
+
|
71
|
+
Attributes
|
72
|
+
----------
|
73
|
+
rho_ : float
|
74
|
+
Fitted correlation coefficient (:math:`\rho`) in [-1, 1].
|
75
|
+
dof_ : float
|
76
|
+
Fitted degrees of freedom (:math:`\nu`) > 2.
|
77
|
+
|
78
|
+
Examples
|
79
|
+
--------
|
80
|
+
>>> from skfolio.datasets import load_sp500_dataset
|
81
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
82
|
+
>>> from skfolio.distribution import StudentTCopula, compute_pseudo_observations
|
83
|
+
>>>
|
84
|
+
>>> # Load historical prices and convert them to returns
|
85
|
+
>>> prices = load_sp500_dataset()
|
86
|
+
>>> X = prices_to_returns(prices)
|
87
|
+
>>> X = X[["AAPL", "JPM"]]
|
88
|
+
>>>
|
89
|
+
>>> # Convert returns to pseudo observation in the interval [0,1]
|
90
|
+
>>> X = compute_pseudo_observations(X)
|
91
|
+
>>>
|
92
|
+
>>> # Initialize the Copula estimator
|
93
|
+
>>> model = StudentTCopula()
|
94
|
+
>>>
|
95
|
+
>>> # Fit the model to the data.
|
96
|
+
>>> model.fit(X)
|
97
|
+
>>>
|
98
|
+
>>> # Display the fitted parameter and tail dependence coefficients
|
99
|
+
>>> print(model.fitted_repr)
|
100
|
+
StudentTCopula(rho=0.327, dof=5.14)
|
101
|
+
>>> print(model.lower_tail_dependence)
|
102
|
+
0.1270
|
103
|
+
>>> print(model.upper_tail_dependence)
|
104
|
+
0.1270
|
105
|
+
>>>
|
106
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
|
107
|
+
>>> # Inverse Partial Derivative, AIC, and BIC
|
108
|
+
>>> log_likelihood = model.score_samples(X)
|
109
|
+
>>> score = model.score(X)
|
110
|
+
>>> cdf = model.cdf(X)
|
111
|
+
>>> p = model.partial_derivative(X)
|
112
|
+
>>> u = model.inverse_partial_derivative(X)
|
113
|
+
>>> aic = model.aic(X)
|
114
|
+
>>> bic = model.bic(X)
|
115
|
+
>>>
|
116
|
+
>>> # Generate 5 new samples
|
117
|
+
>>> samples = model.sample(n_samples=5)
|
118
|
+
>>>
|
119
|
+
>>> # Plot the tail concentration function.
|
120
|
+
>>> fig = model.plot_tail_concentration()
|
121
|
+
>>> fig.show()
|
122
|
+
>>>
|
123
|
+
>>> # Plot a 2D contour of the estimated PDF.
|
124
|
+
>>> fig = model.plot_pdf_2d()
|
125
|
+
>>> fig.show()
|
126
|
+
>>>
|
127
|
+
>>> # Plot a 3D surface of the estimated PDF.
|
128
|
+
>>> fig = model.plot_pdf_3d()
|
129
|
+
>>> fig.show()
|
130
|
+
|
131
|
+
References
|
132
|
+
----------
|
133
|
+
.. [1] "An Introduction to Copulas (2nd ed.)",
|
134
|
+
Nelsen (2006)
|
135
|
+
|
136
|
+
.. [2] "Multivariate Models and Dependence Concepts",
|
137
|
+
Joe, Chapman & Hall (1997)
|
138
|
+
|
139
|
+
.. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
|
140
|
+
McNeil, Frey & Embrechts (2005)
|
141
|
+
|
142
|
+
.. [4] "The t Copula and Related Copulas",
|
143
|
+
Demarta & McNeil (2005)
|
144
|
+
|
145
|
+
.. [5] "Copula Methods in Finance",
|
146
|
+
Cherubini, Luciano & Vecchiato (2004)
|
147
|
+
"""
|
148
|
+
|
149
|
+
rho_: float
|
150
|
+
dof_: float
|
151
|
+
_n_params = 2
|
152
|
+
|
153
|
+
def __init__(
|
154
|
+
self,
|
155
|
+
itau: bool = True,
|
156
|
+
kendall_tau: float | None = None,
|
157
|
+
tolerance: float = 1e-4,
|
158
|
+
random_state: int | None = None,
|
159
|
+
):
|
160
|
+
super().__init__(random_state=random_state)
|
161
|
+
self.itau = itau
|
162
|
+
self.kendall_tau = kendall_tau
|
163
|
+
self.tolerance = tolerance
|
164
|
+
|
165
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "StudentTCopula":
|
166
|
+
r"""Fit the Bivariate Student's t Copula.
|
167
|
+
|
168
|
+
If `itau` is True, it uses a Kendall-based two-step method:
|
169
|
+
- Estimates the correlation parameter (:math:`\rho`) from Kendall's
|
170
|
+
tau inversion.
|
171
|
+
|
172
|
+
- Optimizes the degrees of freedom (:math:`\nu`) by maximizing the
|
173
|
+
log-likelihood.
|
174
|
+
|
175
|
+
Otherwise, it uses the full MLE method: optimizes both :math:`\rho` and
|
176
|
+
:math:`\nu` by maximizing the log-likelihood.
|
177
|
+
|
178
|
+
Parameters
|
179
|
+
----------
|
180
|
+
X : array-like of shape (n_observations, 2)
|
181
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
182
|
+
bivariate observation. Both `u` and `v` must be in the interval [0, 1],
|
183
|
+
having been transformed to uniform marginals.
|
184
|
+
|
185
|
+
y : None
|
186
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
187
|
+
|
188
|
+
Returns
|
189
|
+
-------
|
190
|
+
self : StudentTCopula
|
191
|
+
Returns the instance itself.
|
192
|
+
|
193
|
+
"""
|
194
|
+
X = self._validate_X(X, reset=True)
|
195
|
+
|
196
|
+
if self.kendall_tau is None:
|
197
|
+
kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
|
198
|
+
else:
|
199
|
+
kendall_tau = self.kendall_tau
|
200
|
+
|
201
|
+
# Either used directly or for initial guess
|
202
|
+
rho_from_tau = np.clip(
|
203
|
+
np.sin((np.pi * kendall_tau) / 2.0),
|
204
|
+
a_min=_RHO_BOUNDS[0],
|
205
|
+
a_max=_RHO_BOUNDS[1],
|
206
|
+
)
|
207
|
+
|
208
|
+
if self.itau:
|
209
|
+
res = so.minimize_scalar(
|
210
|
+
_neg_log_likelihood,
|
211
|
+
args=(
|
212
|
+
rho_from_tau,
|
213
|
+
X,
|
214
|
+
),
|
215
|
+
bounds=_DOF_BOUNDS,
|
216
|
+
method="bounded",
|
217
|
+
options={"xatol": self.tolerance},
|
218
|
+
)
|
219
|
+
if not res.success:
|
220
|
+
raise RuntimeError(f"Optimization failed: {res.message}")
|
221
|
+
self.dof_ = res.x
|
222
|
+
self.rho_ = rho_from_tau
|
223
|
+
else:
|
224
|
+
# We'll use L-BFGS-B for the optimization because:
|
225
|
+
# 1) The bivariate Student-t copula's negative log-likelihood is smooth,
|
226
|
+
# making gradient-based methods more efficient than derivative-free
|
227
|
+
# methods.
|
228
|
+
# 2) L-BFGS-B directly supports simple box bounds (e.g., -1 < rho < 1,
|
229
|
+
# 0 < nu < 50).
|
230
|
+
# 3) It's typically faster and more stable for small-dimensional problems
|
231
|
+
# than more general constraint solvers (like trust-constr or SLSQP)
|
232
|
+
result = so.minimize(
|
233
|
+
fun=lambda x: _neg_log_likelihood(dof=x[0], rho=x[1], X=X),
|
234
|
+
x0=np.array([3.0, rho_from_tau]),
|
235
|
+
bounds=(_DOF_BOUNDS, _RHO_BOUNDS),
|
236
|
+
method="L-BFGS-B",
|
237
|
+
tol=self.tolerance,
|
238
|
+
)
|
239
|
+
if not result.success:
|
240
|
+
raise RuntimeError(f"Optimization failed: {result.message}")
|
241
|
+
self.dof_, self.rho_ = result.x
|
242
|
+
|
243
|
+
return self
|
244
|
+
|
245
|
+
def cdf(self, X: npt.ArrayLike) -> np.ndarray:
|
246
|
+
"""Compute the CDF of the bivariate Student-t copula.
|
247
|
+
|
248
|
+
Parameters
|
249
|
+
----------
|
250
|
+
X : array-like of shape (n_observations, 2)
|
251
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
252
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
253
|
+
having been transformed to uniform marginals.
|
254
|
+
|
255
|
+
Returns
|
256
|
+
-------
|
257
|
+
cdf : ndarray of shape (n_observations,)
|
258
|
+
CDF values for each observation in X.
|
259
|
+
"""
|
260
|
+
skv.check_is_fitted(self)
|
261
|
+
X = self._validate_X(X, reset=False)
|
262
|
+
cdf = st.multivariate_t.cdf(
|
263
|
+
x=sp.stdtrit(self.dof_, X),
|
264
|
+
loc=np.array([0, 0]),
|
265
|
+
shape=np.array([[1, self.rho_], [self.rho_, 1]]),
|
266
|
+
df=self.dof_,
|
267
|
+
)
|
268
|
+
return cdf
|
269
|
+
|
270
|
+
def partial_derivative(
|
271
|
+
self, X: npt.ArrayLike, first_margin: bool = False
|
272
|
+
) -> np.ndarray:
|
273
|
+
r"""Compute the h-function (partial derivative) for the bivariate Student's t
|
274
|
+
copula.
|
275
|
+
|
276
|
+
The h-function with respect to the second margin represents the conditional
|
277
|
+
distribution function of :math:`u` given :math:`v`:
|
278
|
+
|
279
|
+
.. math:: \begin{aligned}
|
280
|
+
h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
|
281
|
+
&= t_{\nu+1}\!\left(\frac{t_\nu^{-1}(u) - \rho\,t_\nu^{-1}(v)}
|
282
|
+
{\sqrt{\frac{(1-\rho^2)\left(\nu + \left(t_\nu^{-1}(v)\right)^2\right)}{\nu+1}}}\right).
|
283
|
+
\end{aligned}
|
284
|
+
|
285
|
+
where:
|
286
|
+
- :math:`\nu > 0` is the degrees of freedom.
|
287
|
+
- :math:`\rho \in (-1, 1)` is the correlation coefficient.
|
288
|
+
- :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
|
289
|
+
univariate \(t\)-distribution.
|
290
|
+
|
291
|
+
Parameters
|
292
|
+
----------
|
293
|
+
X : array-like of shape (n_observations, 2)
|
294
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
295
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
296
|
+
having been transformed to uniform marginals.
|
297
|
+
|
298
|
+
first_margin : bool, default=False
|
299
|
+
If True, compute the partial derivative with respect to the first
|
300
|
+
margin `u`; otherwise, compute the partial derivative with respect to the
|
301
|
+
second margin `v`.
|
302
|
+
|
303
|
+
Returns
|
304
|
+
-------
|
305
|
+
p : ndarray of shape (n_observations,)
|
306
|
+
h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
|
307
|
+
"""
|
308
|
+
skv.check_is_fitted(self)
|
309
|
+
X = self._validate_X(X, reset=False)
|
310
|
+
X = _apply_margin_swap(X, first_margin=first_margin)
|
311
|
+
# Compute the inverse CDF (percent point function) using stdtrit for better
|
312
|
+
# performance
|
313
|
+
u_inv, v_inv = sp.stdtrit(self.dof_, X).T
|
314
|
+
# Compute the denominator: sqrt((1 - rho^2) * (nu + y^2) / (nu + 1))
|
315
|
+
z = (u_inv - self.rho_ * v_inv) / (
|
316
|
+
np.sqrt((1 - self.rho_**2) * (self.dof_ + v_inv**2) / (self.dof_ + 1))
|
317
|
+
)
|
318
|
+
# Student's t CDF with (nu+1) degrees of freedom using stdtr for better
|
319
|
+
# performance
|
320
|
+
p = sp.stdtr(self.dof_ + 1, z)
|
321
|
+
return p
|
322
|
+
|
323
|
+
def inverse_partial_derivative(
|
324
|
+
self, X: npt.ArrayLike, first_margin: bool = False
|
325
|
+
) -> np.ndarray:
|
326
|
+
r"""Compute the inverse of the bivariate copula's partial derivative, commonly
|
327
|
+
known as the inverse h-function [1]_.
|
328
|
+
|
329
|
+
Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
|
330
|
+
second margin is defined by
|
331
|
+
|
332
|
+
.. math::
|
333
|
+
h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
|
334
|
+
|
335
|
+
which is the conditional distribution of :math:`U` given :math:`V = v`.
|
336
|
+
The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
|
337
|
+
value :math:`u \in [0,1]` such that
|
338
|
+
|
339
|
+
.. math::
|
340
|
+
h(u \mid v) \;=\; p,
|
341
|
+
\quad \text{where } p \in [0,1].
|
342
|
+
|
343
|
+
In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
|
344
|
+
:math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
|
345
|
+
:math:`p = \partial C(u, v)/\partial v`.
|
346
|
+
|
347
|
+
Parameters
|
348
|
+
----------
|
349
|
+
X : array-like of shape (n_observations, 2)
|
350
|
+
An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
|
351
|
+
- The first column `p` corresponds to the value of the h-function.
|
352
|
+
- The second column `v` is the conditioning variable.
|
353
|
+
|
354
|
+
first_margin : bool, default=False
|
355
|
+
If True, compute the inverse partial derivative with respect to the first
|
356
|
+
margin `u`; otherwise, compute the inverse partial derivative with respect
|
357
|
+
to the second margin `v`.
|
358
|
+
|
359
|
+
Returns
|
360
|
+
-------
|
361
|
+
u : ndarray of shape (n_observations,)
|
362
|
+
A 1D-array of length `n_observations`, where each element is the computed
|
363
|
+
:math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
|
364
|
+
|
365
|
+
References
|
366
|
+
----------
|
367
|
+
.. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
|
368
|
+
.. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
|
369
|
+
"""
|
370
|
+
skv.check_is_fitted(self)
|
371
|
+
X = self._validate_X(X, reset=False)
|
372
|
+
X = _apply_margin_swap(X, first_margin=first_margin)
|
373
|
+
p_inv = sp.stdtrit(self.dof_ + 1, X[:, 0])
|
374
|
+
v_inv = sp.stdtrit(self.dof_, X[:, 1])
|
375
|
+
u_inv = (
|
376
|
+
p_inv
|
377
|
+
* np.sqrt((self.dof_ + v_inv**2) / (self.dof_ + 1) * (1 - self.rho_**2))
|
378
|
+
+ self.rho_ * v_inv
|
379
|
+
)
|
380
|
+
u = sp.stdtr(self.dof_, u_inv)
|
381
|
+
return u
|
382
|
+
|
383
|
+
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
|
384
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the model.
|
385
|
+
|
386
|
+
Parameters
|
387
|
+
----------
|
388
|
+
X : array-like of shape (n_observations, 2)
|
389
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
390
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
391
|
+
having been transformed to uniform marginals.
|
392
|
+
|
393
|
+
Returns
|
394
|
+
-------
|
395
|
+
density : ndarray of shape (n_observations,)
|
396
|
+
The log-likelihood of each sample under the fitted copula.
|
397
|
+
"""
|
398
|
+
skv.check_is_fitted(self)
|
399
|
+
X = self._validate_X(X, reset=False)
|
400
|
+
log_density = _sample_scores(X=X, rho=self.rho_, dof=self.dof_)
|
401
|
+
return log_density
|
402
|
+
|
403
|
+
@property
|
404
|
+
def lower_tail_dependence(self) -> float:
|
405
|
+
"""Theoretical lower tail dependence coefficient."""
|
406
|
+
skv.check_is_fitted(self)
|
407
|
+
arg = -np.sqrt((self.dof_ + 1) * (1 - self.rho_) / (1 + self.rho_))
|
408
|
+
return 2 * sp.stdtr(self.dof_ + 1, arg)
|
409
|
+
|
410
|
+
@property
|
411
|
+
def upper_tail_dependence(self) -> float:
|
412
|
+
"""Theoretical upper tail dependence coefficient."""
|
413
|
+
return self.lower_tail_dependence
|
414
|
+
|
415
|
+
@property
|
416
|
+
def fitted_repr(self) -> str:
|
417
|
+
"""String representation of the fitted copula."""
|
418
|
+
return f"{self.__class__.__name__}(rho={self.rho_:0.3f}, dof={self.dof_:0.2f})"
|
419
|
+
|
420
|
+
|
421
|
+
def _neg_log_likelihood(dof: float, rho: float, X: np.ndarray) -> float:
|
422
|
+
"""Negative log-likelihood function for optimization.
|
423
|
+
|
424
|
+
Parameters
|
425
|
+
----------
|
426
|
+
X : array-like of shape (n_observations, 2)
|
427
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
428
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
429
|
+
having been transformed to uniform marginals.
|
430
|
+
|
431
|
+
rho : float
|
432
|
+
Correlation copula parameter.
|
433
|
+
|
434
|
+
dof : float
|
435
|
+
Degree of freedom copula parameter.
|
436
|
+
|
437
|
+
Returns
|
438
|
+
-------
|
439
|
+
value : float
|
440
|
+
The negative log-likelihood value.
|
441
|
+
"""
|
442
|
+
return -np.sum(_sample_scores(X=X, rho=rho, dof=dof))
|
443
|
+
|
444
|
+
|
445
|
+
def _sample_scores(X: np.ndarray, rho: float, dof: float) -> np.ndarray:
|
446
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the bivariate
|
447
|
+
Gaussian copula model.
|
448
|
+
|
449
|
+
Parameters
|
450
|
+
----------
|
451
|
+
X : array-like of shape (n_observations, 2)
|
452
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
453
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
454
|
+
having been transformed to uniform marginals.
|
455
|
+
|
456
|
+
rho : float
|
457
|
+
Gaussian copula parameter.
|
458
|
+
|
459
|
+
Returns
|
460
|
+
-------
|
461
|
+
density : ndarray of shape (n_observations,)
|
462
|
+
The log-likelihood of each sample under the fitted copula.
|
463
|
+
|
464
|
+
Raises
|
465
|
+
------
|
466
|
+
ValueError
|
467
|
+
If rho is not in (-1, 1) or dof is not positive.
|
468
|
+
"""
|
469
|
+
if not (-1.0 <= rho <= 1.0):
|
470
|
+
raise ValueError("rho must be between -1 and 1.")
|
471
|
+
if not 1.0 <= dof <= 50:
|
472
|
+
raise ValueError("Degrees of freedom `dof` must be between 1 and 50.")
|
473
|
+
|
474
|
+
# Inverse CDF (ppf) using stdtrit for better performance
|
475
|
+
x, y = sp.stdtrit(dof, X).T
|
476
|
+
|
477
|
+
a = 1.0 - rho**2
|
478
|
+
log_density = (
|
479
|
+
sp.gammaln((dof + 2.0) / 2.0)
|
480
|
+
+ sp.gammaln(dof / 2.0)
|
481
|
+
- 2.0 * sp.gammaln((dof + 1.0) / 2.0)
|
482
|
+
- np.log(a) / 2
|
483
|
+
+ (dof + 1.0) / 2.0 * (np.log1p(x**2 / dof) + np.log1p(y**2 / dof))
|
484
|
+
- (dof + 2.0) / 2.0 * np.log1p((x**2 - 2 * rho * x * y + y**2) / a / dof)
|
485
|
+
)
|
486
|
+
return log_density
|