skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +2 -2
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +1 -1
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +1 -1
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +4 -4
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +6 -6
- skfolio/measures/__init__.py +1 -1
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +2 -2
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +1 -1
- skfolio/moments/covariance/_detone_covariance.py +1 -1
- skfolio/moments/covariance/_empirical_covariance.py +1 -1
- skfolio/moments/covariance/_ew_covariance.py +1 -1
- skfolio/moments/covariance/_gerber_covariance.py +1 -1
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +2 -7
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +1 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
- skfolio/moments/expected_returns/_ew_mu.py +1 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
- skfolio/optimization/__init__.py +2 -0
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +2 -0
- skfolio/optimization/cluster/_nco.py +7 -7
- skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
- skfolio/optimization/convex/__init__.py +2 -0
- skfolio/optimization/convex/_base.py +8 -8
- skfolio/optimization/convex/_distributionally_robust.py +4 -4
- skfolio/optimization/convex/_maximum_diversification.py +5 -5
- skfolio/optimization/convex/_mean_risk.py +5 -6
- skfolio/optimization/convex/_risk_budgeting.py +3 -3
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/__init__.py +2 -0
- skfolio/optimization/naive/_naive.py +1 -1
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +34 -7
- skfolio/portfolio/_base.py +42 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +4 -4
- skfolio/pre_selection/__init__.py +2 -0
- skfolio/pre_selection/_drop_correlated.py +2 -2
- skfolio/pre_selection/_select_complete.py +25 -26
- skfolio/pre_selection/_select_k_extremes.py +2 -2
- skfolio/pre_selection/_select_non_dominated.py +2 -2
- skfolio/pre_selection/_select_non_expiring.py +2 -2
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +4 -0
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +5 -3
- skfolio/prior/_empirical.py +3 -1
- skfolio/prior/_factor_model.py +8 -4
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +1 -1
- skfolio/uncertainty_set/__init__.py +2 -0
- skfolio/uncertainty_set/_base.py +2 -2
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +7 -7
- skfolio/utils/tools.py +76 -12
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
- skfolio-0.8.0.dist-info/RECORD +120 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
- skfolio-0.7.0.dist-info/RECORD +0 -95
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,609 @@
|
|
1
|
+
"""Bivariate Joe Copula Estimation."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import scipy.optimize as so
|
11
|
+
import scipy.special as sp
|
12
|
+
import scipy.stats as st
|
13
|
+
import sklearn.utils.validation as skv
|
14
|
+
|
15
|
+
from skfolio.distribution.copula._base import BaseBivariateCopula
|
16
|
+
from skfolio.distribution.copula._utils import (
|
17
|
+
CopulaRotation,
|
18
|
+
_apply_copula_rotation,
|
19
|
+
_apply_margin_swap,
|
20
|
+
_apply_rotation_cdf,
|
21
|
+
_apply_rotation_partial_derivatives,
|
22
|
+
_select_rotation_itau,
|
23
|
+
_select_theta_and_rotation_mle,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Joe copula with a theta of 1.0 is just the independence copula, so we chose a lower
|
27
|
+
# bound of 1.005. After 20, the copula is already imposing very high tail dependence
|
28
|
+
# closed to comonotonic and increasing it will make it impractical.
|
29
|
+
_THETA_BOUNDS = (1.005, 20.0)
|
30
|
+
_EULER_GAMMA = 0.5772156649015328606
|
31
|
+
|
32
|
+
|
33
|
+
class JoeCopula(BaseBivariateCopula):
|
34
|
+
r"""Bivariate Joe Copula Estimation.
|
35
|
+
|
36
|
+
The Joe copula is an Archimedean copula characterized by strong upper tail
|
37
|
+
dependence and little to no lower tail dependence.
|
38
|
+
|
39
|
+
In its unrotated form, it is used for modeling extreme co-movements in the upper
|
40
|
+
tail (i.e. simultaneous extreme gains).
|
41
|
+
|
42
|
+
Rotations allow the copula to be adapted for different types of tail dependence:
|
43
|
+
- A 180° rotation captures extreme co-movements in the lower tail (i.e.
|
44
|
+
simultaneous extreme losses).
|
45
|
+
|
46
|
+
- A 90° rotation captures scenarios where one variable exhibits extreme losses
|
47
|
+
while the other shows extreme gains.
|
48
|
+
|
49
|
+
- A 270° rotation captures the opposite scenario, where one variable experiences
|
50
|
+
extreme gains while the other suffers extreme losses.
|
51
|
+
|
52
|
+
Joe copula generally exhibits stronger upper tail dependence than the Gumbel copula.
|
53
|
+
|
54
|
+
It is defined by:
|
55
|
+
|
56
|
+
.. math::
|
57
|
+
C_{\theta}(u, v) = 1-\Bigl[(1 - u)^{\theta} + (1 - v)^{\theta} -
|
58
|
+
(1 - u)^{\theta} (1 - v)^{\theta}\Bigr]^{\frac{1}{\theta}}
|
59
|
+
|
60
|
+
where :math:`\theta \ge 1` is the dependence parameter. When :math:`\theta = 1`,
|
61
|
+
the Joe copula reduces to the independence copula. Larger values of :math:`\theta`
|
62
|
+
result in stronger upper-tail dependence.
|
63
|
+
|
64
|
+
.. note::
|
65
|
+
|
66
|
+
Rotation are needed for archimedean copulas (e.g., Joe, Gumbel, Clayton)
|
67
|
+
because their parameters only model positive dependence, and they exhibit
|
68
|
+
asymmetric tail behavior. To model negative dependence, one uses rotations
|
69
|
+
to “flip” the copula's tail dependence.
|
70
|
+
|
71
|
+
Parameters
|
72
|
+
----------
|
73
|
+
itau : bool, default=True
|
74
|
+
If True, :math:`\theta` is estimated using the Kendall's tau inversion method;
|
75
|
+
otherwise, the Maximum Likelihood Estimation (MLE) method is used. The MLE is
|
76
|
+
slower but more accurate.
|
77
|
+
|
78
|
+
kendall_tau : float, optional
|
79
|
+
If `itau` is True and `kendall_tau` is provided, this value is used;
|
80
|
+
otherwise, it is computed.
|
81
|
+
|
82
|
+
tolerance : float, default=1e-4
|
83
|
+
Convergence tolerance for the MLE optimization.
|
84
|
+
|
85
|
+
random_state : int, RandomState instance or None, default=None
|
86
|
+
Seed or random state to ensure reproducibility.
|
87
|
+
|
88
|
+
Attributes
|
89
|
+
----------
|
90
|
+
theta_ : float
|
91
|
+
Fitted theta coefficient :math:`\theta` > 1.
|
92
|
+
|
93
|
+
rotation_ : CopulaRotation
|
94
|
+
Fitted rotation of the copula.
|
95
|
+
|
96
|
+
Examples
|
97
|
+
--------
|
98
|
+
>>> from skfolio.datasets import load_sp500_dataset
|
99
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
100
|
+
>>> from skfolio.distribution import JoeCopula, compute_pseudo_observations
|
101
|
+
>>>
|
102
|
+
>>> # Load historical prices and convert them to returns
|
103
|
+
>>> prices = load_sp500_dataset()
|
104
|
+
>>> X = prices_to_returns(prices)
|
105
|
+
>>> X = X[["AAPL", "JPM"]]
|
106
|
+
>>>
|
107
|
+
>>> # Convert returns to pseudo observation in the interval [0,1]
|
108
|
+
>>> X = compute_pseudo_observations(X)
|
109
|
+
>>>
|
110
|
+
>>> # Initialize the Copula estimator
|
111
|
+
>>> model = JoeCopula()
|
112
|
+
>>>
|
113
|
+
>>> # Fit the model to the data.
|
114
|
+
>>> model.fit(X)
|
115
|
+
>>>
|
116
|
+
>>> # Display the fitted parameter and tail dependence coefficients
|
117
|
+
>>> print(model.fitted_repr)
|
118
|
+
JoeCopula(theta=1.48, rot=180°)
|
119
|
+
>>> print(model.lower_tail_dependence)
|
120
|
+
0.4021
|
121
|
+
>>> print(model.upper_tail_dependence)
|
122
|
+
0.0
|
123
|
+
>>>
|
124
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
|
125
|
+
>>> # Inverse Partial Derivative, AIC, and BIC
|
126
|
+
>>> log_likelihood = model.score_samples(X)
|
127
|
+
>>> score = model.score(X)
|
128
|
+
>>> cdf = model.cdf(X)
|
129
|
+
>>> p = model.partial_derivative(X)
|
130
|
+
>>> u = model.inverse_partial_derivative(X)
|
131
|
+
>>> aic = model.aic(X)
|
132
|
+
>>> bic = model.bic(X)
|
133
|
+
>>>
|
134
|
+
>>> # Generate 5 new samples
|
135
|
+
>>> samples = model.sample(n_samples=5)
|
136
|
+
>>>
|
137
|
+
>>> # Plot the tail concentration function.
|
138
|
+
>>> fig = model.plot_tail_concentration()
|
139
|
+
>>> fig.show()
|
140
|
+
>>>
|
141
|
+
>>> # Plot a 2D contour of the estimated PDF.
|
142
|
+
>>> fig = model.plot_pdf_2d()
|
143
|
+
>>> fig.show()
|
144
|
+
>>>
|
145
|
+
>>> # Plot a 3D surface of the estimated PDF.
|
146
|
+
>>> fig = model.plot_pdf_3d()
|
147
|
+
>>> fig.show()
|
148
|
+
|
149
|
+
References
|
150
|
+
----------
|
151
|
+
.. [1] "An Introduction to Copulas (2nd ed.)",
|
152
|
+
Nelsen (2006)
|
153
|
+
|
154
|
+
.. [2] "Multivariate Models and Dependence Concepts",
|
155
|
+
Joe, Chapman & Hall (1997)
|
156
|
+
|
157
|
+
.. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
|
158
|
+
McNeil, Frey & Embrechts (2005)
|
159
|
+
|
160
|
+
.. [4] "The t Copula and Related Copulas",
|
161
|
+
Demarta & McNeil (2005)
|
162
|
+
|
163
|
+
.. [5] "Copula Methods in Finance",
|
164
|
+
Cherubini, Luciano & Vecchiato (2004)
|
165
|
+
"""
|
166
|
+
|
167
|
+
theta_: float
|
168
|
+
rotation_: CopulaRotation
|
169
|
+
_n_params = 1
|
170
|
+
|
171
|
+
def __init__(
|
172
|
+
self,
|
173
|
+
itau: bool = True,
|
174
|
+
kendall_tau: float | None = None,
|
175
|
+
tolerance: float = 1e-4,
|
176
|
+
random_state: int | None = None,
|
177
|
+
):
|
178
|
+
super().__init__(random_state=random_state)
|
179
|
+
self.itau = itau
|
180
|
+
self.kendall_tau = kendall_tau
|
181
|
+
self.tolerance = tolerance
|
182
|
+
|
183
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "JoeCopula":
|
184
|
+
r"""Fit the Bivariate Joe Copula.
|
185
|
+
|
186
|
+
If `itau` is True, estimates :math:`\theta` using Kendall's tau inversion.
|
187
|
+
Otherwise, uses MLE by maximizing the log-likelihood.
|
188
|
+
|
189
|
+
Parameters
|
190
|
+
----------
|
191
|
+
X : array-like of shape (n_observations, 2)
|
192
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
193
|
+
bivariate observation. Both `u` and `v` must be in the interval [0, 1],
|
194
|
+
having been transformed to uniform marginals.
|
195
|
+
|
196
|
+
y : None
|
197
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
198
|
+
|
199
|
+
Returns
|
200
|
+
-------
|
201
|
+
self : object
|
202
|
+
Returns the instance itself.
|
203
|
+
"""
|
204
|
+
X = self._validate_X(X, reset=True)
|
205
|
+
|
206
|
+
if self.itau:
|
207
|
+
if self.kendall_tau is None:
|
208
|
+
kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
|
209
|
+
else:
|
210
|
+
kendall_tau = self.kendall_tau
|
211
|
+
|
212
|
+
abs_kendall_tau = abs(kendall_tau)
|
213
|
+
|
214
|
+
# Root-finding function brentq to find the value of theta in the interval
|
215
|
+
# brentq fails if _tau_diff has same sign, it happens when we are at the
|
216
|
+
# bounds so we capture it before.
|
217
|
+
fa = _tau_diff(_THETA_BOUNDS[0], abs_kendall_tau)
|
218
|
+
fb = _tau_diff(_THETA_BOUNDS[1], abs_kendall_tau)
|
219
|
+
if fa * fb > 0:
|
220
|
+
if abs(fa) < abs(fb):
|
221
|
+
self.theta_ = _THETA_BOUNDS[0]
|
222
|
+
else:
|
223
|
+
self.theta_ = _THETA_BOUNDS[1]
|
224
|
+
else:
|
225
|
+
# noinspection PyTypeChecker
|
226
|
+
self.theta_ = so.brentq(
|
227
|
+
_tau_diff,
|
228
|
+
args=(abs_kendall_tau,),
|
229
|
+
a=_THETA_BOUNDS[0],
|
230
|
+
b=_THETA_BOUNDS[-1],
|
231
|
+
)
|
232
|
+
self.rotation_ = _select_rotation_itau(
|
233
|
+
func=_neg_log_likelihood, X=X, theta=self.theta_
|
234
|
+
)
|
235
|
+
|
236
|
+
else:
|
237
|
+
self.theta_, self.rotation_ = _select_theta_and_rotation_mle(
|
238
|
+
_neg_log_likelihood, X=X, bounds=_THETA_BOUNDS, tolerance=self.tolerance
|
239
|
+
)
|
240
|
+
|
241
|
+
return self
|
242
|
+
|
243
|
+
def cdf(self, X: npt.ArrayLike) -> np.ndarray:
|
244
|
+
"""Compute the CDF of the bivariate Joe copula.
|
245
|
+
|
246
|
+
Parameters
|
247
|
+
----------
|
248
|
+
X : array-like of shape (n_observations, 2)
|
249
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
250
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
251
|
+
having been transformed to uniform marginals.
|
252
|
+
|
253
|
+
Returns
|
254
|
+
-------
|
255
|
+
cdf : ndarray of shape (n_observations,)
|
256
|
+
CDF values for each observation in X.
|
257
|
+
"""
|
258
|
+
skv.check_is_fitted(self)
|
259
|
+
X = self._validate_X(X, reset=False)
|
260
|
+
cdf = _apply_rotation_cdf(
|
261
|
+
func=_base_cdf, X=X, rotation=self.rotation_, theta=self.theta_
|
262
|
+
)
|
263
|
+
return cdf
|
264
|
+
|
265
|
+
def partial_derivative(
|
266
|
+
self, X: npt.ArrayLike, first_margin: bool = False
|
267
|
+
) -> np.ndarray:
|
268
|
+
r"""Compute the h-function (partial derivative) for the bivariate Joe copula
|
269
|
+
with respect to a specified margin.
|
270
|
+
|
271
|
+
The h-function with respect to the second margin represents the conditional
|
272
|
+
distribution function of :math:`u` given :math:`v`:
|
273
|
+
|
274
|
+
.. math:: \begin{aligned}
|
275
|
+
h(u \mid v)
|
276
|
+
&= \frac{\partial C(u,v)}{\partial v} \\[6pt]
|
277
|
+
&= (1-v)^{\theta-1}\,\Bigl[1 \;-\;(1-u)^{\theta}\Bigr]\,
|
278
|
+
\Bigl[(1-u)^{\theta} \;+\;(1-v)^{\theta}
|
279
|
+
\;-\;(1-u)^{\theta}(1-v)^{\theta}\Bigr]^{\frac{1}{\theta}-1} \\[6pt]
|
280
|
+
&= \left( 1 \;+\;\frac{(1-u)^{\theta}}{(1-v)^{\theta}}
|
281
|
+
\;-\;(1-u)^{\theta} \right)^{-1 + \frac{1}{\theta}}
|
282
|
+
\;\cdot\;\bigl[\,1 \;-\;(1-u)^{\theta}\bigr].
|
283
|
+
\end{aligned}
|
284
|
+
|
285
|
+
Parameters
|
286
|
+
----------
|
287
|
+
X : array-like of shape (n_observations, 2)
|
288
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
289
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
290
|
+
having been transformed to uniform marginals.
|
291
|
+
|
292
|
+
first_margin : bool, default=False
|
293
|
+
If True, compute the partial derivative with respect to the first
|
294
|
+
margin `u`; otherwise, compute the partial derivative with respect to the
|
295
|
+
second margin `v`.
|
296
|
+
|
297
|
+
Returns
|
298
|
+
-------
|
299
|
+
p : ndarray of shape (n_observations,)
|
300
|
+
h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
|
301
|
+
"""
|
302
|
+
skv.check_is_fitted(self)
|
303
|
+
X = self._validate_X(X, reset=False)
|
304
|
+
p = _apply_rotation_partial_derivatives(
|
305
|
+
func=_base_partial_derivative,
|
306
|
+
X=X,
|
307
|
+
rotation=self.rotation_,
|
308
|
+
first_margin=first_margin,
|
309
|
+
theta=self.theta_,
|
310
|
+
)
|
311
|
+
return p
|
312
|
+
|
313
|
+
def inverse_partial_derivative(
|
314
|
+
self, X: npt.ArrayLike, first_margin: bool = False
|
315
|
+
) -> np.ndarray:
|
316
|
+
r"""Compute the inverse of the bivariate copula's partial derivative, commonly
|
317
|
+
known as the inverse h-function [1]_.
|
318
|
+
|
319
|
+
Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
|
320
|
+
second margin is defined by
|
321
|
+
|
322
|
+
.. math::
|
323
|
+
h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
|
324
|
+
|
325
|
+
which is the conditional distribution of :math:`U` given :math:`V = v`.
|
326
|
+
The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
|
327
|
+
value :math:`u \in [0,1]` such that
|
328
|
+
|
329
|
+
.. math::
|
330
|
+
h(u \mid v) \;=\; p,
|
331
|
+
\quad \text{where } p \in [0,1].
|
332
|
+
|
333
|
+
In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
|
334
|
+
:math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
|
335
|
+
:math:`p = \partial C(u, v)/\partial v`.
|
336
|
+
|
337
|
+
Parameters
|
338
|
+
----------
|
339
|
+
X : array-like of shape (n_observations, 2)
|
340
|
+
An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
|
341
|
+
- The first column `p` corresponds to the value of the h-function.
|
342
|
+
- The second column `v` is the conditioning variable.
|
343
|
+
|
344
|
+
first_margin : bool, default=False
|
345
|
+
If True, compute the inverse partial derivative with respect to the first
|
346
|
+
margin `u`; otherwise, compute the inverse partial derivative with respect
|
347
|
+
to the second margin `v`.
|
348
|
+
|
349
|
+
Returns
|
350
|
+
-------
|
351
|
+
u : ndarray of shape (n_observations,)
|
352
|
+
A 1D-array of length `n_observations`, where each element is the computed
|
353
|
+
:math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
|
354
|
+
|
355
|
+
References
|
356
|
+
----------
|
357
|
+
.. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
|
358
|
+
.. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
|
359
|
+
.. [3] . "Nested Archimedean Copulas Meet ", Hofert & Mächler (2011)
|
360
|
+
"""
|
361
|
+
# no known closed-form solution, hence we use Newton method.
|
362
|
+
skv.check_is_fitted(self)
|
363
|
+
X = self._validate_X(X, reset=False)
|
364
|
+
u = _apply_rotation_partial_derivatives(
|
365
|
+
func=_base_inverse_partial_derivative,
|
366
|
+
X=X,
|
367
|
+
rotation=self.rotation_,
|
368
|
+
first_margin=first_margin,
|
369
|
+
theta=self.theta_,
|
370
|
+
)
|
371
|
+
return u
|
372
|
+
|
373
|
+
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
|
374
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the model.
|
375
|
+
|
376
|
+
Parameters
|
377
|
+
----------
|
378
|
+
X : array-like of shape (n_observations, 2)
|
379
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
380
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
381
|
+
having been transformed to uniform marginals.
|
382
|
+
|
383
|
+
Returns
|
384
|
+
-------
|
385
|
+
density : ndarray of shape (n_observations,)
|
386
|
+
The log-likelihood of each sample under the fitted copula.
|
387
|
+
"""
|
388
|
+
skv.check_is_fitted(self)
|
389
|
+
X = self._validate_X(X, reset=False)
|
390
|
+
X = _apply_copula_rotation(X, rotation=self.rotation_)
|
391
|
+
log_density = _base_sample_scores(X=X, theta=self.theta_)
|
392
|
+
return log_density
|
393
|
+
|
394
|
+
@property
|
395
|
+
def lower_tail_dependence(self) -> float:
|
396
|
+
"""Theoretical lower tail dependence coefficient."""
|
397
|
+
skv.check_is_fitted(self)
|
398
|
+
if self.rotation_ == CopulaRotation.R180:
|
399
|
+
return 2.0 - np.power(2.0, 1.0 / self.theta_)
|
400
|
+
return 0
|
401
|
+
|
402
|
+
@property
|
403
|
+
def upper_tail_dependence(self) -> float:
|
404
|
+
"""Theoretical upper tail dependence coefficient."""
|
405
|
+
skv.check_is_fitted(self)
|
406
|
+
if self.rotation_ == CopulaRotation.R0:
|
407
|
+
return 2.0 - np.power(2.0, 1.0 / self.theta_)
|
408
|
+
return 0
|
409
|
+
|
410
|
+
@property
|
411
|
+
def fitted_repr(self) -> str:
|
412
|
+
"""String representation of the fitted copula."""
|
413
|
+
return (
|
414
|
+
f"{self.__class__.__name__}(theta={self.theta_:0.2f}, rot={self.rotation_})"
|
415
|
+
)
|
416
|
+
|
417
|
+
|
418
|
+
def _neg_log_likelihood(theta: float, X: np.ndarray) -> float:
|
419
|
+
"""Negative log-likelihood function for optimization.
|
420
|
+
|
421
|
+
Parameters
|
422
|
+
----------
|
423
|
+
X : array-like of shape (n_observations, 2)
|
424
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
425
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
426
|
+
having been transformed to uniform marginals.
|
427
|
+
|
428
|
+
theta : float
|
429
|
+
The dependence parameter (must be greater than 1).
|
430
|
+
|
431
|
+
Returns
|
432
|
+
-------
|
433
|
+
value : float
|
434
|
+
The negative log-likelihood value.
|
435
|
+
"""
|
436
|
+
return -np.sum(_base_sample_scores(X=X, theta=theta))
|
437
|
+
|
438
|
+
|
439
|
+
def _base_sample_scores(X: np.ndarray, theta: float) -> np.ndarray:
|
440
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the bivariate
|
441
|
+
Joe copula model.
|
442
|
+
|
443
|
+
Parameters
|
444
|
+
----------
|
445
|
+
X : array-like of shape (n_observations, 2)
|
446
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
447
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
448
|
+
having been transformed to uniform marginals.
|
449
|
+
|
450
|
+
theta : float
|
451
|
+
The dependence parameter (must be greater than 1).
|
452
|
+
|
453
|
+
Returns
|
454
|
+
-------
|
455
|
+
density : ndarray of shape (n_observations,)
|
456
|
+
The log-likelihood of each sample under the fitted copula.
|
457
|
+
|
458
|
+
Raises
|
459
|
+
------
|
460
|
+
ValueError
|
461
|
+
If rho is not in (-1, 1) or dof is not positive.
|
462
|
+
"""
|
463
|
+
if theta <= 1.0:
|
464
|
+
raise ValueError("Theta must be greater than 1 for the Joe copula.")
|
465
|
+
|
466
|
+
# log-space transformation to improve stability near 0 or 1
|
467
|
+
x, y = np.log1p(-X).T
|
468
|
+
x_y = x + y
|
469
|
+
d = np.exp(x * theta) + np.exp(y * theta) - np.exp(x_y * theta)
|
470
|
+
log_density = (
|
471
|
+
(1.0 / theta - 2.0) * np.log(d) + x_y * (theta - 1.0) + np.log(theta - 1.0 + d)
|
472
|
+
)
|
473
|
+
return log_density
|
474
|
+
|
475
|
+
|
476
|
+
def _tau_diff(theta: float, tau_empirical: float) -> float:
|
477
|
+
r"""Compute the difference between the theoretical Kendall's tau for the Joe copula
|
478
|
+
and an empirical tau.
|
479
|
+
|
480
|
+
The theoretical relationship for the Joe copula is given by:
|
481
|
+
|
482
|
+
.. math::
|
483
|
+
\tau(\theta) = 1 + \frac{2}{2-\theta} \left[ (1-\gamma) - \psi\left(\frac{2}{\theta}+1\right) \right],
|
484
|
+
|
485
|
+
where :math:`\psi` is the digamma function and :math:`\gamma` is the
|
486
|
+
Euler-Mascheroni constant.
|
487
|
+
|
488
|
+
Parameters
|
489
|
+
----------
|
490
|
+
theta : float
|
491
|
+
The dependence parameter (must be greater than 1).
|
492
|
+
|
493
|
+
tau_empirical : float
|
494
|
+
The empirical Kendall's tau.
|
495
|
+
|
496
|
+
Returns
|
497
|
+
-------
|
498
|
+
float
|
499
|
+
The difference :math:`\tau(\theta) - \tau_{\text{empirical}}`.
|
500
|
+
"""
|
501
|
+
# Euler-Mascheroni constant: gamma_const = 1 - EulerGamma
|
502
|
+
gamma_const = 1.0 - _EULER_GAMMA
|
503
|
+
# Compute theoretical tau using the digamma-based expression
|
504
|
+
tau_theoretical = 1.0 + (2.0 / (2.0 - theta)) * (
|
505
|
+
gamma_const - sp.digamma(2.0 / theta + 1.0)
|
506
|
+
)
|
507
|
+
return tau_theoretical - tau_empirical
|
508
|
+
|
509
|
+
|
510
|
+
def _base_cdf(X: np.ndarray, theta: float) -> np.ndarray:
|
511
|
+
"""Bivariate Joe CDF (unrotated)."""
|
512
|
+
z = np.power(1 - X, theta)
|
513
|
+
cdf = 1.0 - np.power(np.sum(z, axis=1) - np.prod(z, axis=1), 1.0 / theta)
|
514
|
+
return cdf
|
515
|
+
|
516
|
+
|
517
|
+
def _base_partial_derivative(
|
518
|
+
X: np.ndarray, first_margin: bool, theta: float
|
519
|
+
) -> np.ndarray:
|
520
|
+
r"""Compute the h-function (partial derivative) for the bivariate unrotated
|
521
|
+
Joe copula with respect to a specified margin.
|
522
|
+
|
523
|
+
Parameters
|
524
|
+
----------
|
525
|
+
X : array-like of shape (n_observations, 2)
|
526
|
+
An array of bivariate inputs `(u, v)` where each row represents a
|
527
|
+
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
|
528
|
+
having been transformed to uniform marginals.
|
529
|
+
|
530
|
+
first_margin : bool, default=False
|
531
|
+
If True, compute the partial derivative with respect to the first
|
532
|
+
margin `u`; otherwise, compute the partial derivative with respect to the
|
533
|
+
second margin `v`.
|
534
|
+
|
535
|
+
theta : float
|
536
|
+
The dependence parameter (must be greater than 1).
|
537
|
+
|
538
|
+
Returns
|
539
|
+
-------
|
540
|
+
: ndarray of shape (n_observations,)
|
541
|
+
h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
|
542
|
+
"""
|
543
|
+
X = _apply_margin_swap(X, first_margin=first_margin)
|
544
|
+
x, y = np.power(1 - X, theta).T
|
545
|
+
p = np.power(1 + x / y - x, 1 / theta - 1) * (1.0 - x)
|
546
|
+
return p
|
547
|
+
|
548
|
+
|
549
|
+
def _base_inverse_partial_derivative(
|
550
|
+
X: np.ndarray, first_margin: bool, theta: float
|
551
|
+
) -> np.ndarray:
|
552
|
+
r"""Compute the inverse of the bivariate copula's partial derivative, commonly
|
553
|
+
known as the inverse h-function.
|
554
|
+
|
555
|
+
Parameters
|
556
|
+
----------
|
557
|
+
X : array-like of shape (n_observations, 2)
|
558
|
+
An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
|
559
|
+
- The first column `p` corresponds to the value of the h-function.
|
560
|
+
- The second column `v` is the conditioning variable.
|
561
|
+
|
562
|
+
first_margin : bool, default=False
|
563
|
+
If True, compute the inverse partial derivative with respect to the first
|
564
|
+
margin `u`; otherwise, compute the inverse partial derivative with respect to
|
565
|
+
the second margin `v`.
|
566
|
+
|
567
|
+
theta : float
|
568
|
+
The dependence parameter (must be greater than 1).
|
569
|
+
|
570
|
+
Returns
|
571
|
+
-------
|
572
|
+
u : ndarray of shape (n_observations,)
|
573
|
+
A 1D-array of length `n_observations`, where each element is the computed
|
574
|
+
:math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
|
575
|
+
"""
|
576
|
+
X = _apply_margin_swap(X, first_margin=first_margin)
|
577
|
+
|
578
|
+
p, v = X.T
|
579
|
+
|
580
|
+
y = np.power(1 - v, theta)
|
581
|
+
|
582
|
+
# No known closed-form solution, hence we use Newton method
|
583
|
+
# with an early-stopping criterion
|
584
|
+
|
585
|
+
# Initial guess
|
586
|
+
x = np.power(
|
587
|
+
(1 - v) * (np.power(1.0 - p, 1.0 / theta - 1) - 1.0) / y + 1.0,
|
588
|
+
theta / (1.0 - theta),
|
589
|
+
)
|
590
|
+
|
591
|
+
max_iters = 50
|
592
|
+
tol = 1e-8
|
593
|
+
for _ in range(max_iters):
|
594
|
+
k = (x - 1.0) * y
|
595
|
+
w = np.power((1.0 / y - 1.0) * x + 1.0, 1.0 / theta)
|
596
|
+
x_new = (
|
597
|
+
x
|
598
|
+
- (theta * (k - x) * (p * (-k + x) + k * w))
|
599
|
+
/ ((y - 1.0) * k - theta * y)
|
600
|
+
/ w
|
601
|
+
)
|
602
|
+
x_new = np.clip(x_new, 0.0, 1.0)
|
603
|
+
diff = np.max(np.abs(x_new - x))
|
604
|
+
x = x_new
|
605
|
+
if diff < tol:
|
606
|
+
break
|
607
|
+
|
608
|
+
u = 1.0 - np.power(x, 1.0 / theta)
|
609
|
+
return u
|