copulas 0.10.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of copulas might be problematic. Click here for more details.

@@ -0,0 +1,449 @@
1
+ """This module contains a base class for bivariate copulas."""
2
+
3
+ import json
4
+ import warnings
5
+ from enum import Enum
6
+
7
+ import numpy as np
8
+ from scipy import stats
9
+ from scipy.optimize import brentq
10
+
11
+ from copulas import EPSILON, NotFittedError, random_state, validate_random_state
12
+ from copulas.bivariate.utils import split_matrix
13
+
14
+
15
+ class CopulaTypes(Enum):
16
+ """Available copula families."""
17
+
18
+ CLAYTON = 0
19
+ FRANK = 1
20
+ GUMBEL = 2
21
+ INDEPENDENCE = 3
22
+
23
+
24
+ class Bivariate(object):
25
+ """Base class for bivariate copulas.
26
+
27
+ This class allows to instantiate all its subclasses and serves as a unique entry point for
28
+ the bivariate copulas classes.
29
+
30
+ >>> Bivariate(copula_type=CopulaTypes.FRANK).__class__
31
+ copulas.bivariate.frank.Frank
32
+
33
+ >>> Bivariate(copula_type='frank').__class__
34
+ copulas.bivariate.frank.Frank
35
+
36
+
37
+ Args:
38
+ copula_type (Union[CopulaType, str]): Subtype of the copula.
39
+ random_state (Union[int, np.random.RandomState, None]): Seed or RandomState
40
+ for the random generator.
41
+
42
+ Attributes:
43
+ copula_type(CopulaTypes): Family of the copula a subclass belongs to.
44
+ _subclasses(list[type]): List of declared subclasses.
45
+ theta_interval(list[float]): Interval of valid thetas for the given copula family.
46
+ invalid_thetas(list[float]): Values that, even though they belong to
47
+ :attr:`theta_interval`, shouldn't be considered valid.
48
+ tau (float): Kendall's tau for the data given at :meth:`fit`.
49
+ theta(float): Parameter for the copula.
50
+
51
+ """
52
+
53
+ copula_type = None
54
+ _subclasses = []
55
+ theta_interval = []
56
+ invalid_thetas = []
57
+ theta = None
58
+ tau = None
59
+
60
+ @classmethod
61
+ def _get_subclasses(cls):
62
+ """Find recursively subclasses for the current class object.
63
+
64
+ Returns:
65
+ list[Bivariate]: List of subclass objects.
66
+
67
+ """
68
+ subclasses = []
69
+ for subclass in cls.__subclasses__():
70
+ subclasses.append(subclass)
71
+ subclasses.extend(subclass._get_subclasses())
72
+
73
+ return subclasses
74
+
75
+ @classmethod
76
+ def subclasses(cls):
77
+ """Return a list of subclasses for the current class object.
78
+
79
+ Returns:
80
+ list[Bivariate]: Subclasses for given class.
81
+
82
+ """
83
+ if not cls._subclasses:
84
+ cls._subclasses = cls._get_subclasses()
85
+
86
+ return cls._subclasses
87
+
88
+ def __new__(cls, *args, **kwargs):
89
+ """Create and return a new object.
90
+
91
+ Returns:
92
+ Bivariate: New object.
93
+ """
94
+ copula_type = kwargs.get('copula_type', None)
95
+ if copula_type is None:
96
+ return super(Bivariate, cls).__new__(cls)
97
+
98
+ if not isinstance(copula_type, CopulaTypes):
99
+ if (isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__):
100
+ copula_type = CopulaTypes[copula_type.upper()]
101
+ else:
102
+ raise ValueError(f'Invalid copula type {copula_type}')
103
+
104
+ for subclass in cls.subclasses():
105
+ if subclass.copula_type is copula_type:
106
+ return super(Bivariate, cls).__new__(subclass)
107
+
108
+ def __init__(self, copula_type=None, random_state=None):
109
+ """Initialize Bivariate object.
110
+
111
+ Args:
112
+ copula_type (CopulaType or str): Subtype of the copula.
113
+ random_state (int, np.random.RandomState, or None): Seed or RandomState
114
+ for the random generator.
115
+ """
116
+ self.random_state = validate_random_state(random_state)
117
+
118
+ def check_theta(self):
119
+ """Validate the computed theta against the copula specification.
120
+
121
+ This method is used to assert the computed theta is in the valid range for the copula.
122
+
123
+ Raises:
124
+ ValueError: If theta is not in :attr:`theta_interval` or is in :attr:`invalid_thetas`,
125
+
126
+ """
127
+ lower, upper = self.theta_interval
128
+ if (not lower <= self.theta <= upper) or (self.theta in self.invalid_thetas):
129
+ message = 'The computed theta value {} is out of limits for the given {} copula.'
130
+ raise ValueError(message.format(self.theta, self.copula_type.name))
131
+
132
+ def check_fit(self):
133
+ """Assert that the model is fit and the computed `theta` is valid.
134
+
135
+ Raises:
136
+ NotFittedError: if the model is not fitted.
137
+ ValueError: if the computed theta is invalid.
138
+
139
+ """
140
+ if not self.theta:
141
+ raise NotFittedError('This model is not fitted.')
142
+
143
+ self.check_theta()
144
+
145
+ def check_marginal(self, u):
146
+ """Check that the marginals are uniformly distributed.
147
+
148
+ Args:
149
+ u(np.ndarray): Array of datapoints with shape (n,).
150
+
151
+ Raises:
152
+ ValueError: If the data does not appear uniformly distributed.
153
+ """
154
+ if min(u) < 0.0 or max(u) > 1.0:
155
+ raise ValueError('Marginal value out of bounds.')
156
+
157
+ emperical_cdf = np.sort(u)
158
+ uniform_cdf = np.linspace(0.0, 1.0, num=len(u))
159
+ ks_statistic = max(np.abs(emperical_cdf - uniform_cdf))
160
+ if ks_statistic > 1.627 / np.sqrt(len(u)):
161
+ # KS test with significance level 0.01
162
+ warnings.warn('Data does not appear to be uniform.', category=RuntimeWarning)
163
+
164
+ def _compute_theta(self):
165
+ """Compute theta, validate it and assign it to self."""
166
+ self.theta = self.compute_theta()
167
+ self.check_theta()
168
+
169
+ def fit(self, X):
170
+ """Fit a model to the data updating the parameters.
171
+
172
+ Args:
173
+ X(np.ndarray): Array of datapoints with shape (n,2).
174
+
175
+ Return:
176
+ None
177
+ """
178
+ U, V = split_matrix(X)
179
+ self.check_marginal(U)
180
+ self.check_marginal(V)
181
+ self.tau = stats.kendalltau(U, V)[0]
182
+ if np.isnan(self.tau):
183
+ if len(np.unique(U)) == 1 or len(np.unique(V)) == 1:
184
+ raise ValueError('Constant column.')
185
+ raise ValueError('Unable to compute tau.')
186
+ self._compute_theta()
187
+
188
+ def to_dict(self):
189
+ """Return a `dict` with the parameters to replicate this object.
190
+
191
+ Returns:
192
+ dict: Parameters of the copula.
193
+
194
+ """
195
+ return {
196
+ 'copula_type': self.copula_type.name,
197
+ 'theta': self.theta,
198
+ 'tau': self.tau
199
+ }
200
+
201
+ @classmethod
202
+ def from_dict(cls, copula_dict):
203
+ """Create a new instance from the given parameters.
204
+
205
+ Args:
206
+ copula_dict: `dict` with the parameters to replicate the copula.
207
+ Like the output of `Bivariate.to_dict`
208
+
209
+ Returns:
210
+ Bivariate: Instance of the copula defined on the parameters.
211
+
212
+ """
213
+ instance = cls(copula_type=copula_dict['copula_type'])
214
+ instance.theta = copula_dict['theta']
215
+ instance.tau = copula_dict['tau']
216
+ return instance
217
+
218
+ def infer(self, X):
219
+ """Take in subset of values and predicts the rest."""
220
+ raise NotImplementedError
221
+
222
+ def generator(self, t):
223
+ r"""Compute the generator function for Archimedian copulas.
224
+
225
+ The generator is a function
226
+ :math:`\psi: [0,1]\times\Theta \rightarrow [0, \infty)` # noqa: JS101
227
+
228
+ that given an Archimedian copula fulfills:
229
+ .. math:: C(u,v) = \psi^{-1}(\psi(u) + \psi(v))
230
+
231
+
232
+ In a more generic way:
233
+
234
+ .. math:: C(u_1, u_2, ..., u_n;\theta) = \psi^-1(\sum_0^n{\psi(u_i;\theta)}; \theta)
235
+
236
+ """
237
+ raise NotImplementedError
238
+
239
+ def probability_density(self, X):
240
+ r"""Compute probability density function for given copula family.
241
+
242
+ The probability density(pdf) for a given copula is defined as:
243
+
244
+ .. math:: c(U,V) = \frac{\partial^2 C(u,v)}{\partial v \partial u}
245
+
246
+ Args:
247
+ X(np.ndarray): Shape (n, 2).Datapoints to compute pdf.
248
+
249
+ Returns:
250
+ np.array: Probability density for the input values.
251
+
252
+ """
253
+ raise NotImplementedError
254
+
255
+ def log_probability_density(self, X):
256
+ """Return log probability density of model.
257
+
258
+ The log probability should be overridden with numerically stable
259
+ variants whenever possible.
260
+
261
+ Arguments:
262
+ X: `np.ndarray` of shape (n, 1).
263
+
264
+ Returns:
265
+ np.ndarray
266
+
267
+ """
268
+ return np.log(self.probability_density(X))
269
+
270
+ def pdf(self, X):
271
+ """Shortcut to :meth:`probability_density`."""
272
+ return self.probability_density(X)
273
+
274
+ def cumulative_distribution(self, X):
275
+ """Compute the cumulative distribution function for the copula, :math:`C(u, v)`.
276
+
277
+ Args:
278
+ X(np.ndarray):
279
+
280
+ Returns:
281
+ numpy.array: cumulative probability
282
+
283
+ """
284
+ raise NotImplementedError
285
+
286
+ def cdf(self, X):
287
+ """Shortcut to :meth:`cumulative_distribution`."""
288
+ return self.cumulative_distribution(X)
289
+
290
+ def percent_point(self, y, V):
291
+ """Compute the inverse of conditional cumulative distribution :math:`C(u|v)^{-1}`.
292
+
293
+ Args:
294
+ y: `np.ndarray` value of :math:`C(u|v)`.
295
+ v: `np.ndarray` given value of v.
296
+ """
297
+ self.check_fit()
298
+ result = []
299
+ for _y, _v in zip(y, V):
300
+ def f(u):
301
+ return self.partial_derivative_scalar(u, _v) - _y
302
+
303
+ minimum = brentq(f, EPSILON, 1.0)
304
+ if isinstance(minimum, np.ndarray):
305
+ minimum = minimum[0]
306
+
307
+ result.append(minimum)
308
+
309
+ return np.array(result)
310
+
311
+ def ppf(self, y, V):
312
+ """Shortcut to :meth:`percent_point`."""
313
+ return self.percent_point(y, V)
314
+
315
+ def partial_derivative(self, X):
316
+ r"""Compute partial derivative of cumulative distribution.
317
+
318
+ The partial derivative of the copula(CDF) is the conditional CDF.
319
+
320
+ .. math:: F(v|u) = \frac{\partial C(u,v)}{\partial u}
321
+
322
+ The base class provides a finite difference approximation of the
323
+ partial derivative of the CDF with respect to u.
324
+
325
+ Args:
326
+ X(np.ndarray)
327
+ y(float)
328
+
329
+ Returns:
330
+ np.ndarray
331
+
332
+ """
333
+ delta = (-2 * (X[:, 1] > 0.5) + 1)
334
+ delta = 0.0001 * delta
335
+ X_prime = X.copy()
336
+ X_prime[:, 1] += delta
337
+ f = self.cumulative_distribution(X)
338
+ f_prime = self.cumulative_distribution(X_prime)
339
+ return (f_prime - f) / delta
340
+
341
+ def partial_derivative_scalar(self, U, V):
342
+ """Compute partial derivative :math:`C(u|v)` of cumulative density of single values."""
343
+ self.check_fit()
344
+
345
+ X = np.column_stack((U, V))
346
+ return self.partial_derivative(X)
347
+
348
+ def set_random_state(self, random_state):
349
+ """Set the random state.
350
+
351
+ Args:
352
+ random_state (int, np.random.RandomState, or None): Seed or RandomState
353
+ for the random generator.
354
+ """
355
+ self.random_state = validate_random_state(random_state)
356
+
357
+ @random_state
358
+ def sample(self, n_samples):
359
+ """Generate specified `n_samples` of new data from model.
360
+
361
+ The sampled are generated using the inverse transform method `v~U[0,1],v~C^-1(u|v)`
362
+
363
+ Args:
364
+ n_samples (int): amount of samples to create.
365
+
366
+ Returns:
367
+ np.ndarray: Array of length `n_samples` with generated data from the model.
368
+
369
+ """
370
+ if self.tau > 1 or self.tau < -1:
371
+ raise ValueError('The range for correlation measure is [-1,1].')
372
+
373
+ v = np.random.uniform(0, 1, n_samples)
374
+ c = np.random.uniform(0, 1, n_samples)
375
+
376
+ u = self.percent_point(c, v)
377
+ return np.column_stack((u, v))
378
+
379
+ def compute_theta(self):
380
+ """Compute theta parameter using Kendall's tau."""
381
+ raise NotImplementedError
382
+
383
+ @classmethod
384
+ def select_copula(cls, X):
385
+ r"""Select best copula function based on likelihood.
386
+
387
+ Given out candidate copulas the procedure proposed for selecting the one
388
+ that best fit to a dataset of pairs :math:`\{(u_j, v_j )\}, j=1,2,...n` , is as follows:
389
+
390
+ 1. Estimate the most likely parameter :math:`\theta` of each copula candidate for the given
391
+ dataset.
392
+
393
+ 2. Construct :math:`R(z|\theta)`. Calculate the area under the tail for each of the copula
394
+ candidates.
395
+
396
+ 3. Compare the areas: :math:`a_u` achieved using empirical copula against the ones
397
+ achieved for the copula candidates. Score the outcome of the comparison from 3 (best)
398
+ down to 1 (worst).
399
+
400
+ 4. Proceed as in steps 2- 3 with the lower tail and function :math:`L`.
401
+
402
+ 5. Finally the sum of empirical upper and lower tail functions is compared against
403
+ :math:`R + L`. Scores of the three comparisons are summed and the candidate with the
404
+ highest value is selected.
405
+
406
+ Args:
407
+ X(np.ndarray): Matrix of shape (n,2).
408
+
409
+ Returns:
410
+ copula: Best copula that fits for it.
411
+
412
+ """
413
+ from copulas.bivariate import select_copula # noqa
414
+ warnings.warn(
415
+ '`Bivariate.select_copula` has been deprecated and will be removed in a later '
416
+ 'release. Please use `copulas.bivariate.select_copula` instead',
417
+ DeprecationWarning
418
+ )
419
+ return select_copula(X)
420
+
421
+ def save(self, filename):
422
+ """Save the internal state of a copula in the specified filename.
423
+
424
+ Args:
425
+ filename(str): Path to save.
426
+
427
+ Returns:
428
+ None
429
+
430
+ """
431
+ content = self.to_dict()
432
+ with open(filename, 'w') as f:
433
+ json.dump(content, f)
434
+
435
+ @classmethod
436
+ def load(cls, copula_path):
437
+ """Create a new instance from a file.
438
+
439
+ Args:
440
+ copula_path(str): Path to file with the serialized copula.
441
+
442
+ Returns:
443
+ Bivariate: Instance with the parameters stored in the file.
444
+
445
+ """
446
+ with open(copula_path) as f:
447
+ copula_dict = json.load(f)
448
+
449
+ return cls.from_dict(copula_dict)
@@ -0,0 +1,162 @@
1
+ """Clayton module."""
2
+
3
+ import numpy as np
4
+
5
+ from copulas.bivariate.base import Bivariate, CopulaTypes
6
+ from copulas.bivariate.utils import split_matrix
7
+
8
+
9
+ class Clayton(Bivariate):
10
+ """Class for clayton copula model."""
11
+
12
+ copula_type = CopulaTypes.CLAYTON
13
+ theta_interval = [0, float('inf')]
14
+ invalid_thetas = []
15
+
16
+ def generator(self, t):
17
+ r"""Compute the generator function for Clayton copula family.
18
+
19
+ The generator is a function
20
+ :math:`\psi: [0,1]\times\Theta \rightarrow [0, \infty)` # noqa: JS101
21
+
22
+ that given an Archimedian copula fulfills:
23
+ .. math:: C(u,v) = \psi^{-1}(\psi(u) + \psi(v))
24
+
25
+ Args:
26
+ t (numpy.ndarray)
27
+
28
+ Returns:
29
+ numpy.ndarray
30
+
31
+ """
32
+ self.check_fit()
33
+
34
+ return (1.0 / self.theta) * (np.power(t, -self.theta) - 1)
35
+
36
+ def probability_density(self, X):
37
+ r"""Compute probability density function for given copula family.
38
+
39
+ The probability density(PDF) for the Clayton family of copulas correspond to the formula:
40
+
41
+ .. math:: c(U,V) = \frac{\partial^2}{\partial v \partial u}C(u,v) =
42
+ (\theta + 1)(uv)^{-\theta-1}(u^{-\theta} +
43
+ v^{-\theta} - 1)^{-\frac{2\theta + 1}{\theta}}
44
+
45
+ Args:
46
+ X (numpy.ndarray)
47
+
48
+ Returns:
49
+ numpy.ndarray: Probability density for the input values.
50
+
51
+ """
52
+ self.check_fit()
53
+
54
+ U, V = split_matrix(X)
55
+
56
+ a = (self.theta + 1) * np.power(U * V, -(self.theta + 1))
57
+ b = np.power(U, -self.theta) + np.power(V, -self.theta) - 1
58
+ c = -(2 * self.theta + 1) / self.theta
59
+ return a * np.power(b, c)
60
+
61
+ def cumulative_distribution(self, X):
62
+ """Compute the cumulative distribution function for the clayton copula.
63
+
64
+ The cumulative density(cdf), or distribution function for the Clayton family of copulas
65
+ correspond to the formula:
66
+
67
+ .. math:: C(u,v) = (u^{-θ} + v^{-θ} - 1)^{-1/θ}
68
+
69
+ Args:
70
+ X (numpy.ndarray)
71
+
72
+ Returns:
73
+ numpy.ndarray: cumulative probability.
74
+
75
+ """
76
+ self.check_fit()
77
+
78
+ U, V = split_matrix(X)
79
+
80
+ if (V == 0).all() or (U == 0).all():
81
+ return np.zeros(V.shape[0])
82
+
83
+ else:
84
+ cdfs = [
85
+ np.power(
86
+ np.power(U[i], -self.theta) + np.power(V[i], -self.theta) - 1,
87
+ -1.0 / self.theta
88
+ )
89
+ if (U[i] > 0 and V[i] > 0) else 0
90
+ for i in range(len(U))
91
+ ]
92
+
93
+ return np.array(cdfs)
94
+
95
+ def percent_point(self, y, V):
96
+ """Compute the inverse of conditional cumulative distribution :math:`C(u|v)^{-1}`.
97
+
98
+ Args:
99
+ y (numpy.ndarray): Value of :math:`C(u|v)`.
100
+ v (numpy.ndarray): given value of v.
101
+ """
102
+ self.check_fit()
103
+
104
+ if self.theta < 0:
105
+ return V
106
+
107
+ else:
108
+ a = np.power(y, self.theta / (-1 - self.theta))
109
+ b = np.power(V, self.theta)
110
+
111
+ # If b == 0, self.theta tends to inf,
112
+ # so the next operation tends to 1
113
+ if (b == 0).all():
114
+ return np.ones(len(V))
115
+
116
+ return np.power((a + b - 1) / b, -1 / self.theta)
117
+
118
+ def partial_derivative(self, X):
119
+ r"""Compute partial derivative of cumulative distribution.
120
+
121
+ The partial derivative of the copula(CDF) is the conditional CDF.
122
+
123
+ .. math:: F(v|u) = \frac{\partial C(u,v)}{\partial u} =
124
+ u^{- \theta - 1}(u^{-\theta} + v^{-\theta} - 1)^{-\frac{\theta+1}{\theta}}
125
+
126
+ Args:
127
+ X (np.ndarray)
128
+ y (float)
129
+
130
+ Returns:
131
+ numpy.ndarray: Derivatives
132
+
133
+ """
134
+ self.check_fit()
135
+
136
+ U, V = split_matrix(X)
137
+
138
+ A = np.power(V, -self.theta - 1)
139
+
140
+ # If theta tends to inf, A tends to inf
141
+ # And the next partial_derivative tends to 0
142
+ if (A == np.inf).any():
143
+ return np.zeros(len(V))
144
+
145
+ B = np.power(V, -self.theta) + np.power(U, -self.theta) - 1
146
+ h = np.power(B, (-1 - self.theta) / self.theta)
147
+ return A * h
148
+
149
+ def compute_theta(self):
150
+ r"""Compute theta parameter using Kendall's tau.
151
+
152
+ On Clayton copula this is
153
+
154
+ .. math:: τ = θ/(θ + 2) \implies θ = 2τ/(1-τ)
155
+ .. math:: θ ∈ (0, ∞)
156
+
157
+ On the corner case of :math:`τ = 1`, return infinite.
158
+ """
159
+ if self.tau == 1:
160
+ return np.inf
161
+
162
+ return 2 * self.tau / (1 - self.tau)