copulas 0.12.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copulas/__init__.py +91 -0
- copulas/bivariate/__init__.py +175 -0
- copulas/bivariate/base.py +448 -0
- copulas/bivariate/clayton.py +163 -0
- copulas/bivariate/frank.py +170 -0
- copulas/bivariate/gumbel.py +144 -0
- copulas/bivariate/independence.py +81 -0
- copulas/bivariate/utils.py +19 -0
- copulas/datasets.py +214 -0
- copulas/errors.py +5 -0
- copulas/multivariate/__init__.py +8 -0
- copulas/multivariate/base.py +200 -0
- copulas/multivariate/gaussian.py +345 -0
- copulas/multivariate/tree.py +691 -0
- copulas/multivariate/vine.py +359 -0
- copulas/optimize/__init__.py +154 -0
- copulas/univariate/__init__.py +25 -0
- copulas/univariate/base.py +661 -0
- copulas/univariate/beta.py +48 -0
- copulas/univariate/gamma.py +38 -0
- copulas/univariate/gaussian.py +27 -0
- copulas/univariate/gaussian_kde.py +192 -0
- copulas/univariate/log_laplace.py +38 -0
- copulas/univariate/selection.py +36 -0
- copulas/univariate/student_t.py +31 -0
- copulas/univariate/truncated_gaussian.py +66 -0
- copulas/univariate/uniform.py +27 -0
- copulas/utils.py +248 -0
- copulas/visualization.py +345 -0
- copulas-0.12.4.dev3.dist-info/METADATA +215 -0
- copulas-0.12.4.dev3.dist-info/RECORD +34 -0
- copulas-0.12.4.dev3.dist-info/WHEEL +5 -0
- copulas-0.12.4.dev3.dist-info/licenses/LICENSE +106 -0
- copulas-0.12.4.dev3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""GammaUnivariate module."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import gamma
|
|
5
|
+
|
|
6
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GammaUnivariate(ScipyModel):
|
|
10
|
+
"""Wrapper around scipy.stats.gamma.
|
|
11
|
+
|
|
12
|
+
Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
16
|
+
BOUNDED = BoundedType.SEMI_BOUNDED
|
|
17
|
+
MODEL_CLASS = gamma
|
|
18
|
+
|
|
19
|
+
def _fit_constant(self, X):
|
|
20
|
+
self._params = {
|
|
21
|
+
'a': 0.0,
|
|
22
|
+
'loc': np.unique(X)[0],
|
|
23
|
+
'scale': 0.0,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
def _fit(self, X):
|
|
27
|
+
a, loc, scale = gamma.fit(X)
|
|
28
|
+
self._params = {
|
|
29
|
+
'a': a,
|
|
30
|
+
'loc': loc,
|
|
31
|
+
'scale': scale,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def _is_constant(self):
|
|
35
|
+
return self._params['scale'] == 0
|
|
36
|
+
|
|
37
|
+
def _extract_constant(self):
|
|
38
|
+
return self._params['loc']
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""GaussianUnivariate module."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import norm
|
|
5
|
+
|
|
6
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GaussianUnivariate(ScipyModel):
|
|
10
|
+
"""Gaussian univariate model."""
|
|
11
|
+
|
|
12
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
13
|
+
BOUNDED = BoundedType.UNBOUNDED
|
|
14
|
+
|
|
15
|
+
MODEL_CLASS = norm
|
|
16
|
+
|
|
17
|
+
def _fit_constant(self, X):
|
|
18
|
+
self._params = {'loc': np.unique(X)[0], 'scale': 0}
|
|
19
|
+
|
|
20
|
+
def _fit(self, X):
|
|
21
|
+
self._params = {'loc': np.mean(X), 'scale': np.std(X)}
|
|
22
|
+
|
|
23
|
+
def _is_constant(self):
|
|
24
|
+
return self._params['scale'] == 0
|
|
25
|
+
|
|
26
|
+
def _extract_constant(self):
|
|
27
|
+
return self._params['loc']
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""GaussianKDE module."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.special import ndtr
|
|
5
|
+
from scipy.stats import gaussian_kde
|
|
6
|
+
|
|
7
|
+
from copulas.optimize import bisect, chandrupatla
|
|
8
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
9
|
+
from copulas.utils import EPSILON, random_state, store_args, validate_random_state
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GaussianKDE(ScipyModel):
|
|
13
|
+
"""A wrapper for gaussian Kernel density estimation.
|
|
14
|
+
|
|
15
|
+
It was implemented in scipy.stats toolbox. gaussian_kde is slower than statsmodels
|
|
16
|
+
but allows more flexibility.
|
|
17
|
+
|
|
18
|
+
When a sample_size is provided the fit method will sample the
|
|
19
|
+
data, and mask the real information. Also, ensure the number of
|
|
20
|
+
entries will be always the value of sample_size.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
sample_size(int): amount of parameters to sample
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
PARAMETRIC = ParametricType.NON_PARAMETRIC
|
|
27
|
+
BOUNDED = BoundedType.UNBOUNDED
|
|
28
|
+
MODEL_CLASS = gaussian_kde
|
|
29
|
+
|
|
30
|
+
@store_args
|
|
31
|
+
def __init__(self, sample_size=None, random_state=None, bw_method=None, weights=None):
|
|
32
|
+
self.random_state = validate_random_state(random_state)
|
|
33
|
+
self._sample_size = sample_size
|
|
34
|
+
self.bw_method = bw_method
|
|
35
|
+
self.weights = weights
|
|
36
|
+
|
|
37
|
+
def _get_model(self):
|
|
38
|
+
dataset = self._params['dataset']
|
|
39
|
+
self._sample_size = self._sample_size or len(dataset)
|
|
40
|
+
return gaussian_kde(dataset, bw_method=self.bw_method, weights=self.weights)
|
|
41
|
+
|
|
42
|
+
def _get_bounds(self):
|
|
43
|
+
X = self._params['dataset']
|
|
44
|
+
lower = np.min(X) - (5 * np.std(X))
|
|
45
|
+
upper = np.max(X) + (5 * np.std(X))
|
|
46
|
+
|
|
47
|
+
return lower, upper
|
|
48
|
+
|
|
49
|
+
def probability_density(self, X):
|
|
50
|
+
"""Compute the probability density for each point in X.
|
|
51
|
+
|
|
52
|
+
Arguments:
|
|
53
|
+
X (numpy.ndarray):
|
|
54
|
+
Values for which the probability density will be computed.
|
|
55
|
+
It must have shape (n, 1).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
numpy.ndarray:
|
|
59
|
+
Probability density values for points in X.
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
NotFittedError:
|
|
63
|
+
if the model is not fitted.
|
|
64
|
+
"""
|
|
65
|
+
self.check_fit()
|
|
66
|
+
return self._model.evaluate(X)
|
|
67
|
+
|
|
68
|
+
@random_state
|
|
69
|
+
def sample(self, n_samples=1):
|
|
70
|
+
"""Sample values from this model.
|
|
71
|
+
|
|
72
|
+
Argument:
|
|
73
|
+
n_samples (int):
|
|
74
|
+
Number of values to sample
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
numpy.ndarray:
|
|
78
|
+
Array of shape (n_samples, 1) with values randomly
|
|
79
|
+
sampled from this model distribution.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
NotFittedError:
|
|
83
|
+
if the model is not fitted.
|
|
84
|
+
"""
|
|
85
|
+
self.check_fit()
|
|
86
|
+
return self._model.resample(size=n_samples)[0]
|
|
87
|
+
|
|
88
|
+
def cumulative_distribution(self, X):
|
|
89
|
+
"""Compute the cumulative distribution value for each point in X.
|
|
90
|
+
|
|
91
|
+
Arguments:
|
|
92
|
+
X (numpy.ndarray):
|
|
93
|
+
Values for which the cumulative distribution will be computed.
|
|
94
|
+
It must have shape (n, 1).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
numpy.ndarray:
|
|
98
|
+
Cumulative distribution values for points in X.
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
NotFittedError:
|
|
102
|
+
if the model is not fitted.
|
|
103
|
+
"""
|
|
104
|
+
self.check_fit()
|
|
105
|
+
X = np.array(X)
|
|
106
|
+
stdev = np.sqrt(self._model.covariance[0, 0])
|
|
107
|
+
lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0]
|
|
108
|
+
uppers = ndtr((X[:, None] - self._model.dataset) / stdev)
|
|
109
|
+
return (uppers - lower).dot(self._model.weights)
|
|
110
|
+
|
|
111
|
+
def percent_point(self, U, method='chandrupatla'):
|
|
112
|
+
"""Compute the inverse cumulative distribution value for each point in U.
|
|
113
|
+
|
|
114
|
+
Arguments:
|
|
115
|
+
U (numpy.ndarray):
|
|
116
|
+
Values for which the cumulative distribution will be computed.
|
|
117
|
+
It must have shape (n, 1) and values must be in [0,1].
|
|
118
|
+
method (str):
|
|
119
|
+
Whether to use the `chandrupatla` or `bisect` solver.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
numpy.ndarray:
|
|
123
|
+
Inverse cumulative distribution values for points in U.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
NotFittedError:
|
|
127
|
+
if the model is not fitted.
|
|
128
|
+
"""
|
|
129
|
+
self.check_fit()
|
|
130
|
+
|
|
131
|
+
if len(U.shape) > 1:
|
|
132
|
+
raise ValueError(f'Expected 1d array, got {(U,)}.')
|
|
133
|
+
|
|
134
|
+
if np.any(U > 1.0) or np.any(U < 0.0):
|
|
135
|
+
raise ValueError('Expected values in range [0.0, 1.0].')
|
|
136
|
+
|
|
137
|
+
is_one = U >= 1.0 - EPSILON
|
|
138
|
+
is_zero = U <= EPSILON
|
|
139
|
+
is_valid = ~(is_zero | is_one)
|
|
140
|
+
|
|
141
|
+
lower, upper = self._get_bounds()
|
|
142
|
+
|
|
143
|
+
def _f(X):
|
|
144
|
+
return self.cumulative_distribution(X) - U[is_valid]
|
|
145
|
+
|
|
146
|
+
X = np.zeros(U.shape)
|
|
147
|
+
X[is_one] = float('inf')
|
|
148
|
+
X[is_zero] = float('-inf')
|
|
149
|
+
if is_valid.any():
|
|
150
|
+
lower = np.full(U[is_valid].shape, lower)
|
|
151
|
+
upper = np.full(U[is_valid].shape, upper)
|
|
152
|
+
if method == 'bisect':
|
|
153
|
+
X[is_valid] = bisect(_f, lower, upper)
|
|
154
|
+
else:
|
|
155
|
+
X[is_valid] = chandrupatla(_f, lower, upper)
|
|
156
|
+
|
|
157
|
+
return X
|
|
158
|
+
|
|
159
|
+
def _fit_constant(self, X):
|
|
160
|
+
sample_size = self._sample_size or len(X)
|
|
161
|
+
constant = np.unique(X)[0]
|
|
162
|
+
self._params = {
|
|
163
|
+
'dataset': [constant] * sample_size,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
def _fit(self, X):
|
|
167
|
+
if self._sample_size:
|
|
168
|
+
X = gaussian_kde(X, bw_method=self.bw_method, weights=self.weights).resample(
|
|
169
|
+
self._sample_size
|
|
170
|
+
)
|
|
171
|
+
self._params = {'dataset': X.tolist()}
|
|
172
|
+
self._model = self._get_model()
|
|
173
|
+
|
|
174
|
+
def _is_constant(self):
|
|
175
|
+
return len(np.unique(self._params['dataset'])) == 1
|
|
176
|
+
|
|
177
|
+
def _extract_constant(self):
|
|
178
|
+
return self._params['dataset'][0]
|
|
179
|
+
|
|
180
|
+
def _set_params(self, params):
|
|
181
|
+
"""Set the parameters of this univariate.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
params (dict):
|
|
185
|
+
Parameters to recreate this instance.
|
|
186
|
+
"""
|
|
187
|
+
self._params = params.copy()
|
|
188
|
+
if self._is_constant():
|
|
189
|
+
constant = self._extract_constant()
|
|
190
|
+
self._set_constant_value(constant)
|
|
191
|
+
else:
|
|
192
|
+
self._model = self._get_model()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""LogLaplace module."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import loglaplace
|
|
5
|
+
|
|
6
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LogLaplace(ScipyModel):
|
|
10
|
+
"""Wrapper around scipy.stats.loglaplace.
|
|
11
|
+
|
|
12
|
+
Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.loglaplace.html
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
16
|
+
BOUNDED = BoundedType.SEMI_BOUNDED
|
|
17
|
+
MODEL_CLASS = loglaplace
|
|
18
|
+
|
|
19
|
+
def _fit_constant(self, X):
|
|
20
|
+
self._params = {
|
|
21
|
+
'c': 2.0,
|
|
22
|
+
'loc': np.unique(X)[0],
|
|
23
|
+
'scale': 0.0,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
def _fit(self, X):
|
|
27
|
+
c, loc, scale = loglaplace.fit(X)
|
|
28
|
+
self._params = {
|
|
29
|
+
'c': c,
|
|
30
|
+
'loc': loc,
|
|
31
|
+
'scale': scale,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def _is_constant(self):
|
|
35
|
+
return self._params['scale'] == 0
|
|
36
|
+
|
|
37
|
+
def _extract_constant(self):
|
|
38
|
+
return self._params['loc']
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Univariate selection function."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import kstest
|
|
5
|
+
|
|
6
|
+
from copulas.utils import get_instance
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def select_univariate(X, candidates):
|
|
10
|
+
"""Select the best univariate class for this data.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
X (pandas.DataFrame):
|
|
14
|
+
Data for which be best univariate must be found.
|
|
15
|
+
candidates (list[Univariate]):
|
|
16
|
+
List of Univariate subclasses (or instances of those) to choose from.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Univariate:
|
|
20
|
+
Instance of the selected candidate.
|
|
21
|
+
"""
|
|
22
|
+
best_ks = np.inf
|
|
23
|
+
best_model = None
|
|
24
|
+
for model in candidates:
|
|
25
|
+
try:
|
|
26
|
+
instance = get_instance(model)
|
|
27
|
+
instance.fit(X)
|
|
28
|
+
ks, _ = kstest(X, instance.cdf)
|
|
29
|
+
if ks < best_ks:
|
|
30
|
+
best_ks = ks
|
|
31
|
+
best_model = model
|
|
32
|
+
except Exception:
|
|
33
|
+
# Distribution not supported
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
return get_instance(best_model)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""StudentTUnivariate module."""
|
|
2
|
+
|
|
3
|
+
from scipy.stats import t
|
|
4
|
+
|
|
5
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StudentTUnivariate(ScipyModel):
|
|
9
|
+
"""Wrapper around scipy.stats.t.
|
|
10
|
+
|
|
11
|
+
Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
15
|
+
BOUNDED = BoundedType.UNBOUNDED
|
|
16
|
+
|
|
17
|
+
MODEL_CLASS = t
|
|
18
|
+
|
|
19
|
+
def _fit_constant(self, X):
|
|
20
|
+
self._fit(X)
|
|
21
|
+
self._params['scale'] = 0
|
|
22
|
+
|
|
23
|
+
def _fit(self, X):
|
|
24
|
+
dataframe, loc, scale = t.fit(X)
|
|
25
|
+
self._params = {'df': dataframe, 'loc': loc, 'scale': scale}
|
|
26
|
+
|
|
27
|
+
def _is_constant(self):
|
|
28
|
+
return self._params['scale'] == 0
|
|
29
|
+
|
|
30
|
+
def _extract_constant(self):
|
|
31
|
+
return self._params['loc']
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""TruncatedGaussian module."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy.optimize import fmin_slsqp
|
|
7
|
+
from scipy.stats import truncnorm
|
|
8
|
+
|
|
9
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
10
|
+
from copulas.utils import EPSILON, store_args, validate_random_state
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TruncatedGaussian(ScipyModel):
|
|
14
|
+
"""Wrapper around scipy.stats.truncnorm.
|
|
15
|
+
|
|
16
|
+
Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.truncnorm.html
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
20
|
+
BOUNDED = BoundedType.BOUNDED
|
|
21
|
+
MODEL_CLASS = truncnorm
|
|
22
|
+
|
|
23
|
+
@store_args
|
|
24
|
+
def __init__(self, minimum=None, maximum=None, random_state=None):
|
|
25
|
+
self.random_state = validate_random_state(random_state)
|
|
26
|
+
self.min = minimum
|
|
27
|
+
self.max = maximum
|
|
28
|
+
|
|
29
|
+
def _fit_constant(self, X):
|
|
30
|
+
constant = np.unique(X)[0]
|
|
31
|
+
self._params = {'a': constant, 'b': constant, 'loc': constant, 'scale': 0.0}
|
|
32
|
+
|
|
33
|
+
def _fit(self, X):
|
|
34
|
+
if self.min is None:
|
|
35
|
+
self.min = X.min() - EPSILON
|
|
36
|
+
|
|
37
|
+
if self.max is None:
|
|
38
|
+
self.max = X.max() + EPSILON
|
|
39
|
+
|
|
40
|
+
def nnlf(params):
|
|
41
|
+
loc, scale = params
|
|
42
|
+
a = (self.min - loc) / scale
|
|
43
|
+
b = (self.max - loc) / scale
|
|
44
|
+
return truncnorm.nnlf((a, b, loc, scale), X)
|
|
45
|
+
|
|
46
|
+
initial_params = X.mean(), X.std()
|
|
47
|
+
with warnings.catch_warnings():
|
|
48
|
+
warnings.simplefilter('ignore', category=RuntimeWarning)
|
|
49
|
+
optimal = fmin_slsqp(
|
|
50
|
+
nnlf,
|
|
51
|
+
initial_params,
|
|
52
|
+
iprint=False,
|
|
53
|
+
bounds=[(self.min, self.max), (0.0, (self.max - self.min) ** 2)],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
loc, scale = optimal
|
|
57
|
+
a = (self.min - loc) / scale
|
|
58
|
+
b = (self.max - loc) / scale
|
|
59
|
+
|
|
60
|
+
self._params = {'a': a, 'b': b, 'loc': loc, 'scale': scale}
|
|
61
|
+
|
|
62
|
+
def _is_constant(self):
|
|
63
|
+
return self._params['a'] == self._params['b']
|
|
64
|
+
|
|
65
|
+
def _extract_constant(self):
|
|
66
|
+
return self._params['loc']
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""UniformUnivariate module."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats import uniform
|
|
5
|
+
|
|
6
|
+
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UniformUnivariate(ScipyModel):
|
|
10
|
+
"""Uniform univariate model."""
|
|
11
|
+
|
|
12
|
+
PARAMETRIC = ParametricType.PARAMETRIC
|
|
13
|
+
BOUNDED = BoundedType.BOUNDED
|
|
14
|
+
|
|
15
|
+
MODEL_CLASS = uniform
|
|
16
|
+
|
|
17
|
+
def _fit_constant(self, X):
|
|
18
|
+
self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
|
|
19
|
+
|
|
20
|
+
def _fit(self, X):
|
|
21
|
+
self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
|
|
22
|
+
|
|
23
|
+
def _is_constant(self):
|
|
24
|
+
return self._params['scale'] == 0
|
|
25
|
+
|
|
26
|
+
def _extract_constant(self):
|
|
27
|
+
return self._params['loc']
|
copulas/utils.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Utils module."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import importlib
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from functools import wraps
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
EPSILON = np.finfo(np.float32).eps
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@contextlib.contextmanager
|
|
15
|
+
def set_random_state(random_state, set_model_random_state):
|
|
16
|
+
"""Context manager for managing the random state.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
random_state (int or np.random.RandomState):
|
|
20
|
+
The random seed or RandomState.
|
|
21
|
+
set_model_random_state (function):
|
|
22
|
+
Function to set the random state on the model.
|
|
23
|
+
"""
|
|
24
|
+
original_state = np.random.get_state()
|
|
25
|
+
np.random.set_state(random_state.get_state())
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
yield
|
|
29
|
+
finally:
|
|
30
|
+
current_random_state = np.random.RandomState()
|
|
31
|
+
current_random_state.set_state(np.random.get_state())
|
|
32
|
+
set_model_random_state(current_random_state)
|
|
33
|
+
np.random.set_state(original_state)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def random_state(function):
|
|
37
|
+
"""Set the random state before calling the function.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
function (Callable):
|
|
41
|
+
The function to wrap around.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@wraps(function)
|
|
45
|
+
def wrapper(self, *args, **kwargs):
|
|
46
|
+
if self.random_state is None:
|
|
47
|
+
return function(self, *args, **kwargs)
|
|
48
|
+
else:
|
|
49
|
+
with set_random_state(self.random_state, self.set_random_state):
|
|
50
|
+
return function(self, *args, **kwargs)
|
|
51
|
+
|
|
52
|
+
return wrapper
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def validate_random_state(random_state):
|
|
56
|
+
"""Validate random state argument.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
random_state (int, numpy.random.RandomState, tuple, or None):
|
|
60
|
+
Seed or RandomState for the random generator.
|
|
61
|
+
|
|
62
|
+
Output:
|
|
63
|
+
numpy.random.RandomState
|
|
64
|
+
"""
|
|
65
|
+
if random_state is None:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
if isinstance(random_state, int):
|
|
69
|
+
return np.random.RandomState(seed=random_state)
|
|
70
|
+
elif isinstance(random_state, np.random.RandomState):
|
|
71
|
+
return random_state
|
|
72
|
+
else:
|
|
73
|
+
raise TypeError(
|
|
74
|
+
f'`random_state` {random_state} expected to be an int '
|
|
75
|
+
'or `np.random.RandomState` object.'
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_instance(obj, **kwargs):
|
|
80
|
+
"""Create new instance of the ``obj`` argument.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
obj (str, type, instance):
|
|
84
|
+
"""
|
|
85
|
+
instance = None
|
|
86
|
+
if isinstance(obj, str):
|
|
87
|
+
package, name = obj.rsplit('.', 1)
|
|
88
|
+
instance = getattr(importlib.import_module(package), name)(**kwargs)
|
|
89
|
+
elif isinstance(obj, type):
|
|
90
|
+
instance = obj(**kwargs)
|
|
91
|
+
else:
|
|
92
|
+
if kwargs:
|
|
93
|
+
instance = obj.__class__(**kwargs)
|
|
94
|
+
else:
|
|
95
|
+
args = getattr(obj, '__args__', ())
|
|
96
|
+
kwargs = getattr(obj, '__kwargs__', {})
|
|
97
|
+
instance = obj.__class__(*args, **kwargs)
|
|
98
|
+
|
|
99
|
+
return instance
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def store_args(__init__):
|
|
103
|
+
"""Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
__init__(callable): ``__init__`` function to store their arguments.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
callable: Decorated ``__init__`` function.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
@wraps(__init__)
|
|
113
|
+
def new__init__(self, *args, **kwargs):
|
|
114
|
+
args_copy = deepcopy(args)
|
|
115
|
+
kwargs_copy = deepcopy(kwargs)
|
|
116
|
+
__init__(self, *args, **kwargs)
|
|
117
|
+
self.__args__ = args_copy
|
|
118
|
+
self.__kwargs__ = kwargs_copy
|
|
119
|
+
|
|
120
|
+
return new__init__
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_qualified_name(_object):
|
|
124
|
+
"""Return the Fully Qualified Name from an instance or class."""
|
|
125
|
+
module = _object.__module__
|
|
126
|
+
if hasattr(_object, '__name__'):
|
|
127
|
+
_class = _object.__name__
|
|
128
|
+
else:
|
|
129
|
+
_class = _object.__class__.__name__
|
|
130
|
+
|
|
131
|
+
return module + '.' + _class
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def vectorize(function):
|
|
135
|
+
"""Allow a method that only accepts scalars to accept vectors too.
|
|
136
|
+
|
|
137
|
+
This decorator has two different behaviors depending on the dimensionality of the
|
|
138
|
+
array passed as an argument:
|
|
139
|
+
|
|
140
|
+
**1-d array**
|
|
141
|
+
|
|
142
|
+
It will work under the assumption that the `function` argument is a callable
|
|
143
|
+
with signature::
|
|
144
|
+
|
|
145
|
+
function(self, X, *args, **kwargs)
|
|
146
|
+
|
|
147
|
+
where X is an scalar magnitude.
|
|
148
|
+
|
|
149
|
+
In this case the arguments of the input array will be given one at a time, and
|
|
150
|
+
both the input and output of the decorated function will have shape (n,).
|
|
151
|
+
|
|
152
|
+
**2-d array**
|
|
153
|
+
|
|
154
|
+
It will work under the assumption that the `function` argument is a callable with signature::
|
|
155
|
+
|
|
156
|
+
function(self, X0, ..., Xj, *args, **kwargs)
|
|
157
|
+
|
|
158
|
+
where `Xi` are scalar magnitudes.
|
|
159
|
+
|
|
160
|
+
It will pass the contents of each row unpacked on each call. The input is espected to have
|
|
161
|
+
shape (n, j), the output a shape of (n,)
|
|
162
|
+
|
|
163
|
+
It will return a function that is guaranteed to return a `numpy.array`.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
function(callable): Function that only accept and return scalars.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
callable: Decorated function that can accept and return :attr:`numpy.array`.
|
|
170
|
+
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
@wraps(function)
|
|
174
|
+
def decorated(self, X, *args, **kwargs):
|
|
175
|
+
if not isinstance(X, np.ndarray):
|
|
176
|
+
return function(self, X, *args, **kwargs)
|
|
177
|
+
|
|
178
|
+
if len(X.shape) == 1:
|
|
179
|
+
X = X.reshape([-1, 1])
|
|
180
|
+
|
|
181
|
+
if len(X.shape) == 2:
|
|
182
|
+
return np.fromiter(
|
|
183
|
+
(function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
raise ValueError('Arrays of dimensionality higher than 2 are not supported.')
|
|
187
|
+
|
|
188
|
+
return decorated
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def scalarize(function):
|
|
192
|
+
"""Allow methods that only accepts 1-d vectors to work with scalars.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
function(callable): Function that accepts and returns vectors.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
callable: Decorated function that accepts and returns scalars.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
@wraps(function)
|
|
202
|
+
def decorated(self, X, *args, **kwargs):
|
|
203
|
+
scalar = not isinstance(X, np.ndarray)
|
|
204
|
+
|
|
205
|
+
if scalar:
|
|
206
|
+
X = np.array([X])
|
|
207
|
+
|
|
208
|
+
result = function(self, X, *args, **kwargs)
|
|
209
|
+
if scalar:
|
|
210
|
+
result = result[0]
|
|
211
|
+
|
|
212
|
+
return result
|
|
213
|
+
|
|
214
|
+
return decorated
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def check_valid_values(function):
|
|
218
|
+
"""Raise an exception if the given values are not supported.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
function(callable): Method whose unique argument is a numpy.array-like object.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
callable: Decorated function
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
ValueError: If there are missing or invalid values or if the dataset is empty.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
@wraps(function)
|
|
231
|
+
def decorated(self, X, *args, **kwargs):
|
|
232
|
+
if isinstance(X, pd.DataFrame):
|
|
233
|
+
W = X.to_numpy()
|
|
234
|
+
else:
|
|
235
|
+
W = X
|
|
236
|
+
|
|
237
|
+
if not len(W):
|
|
238
|
+
raise ValueError('Your dataset is empty.')
|
|
239
|
+
|
|
240
|
+
if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
|
|
241
|
+
raise ValueError('There are non-numerical values in your data.')
|
|
242
|
+
|
|
243
|
+
if np.isnan(W).any().any():
|
|
244
|
+
raise ValueError('There are nan values in your data.')
|
|
245
|
+
|
|
246
|
+
return function(self, X, *args, **kwargs)
|
|
247
|
+
|
|
248
|
+
return decorated
|