copulas 0.10.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of copulas might be problematic. Click here for more details.

@@ -0,0 +1,38 @@
1
+ """GammaUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import gamma
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class GammaUnivariate(ScipyModel):
10
+ """Wrapper around scipy.stats.gamma.
11
+
12
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
13
+ """
14
+
15
+ PARAMETRIC = ParametricType.PARAMETRIC
16
+ BOUNDED = BoundedType.SEMI_BOUNDED
17
+ MODEL_CLASS = gamma
18
+
19
+ def _fit_constant(self, X):
20
+ self._params = {
21
+ 'a': 0.0,
22
+ 'loc': np.unique(X)[0],
23
+ 'scale': 0.0,
24
+ }
25
+
26
+ def _fit(self, X):
27
+ a, loc, scale = gamma.fit(X)
28
+ self._params = {
29
+ 'a': a,
30
+ 'loc': loc,
31
+ 'scale': scale,
32
+ }
33
+
34
+ def _is_constant(self):
35
+ return self._params['scale'] == 0
36
+
37
+ def _extract_constant(self):
38
+ return self._params['loc']
@@ -0,0 +1,33 @@
1
+ """GaussianUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import norm
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class GaussianUnivariate(ScipyModel):
10
+ """Gaussian univariate model."""
11
+
12
+ PARAMETRIC = ParametricType.PARAMETRIC
13
+ BOUNDED = BoundedType.UNBOUNDED
14
+
15
+ MODEL_CLASS = norm
16
+
17
+ def _fit_constant(self, X):
18
+ self._params = {
19
+ 'loc': np.unique(X)[0],
20
+ 'scale': 0
21
+ }
22
+
23
+ def _fit(self, X):
24
+ self._params = {
25
+ 'loc': np.mean(X),
26
+ 'scale': np.std(X)
27
+ }
28
+
29
+ def _is_constant(self):
30
+ return self._params['scale'] == 0
31
+
32
+ def _extract_constant(self):
33
+ return self._params['loc']
@@ -0,0 +1,193 @@
1
+ """GaussianKDE module."""
2
+
3
+ import numpy as np
4
+ from scipy.special import ndtr
5
+ from scipy.stats import gaussian_kde
6
+
7
+ from copulas import EPSILON, random_state, store_args, validate_random_state
8
+ from copulas.optimize import bisect, chandrupatla
9
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
10
+
11
+
12
+ class GaussianKDE(ScipyModel):
13
+ """A wrapper for gaussian Kernel density estimation.
14
+
15
+ It was implemented in scipy.stats toolbox. gaussian_kde is slower than statsmodels
16
+ but allows more flexibility.
17
+
18
+ When a sample_size is provided the fit method will sample the
19
+ data, and mask the real information. Also, ensure the number of
20
+ entries will be always the value of sample_size.
21
+
22
+ Args:
23
+ sample_size(int): amount of parameters to sample
24
+ """
25
+
26
+ PARAMETRIC = ParametricType.NON_PARAMETRIC
27
+ BOUNDED = BoundedType.UNBOUNDED
28
+ MODEL_CLASS = gaussian_kde
29
+
30
+ @store_args
31
+ def __init__(self, sample_size=None, random_state=None, bw_method=None, weights=None):
32
+ self.random_state = validate_random_state(random_state)
33
+ self._sample_size = sample_size
34
+ self.bw_method = bw_method
35
+ self.weights = weights
36
+
37
+ def _get_model(self):
38
+ dataset = self._params['dataset']
39
+ self._sample_size = self._sample_size or len(dataset)
40
+ return gaussian_kde(dataset, bw_method=self.bw_method, weights=self.weights)
41
+
42
+ def _get_bounds(self):
43
+ X = self._params['dataset']
44
+ lower = np.min(X) - (5 * np.std(X))
45
+ upper = np.max(X) + (5 * np.std(X))
46
+
47
+ return lower, upper
48
+
49
+ def probability_density(self, X):
50
+ """Compute the probability density for each point in X.
51
+
52
+ Arguments:
53
+ X (numpy.ndarray):
54
+ Values for which the probability density will be computed.
55
+ It must have shape (n, 1).
56
+
57
+ Returns:
58
+ numpy.ndarray:
59
+ Probability density values for points in X.
60
+
61
+ Raises:
62
+ NotFittedError:
63
+ if the model is not fitted.
64
+ """
65
+ self.check_fit()
66
+ return self._model.evaluate(X)
67
+
68
+ @random_state
69
+ def sample(self, n_samples=1):
70
+ """Sample values from this model.
71
+
72
+ Argument:
73
+ n_samples (int):
74
+ Number of values to sample
75
+
76
+ Returns:
77
+ numpy.ndarray:
78
+ Array of shape (n_samples, 1) with values randomly
79
+ sampled from this model distribution.
80
+
81
+ Raises:
82
+ NotFittedError:
83
+ if the model is not fitted.
84
+ """
85
+ self.check_fit()
86
+ return self._model.resample(size=n_samples)[0]
87
+
88
+ def cumulative_distribution(self, X):
89
+ """Compute the cumulative distribution value for each point in X.
90
+
91
+ Arguments:
92
+ X (numpy.ndarray):
93
+ Values for which the cumulative distribution will be computed.
94
+ It must have shape (n, 1).
95
+
96
+ Returns:
97
+ numpy.ndarray:
98
+ Cumulative distribution values for points in X.
99
+
100
+ Raises:
101
+ NotFittedError:
102
+ if the model is not fitted.
103
+ """
104
+ self.check_fit()
105
+ X = np.array(X)
106
+ stdev = np.sqrt(self._model.covariance[0, 0])
107
+ lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0]
108
+ uppers = ndtr((X[:, None] - self._model.dataset) / stdev)
109
+ return (uppers - lower).dot(self._model.weights)
110
+
111
+ def percent_point(self, U, method='chandrupatla'):
112
+ """Compute the inverse cumulative distribution value for each point in U.
113
+
114
+ Arguments:
115
+ U (numpy.ndarray):
116
+ Values for which the cumulative distribution will be computed.
117
+ It must have shape (n, 1) and values must be in [0,1].
118
+ method (str):
119
+ Whether to use the `chandrupatla` or `bisect` solver.
120
+
121
+ Returns:
122
+ numpy.ndarray:
123
+ Inverse cumulative distribution values for points in U.
124
+
125
+ Raises:
126
+ NotFittedError:
127
+ if the model is not fitted.
128
+ """
129
+ self.check_fit()
130
+
131
+ if len(U.shape) > 1:
132
+ raise ValueError(f'Expected 1d array, got {(U, )}.')
133
+
134
+ if np.any(U > 1.0) or np.any(U < 0.0):
135
+ raise ValueError('Expected values in range [0.0, 1.0].')
136
+
137
+ is_one = U >= 1.0 - EPSILON
138
+ is_zero = U <= EPSILON
139
+ is_valid = ~(is_zero | is_one)
140
+
141
+ lower, upper = self._get_bounds()
142
+
143
+ def _f(X):
144
+ return self.cumulative_distribution(X) - U[is_valid]
145
+
146
+ X = np.zeros(U.shape)
147
+ X[is_one] = float('inf')
148
+ X[is_zero] = float('-inf')
149
+ if is_valid.any():
150
+ lower = np.full(U[is_valid].shape, lower)
151
+ upper = np.full(U[is_valid].shape, upper)
152
+ if method == 'bisect':
153
+ X[is_valid] = bisect(_f, lower, upper)
154
+ else:
155
+ X[is_valid] = chandrupatla(_f, lower, upper)
156
+
157
+ return X
158
+
159
+ def _fit_constant(self, X):
160
+ sample_size = self._sample_size or len(X)
161
+ constant = np.unique(X)[0]
162
+ self._params = {
163
+ 'dataset': [constant] * sample_size,
164
+ }
165
+
166
+ def _fit(self, X):
167
+ if self._sample_size:
168
+ X = gaussian_kde(X, bw_method=self.bw_method,
169
+ weights=self.weights).resample(self._sample_size)
170
+ self._params = {
171
+ 'dataset': X.tolist()
172
+ }
173
+ self._model = self._get_model()
174
+
175
+ def _is_constant(self):
176
+ return len(np.unique(self._params['dataset'])) == 1
177
+
178
+ def _extract_constant(self):
179
+ return self._params['dataset'][0]
180
+
181
+ def _set_params(self, params):
182
+ """Set the parameters of this univariate.
183
+
184
+ Args:
185
+ params (dict):
186
+ Parameters to recreate this instance.
187
+ """
188
+ self._params = params.copy()
189
+ if self._is_constant():
190
+ constant = self._extract_constant()
191
+ self._set_constant_value(constant)
192
+ else:
193
+ self._model = self._get_model()
@@ -0,0 +1,38 @@
1
+ """LogLaplace module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import loglaplace
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class LogLaplace(ScipyModel):
10
+ """Wrapper around scipy.stats.loglaplace.
11
+
12
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.loglaplace.html
13
+ """
14
+
15
+ PARAMETRIC = ParametricType.PARAMETRIC
16
+ BOUNDED = BoundedType.SEMI_BOUNDED
17
+ MODEL_CLASS = loglaplace
18
+
19
+ def _fit_constant(self, X):
20
+ self._params = {
21
+ 'c': 2.0,
22
+ 'loc': np.unique(X)[0],
23
+ 'scale': 0.0,
24
+ }
25
+
26
+ def _fit(self, X):
27
+ c, loc, scale = loglaplace.fit(X)
28
+ self._params = {
29
+ 'c': c,
30
+ 'loc': loc,
31
+ 'scale': scale,
32
+ }
33
+
34
+ def _is_constant(self):
35
+ return self._params['scale'] == 0
36
+
37
+ def _extract_constant(self):
38
+ return self._params['loc']
@@ -0,0 +1,36 @@
1
+ """Univariate selection function."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import kstest
5
+
6
+ from copulas import get_instance
7
+
8
+
9
+ def select_univariate(X, candidates):
10
+ """Select the best univariate class for this data.
11
+
12
+ Args:
13
+ X (pandas.DataFrame):
14
+ Data for which be best univariate must be found.
15
+ candidates (list[Univariate]):
16
+ List of Univariate subclasses (or instances of those) to choose from.
17
+
18
+ Returns:
19
+ Univariate:
20
+ Instance of the selected candidate.
21
+ """
22
+ best_ks = np.inf
23
+ best_model = None
24
+ for model in candidates:
25
+ try:
26
+ instance = get_instance(model)
27
+ instance.fit(X)
28
+ ks, _ = kstest(X, instance.cdf)
29
+ if ks < best_ks:
30
+ best_ks = ks
31
+ best_model = model
32
+ except Exception:
33
+ # Distribution not supported
34
+ pass
35
+
36
+ return get_instance(best_model)
@@ -0,0 +1,35 @@
1
+ """StudentTUnivariate module."""
2
+
3
+ from scipy.stats import t
4
+
5
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
6
+
7
+
8
+ class StudentTUnivariate(ScipyModel):
9
+ """Wrapper around scipy.stats.t.
10
+
11
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html
12
+ """
13
+
14
+ PARAMETRIC = ParametricType.PARAMETRIC
15
+ BOUNDED = BoundedType.UNBOUNDED
16
+
17
+ MODEL_CLASS = t
18
+
19
+ def _fit_constant(self, X):
20
+ self._fit(X)
21
+ self._params['scale'] = 0
22
+
23
+ def _fit(self, X):
24
+ dataframe, loc, scale = t.fit(X)
25
+ self._params = {
26
+ 'df': dataframe,
27
+ 'loc': loc,
28
+ 'scale': scale
29
+ }
30
+
31
+ def _is_constant(self):
32
+ return self._params['scale'] == 0
33
+
34
+ def _extract_constant(self):
35
+ return self._params['loc']
@@ -0,0 +1,74 @@
1
+ """TruncatedGaussian module."""
2
+
3
+ import warnings
4
+
5
+ import numpy as np
6
+ from scipy.optimize import fmin_slsqp
7
+ from scipy.stats import truncnorm
8
+
9
+ from copulas import EPSILON, store_args, validate_random_state
10
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
11
+
12
+
13
+ class TruncatedGaussian(ScipyModel):
14
+ """Wrapper around scipy.stats.truncnorm.
15
+
16
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.truncnorm.html
17
+ """
18
+
19
+ PARAMETRIC = ParametricType.PARAMETRIC
20
+ BOUNDED = BoundedType.BOUNDED
21
+ MODEL_CLASS = truncnorm
22
+
23
+ @store_args
24
+ def __init__(self, minimum=None, maximum=None, random_state=None):
25
+ self.random_state = validate_random_state(random_state)
26
+ self.min = minimum
27
+ self.max = maximum
28
+
29
+ def _fit_constant(self, X):
30
+ constant = np.unique(X)[0]
31
+ self._params = {
32
+ 'a': constant,
33
+ 'b': constant,
34
+ 'loc': constant,
35
+ 'scale': 0.0
36
+ }
37
+
38
+ def _fit(self, X):
39
+ if self.min is None:
40
+ self.min = X.min() - EPSILON
41
+
42
+ if self.max is None:
43
+ self.max = X.max() + EPSILON
44
+
45
+ def nnlf(params):
46
+ loc, scale = params
47
+ a = (self.min - loc) / scale
48
+ b = (self.max - loc) / scale
49
+ return truncnorm.nnlf((a, b, loc, scale), X)
50
+
51
+ initial_params = X.mean(), X.std()
52
+ with warnings.catch_warnings():
53
+ warnings.simplefilter('ignore', category=RuntimeWarning)
54
+ optimal = fmin_slsqp(nnlf, initial_params, iprint=False, bounds=[
55
+ (self.min, self.max),
56
+ (0.0, (self.max - self.min)**2)
57
+ ])
58
+
59
+ loc, scale = optimal
60
+ a = (self.min - loc) / scale
61
+ b = (self.max - loc) / scale
62
+
63
+ self._params = {
64
+ 'a': a,
65
+ 'b': b,
66
+ 'loc': loc,
67
+ 'scale': scale
68
+ }
69
+
70
+ def _is_constant(self):
71
+ return self._params['a'] == self._params['b']
72
+
73
+ def _extract_constant(self):
74
+ return self._params['loc']
@@ -0,0 +1,33 @@
1
+ """UniformUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import uniform
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class UniformUnivariate(ScipyModel):
10
+ """Uniform univariate model."""
11
+
12
+ PARAMETRIC = ParametricType.PARAMETRIC
13
+ BOUNDED = BoundedType.BOUNDED
14
+
15
+ MODEL_CLASS = uniform
16
+
17
+ def _fit_constant(self, X):
18
+ self._params = {
19
+ 'loc': np.min(X),
20
+ 'scale': np.max(X) - np.min(X)
21
+ }
22
+
23
+ def _fit(self, X):
24
+ self._params = {
25
+ 'loc': np.min(X),
26
+ 'scale': np.max(X) - np.min(X)
27
+ }
28
+
29
+ def _is_constant(self):
30
+ return self._params['scale'] == 0
31
+
32
+ def _extract_constant(self):
33
+ return self._params['loc']