copulas 0.12.4.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ """GammaUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import gamma
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class GammaUnivariate(ScipyModel):
10
+ """Wrapper around scipy.stats.gamma.
11
+
12
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
13
+ """
14
+
15
+ PARAMETRIC = ParametricType.PARAMETRIC
16
+ BOUNDED = BoundedType.SEMI_BOUNDED
17
+ MODEL_CLASS = gamma
18
+
19
+ def _fit_constant(self, X):
20
+ self._params = {
21
+ 'a': 0.0,
22
+ 'loc': np.unique(X)[0],
23
+ 'scale': 0.0,
24
+ }
25
+
26
+ def _fit(self, X):
27
+ a, loc, scale = gamma.fit(X)
28
+ self._params = {
29
+ 'a': a,
30
+ 'loc': loc,
31
+ 'scale': scale,
32
+ }
33
+
34
+ def _is_constant(self):
35
+ return self._params['scale'] == 0
36
+
37
+ def _extract_constant(self):
38
+ return self._params['loc']
@@ -0,0 +1,27 @@
1
+ """GaussianUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import norm
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class GaussianUnivariate(ScipyModel):
10
+ """Gaussian univariate model."""
11
+
12
+ PARAMETRIC = ParametricType.PARAMETRIC
13
+ BOUNDED = BoundedType.UNBOUNDED
14
+
15
+ MODEL_CLASS = norm
16
+
17
+ def _fit_constant(self, X):
18
+ self._params = {'loc': np.unique(X)[0], 'scale': 0}
19
+
20
+ def _fit(self, X):
21
+ self._params = {'loc': np.mean(X), 'scale': np.std(X)}
22
+
23
+ def _is_constant(self):
24
+ return self._params['scale'] == 0
25
+
26
+ def _extract_constant(self):
27
+ return self._params['loc']
@@ -0,0 +1,192 @@
1
+ """GaussianKDE module."""
2
+
3
+ import numpy as np
4
+ from scipy.special import ndtr
5
+ from scipy.stats import gaussian_kde
6
+
7
+ from copulas.optimize import bisect, chandrupatla
8
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
9
+ from copulas.utils import EPSILON, random_state, store_args, validate_random_state
10
+
11
+
12
+ class GaussianKDE(ScipyModel):
13
+ """A wrapper for gaussian Kernel density estimation.
14
+
15
+ It was implemented in scipy.stats toolbox. gaussian_kde is slower than statsmodels
16
+ but allows more flexibility.
17
+
18
+ When a sample_size is provided the fit method will sample the
19
+ data, and mask the real information. Also, ensure the number of
20
+ entries will be always the value of sample_size.
21
+
22
+ Args:
23
+ sample_size(int): amount of parameters to sample
24
+ """
25
+
26
+ PARAMETRIC = ParametricType.NON_PARAMETRIC
27
+ BOUNDED = BoundedType.UNBOUNDED
28
+ MODEL_CLASS = gaussian_kde
29
+
30
+ @store_args
31
+ def __init__(self, sample_size=None, random_state=None, bw_method=None, weights=None):
32
+ self.random_state = validate_random_state(random_state)
33
+ self._sample_size = sample_size
34
+ self.bw_method = bw_method
35
+ self.weights = weights
36
+
37
+ def _get_model(self):
38
+ dataset = self._params['dataset']
39
+ self._sample_size = self._sample_size or len(dataset)
40
+ return gaussian_kde(dataset, bw_method=self.bw_method, weights=self.weights)
41
+
42
+ def _get_bounds(self):
43
+ X = self._params['dataset']
44
+ lower = np.min(X) - (5 * np.std(X))
45
+ upper = np.max(X) + (5 * np.std(X))
46
+
47
+ return lower, upper
48
+
49
+ def probability_density(self, X):
50
+ """Compute the probability density for each point in X.
51
+
52
+ Arguments:
53
+ X (numpy.ndarray):
54
+ Values for which the probability density will be computed.
55
+ It must have shape (n, 1).
56
+
57
+ Returns:
58
+ numpy.ndarray:
59
+ Probability density values for points in X.
60
+
61
+ Raises:
62
+ NotFittedError:
63
+ if the model is not fitted.
64
+ """
65
+ self.check_fit()
66
+ return self._model.evaluate(X)
67
+
68
+ @random_state
69
+ def sample(self, n_samples=1):
70
+ """Sample values from this model.
71
+
72
+ Argument:
73
+ n_samples (int):
74
+ Number of values to sample
75
+
76
+ Returns:
77
+ numpy.ndarray:
78
+ Array of shape (n_samples, 1) with values randomly
79
+ sampled from this model distribution.
80
+
81
+ Raises:
82
+ NotFittedError:
83
+ if the model is not fitted.
84
+ """
85
+ self.check_fit()
86
+ return self._model.resample(size=n_samples)[0]
87
+
88
+ def cumulative_distribution(self, X):
89
+ """Compute the cumulative distribution value for each point in X.
90
+
91
+ Arguments:
92
+ X (numpy.ndarray):
93
+ Values for which the cumulative distribution will be computed.
94
+ It must have shape (n, 1).
95
+
96
+ Returns:
97
+ numpy.ndarray:
98
+ Cumulative distribution values for points in X.
99
+
100
+ Raises:
101
+ NotFittedError:
102
+ if the model is not fitted.
103
+ """
104
+ self.check_fit()
105
+ X = np.array(X)
106
+ stdev = np.sqrt(self._model.covariance[0, 0])
107
+ lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0]
108
+ uppers = ndtr((X[:, None] - self._model.dataset) / stdev)
109
+ return (uppers - lower).dot(self._model.weights)
110
+
111
+ def percent_point(self, U, method='chandrupatla'):
112
+ """Compute the inverse cumulative distribution value for each point in U.
113
+
114
+ Arguments:
115
+ U (numpy.ndarray):
116
+ Values for which the cumulative distribution will be computed.
117
+ It must have shape (n, 1) and values must be in [0,1].
118
+ method (str):
119
+ Whether to use the `chandrupatla` or `bisect` solver.
120
+
121
+ Returns:
122
+ numpy.ndarray:
123
+ Inverse cumulative distribution values for points in U.
124
+
125
+ Raises:
126
+ NotFittedError:
127
+ if the model is not fitted.
128
+ """
129
+ self.check_fit()
130
+
131
+ if len(U.shape) > 1:
132
+ raise ValueError(f'Expected 1d array, got {(U,)}.')
133
+
134
+ if np.any(U > 1.0) or np.any(U < 0.0):
135
+ raise ValueError('Expected values in range [0.0, 1.0].')
136
+
137
+ is_one = U >= 1.0 - EPSILON
138
+ is_zero = U <= EPSILON
139
+ is_valid = ~(is_zero | is_one)
140
+
141
+ lower, upper = self._get_bounds()
142
+
143
+ def _f(X):
144
+ return self.cumulative_distribution(X) - U[is_valid]
145
+
146
+ X = np.zeros(U.shape)
147
+ X[is_one] = float('inf')
148
+ X[is_zero] = float('-inf')
149
+ if is_valid.any():
150
+ lower = np.full(U[is_valid].shape, lower)
151
+ upper = np.full(U[is_valid].shape, upper)
152
+ if method == 'bisect':
153
+ X[is_valid] = bisect(_f, lower, upper)
154
+ else:
155
+ X[is_valid] = chandrupatla(_f, lower, upper)
156
+
157
+ return X
158
+
159
+ def _fit_constant(self, X):
160
+ sample_size = self._sample_size or len(X)
161
+ constant = np.unique(X)[0]
162
+ self._params = {
163
+ 'dataset': [constant] * sample_size,
164
+ }
165
+
166
+ def _fit(self, X):
167
+ if self._sample_size:
168
+ X = gaussian_kde(X, bw_method=self.bw_method, weights=self.weights).resample(
169
+ self._sample_size
170
+ )
171
+ self._params = {'dataset': X.tolist()}
172
+ self._model = self._get_model()
173
+
174
+ def _is_constant(self):
175
+ return len(np.unique(self._params['dataset'])) == 1
176
+
177
+ def _extract_constant(self):
178
+ return self._params['dataset'][0]
179
+
180
+ def _set_params(self, params):
181
+ """Set the parameters of this univariate.
182
+
183
+ Args:
184
+ params (dict):
185
+ Parameters to recreate this instance.
186
+ """
187
+ self._params = params.copy()
188
+ if self._is_constant():
189
+ constant = self._extract_constant()
190
+ self._set_constant_value(constant)
191
+ else:
192
+ self._model = self._get_model()
@@ -0,0 +1,38 @@
1
+ """LogLaplace module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import loglaplace
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class LogLaplace(ScipyModel):
10
+ """Wrapper around scipy.stats.loglaplace.
11
+
12
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.loglaplace.html
13
+ """
14
+
15
+ PARAMETRIC = ParametricType.PARAMETRIC
16
+ BOUNDED = BoundedType.SEMI_BOUNDED
17
+ MODEL_CLASS = loglaplace
18
+
19
+ def _fit_constant(self, X):
20
+ self._params = {
21
+ 'c': 2.0,
22
+ 'loc': np.unique(X)[0],
23
+ 'scale': 0.0,
24
+ }
25
+
26
+ def _fit(self, X):
27
+ c, loc, scale = loglaplace.fit(X)
28
+ self._params = {
29
+ 'c': c,
30
+ 'loc': loc,
31
+ 'scale': scale,
32
+ }
33
+
34
+ def _is_constant(self):
35
+ return self._params['scale'] == 0
36
+
37
+ def _extract_constant(self):
38
+ return self._params['loc']
@@ -0,0 +1,36 @@
1
+ """Univariate selection function."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import kstest
5
+
6
+ from copulas.utils import get_instance
7
+
8
+
9
+ def select_univariate(X, candidates):
10
+ """Select the best univariate class for this data.
11
+
12
+ Args:
13
+ X (pandas.DataFrame):
14
+ Data for which be best univariate must be found.
15
+ candidates (list[Univariate]):
16
+ List of Univariate subclasses (or instances of those) to choose from.
17
+
18
+ Returns:
19
+ Univariate:
20
+ Instance of the selected candidate.
21
+ """
22
+ best_ks = np.inf
23
+ best_model = None
24
+ for model in candidates:
25
+ try:
26
+ instance = get_instance(model)
27
+ instance.fit(X)
28
+ ks, _ = kstest(X, instance.cdf)
29
+ if ks < best_ks:
30
+ best_ks = ks
31
+ best_model = model
32
+ except Exception:
33
+ # Distribution not supported
34
+ pass
35
+
36
+ return get_instance(best_model)
@@ -0,0 +1,31 @@
1
+ """StudentTUnivariate module."""
2
+
3
+ from scipy.stats import t
4
+
5
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
6
+
7
+
8
+ class StudentTUnivariate(ScipyModel):
9
+ """Wrapper around scipy.stats.t.
10
+
11
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html
12
+ """
13
+
14
+ PARAMETRIC = ParametricType.PARAMETRIC
15
+ BOUNDED = BoundedType.UNBOUNDED
16
+
17
+ MODEL_CLASS = t
18
+
19
+ def _fit_constant(self, X):
20
+ self._fit(X)
21
+ self._params['scale'] = 0
22
+
23
+ def _fit(self, X):
24
+ dataframe, loc, scale = t.fit(X)
25
+ self._params = {'df': dataframe, 'loc': loc, 'scale': scale}
26
+
27
+ def _is_constant(self):
28
+ return self._params['scale'] == 0
29
+
30
+ def _extract_constant(self):
31
+ return self._params['loc']
@@ -0,0 +1,66 @@
1
+ """TruncatedGaussian module."""
2
+
3
+ import warnings
4
+
5
+ import numpy as np
6
+ from scipy.optimize import fmin_slsqp
7
+ from scipy.stats import truncnorm
8
+
9
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
10
+ from copulas.utils import EPSILON, store_args, validate_random_state
11
+
12
+
13
+ class TruncatedGaussian(ScipyModel):
14
+ """Wrapper around scipy.stats.truncnorm.
15
+
16
+ Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.truncnorm.html
17
+ """
18
+
19
+ PARAMETRIC = ParametricType.PARAMETRIC
20
+ BOUNDED = BoundedType.BOUNDED
21
+ MODEL_CLASS = truncnorm
22
+
23
+ @store_args
24
+ def __init__(self, minimum=None, maximum=None, random_state=None):
25
+ self.random_state = validate_random_state(random_state)
26
+ self.min = minimum
27
+ self.max = maximum
28
+
29
+ def _fit_constant(self, X):
30
+ constant = np.unique(X)[0]
31
+ self._params = {'a': constant, 'b': constant, 'loc': constant, 'scale': 0.0}
32
+
33
+ def _fit(self, X):
34
+ if self.min is None:
35
+ self.min = X.min() - EPSILON
36
+
37
+ if self.max is None:
38
+ self.max = X.max() + EPSILON
39
+
40
+ def nnlf(params):
41
+ loc, scale = params
42
+ a = (self.min - loc) / scale
43
+ b = (self.max - loc) / scale
44
+ return truncnorm.nnlf((a, b, loc, scale), X)
45
+
46
+ initial_params = X.mean(), X.std()
47
+ with warnings.catch_warnings():
48
+ warnings.simplefilter('ignore', category=RuntimeWarning)
49
+ optimal = fmin_slsqp(
50
+ nnlf,
51
+ initial_params,
52
+ iprint=False,
53
+ bounds=[(self.min, self.max), (0.0, (self.max - self.min) ** 2)],
54
+ )
55
+
56
+ loc, scale = optimal
57
+ a = (self.min - loc) / scale
58
+ b = (self.max - loc) / scale
59
+
60
+ self._params = {'a': a, 'b': b, 'loc': loc, 'scale': scale}
61
+
62
+ def _is_constant(self):
63
+ return self._params['a'] == self._params['b']
64
+
65
+ def _extract_constant(self):
66
+ return self._params['loc']
@@ -0,0 +1,27 @@
1
+ """UniformUnivariate module."""
2
+
3
+ import numpy as np
4
+ from scipy.stats import uniform
5
+
6
+ from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7
+
8
+
9
+ class UniformUnivariate(ScipyModel):
10
+ """Uniform univariate model."""
11
+
12
+ PARAMETRIC = ParametricType.PARAMETRIC
13
+ BOUNDED = BoundedType.BOUNDED
14
+
15
+ MODEL_CLASS = uniform
16
+
17
+ def _fit_constant(self, X):
18
+ self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
19
+
20
+ def _fit(self, X):
21
+ self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
22
+
23
+ def _is_constant(self):
24
+ return self._params['scale'] == 0
25
+
26
+ def _extract_constant(self):
27
+ return self._params['loc']
copulas/utils.py ADDED
@@ -0,0 +1,248 @@
1
+ """Utils module."""
2
+
3
+ import contextlib
4
+ import importlib
5
+ from copy import deepcopy
6
+ from functools import wraps
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ EPSILON = np.finfo(np.float32).eps
12
+
13
+
14
+ @contextlib.contextmanager
15
+ def set_random_state(random_state, set_model_random_state):
16
+ """Context manager for managing the random state.
17
+
18
+ Args:
19
+ random_state (int or np.random.RandomState):
20
+ The random seed or RandomState.
21
+ set_model_random_state (function):
22
+ Function to set the random state on the model.
23
+ """
24
+ original_state = np.random.get_state()
25
+ np.random.set_state(random_state.get_state())
26
+
27
+ try:
28
+ yield
29
+ finally:
30
+ current_random_state = np.random.RandomState()
31
+ current_random_state.set_state(np.random.get_state())
32
+ set_model_random_state(current_random_state)
33
+ np.random.set_state(original_state)
34
+
35
+
36
+ def random_state(function):
37
+ """Set the random state before calling the function.
38
+
39
+ Args:
40
+ function (Callable):
41
+ The function to wrap around.
42
+ """
43
+
44
+ @wraps(function)
45
+ def wrapper(self, *args, **kwargs):
46
+ if self.random_state is None:
47
+ return function(self, *args, **kwargs)
48
+ else:
49
+ with set_random_state(self.random_state, self.set_random_state):
50
+ return function(self, *args, **kwargs)
51
+
52
+ return wrapper
53
+
54
+
55
+ def validate_random_state(random_state):
56
+ """Validate random state argument.
57
+
58
+ Args:
59
+ random_state (int, numpy.random.RandomState, tuple, or None):
60
+ Seed or RandomState for the random generator.
61
+
62
+ Output:
63
+ numpy.random.RandomState
64
+ """
65
+ if random_state is None:
66
+ return None
67
+
68
+ if isinstance(random_state, int):
69
+ return np.random.RandomState(seed=random_state)
70
+ elif isinstance(random_state, np.random.RandomState):
71
+ return random_state
72
+ else:
73
+ raise TypeError(
74
+ f'`random_state` {random_state} expected to be an int '
75
+ 'or `np.random.RandomState` object.'
76
+ )
77
+
78
+
79
+ def get_instance(obj, **kwargs):
80
+ """Create new instance of the ``obj`` argument.
81
+
82
+ Args:
83
+ obj (str, type, instance):
84
+ """
85
+ instance = None
86
+ if isinstance(obj, str):
87
+ package, name = obj.rsplit('.', 1)
88
+ instance = getattr(importlib.import_module(package), name)(**kwargs)
89
+ elif isinstance(obj, type):
90
+ instance = obj(**kwargs)
91
+ else:
92
+ if kwargs:
93
+ instance = obj.__class__(**kwargs)
94
+ else:
95
+ args = getattr(obj, '__args__', ())
96
+ kwargs = getattr(obj, '__kwargs__', {})
97
+ instance = obj.__class__(*args, **kwargs)
98
+
99
+ return instance
100
+
101
+
102
+ def store_args(__init__):
103
+ """Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
104
+
105
+ Args:
106
+ __init__(callable): ``__init__`` function to store their arguments.
107
+
108
+ Returns:
109
+ callable: Decorated ``__init__`` function.
110
+ """
111
+
112
+ @wraps(__init__)
113
+ def new__init__(self, *args, **kwargs):
114
+ args_copy = deepcopy(args)
115
+ kwargs_copy = deepcopy(kwargs)
116
+ __init__(self, *args, **kwargs)
117
+ self.__args__ = args_copy
118
+ self.__kwargs__ = kwargs_copy
119
+
120
+ return new__init__
121
+
122
+
123
+ def get_qualified_name(_object):
124
+ """Return the Fully Qualified Name from an instance or class."""
125
+ module = _object.__module__
126
+ if hasattr(_object, '__name__'):
127
+ _class = _object.__name__
128
+ else:
129
+ _class = _object.__class__.__name__
130
+
131
+ return module + '.' + _class
132
+
133
+
134
+ def vectorize(function):
135
+ """Allow a method that only accepts scalars to accept vectors too.
136
+
137
+ This decorator has two different behaviors depending on the dimensionality of the
138
+ array passed as an argument:
139
+
140
+ **1-d array**
141
+
142
+ It will work under the assumption that the `function` argument is a callable
143
+ with signature::
144
+
145
+ function(self, X, *args, **kwargs)
146
+
147
+ where X is an scalar magnitude.
148
+
149
+ In this case the arguments of the input array will be given one at a time, and
150
+ both the input and output of the decorated function will have shape (n,).
151
+
152
+ **2-d array**
153
+
154
+ It will work under the assumption that the `function` argument is a callable with signature::
155
+
156
+ function(self, X0, ..., Xj, *args, **kwargs)
157
+
158
+ where `Xi` are scalar magnitudes.
159
+
160
+ It will pass the contents of each row unpacked on each call. The input is espected to have
161
+ shape (n, j), the output a shape of (n,)
162
+
163
+ It will return a function that is guaranteed to return a `numpy.array`.
164
+
165
+ Args:
166
+ function(callable): Function that only accept and return scalars.
167
+
168
+ Returns:
169
+ callable: Decorated function that can accept and return :attr:`numpy.array`.
170
+
171
+ """
172
+
173
+ @wraps(function)
174
+ def decorated(self, X, *args, **kwargs):
175
+ if not isinstance(X, np.ndarray):
176
+ return function(self, X, *args, **kwargs)
177
+
178
+ if len(X.shape) == 1:
179
+ X = X.reshape([-1, 1])
180
+
181
+ if len(X.shape) == 2:
182
+ return np.fromiter(
183
+ (function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
184
+ )
185
+ else:
186
+ raise ValueError('Arrays of dimensionality higher than 2 are not supported.')
187
+
188
+ return decorated
189
+
190
+
191
+ def scalarize(function):
192
+ """Allow methods that only accepts 1-d vectors to work with scalars.
193
+
194
+ Args:
195
+ function(callable): Function that accepts and returns vectors.
196
+
197
+ Returns:
198
+ callable: Decorated function that accepts and returns scalars.
199
+ """
200
+
201
+ @wraps(function)
202
+ def decorated(self, X, *args, **kwargs):
203
+ scalar = not isinstance(X, np.ndarray)
204
+
205
+ if scalar:
206
+ X = np.array([X])
207
+
208
+ result = function(self, X, *args, **kwargs)
209
+ if scalar:
210
+ result = result[0]
211
+
212
+ return result
213
+
214
+ return decorated
215
+
216
+
217
+ def check_valid_values(function):
218
+ """Raise an exception if the given values are not supported.
219
+
220
+ Args:
221
+ function(callable): Method whose unique argument is a numpy.array-like object.
222
+
223
+ Returns:
224
+ callable: Decorated function
225
+
226
+ Raises:
227
+ ValueError: If there are missing or invalid values or if the dataset is empty.
228
+ """
229
+
230
+ @wraps(function)
231
+ def decorated(self, X, *args, **kwargs):
232
+ if isinstance(X, pd.DataFrame):
233
+ W = X.to_numpy()
234
+ else:
235
+ W = X
236
+
237
+ if not len(W):
238
+ raise ValueError('Your dataset is empty.')
239
+
240
+ if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
241
+ raise ValueError('There are non-numerical values in your data.')
242
+
243
+ if np.isnan(W).any().any():
244
+ raise ValueError('There are nan values in your data.')
245
+
246
+ return function(self, X, *args, **kwargs)
247
+
248
+ return decorated