copulas 0.10.1.dev0__py3-none-any.whl → 0.12.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of copulas might be problematic. Click here for more details.

@@ -6,9 +6,9 @@ from enum import Enum
6
6
  import numpy as np
7
7
  import scipy
8
8
 
9
- from copulas import EPSILON, get_qualified_name
10
9
  from copulas.bivariate.base import Bivariate
11
10
  from copulas.multivariate.base import Multivariate
11
+ from copulas.utils import EPSILON, get_qualified_name
12
12
 
13
13
  LOGGER = logging.getLogger(__name__)
14
14
 
@@ -98,7 +98,7 @@ class Tree(Multivariate):
98
98
  """
99
99
  # first column is the variable of interest
100
100
  tau_y = self.tau_matrix[:, y]
101
- tau_y[y] = np.NaN
101
+ tau_y[y] = np.nan
102
102
 
103
103
  temp = np.empty([self.n_nodes, 3])
104
104
  temp[:, 0] = np.arange(self.n_nodes)
@@ -131,7 +131,7 @@ class Tree(Multivariate):
131
131
  left_parent, right_parent = edge.parents
132
132
  left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent)
133
133
 
134
- tau[i, j], pvalue = scipy.stats.kendalltau(left_u, right_u)
134
+ tau[i, j], _pvalue = scipy.stats.kendalltau(left_u, right_u)
135
135
 
136
136
  return tau
137
137
 
@@ -212,8 +212,7 @@ class Tree(Multivariate):
212
212
  """Produce printable representation of the class."""
213
213
  template = 'L:{} R:{} D:{} Copula:{} Theta:{}'
214
214
  return '\n'.join([
215
- template.format(edge.L, edge.R, edge.D, edge.name, edge.theta)
216
- for edge in self.edges
215
+ template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) for edge in self.edges
217
216
  ])
218
217
 
219
218
  def _serialize_previous_tree(self):
@@ -237,11 +236,7 @@ class Tree(Multivariate):
237
236
  Parameters of this Tree.
238
237
  """
239
238
  fitted = self.fitted
240
- result = {
241
- 'tree_type': self.tree_type,
242
- 'type': get_qualified_name(self),
243
- 'fitted': fitted
244
- }
239
+ result = {'tree_type': self.tree_type, 'type': get_qualified_name(self), 'fitted': fitted}
245
240
 
246
241
  if not fitted:
247
242
  return result
@@ -451,7 +446,7 @@ def get_tree(tree_type):
451
446
  Instance of a Tree of the specified type.
452
447
  """
453
448
  if not isinstance(tree_type, TreeTypes):
454
- if (isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__):
449
+ if isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__:
455
450
  tree_type = TreeTypes[tree_type.upper()]
456
451
  else:
457
452
  raise ValueError(f'Invalid tree type {tree_type}')
@@ -657,7 +652,7 @@ class Edge(object):
657
652
  'theta': self.theta,
658
653
  'tau': self.tau,
659
654
  'U': U,
660
- 'likelihood': self.likelihood
655
+ 'likelihood': self.likelihood,
661
656
  }
662
657
 
663
658
  @classmethod
@@ -674,8 +669,11 @@ class Edge(object):
674
669
  Instance of the edge defined on the parameters.
675
670
  """
676
671
  instance = cls(
677
- edge_dict['index'], edge_dict['L'], edge_dict['R'],
678
- edge_dict['name'], edge_dict['theta']
672
+ edge_dict['index'],
673
+ edge_dict['L'],
674
+ edge_dict['R'],
675
+ edge_dict['name'],
676
+ edge_dict['theta'],
679
677
  )
680
678
  instance.U = np.array(edge_dict['U'])
681
679
  parents = edge_dict['parents']
@@ -7,13 +7,18 @@ import warnings
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
- from copulas import (
11
- EPSILON, check_valid_values, get_qualified_name, random_state, store_args,
12
- validate_random_state)
13
10
  from copulas.bivariate.base import Bivariate, CopulaTypes
14
11
  from copulas.multivariate.base import Multivariate
15
12
  from copulas.multivariate.tree import Tree, get_tree
16
13
  from copulas.univariate.gaussian_kde import GaussianKDE
14
+ from copulas.utils import (
15
+ EPSILON,
16
+ check_valid_values,
17
+ get_qualified_name,
18
+ random_state,
19
+ store_args,
20
+ validate_random_state,
21
+ )
17
22
 
18
23
  LOGGER = logging.getLogger(__name__)
19
24
 
@@ -71,8 +76,7 @@ class VineCopula(Multivariate):
71
76
  def __init__(self, vine_type, random_state=None):
72
77
  if sys.version_info > (3, 8):
73
78
  warnings.warn(
74
- 'Vines have not been fully tested on Python >= 3.8 and might '
75
- 'produce wrong results.'
79
+ 'Vines have not been fully tested on Python >= 3.8 and might produce wrong results.'
76
80
  )
77
81
 
78
82
  self.random_state = validate_random_state(random_state)
@@ -103,7 +107,7 @@ class VineCopula(Multivariate):
103
107
  result = {
104
108
  'type': get_qualified_name(self),
105
109
  'vine_type': self.vine_type,
106
- 'fitted': self.fitted
110
+ 'fitted': self.fitted,
107
111
  }
108
112
 
109
113
  if not self.fitted:
@@ -118,7 +122,7 @@ class VineCopula(Multivariate):
118
122
  'tau_mat': self.tau_mat.tolist(),
119
123
  'u_matrix': self.u_matrix.tolist(),
120
124
  'unis': [distribution.to_dict() for distribution in self.unis],
121
- 'columns': self.columns
125
+ 'columns': self.columns,
122
126
  })
123
127
  return result
124
128
 
@@ -293,8 +297,9 @@ class VineCopula(Multivariate):
293
297
  # get index of edge to retrieve
294
298
  for edge in current_tree:
295
299
  if i == 0:
296
- if (edge.L == current and edge.R == visited[0]) or\
297
- (edge.R == current and edge.L == visited[0]):
300
+ if (edge.L == current and edge.R == visited[0]) or (
301
+ edge.R == current and edge.L == visited[0]
302
+ ):
298
303
  current_ind = edge.index
299
304
  break
300
305
  else:
@@ -127,7 +127,7 @@ def chandrupatla(f, xmin, xmax, eps_m=None, eps_a=None, maxiter=50):
127
127
  # to determine which method we should use next
128
128
  xi = (a - b) / (c - b)
129
129
  phi = (fa - fb) / (fc - fb)
130
- iqi = np.logical_and(phi**2 < xi, (1 - phi)**2 < 1 - xi)
130
+ iqi = np.logical_and(phi**2 < xi, (1 - phi) ** 2 < 1 - xi)
131
131
 
132
132
  if not shape:
133
133
  # scalar case
@@ -143,8 +143,9 @@ def chandrupatla(f, xmin, xmax, eps_m=None, eps_a=None, maxiter=50):
143
143
  # array case
144
144
  t = np.full(shape, 0.5)
145
145
  a2, b2, c2, fa2, fb2, fc2 = a[iqi], b[iqi], c[iqi], fa[iqi], fb[iqi], fc[iqi]
146
- t[iqi] = fa2 / (fb2 - fa2) * fc2 / (fb2 - fc2) + (c2 - a2) / \
147
- (b2 - a2) * fa2 / (fc2 - fa2) * fb2 / (fc2 - fb2)
146
+ t[iqi] = fa2 / (fb2 - fa2) * fc2 / (fb2 - fc2) + (c2 - a2) / (b2 - a2) * fa2 / (
147
+ fc2 - fa2
148
+ ) * fb2 / (fc2 - fb2)
148
149
 
149
150
  # limit to the range (tlim, 1-tlim)
150
151
  t = np.minimum(1 - tlim, np.maximum(tlim, t))
@@ -21,5 +21,5 @@ __all__ = (
21
21
  'ParametricType',
22
22
  'BoundedType',
23
23
  'UniformUnivariate',
24
- 'LogLaplace'
24
+ 'LogLaplace',
25
25
  )
@@ -6,10 +6,15 @@ from enum import Enum
6
6
 
7
7
  import numpy as np
8
8
 
9
- from copulas import (
10
- NotFittedError, get_instance, get_qualified_name, random_state, store_args,
11
- validate_random_state)
9
+ from copulas.errors import NotFittedError
12
10
  from copulas.univariate.selection import select_univariate
11
+ from copulas.utils import (
12
+ get_instance,
13
+ get_qualified_name,
14
+ random_state,
15
+ store_args,
16
+ validate_random_state,
17
+ )
13
18
 
14
19
 
15
20
  class ParametricType(Enum):
@@ -84,8 +89,14 @@ class Univariate(object):
84
89
  return candidates
85
90
 
86
91
  @store_args
87
- def __init__(self, candidates=None, parametric=None, bounded=None, random_state=None,
88
- selection_sample_size=None):
92
+ def __init__(
93
+ self,
94
+ candidates=None,
95
+ parametric=None,
96
+ bounded=None,
97
+ random_state=None,
98
+ selection_sample_size=None,
99
+ ):
89
100
  self.candidates = candidates or self._select_candidates(parametric, bounded)
90
101
  self.random_state = validate_random_state(random_state)
91
102
  self.selection_sample_size = selection_sample_size
@@ -28,12 +28,7 @@ class BetaUnivariate(ScipyModel):
28
28
  loc = np.min(X)
29
29
  scale = np.max(X) - loc
30
30
  a, b, loc, scale = beta.fit(X, loc=loc, scale=scale)
31
- self._params = {
32
- 'loc': loc,
33
- 'scale': scale,
34
- 'a': a,
35
- 'b': b
36
- }
31
+ self._params = {'loc': loc, 'scale': scale, 'a': a, 'b': b}
37
32
 
38
33
  def _is_constant(self):
39
34
  return self._params['scale'] == 0
@@ -15,16 +15,10 @@ class GaussianUnivariate(ScipyModel):
15
15
  MODEL_CLASS = norm
16
16
 
17
17
  def _fit_constant(self, X):
18
- self._params = {
19
- 'loc': np.unique(X)[0],
20
- 'scale': 0
21
- }
18
+ self._params = {'loc': np.unique(X)[0], 'scale': 0}
22
19
 
23
20
  def _fit(self, X):
24
- self._params = {
25
- 'loc': np.mean(X),
26
- 'scale': np.std(X)
27
- }
21
+ self._params = {'loc': np.mean(X), 'scale': np.std(X)}
28
22
 
29
23
  def _is_constant(self):
30
24
  return self._params['scale'] == 0
@@ -4,9 +4,9 @@ import numpy as np
4
4
  from scipy.special import ndtr
5
5
  from scipy.stats import gaussian_kde
6
6
 
7
- from copulas import EPSILON, random_state, store_args, validate_random_state
8
7
  from copulas.optimize import bisect, chandrupatla
9
8
  from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
9
+ from copulas.utils import EPSILON, random_state, store_args, validate_random_state
10
10
 
11
11
 
12
12
  class GaussianKDE(ScipyModel):
@@ -129,7 +129,7 @@ class GaussianKDE(ScipyModel):
129
129
  self.check_fit()
130
130
 
131
131
  if len(U.shape) > 1:
132
- raise ValueError(f'Expected 1d array, got {(U, )}.')
132
+ raise ValueError(f'Expected 1d array, got {(U,)}.')
133
133
 
134
134
  if np.any(U > 1.0) or np.any(U < 0.0):
135
135
  raise ValueError('Expected values in range [0.0, 1.0].')
@@ -165,11 +165,10 @@ class GaussianKDE(ScipyModel):
165
165
 
166
166
  def _fit(self, X):
167
167
  if self._sample_size:
168
- X = gaussian_kde(X, bw_method=self.bw_method,
169
- weights=self.weights).resample(self._sample_size)
170
- self._params = {
171
- 'dataset': X.tolist()
172
- }
168
+ X = gaussian_kde(X, bw_method=self.bw_method, weights=self.weights).resample(
169
+ self._sample_size
170
+ )
171
+ self._params = {'dataset': X.tolist()}
173
172
  self._model = self._get_model()
174
173
 
175
174
  def _is_constant(self):
@@ -3,7 +3,7 @@
3
3
  import numpy as np
4
4
  from scipy.stats import kstest
5
5
 
6
- from copulas import get_instance
6
+ from copulas.utils import get_instance
7
7
 
8
8
 
9
9
  def select_univariate(X, candidates):
@@ -22,11 +22,7 @@ class StudentTUnivariate(ScipyModel):
22
22
 
23
23
  def _fit(self, X):
24
24
  dataframe, loc, scale = t.fit(X)
25
- self._params = {
26
- 'df': dataframe,
27
- 'loc': loc,
28
- 'scale': scale
29
- }
25
+ self._params = {'df': dataframe, 'loc': loc, 'scale': scale}
30
26
 
31
27
  def _is_constant(self):
32
28
  return self._params['scale'] == 0
@@ -6,8 +6,8 @@ import numpy as np
6
6
  from scipy.optimize import fmin_slsqp
7
7
  from scipy.stats import truncnorm
8
8
 
9
- from copulas import EPSILON, store_args, validate_random_state
10
9
  from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
10
+ from copulas.utils import EPSILON, store_args, validate_random_state
11
11
 
12
12
 
13
13
  class TruncatedGaussian(ScipyModel):
@@ -28,12 +28,7 @@ class TruncatedGaussian(ScipyModel):
28
28
 
29
29
  def _fit_constant(self, X):
30
30
  constant = np.unique(X)[0]
31
- self._params = {
32
- 'a': constant,
33
- 'b': constant,
34
- 'loc': constant,
35
- 'scale': 0.0
36
- }
31
+ self._params = {'a': constant, 'b': constant, 'loc': constant, 'scale': 0.0}
37
32
 
38
33
  def _fit(self, X):
39
34
  if self.min is None:
@@ -51,21 +46,18 @@ class TruncatedGaussian(ScipyModel):
51
46
  initial_params = X.mean(), X.std()
52
47
  with warnings.catch_warnings():
53
48
  warnings.simplefilter('ignore', category=RuntimeWarning)
54
- optimal = fmin_slsqp(nnlf, initial_params, iprint=False, bounds=[
55
- (self.min, self.max),
56
- (0.0, (self.max - self.min)**2)
57
- ])
49
+ optimal = fmin_slsqp(
50
+ nnlf,
51
+ initial_params,
52
+ iprint=False,
53
+ bounds=[(self.min, self.max), (0.0, (self.max - self.min) ** 2)],
54
+ )
58
55
 
59
56
  loc, scale = optimal
60
57
  a = (self.min - loc) / scale
61
58
  b = (self.max - loc) / scale
62
59
 
63
- self._params = {
64
- 'a': a,
65
- 'b': b,
66
- 'loc': loc,
67
- 'scale': scale
68
- }
60
+ self._params = {'a': a, 'b': b, 'loc': loc, 'scale': scale}
69
61
 
70
62
  def _is_constant(self):
71
63
  return self._params['a'] == self._params['b']
@@ -15,16 +15,10 @@ class UniformUnivariate(ScipyModel):
15
15
  MODEL_CLASS = uniform
16
16
 
17
17
  def _fit_constant(self, X):
18
- self._params = {
19
- 'loc': np.min(X),
20
- 'scale': np.max(X) - np.min(X)
21
- }
18
+ self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
22
19
 
23
20
  def _fit(self, X):
24
- self._params = {
25
- 'loc': np.min(X),
26
- 'scale': np.max(X) - np.min(X)
27
- }
21
+ self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)}
28
22
 
29
23
  def _is_constant(self):
30
24
  return self._params['scale'] == 0
copulas/utils.py ADDED
@@ -0,0 +1,248 @@
1
+ """Utils module."""
2
+
3
+ import contextlib
4
+ import importlib
5
+ from copy import deepcopy
6
+ from functools import wraps
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ EPSILON = np.finfo(np.float32).eps
12
+
13
+
14
+ @contextlib.contextmanager
15
+ def set_random_state(random_state, set_model_random_state):
16
+ """Context manager for managing the random state.
17
+
18
+ Args:
19
+ random_state (int or np.random.RandomState):
20
+ The random seed or RandomState.
21
+ set_model_random_state (function):
22
+ Function to set the random state on the model.
23
+ """
24
+ original_state = np.random.get_state()
25
+ np.random.set_state(random_state.get_state())
26
+
27
+ try:
28
+ yield
29
+ finally:
30
+ current_random_state = np.random.RandomState()
31
+ current_random_state.set_state(np.random.get_state())
32
+ set_model_random_state(current_random_state)
33
+ np.random.set_state(original_state)
34
+
35
+
36
+ def random_state(function):
37
+ """Set the random state before calling the function.
38
+
39
+ Args:
40
+ function (Callable):
41
+ The function to wrap around.
42
+ """
43
+
44
+ @wraps(function)
45
+ def wrapper(self, *args, **kwargs):
46
+ if self.random_state is None:
47
+ return function(self, *args, **kwargs)
48
+ else:
49
+ with set_random_state(self.random_state, self.set_random_state):
50
+ return function(self, *args, **kwargs)
51
+
52
+ return wrapper
53
+
54
+
55
+ def validate_random_state(random_state):
56
+ """Validate random state argument.
57
+
58
+ Args:
59
+ random_state (int, numpy.random.RandomState, tuple, or None):
60
+ Seed or RandomState for the random generator.
61
+
62
+ Output:
63
+ numpy.random.RandomState
64
+ """
65
+ if random_state is None:
66
+ return None
67
+
68
+ if isinstance(random_state, int):
69
+ return np.random.RandomState(seed=random_state)
70
+ elif isinstance(random_state, np.random.RandomState):
71
+ return random_state
72
+ else:
73
+ raise TypeError(
74
+ f'`random_state` {random_state} expected to be an int '
75
+ 'or `np.random.RandomState` object.'
76
+ )
77
+
78
+
79
+ def get_instance(obj, **kwargs):
80
+ """Create new instance of the ``obj`` argument.
81
+
82
+ Args:
83
+ obj (str, type, instance):
84
+ """
85
+ instance = None
86
+ if isinstance(obj, str):
87
+ package, name = obj.rsplit('.', 1)
88
+ instance = getattr(importlib.import_module(package), name)(**kwargs)
89
+ elif isinstance(obj, type):
90
+ instance = obj(**kwargs)
91
+ else:
92
+ if kwargs:
93
+ instance = obj.__class__(**kwargs)
94
+ else:
95
+ args = getattr(obj, '__args__', ())
96
+ kwargs = getattr(obj, '__kwargs__', {})
97
+ instance = obj.__class__(*args, **kwargs)
98
+
99
+ return instance
100
+
101
+
102
+ def store_args(__init__):
103
+ """Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
104
+
105
+ Args:
106
+ __init__(callable): ``__init__`` function to store their arguments.
107
+
108
+ Returns:
109
+ callable: Decorated ``__init__`` function.
110
+ """
111
+
112
+ @wraps(__init__)
113
+ def new__init__(self, *args, **kwargs):
114
+ args_copy = deepcopy(args)
115
+ kwargs_copy = deepcopy(kwargs)
116
+ __init__(self, *args, **kwargs)
117
+ self.__args__ = args_copy
118
+ self.__kwargs__ = kwargs_copy
119
+
120
+ return new__init__
121
+
122
+
123
+ def get_qualified_name(_object):
124
+ """Return the Fully Qualified Name from an instance or class."""
125
+ module = _object.__module__
126
+ if hasattr(_object, '__name__'):
127
+ _class = _object.__name__
128
+ else:
129
+ _class = _object.__class__.__name__
130
+
131
+ return module + '.' + _class
132
+
133
+
134
+ def vectorize(function):
135
+ """Allow a method that only accepts scalars to accept vectors too.
136
+
137
+ This decorator has two different behaviors depending on the dimensionality of the
138
+ array passed as an argument:
139
+
140
+ **1-d array**
141
+
142
+ It will work under the assumption that the `function` argument is a callable
143
+ with signature::
144
+
145
+ function(self, X, *args, **kwargs)
146
+
147
+ where X is an scalar magnitude.
148
+
149
+ In this case the arguments of the input array will be given one at a time, and
150
+ both the input and output of the decorated function will have shape (n,).
151
+
152
+ **2-d array**
153
+
154
+ It will work under the assumption that the `function` argument is a callable with signature::
155
+
156
+ function(self, X0, ..., Xj, *args, **kwargs)
157
+
158
+ where `Xi` are scalar magnitudes.
159
+
160
+ It will pass the contents of each row unpacked on each call. The input is espected to have
161
+ shape (n, j), the output a shape of (n,)
162
+
163
+ It will return a function that is guaranteed to return a `numpy.array`.
164
+
165
+ Args:
166
+ function(callable): Function that only accept and return scalars.
167
+
168
+ Returns:
169
+ callable: Decorated function that can accept and return :attr:`numpy.array`.
170
+
171
+ """
172
+
173
+ @wraps(function)
174
+ def decorated(self, X, *args, **kwargs):
175
+ if not isinstance(X, np.ndarray):
176
+ return function(self, X, *args, **kwargs)
177
+
178
+ if len(X.shape) == 1:
179
+ X = X.reshape([-1, 1])
180
+
181
+ if len(X.shape) == 2:
182
+ return np.fromiter(
183
+ (function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
184
+ )
185
+ else:
186
+ raise ValueError('Arrays of dimensionality higher than 2 are not supported.')
187
+
188
+ return decorated
189
+
190
+
191
+ def scalarize(function):
192
+ """Allow methods that only accepts 1-d vectors to work with scalars.
193
+
194
+ Args:
195
+ function(callable): Function that accepts and returns vectors.
196
+
197
+ Returns:
198
+ callable: Decorated function that accepts and returns scalars.
199
+ """
200
+
201
+ @wraps(function)
202
+ def decorated(self, X, *args, **kwargs):
203
+ scalar = not isinstance(X, np.ndarray)
204
+
205
+ if scalar:
206
+ X = np.array([X])
207
+
208
+ result = function(self, X, *args, **kwargs)
209
+ if scalar:
210
+ result = result[0]
211
+
212
+ return result
213
+
214
+ return decorated
215
+
216
+
217
+ def check_valid_values(function):
218
+ """Raise an exception if the given values are not supported.
219
+
220
+ Args:
221
+ function(callable): Method whose unique argument is a numpy.array-like object.
222
+
223
+ Returns:
224
+ callable: Decorated function
225
+
226
+ Raises:
227
+ ValueError: If there are missing or invalid values or if the dataset is empty.
228
+ """
229
+
230
+ @wraps(function)
231
+ def decorated(self, X, *args, **kwargs):
232
+ if isinstance(X, pd.DataFrame):
233
+ W = X.to_numpy()
234
+ else:
235
+ W = X
236
+
237
+ if not len(W):
238
+ raise ValueError('Your dataset is empty.')
239
+
240
+ if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
241
+ raise ValueError('There are non-numerical values in your data.')
242
+
243
+ if np.isnan(W).any().any():
244
+ raise ValueError('There are nan values in your data.')
245
+
246
+ return function(self, X, *args, **kwargs)
247
+
248
+ return decorated