copulas 0.11.1.dev0__py3-none-any.whl → 0.12.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of copulas might be problematic. Click here for more details.

copulas/__init__.py CHANGED
@@ -1,266 +1,15 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Top-level package for Copulas."""
4
2
 
5
3
  __author__ = 'DataCebo, Inc.'
6
4
  __email__ = 'info@sdv.dev'
7
- __version__ = '0.11.1.dev0'
5
+ __version__ = '0.12.0.dev0'
8
6
 
9
- import contextlib
10
- import importlib
11
7
  import sys
12
8
  import warnings
13
9
  from copy import deepcopy
14
10
  from importlib.metadata import entry_points
15
11
  from operator import attrgetter
16
-
17
- import numpy as np
18
- import pandas as pd
19
-
20
- EPSILON = np.finfo(np.float32).eps
21
-
22
-
23
- class NotFittedError(Exception):
24
- """NotFittedError class."""
25
-
26
-
27
- @contextlib.contextmanager
28
- def set_random_state(random_state, set_model_random_state):
29
- """Context manager for managing the random state.
30
-
31
- Args:
32
- random_state (int or np.random.RandomState):
33
- The random seed or RandomState.
34
- set_model_random_state (function):
35
- Function to set the random state on the model.
36
- """
37
- original_state = np.random.get_state()
38
-
39
- np.random.set_state(random_state.get_state())
40
-
41
- try:
42
- yield
43
- finally:
44
- current_random_state = np.random.RandomState()
45
- current_random_state.set_state(np.random.get_state())
46
- set_model_random_state(current_random_state)
47
- np.random.set_state(original_state)
48
-
49
-
50
- def random_state(function):
51
- """Set the random state before calling the function.
52
-
53
- Args:
54
- function (Callable):
55
- The function to wrap around.
56
- """
57
-
58
- def wrapper(self, *args, **kwargs):
59
- if self.random_state is None:
60
- return function(self, *args, **kwargs)
61
-
62
- else:
63
- with set_random_state(self.random_state, self.set_random_state):
64
- return function(self, *args, **kwargs)
65
-
66
- return wrapper
67
-
68
-
69
- def validate_random_state(random_state):
70
- """Validate random state argument.
71
-
72
- Args:
73
- random_state (int, numpy.random.RandomState, tuple, or None):
74
- Seed or RandomState for the random generator.
75
-
76
- Output:
77
- numpy.random.RandomState
78
- """
79
- if random_state is None:
80
- return None
81
-
82
- if isinstance(random_state, int):
83
- return np.random.RandomState(seed=random_state)
84
- elif isinstance(random_state, np.random.RandomState):
85
- return random_state
86
- else:
87
- raise TypeError(
88
- f'`random_state` {random_state} expected to be an int '
89
- 'or `np.random.RandomState` object.'
90
- )
91
-
92
-
93
- def get_instance(obj, **kwargs):
94
- """Create new instance of the ``obj`` argument.
95
-
96
- Args:
97
- obj (str, type, instance):
98
- """
99
- instance = None
100
- if isinstance(obj, str):
101
- package, name = obj.rsplit('.', 1)
102
- instance = getattr(importlib.import_module(package), name)(**kwargs)
103
- elif isinstance(obj, type):
104
- instance = obj(**kwargs)
105
- else:
106
- if kwargs:
107
- instance = obj.__class__(**kwargs)
108
- else:
109
- args = getattr(obj, '__args__', ())
110
- kwargs = getattr(obj, '__kwargs__', {})
111
- instance = obj.__class__(*args, **kwargs)
112
-
113
- return instance
114
-
115
-
116
- def store_args(__init__):
117
- """Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
118
-
119
- Args:
120
- __init__(callable): ``__init__`` function to store their arguments.
121
-
122
- Returns:
123
- callable: Decorated ``__init__`` function.
124
- """
125
-
126
- def new__init__(self, *args, **kwargs):
127
- args_copy = deepcopy(args)
128
- kwargs_copy = deepcopy(kwargs)
129
- __init__(self, *args, **kwargs)
130
- self.__args__ = args_copy
131
- self.__kwargs__ = kwargs_copy
132
-
133
- return new__init__
134
-
135
-
136
- def get_qualified_name(_object):
137
- """Return the Fully Qualified Name from an instance or class."""
138
- module = _object.__module__
139
- if hasattr(_object, '__name__'):
140
- _class = _object.__name__
141
-
142
- else:
143
- _class = _object.__class__.__name__
144
-
145
- return module + '.' + _class
146
-
147
-
148
- def vectorize(function):
149
- """Allow a method that only accepts scalars to accept vectors too.
150
-
151
- This decorator has two different behaviors depending on the dimensionality of the
152
- array passed as an argument:
153
-
154
- **1-d array**
155
-
156
- It will work under the assumption that the `function` argument is a callable
157
- with signature::
158
-
159
- function(self, X, *args, **kwargs)
160
-
161
- where X is an scalar magnitude.
162
-
163
- In this case the arguments of the input array will be given one at a time, and
164
- both the input and output of the decorated function will have shape (n,).
165
-
166
- **2-d array**
167
-
168
- It will work under the assumption that the `function` argument is a callable with signature::
169
-
170
- function(self, X0, ..., Xj, *args, **kwargs)
171
-
172
- where `Xi` are scalar magnitudes.
173
-
174
- It will pass the contents of each row unpacked on each call. The input is espected to have
175
- shape (n, j), the output a shape of (n,)
176
-
177
- It will return a function that is guaranteed to return a `numpy.array`.
178
-
179
- Args:
180
- function(callable): Function that only accept and return scalars.
181
-
182
- Returns:
183
- callable: Decorated function that can accept and return :attr:`numpy.array`.
184
-
185
- """
186
-
187
- def decorated(self, X, *args, **kwargs):
188
- if not isinstance(X, np.ndarray):
189
- return function(self, X, *args, **kwargs)
190
-
191
- if len(X.shape) == 1:
192
- X = X.reshape([-1, 1])
193
-
194
- if len(X.shape) == 2:
195
- return np.fromiter(
196
- (function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
197
- )
198
-
199
- else:
200
- raise ValueError('Arrays of dimensionality higher than 2 are not supported.')
201
-
202
- decorated.__doc__ = function.__doc__
203
- return decorated
204
-
205
-
206
- def scalarize(function):
207
- """Allow methods that only accepts 1-d vectors to work with scalars.
208
-
209
- Args:
210
- function(callable): Function that accepts and returns vectors.
211
-
212
- Returns:
213
- callable: Decorated function that accepts and returns scalars.
214
- """
215
-
216
- def decorated(self, X, *args, **kwargs):
217
- scalar = not isinstance(X, np.ndarray)
218
-
219
- if scalar:
220
- X = np.array([X])
221
-
222
- result = function(self, X, *args, **kwargs)
223
- if scalar:
224
- result = result[0]
225
-
226
- return result
227
-
228
- decorated.__doc__ = function.__doc__
229
- return decorated
230
-
231
-
232
- def check_valid_values(function):
233
- """Raise an exception if the given values are not supported.
234
-
235
- Args:
236
- function(callable): Method whose unique argument is a numpy.array-like object.
237
-
238
- Returns:
239
- callable: Decorated function
240
-
241
- Raises:
242
- ValueError: If there are missing or invalid values or if the dataset is empty.
243
- """
244
-
245
- def decorated(self, X, *args, **kwargs):
246
- if isinstance(X, pd.DataFrame):
247
- W = X.to_numpy()
248
-
249
- else:
250
- W = X
251
-
252
- if not len(W):
253
- raise ValueError('Your dataset is empty.')
254
-
255
- if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
256
- raise ValueError('There are non-numerical values in your data.')
257
-
258
- if np.isnan(W).any().any():
259
- raise ValueError('There are nan values in your data.')
260
-
261
- return function(self, X, *args, **kwargs)
262
-
263
- return decorated
12
+ from types import ModuleType
264
13
 
265
14
 
266
15
  def _get_addon_target(addon_path_name):
@@ -319,8 +68,8 @@ def _find_addons():
319
68
  for entry_point in eps:
320
69
  try:
321
70
  addon = entry_point.load()
322
- except Exception: # pylint: disable=broad-exception-caught
323
- msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}".'
71
+ except Exception as e: # pylint: disable=broad-exception-caught
72
+ msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}" with error:\n{e}'
324
73
  warnings.warn(msg)
325
74
  continue
326
75
 
@@ -331,6 +80,11 @@ def _find_addons():
331
80
  warnings.warn(msg)
332
81
  continue
333
82
 
83
+ if isinstance(addon, ModuleType):
84
+ addon_module_name = f'{addon_target.__name__}.{addon_name}'
85
+ if addon_module_name not in sys.modules:
86
+ sys.modules[addon_module_name] = addon
87
+
334
88
  setattr(addon_target, addon_name, addon)
335
89
 
336
90
 
@@ -3,7 +3,7 @@
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
 
6
- from copulas import EPSILON
6
+ from copulas.utils import EPSILON
7
7
  from copulas.bivariate.base import Bivariate, CopulaTypes
8
8
  from copulas.bivariate.clayton import Clayton
9
9
  from copulas.bivariate.frank import Frank
copulas/bivariate/base.py CHANGED
@@ -8,8 +8,9 @@ import numpy as np
8
8
  from scipy import stats
9
9
  from scipy.optimize import brentq
10
10
 
11
- from copulas import EPSILON, NotFittedError, random_state, validate_random_state
12
11
  from copulas.bivariate.utils import split_matrix
12
+ from copulas.errors import NotFittedError
13
+ from copulas.utils import EPSILON, random_state, validate_random_state
13
14
 
14
15
 
15
16
  class CopulaTypes(Enum):
@@ -6,9 +6,9 @@ import numpy as np
6
6
  import scipy.integrate as integrate
7
7
  from scipy.optimize import least_squares
8
8
 
9
- from copulas import EPSILON
10
9
  from copulas.bivariate.base import Bivariate, CopulaTypes
11
10
  from copulas.bivariate.utils import split_matrix
11
+ from copulas.utils import EPSILON
12
12
 
13
13
  MIN_FLOAT_LOG = np.log(sys.float_info.min)
14
14
  MAX_FLOAT_LOG = np.log(sys.float_info.max)
copulas/datasets.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
4
  import pandas as pd
5
5
  from scipy import stats
6
6
 
7
- from copulas import set_random_state, validate_random_state
7
+ from copulas.utils import set_random_state, validate_random_state
8
8
 
9
9
 
10
10
  def _dummy_fn(state):
copulas/errors.py ADDED
@@ -0,0 +1,5 @@
1
+ """Copulas Exceptions."""
2
+
3
+
4
+ class NotFittedError(Exception):
5
+ """NotFittedError class."""
@@ -4,7 +4,8 @@ import pickle
4
4
 
5
5
  import numpy as np
6
6
 
7
- from copulas import NotFittedError, get_instance, validate_random_state
7
+ from copulas.errors import NotFittedError
8
+ from copulas.utils import get_instance, validate_random_state
8
9
 
9
10
 
10
11
  class Multivariate(object):
@@ -7,7 +7,9 @@ import numpy as np
7
7
  import pandas as pd
8
8
  from scipy import stats
9
9
 
10
- from copulas import (
10
+ from copulas.multivariate.base import Multivariate
11
+ from copulas.univariate import GaussianUnivariate, Univariate
12
+ from copulas.utils import (
11
13
  EPSILON,
12
14
  check_valid_values,
13
15
  get_instance,
@@ -16,8 +18,6 @@ from copulas import (
16
18
  store_args,
17
19
  validate_random_state,
18
20
  )
19
- from copulas.multivariate.base import Multivariate
20
- from copulas.univariate import GaussianUnivariate, Univariate
21
21
 
22
22
  LOGGER = logging.getLogger(__name__)
23
23
  DEFAULT_DISTRIBUTION = Univariate
@@ -70,26 +70,6 @@ class GaussianMultivariate(Multivariate):
70
70
 
71
71
  return stats.norm.ppf(np.column_stack(U))
72
72
 
73
- def _get_correlation(self, X):
74
- """Compute correlation matrix with transformed data.
75
-
76
- Args:
77
- X (numpy.ndarray):
78
- Data for which the correlation needs to be computed.
79
-
80
- Returns:
81
- numpy.ndarray:
82
- computed correlation matrix.
83
- """
84
- result = self._transform_to_normal(X)
85
- correlation = pd.DataFrame(data=result).corr().to_numpy()
86
- correlation = np.nan_to_num(correlation, nan=0.0)
87
- # If singular, add some noise to the diagonal
88
- if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
89
- correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
90
-
91
- return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
92
-
93
73
  @check_valid_values
94
74
  def fit(self, X):
95
75
  """Compute the distribution for each variable and then its correlation matrix.
@@ -100,42 +80,88 @@ class GaussianMultivariate(Multivariate):
100
80
  """
101
81
  LOGGER.info('Fitting %s', self)
102
82
 
83
+ # Validate the input data
84
+ X = self._validate_input(X)
85
+ columns, univariates = self._fit_columns(X)
86
+
87
+ self.columns = columns
88
+ self.univariates = univariates
89
+
90
+ LOGGER.debug('Computing correlation.')
91
+ self.correlation = self._get_correlation(X)
92
+ self.fitted = True
93
+ LOGGER.debug('GaussianMultivariate fitted successfully')
94
+
95
+ def _validate_input(self, X):
96
+ """Validate the input data."""
103
97
  if not isinstance(X, pd.DataFrame):
104
98
  X = pd.DataFrame(X)
105
99
 
100
+ return X
101
+
102
+ def _fit_columns(self, X):
103
+ """Fit each column to its distribution."""
106
104
  columns = []
107
105
  univariates = []
108
106
  for column_name, column in X.items():
109
- if isinstance(self.distribution, dict):
110
- distribution = self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
111
- else:
112
- distribution = self.distribution
113
-
107
+ distribution = self._get_distribution_for_column(column_name)
114
108
  LOGGER.debug('Fitting column %s to %s', column_name, distribution)
115
109
 
116
- univariate = get_instance(distribution)
117
- try:
118
- univariate.fit(column)
119
- except BaseException:
120
- log_message = (
121
- f'Unable to fit to a {distribution} distribution for column {column_name}. '
122
- 'Using a Gaussian distribution instead.'
123
- )
124
- LOGGER.info(log_message)
125
- univariate = GaussianUnivariate()
126
- univariate.fit(column)
127
-
110
+ univariate = self._fit_column(column, distribution, column_name)
128
111
  columns.append(column_name)
129
112
  univariates.append(univariate)
130
113
 
131
- self.columns = columns
132
- self.univariates = univariates
114
+ return columns, univariates
115
+
116
+ def _get_distribution_for_column(self, column_name):
117
+ """Retrieve the distribution for a given column name."""
118
+ if isinstance(self.distribution, dict):
119
+ return self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
120
+
121
+ return self.distribution
122
+
123
+ def _fit_column(self, column, distribution, column_name):
124
+ """Fit a single column to its distribution with exception handling."""
125
+ univariate = get_instance(distribution)
126
+ try:
127
+ univariate.fit(column)
128
+ except Exception as error:
129
+ univariate = self._fit_with_fallback_distribution(
130
+ column, distribution, column_name, error
131
+ )
132
+
133
+ return univariate
134
+
135
+ def _fit_with_fallback_distribution(self, column, distribution, column_name, error):
136
+ """Fall back to fitting a Gaussian distribution and log the error."""
137
+ log_message = (
138
+ f'Unable to fit to a {distribution} distribution for column {column_name}. '
139
+ 'Using a Gaussian distribution instead.'
140
+ )
141
+ LOGGER.info(log_message)
142
+ univariate = GaussianUnivariate()
143
+ univariate.fit(column)
144
+ return univariate
133
145
 
134
- LOGGER.debug('Computing correlation')
135
- self.correlation = self._get_correlation(X)
136
- self.fitted = True
146
+ def _get_correlation(self, X):
147
+ """Compute correlation matrix with transformed data.
137
148
 
138
- LOGGER.debug('GaussianMultivariate fitted successfully')
149
+ Args:
150
+ X (numpy.ndarray):
151
+ Data for which the correlation needs to be computed.
152
+
153
+ Returns:
154
+ numpy.ndarray:
155
+ computed correlation matrix.
156
+ """
157
+ result = self._transform_to_normal(X)
158
+ correlation = pd.DataFrame(data=result).corr().to_numpy()
159
+ correlation = np.nan_to_num(correlation, nan=0.0)
160
+ # If singular, add some noise to the diagonal
161
+ if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
162
+ correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
163
+
164
+ return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
139
165
 
140
166
  def probability_density(self, X):
141
167
  """Compute the probability density for each point in X.
@@ -6,9 +6,9 @@ from enum import Enum
6
6
  import numpy as np
7
7
  import scipy
8
8
 
9
- from copulas import EPSILON, get_qualified_name
10
9
  from copulas.bivariate.base import Bivariate
11
10
  from copulas.multivariate.base import Multivariate
11
+ from copulas.utils import EPSILON, get_qualified_name
12
12
 
13
13
  LOGGER = logging.getLogger(__name__)
14
14
 
@@ -7,7 +7,11 @@ import warnings
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
- from copulas import (
10
+ from copulas.bivariate.base import Bivariate, CopulaTypes
11
+ from copulas.multivariate.base import Multivariate
12
+ from copulas.multivariate.tree import Tree, get_tree
13
+ from copulas.univariate.gaussian_kde import GaussianKDE
14
+ from copulas.utils import (
11
15
  EPSILON,
12
16
  check_valid_values,
13
17
  get_qualified_name,
@@ -15,10 +19,6 @@ from copulas import (
15
19
  store_args,
16
20
  validate_random_state,
17
21
  )
18
- from copulas.bivariate.base import Bivariate, CopulaTypes
19
- from copulas.multivariate.base import Multivariate
20
- from copulas.multivariate.tree import Tree, get_tree
21
- from copulas.univariate.gaussian_kde import GaussianKDE
22
22
 
23
23
  LOGGER = logging.getLogger(__name__)
24
24
 
@@ -76,8 +76,7 @@ class VineCopula(Multivariate):
76
76
  def __init__(self, vine_type, random_state=None):
77
77
  if sys.version_info > (3, 8):
78
78
  warnings.warn(
79
- 'Vines have not been fully tested on Python >= 3.8 and might '
80
- 'produce wrong results.'
79
+ 'Vines have not been fully tested on Python >= 3.8 and might produce wrong results.'
81
80
  )
82
81
 
83
82
  self.random_state = validate_random_state(random_state)
@@ -6,15 +6,15 @@ from enum import Enum
6
6
 
7
7
  import numpy as np
8
8
 
9
- from copulas import (
10
- NotFittedError,
9
+ from copulas.errors import NotFittedError
10
+ from copulas.univariate.selection import select_univariate
11
+ from copulas.utils import (
11
12
  get_instance,
12
13
  get_qualified_name,
13
14
  random_state,
14
15
  store_args,
15
16
  validate_random_state,
16
17
  )
17
- from copulas.univariate.selection import select_univariate
18
18
 
19
19
 
20
20
  class ParametricType(Enum):
@@ -4,9 +4,9 @@ import numpy as np
4
4
  from scipy.special import ndtr
5
5
  from scipy.stats import gaussian_kde
6
6
 
7
- from copulas import EPSILON, random_state, store_args, validate_random_state
8
7
  from copulas.optimize import bisect, chandrupatla
9
8
  from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
9
+ from copulas.utils import EPSILON, random_state, store_args, validate_random_state
10
10
 
11
11
 
12
12
  class GaussianKDE(ScipyModel):
@@ -3,7 +3,7 @@
3
3
  import numpy as np
4
4
  from scipy.stats import kstest
5
5
 
6
- from copulas import get_instance
6
+ from copulas.utils import get_instance
7
7
 
8
8
 
9
9
  def select_univariate(X, candidates):
@@ -6,8 +6,8 @@ import numpy as np
6
6
  from scipy.optimize import fmin_slsqp
7
7
  from scipy.stats import truncnorm
8
8
 
9
- from copulas import EPSILON, store_args, validate_random_state
10
9
  from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
10
+ from copulas.utils import EPSILON, store_args, validate_random_state
11
11
 
12
12
 
13
13
  class TruncatedGaussian(ScipyModel):
copulas/utils.py ADDED
@@ -0,0 +1,248 @@
1
+ """Utils module."""
2
+
3
+ import contextlib
4
+ import importlib
5
+ from copy import deepcopy
6
+ from functools import wraps
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ EPSILON = np.finfo(np.float32).eps
12
+
13
+
14
+ @contextlib.contextmanager
15
+ def set_random_state(random_state, set_model_random_state):
16
+ """Context manager for managing the random state.
17
+
18
+ Args:
19
+ random_state (int or np.random.RandomState):
20
+ The random seed or RandomState.
21
+ set_model_random_state (function):
22
+ Function to set the random state on the model.
23
+ """
24
+ original_state = np.random.get_state()
25
+ np.random.set_state(random_state.get_state())
26
+
27
+ try:
28
+ yield
29
+ finally:
30
+ current_random_state = np.random.RandomState()
31
+ current_random_state.set_state(np.random.get_state())
32
+ set_model_random_state(current_random_state)
33
+ np.random.set_state(original_state)
34
+
35
+
36
+ def random_state(function):
37
+ """Set the random state before calling the function.
38
+
39
+ Args:
40
+ function (Callable):
41
+ The function to wrap around.
42
+ """
43
+
44
+ @wraps(function)
45
+ def wrapper(self, *args, **kwargs):
46
+ if self.random_state is None:
47
+ return function(self, *args, **kwargs)
48
+ else:
49
+ with set_random_state(self.random_state, self.set_random_state):
50
+ return function(self, *args, **kwargs)
51
+
52
+ return wrapper
53
+
54
+
55
+ def validate_random_state(random_state):
56
+ """Validate random state argument.
57
+
58
+ Args:
59
+ random_state (int, numpy.random.RandomState, tuple, or None):
60
+ Seed or RandomState for the random generator.
61
+
62
+ Output:
63
+ numpy.random.RandomState
64
+ """
65
+ if random_state is None:
66
+ return None
67
+
68
+ if isinstance(random_state, int):
69
+ return np.random.RandomState(seed=random_state)
70
+ elif isinstance(random_state, np.random.RandomState):
71
+ return random_state
72
+ else:
73
+ raise TypeError(
74
+ f'`random_state` {random_state} expected to be an int '
75
+ 'or `np.random.RandomState` object.'
76
+ )
77
+
78
+
79
+ def get_instance(obj, **kwargs):
80
+ """Create new instance of the ``obj`` argument.
81
+
82
+ Args:
83
+ obj (str, type, instance):
84
+ """
85
+ instance = None
86
+ if isinstance(obj, str):
87
+ package, name = obj.rsplit('.', 1)
88
+ instance = getattr(importlib.import_module(package), name)(**kwargs)
89
+ elif isinstance(obj, type):
90
+ instance = obj(**kwargs)
91
+ else:
92
+ if kwargs:
93
+ instance = obj.__class__(**kwargs)
94
+ else:
95
+ args = getattr(obj, '__args__', ())
96
+ kwargs = getattr(obj, '__kwargs__', {})
97
+ instance = obj.__class__(*args, **kwargs)
98
+
99
+ return instance
100
+
101
+
102
+ def store_args(__init__):
103
+ """Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
104
+
105
+ Args:
106
+ __init__(callable): ``__init__`` function to store their arguments.
107
+
108
+ Returns:
109
+ callable: Decorated ``__init__`` function.
110
+ """
111
+
112
+ @wraps(__init__)
113
+ def new__init__(self, *args, **kwargs):
114
+ args_copy = deepcopy(args)
115
+ kwargs_copy = deepcopy(kwargs)
116
+ __init__(self, *args, **kwargs)
117
+ self.__args__ = args_copy
118
+ self.__kwargs__ = kwargs_copy
119
+
120
+ return new__init__
121
+
122
+
123
+ def get_qualified_name(_object):
124
+ """Return the Fully Qualified Name from an instance or class."""
125
+ module = _object.__module__
126
+ if hasattr(_object, '__name__'):
127
+ _class = _object.__name__
128
+ else:
129
+ _class = _object.__class__.__name__
130
+
131
+ return module + '.' + _class
132
+
133
+
134
+ def vectorize(function):
135
+ """Allow a method that only accepts scalars to accept vectors too.
136
+
137
+ This decorator has two different behaviors depending on the dimensionality of the
138
+ array passed as an argument:
139
+
140
+ **1-d array**
141
+
142
+ It will work under the assumption that the `function` argument is a callable
143
+ with signature::
144
+
145
+ function(self, X, *args, **kwargs)
146
+
147
+ where X is an scalar magnitude.
148
+
149
+ In this case the arguments of the input array will be given one at a time, and
150
+ both the input and output of the decorated function will have shape (n,).
151
+
152
+ **2-d array**
153
+
154
+ It will work under the assumption that the `function` argument is a callable with signature::
155
+
156
+ function(self, X0, ..., Xj, *args, **kwargs)
157
+
158
+ where `Xi` are scalar magnitudes.
159
+
160
+ It will pass the contents of each row unpacked on each call. The input is espected to have
161
+ shape (n, j), the output a shape of (n,)
162
+
163
+ It will return a function that is guaranteed to return a `numpy.array`.
164
+
165
+ Args:
166
+ function(callable): Function that only accept and return scalars.
167
+
168
+ Returns:
169
+ callable: Decorated function that can accept and return :attr:`numpy.array`.
170
+
171
+ """
172
+
173
+ @wraps(function)
174
+ def decorated(self, X, *args, **kwargs):
175
+ if not isinstance(X, np.ndarray):
176
+ return function(self, X, *args, **kwargs)
177
+
178
+ if len(X.shape) == 1:
179
+ X = X.reshape([-1, 1])
180
+
181
+ if len(X.shape) == 2:
182
+ return np.fromiter(
183
+ (function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
184
+ )
185
+ else:
186
+ raise ValueError('Arrays of dimensionality higher than 2 are not supported.')
187
+
188
+ return decorated
189
+
190
+
191
+ def scalarize(function):
192
+ """Allow methods that only accepts 1-d vectors to work with scalars.
193
+
194
+ Args:
195
+ function(callable): Function that accepts and returns vectors.
196
+
197
+ Returns:
198
+ callable: Decorated function that accepts and returns scalars.
199
+ """
200
+
201
+ @wraps(function)
202
+ def decorated(self, X, *args, **kwargs):
203
+ scalar = not isinstance(X, np.ndarray)
204
+
205
+ if scalar:
206
+ X = np.array([X])
207
+
208
+ result = function(self, X, *args, **kwargs)
209
+ if scalar:
210
+ result = result[0]
211
+
212
+ return result
213
+
214
+ return decorated
215
+
216
+
217
+ def check_valid_values(function):
218
+ """Raise an exception if the given values are not supported.
219
+
220
+ Args:
221
+ function(callable): Method whose unique argument is a numpy.array-like object.
222
+
223
+ Returns:
224
+ callable: Decorated function
225
+
226
+ Raises:
227
+ ValueError: If there are missing or invalid values or if the dataset is empty.
228
+ """
229
+
230
+ @wraps(function)
231
+ def decorated(self, X, *args, **kwargs):
232
+ if isinstance(X, pd.DataFrame):
233
+ W = X.to_numpy()
234
+ else:
235
+ W = X
236
+
237
+ if not len(W):
238
+ raise ValueError('Your dataset is empty.')
239
+
240
+ if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
241
+ raise ValueError('There are non-numerical values in your data.')
242
+
243
+ if np.isnan(W).any().any():
244
+ raise ValueError('There are nan values in your data.')
245
+
246
+ return function(self, X, *args, **kwargs)
247
+
248
+ return decorated
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: copulas
3
- Version: 0.11.1.dev0
3
+ Version: 0.12.0.dev0
4
4
  Summary: Create tabular synthetic data using copulas-based modeling.
5
5
  Author-email: "DataCebo, Inc." <info@sdv.dev>
6
6
  License: BSL-1.1
@@ -0,0 +1,34 @@
1
+ copulas/__init__.py,sha256=rQ6-lhzQ6cMWMHwnjy5oNLyOHLe5FBmtxNKO0mmiybw,2929
2
+ copulas/datasets.py,sha256=uyiiEV2E_vztiIEIFR71yzMYXu_ZAlnP04Mhcv-me_0,6769
3
+ copulas/errors.py,sha256=YDb1uhFOsht_9ynu4MYyZJgdUQWoI_WoWzG1JfGdvzU,93
4
+ copulas/utils.py,sha256=2wWVVx-FMOhjPDt58ZgfFHQzVdLsMo44IPh7ccaIALE,6789
5
+ copulas/visualization.py,sha256=PJIa9ZiT8Px8owsZKVxy0LiTszDY3ymMDkvcgYTBzBk,9688
6
+ copulas/bivariate/__init__.py,sha256=CPt6D4Tae_hlqQ_cbhzMKf6m_xEQJms5ctfqt4ouZkQ,5097
7
+ copulas/bivariate/base.py,sha256=BGWxu7cbFxQCbBNR0vbSWQVAALxSTehE_uPk9Zxe9ic,13937
8
+ copulas/bivariate/clayton.py,sha256=5Hr90vrEMl2FdCcvxqeiWAt23ZQT41GKVQ0iHpmCmvA,4553
9
+ copulas/bivariate/frank.py,sha256=eBcPSpeOwe5NWCli9RawLB98Xc_6t6AamqbQHDiz9dk,4648
10
+ copulas/bivariate/gumbel.py,sha256=OFKf0FM3w9EQuu2gCIBGZ_DYZIIc2pQl7XoCpgq5ToA,4216
11
+ copulas/bivariate/independence.py,sha256=S0mERdcveH9Hw_N8Dpn4_xR_pGT9B2FEn_Yvi6CLfIE,2069
12
+ copulas/bivariate/utils.py,sha256=iNTwVL-vlE6gWGDQUIdGO4bmhP3Bws9CyBCi8Y3ZezE,347
13
+ copulas/multivariate/__init__.py,sha256=KZT1L1PfdZnqR6942jDOyibg7tjmF_HDNaHRGOg0AGg,340
14
+ copulas/multivariate/base.py,sha256=GavPayD6gghpTzwV1NR8Fu97zY2fhtQaJyLMZHlfkcQ,5632
15
+ copulas/multivariate/gaussian.py,sha256=b__9UkgmzduGMJye0qmncGMx1jonNfsbn6wSgP2xh2c,12176
16
+ copulas/multivariate/tree.py,sha256=8MLRBeM9Xc_kirPHwJ_C8Vci2AxsvXjrHjFws3GdLYE,22005
17
+ copulas/multivariate/vine.py,sha256=DXEaKJ14mWkpUFAapV6cPjZs_7qQRyFfUqDZ5EX5rjs,12919
18
+ copulas/optimize/__init__.py,sha256=x3KLFTF3CoO3-7vCxAK8PRAkVKpVHhcoJW0oDwGkPvg,4941
19
+ copulas/univariate/__init__.py,sha256=5j1pTKG1hVEn9wmAumLnVghR7eKI_Wv5sceXTr-aOUY,826
20
+ copulas/univariate/base.py,sha256=Gz4dry15fI8yxB6tGLFkP8KSILizQCVeWDz2lS5_TYQ,20060
21
+ copulas/univariate/beta.py,sha256=l_aTwzDfmZmDwMdBZL6vye8SoOTGWNy9akNFN0DMhOU,958
22
+ copulas/univariate/gamma.py,sha256=az8-3sMbp1-K16SxtW6qJ4fRKxoXg0XSyKNxH_x86tM,906
23
+ copulas/univariate/gaussian.py,sha256=ZRxwg-YNr8QHDGA0locVBYLKEi0dyOzoMZthRh3P0SA,660
24
+ copulas/univariate/gaussian_kde.py,sha256=vF7RY9KmOrnGlBUWBu34iubsJ9iLNAnJRgZTcbo7aIs,6077
25
+ copulas/univariate/log_laplace.py,sha256=1njkNjVc3-p3ZP6lkOA3fhc6wmr_70BQtm7pQuCAzTk,921
26
+ copulas/univariate/selection.py,sha256=nETHnLB89lTf5CPSEX28Xu2cNySQU7yhW2gZRu4Otic,950
27
+ copulas/univariate/student_t.py,sha256=r4_sHdEX4C74byC5_i60e8f2DT-J8RqGyjzeCtZkwbM,777
28
+ copulas/univariate/truncated_gaussian.py,sha256=ugr3Lm-rzadRIwsi1FTYpziNoNiP7t2iBQNGbfrFrKs,1999
29
+ copulas/univariate/uniform.py,sha256=BkGaEZkitKpDAEkMscvLVLJ4U-j6gZuZqnZiBtCVr8Y,686
30
+ copulas-0.12.0.dev0.dist-info/LICENSE,sha256=cORU2kpIo9Qyy7Kv2ZpYDIIcksrjqlNEL9c9Ic1ayo0,4822
31
+ copulas-0.12.0.dev0.dist-info/METADATA,sha256=tbfrsBVQM0zzt70CidYMr7a9g22cGmLsU5Bsv9YzC84,9061
32
+ copulas-0.12.0.dev0.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
33
+ copulas-0.12.0.dev0.dist-info/top_level.txt,sha256=xNXWuWoZ-U3Gb734WqQxkF5RIeGDVU3IstjD-RnWsk8,8
34
+ copulas-0.12.0.dev0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (75.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,32 +0,0 @@
1
- copulas/__init__.py,sha256=qasYMEQtxt4ET3QYVh7-UgXfUkS4g1pj0FqQxMzjZjk,9444
2
- copulas/datasets.py,sha256=XGhnTbN_eOPi1s5_MNJmRoPpWemJn0LKpsvglArO4KA,6763
3
- copulas/visualization.py,sha256=PJIa9ZiT8Px8owsZKVxy0LiTszDY3ymMDkvcgYTBzBk,9688
4
- copulas/bivariate/__init__.py,sha256=3Oakie5lI9QPZBpllbi6_tTXwlgQ47_iR5uCQVbfQHQ,5091
5
- copulas/bivariate/base.py,sha256=WMUr3Z1CLqmJaAmUdeKub_8ETs9GSgTyriTXhlZfyus,13905
6
- copulas/bivariate/clayton.py,sha256=5Hr90vrEMl2FdCcvxqeiWAt23ZQT41GKVQ0iHpmCmvA,4553
7
- copulas/bivariate/frank.py,sha256=fXOxsIeDnx_nMwAWDvM_QBlUvxTyrwOX4Ho5F6GC-T0,4642
8
- copulas/bivariate/gumbel.py,sha256=OFKf0FM3w9EQuu2gCIBGZ_DYZIIc2pQl7XoCpgq5ToA,4216
9
- copulas/bivariate/independence.py,sha256=S0mERdcveH9Hw_N8Dpn4_xR_pGT9B2FEn_Yvi6CLfIE,2069
10
- copulas/bivariate/utils.py,sha256=iNTwVL-vlE6gWGDQUIdGO4bmhP3Bws9CyBCi8Y3ZezE,347
11
- copulas/multivariate/__init__.py,sha256=KZT1L1PfdZnqR6942jDOyibg7tjmF_HDNaHRGOg0AGg,340
12
- copulas/multivariate/base.py,sha256=I_ECmhSvv15ug0oYW6zk_I-_kdnSttoy2w3YFzdH0sQ,5600
13
- copulas/multivariate/gaussian.py,sha256=q8hEtrjpXbsB4lATiH7VKVbxKJzPYlt0IzGDs0uNjzk,11184
14
- copulas/multivariate/tree.py,sha256=goXkRrgxkPfnvIRRAVhjVeuGoIeIT1cO8c-qV5XNdx8,21999
15
- copulas/multivariate/vine.py,sha256=WDk7HRoEOFl-qsmacZZqNxIjVB6Db-eItpeIG1Kyggk,12932
16
- copulas/optimize/__init__.py,sha256=x3KLFTF3CoO3-7vCxAK8PRAkVKpVHhcoJW0oDwGkPvg,4941
17
- copulas/univariate/__init__.py,sha256=5j1pTKG1hVEn9wmAumLnVghR7eKI_Wv5sceXTr-aOUY,826
18
- copulas/univariate/base.py,sha256=RgX_dR1fJM8kcl6SojU-k4QQINvThuraIiPqlRT0wQM,20032
19
- copulas/univariate/beta.py,sha256=l_aTwzDfmZmDwMdBZL6vye8SoOTGWNy9akNFN0DMhOU,958
20
- copulas/univariate/gamma.py,sha256=az8-3sMbp1-K16SxtW6qJ4fRKxoXg0XSyKNxH_x86tM,906
21
- copulas/univariate/gaussian.py,sha256=ZRxwg-YNr8QHDGA0locVBYLKEi0dyOzoMZthRh3P0SA,660
22
- copulas/univariate/gaussian_kde.py,sha256=vIb6jTRWLSMCb7QHQJLsdxwMHmWWiypze3KceoKtEwE,6071
23
- copulas/univariate/log_laplace.py,sha256=1njkNjVc3-p3ZP6lkOA3fhc6wmr_70BQtm7pQuCAzTk,921
24
- copulas/univariate/selection.py,sha256=uC-l8osnbx50Gqx4-WLfKTLco0ncb41TDEbdt1hp_j8,944
25
- copulas/univariate/student_t.py,sha256=r4_sHdEX4C74byC5_i60e8f2DT-J8RqGyjzeCtZkwbM,777
26
- copulas/univariate/truncated_gaussian.py,sha256=QhvMpkz-Qbf-R1ivnOi8fVdGXzmwVKlequPszUdmNEE,1993
27
- copulas/univariate/uniform.py,sha256=BkGaEZkitKpDAEkMscvLVLJ4U-j6gZuZqnZiBtCVr8Y,686
28
- copulas-0.11.1.dev0.dist-info/LICENSE,sha256=cORU2kpIo9Qyy7Kv2ZpYDIIcksrjqlNEL9c9Ic1ayo0,4822
29
- copulas-0.11.1.dev0.dist-info/METADATA,sha256=6I1shmFNiiatHEvgYYVqyarMDLTJ04QUHKB-0afFoWg,9061
30
- copulas-0.11.1.dev0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
31
- copulas-0.11.1.dev0.dist-info/top_level.txt,sha256=xNXWuWoZ-U3Gb734WqQxkF5RIeGDVU3IstjD-RnWsk8,8
32
- copulas-0.11.1.dev0.dist-info/RECORD,,