copulas 0.10.1__tar.gz → 0.12.1.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of copulas might be problematic. Click here for more details.

Files changed (42) hide show
  1. {copulas-0.10.1 → copulas-0.12.1.dev0}/PKG-INFO +32 -40
  2. copulas-0.12.1.dev0/copulas/__init__.py +91 -0
  3. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/__init__.py +3 -3
  4. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/base.py +8 -9
  5. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/clayton.py +3 -2
  6. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/frank.py +2 -1
  7. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/datasets.py +3 -10
  8. copulas-0.12.1.dev0/copulas/errors.py +5 -0
  9. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/multivariate/__init__.py +1 -7
  10. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/multivariate/base.py +2 -1
  11. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/multivariate/gaussian.py +79 -48
  12. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/multivariate/tree.py +12 -14
  13. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/multivariate/vine.py +14 -9
  14. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/optimize/__init__.py +4 -3
  15. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/__init__.py +1 -1
  16. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/base.py +16 -5
  17. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/beta.py +1 -6
  18. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/gaussian.py +2 -8
  19. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/gaussian_kde.py +6 -7
  20. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/selection.py +1 -1
  21. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/student_t.py +1 -5
  22. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/truncated_gaussian.py +9 -17
  23. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/uniform.py +2 -8
  24. copulas-0.10.1/copulas/__init__.py → copulas-0.12.1.dev0/copulas/utils.py +10 -94
  25. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/visualization.py +15 -20
  26. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas.egg-info/PKG-INFO +32 -40
  27. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas.egg-info/SOURCES.txt +2 -1
  28. copulas-0.12.1.dev0/copulas.egg-info/requires.txt +79 -0
  29. {copulas-0.10.1 → copulas-0.12.1.dev0}/pyproject.toml +96 -45
  30. copulas-0.12.1.dev0/setup.cfg +4 -0
  31. {copulas-0.10.1 → copulas-0.12.1.dev0}/tests/test_tasks.py +6 -3
  32. copulas-0.10.1/copulas.egg-info/requires.txt +0 -75
  33. copulas-0.10.1/setup.cfg +0 -23
  34. {copulas-0.10.1 → copulas-0.12.1.dev0}/LICENSE +0 -0
  35. {copulas-0.10.1 → copulas-0.12.1.dev0}/README.md +0 -0
  36. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/gumbel.py +0 -0
  37. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/independence.py +0 -0
  38. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/bivariate/utils.py +0 -0
  39. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/gamma.py +0 -0
  40. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas/univariate/log_laplace.py +0 -0
  41. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas.egg-info/dependency_links.txt +0 -0
  42. {copulas-0.10.1 → copulas-0.12.1.dev0}/copulas.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: copulas
3
- Version: 0.10.1
3
+ Version: 0.12.1.dev0
4
4
  Summary: Create tabular synthetic data using copulas-based modeling.
5
5
  Author-email: "DataCebo, Inc." <info@sdv.dev>
6
6
  License: BSL-1.1
@@ -19,27 +19,36 @@ Classifier: Programming Language :: Python :: 3.8
19
19
  Classifier: Programming Language :: Python :: 3.9
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
22
24
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
- Requires-Python: <3.12,>=3.8
25
+ Requires-Python: <3.14,>=3.8
24
26
  Description-Content-Type: text/markdown
25
27
  License-File: LICENSE
26
- Requires-Dist: numpy<2,>=1.20.0; python_version < "3.10"
27
- Requires-Dist: numpy<2,>=1.23.3; python_version >= "3.10"
28
- Requires-Dist: pandas>=1.1.3; python_version < "3.10"
29
- Requires-Dist: pandas>=1.3.4; python_version >= "3.10" and python_version < "3.11"
30
- Requires-Dist: pandas>=1.5.0; python_version >= "3.11"
31
- Requires-Dist: plotly<6,>=5.10.0
32
- Requires-Dist: scipy<2,>=1.5.4; python_version < "3.10"
33
- Requires-Dist: scipy<2,>=1.9.2; python_version >= "3.10"
28
+ Requires-Dist: numpy>=1.21.0; python_version < "3.10"
29
+ Requires-Dist: numpy>=1.23.3; python_version >= "3.10" and python_version < "3.12"
30
+ Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
31
+ Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
32
+ Requires-Dist: pandas>=1.4.0; python_version < "3.11"
33
+ Requires-Dist: pandas>=1.5.0; python_version >= "3.11" and python_version < "3.12"
34
+ Requires-Dist: pandas>=2.1.1; python_version >= "3.12" and python_version < "3.13"
35
+ Requires-Dist: pandas>=2.2.3; python_version >= "3.13"
36
+ Requires-Dist: plotly>=5.10.0; python_version < "3.13"
37
+ Requires-Dist: plotly>=5.12.0; python_version >= "3.13"
38
+ Requires-Dist: scipy>=1.7.3; python_version < "3.10"
39
+ Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
40
+ Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
41
+ Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
34
42
  Provides-Extra: tutorials
35
43
  Requires-Dist: markupsafe<=2.0.1; extra == "tutorials"
36
- Requires-Dist: scikit-learn<1.2,>=0.24; extra == "tutorials"
44
+ Requires-Dist: scikit-learn>=0.24; python_version < "3.12" and extra == "tutorials"
45
+ Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and extra == "tutorials"
37
46
  Requires-Dist: jupyter<2,>=1.0.0; extra == "tutorials"
38
47
  Provides-Extra: test
39
48
  Requires-Dist: copulas[tutorials]; extra == "test"
40
49
  Requires-Dist: pytest<7,>=6.2.5; extra == "test"
41
50
  Requires-Dist: pytest-cov<3,>=2.6.0; extra == "test"
42
- Requires-Dist: pytest-rerunfailures<10,>=9.0.0; extra == "test"
51
+ Requires-Dist: pytest-rerunfailures<15,>=10.3; extra == "test"
43
52
  Requires-Dist: rundoc<0.5,>=0.4.3; extra == "test"
44
53
  Requires-Dist: tomli<3,>=2.0.0; extra == "test"
45
54
  Provides-Extra: dev
@@ -47,42 +56,25 @@ Requires-Dist: copulas[test,tutorials]; extra == "dev"
47
56
  Requires-Dist: pip>=9.0.1; extra == "dev"
48
57
  Requires-Dist: build<2,>=1.0.0; extra == "dev"
49
58
  Requires-Dist: bump-my-version<1,>=0.18.3; extra == "dev"
50
- Requires-Dist: watchdog<0.11,>=0.8.3; extra == "dev"
59
+ Requires-Dist: watchdog<5,>=1.0.1; extra == "dev"
51
60
  Requires-Dist: m2r<0.3,>=0.2.0; extra == "dev"
52
61
  Requires-Dist: nbsphinx<0.7,>=0.5.0; extra == "dev"
53
62
  Requires-Dist: Sphinx<3,>=1.7.1; extra == "dev"
54
63
  Requires-Dist: sphinx_rtd_theme<0.5,>=0.2.4; extra == "dev"
55
- Requires-Dist: Jinja2<3,>=2; extra == "dev"
56
- Requires-Dist: flake8<4,>=3.7.7; extra == "dev"
57
- Requires-Dist: isort<5,>=4.3.4; extra == "dev"
58
- Requires-Dist: flake8-debugger<4.1,>=4.0.0; extra == "dev"
59
- Requires-Dist: flake8-mock<0.4,>=0.3; extra == "dev"
60
- Requires-Dist: flake8-mutable<1.3,>=1.2.0; extra == "dev"
61
- Requires-Dist: flake8-fixme<1.2,>=1.1.1; extra == "dev"
62
- Requires-Dist: pep8-naming<0.13,>=0.12.1; extra == "dev"
63
- Requires-Dist: dlint<0.12,>=0.11.0; extra == "dev"
64
- Requires-Dist: flake8-docstrings<2,>=1.5.0; extra == "dev"
65
- Requires-Dist: pydocstyle<6.2,>=6.1.1; extra == "dev"
66
- Requires-Dist: flake8-pytest-style<2,>=1.5.0; extra == "dev"
67
- Requires-Dist: flake8-comprehensions<3.7,>=3.6.1; extra == "dev"
68
- Requires-Dist: flake8-print<4.1,>=4.0.0; extra == "dev"
69
- Requires-Dist: flake8-expression-complexity<0.1,>=0.0.9; extra == "dev"
70
- Requires-Dist: flake8-multiline-containers<0.1,>=0.0.18; extra == "dev"
71
- Requires-Dist: pandas-vet<0.3,>=0.2.2; extra == "dev"
72
- Requires-Dist: flake8-builtins<1.6,>=1.5.3; extra == "dev"
73
- Requires-Dist: flake8-eradicate<1.2,>=1.1.0; extra == "dev"
74
- Requires-Dist: flake8-quotes<4,>=3.3.0; extra == "dev"
75
- Requires-Dist: flake8-variables-names<0.1,>=0.0.4; extra == "dev"
76
- Requires-Dist: flake8-sfs<0.1,>=0.0.3; extra == "dev"
77
- Requires-Dist: flake8-absolute-import<2,>=1.0; extra == "dev"
78
- Requires-Dist: autoflake<2,>=1.1; extra == "dev"
79
- Requires-Dist: autopep8<1.6,>=1.4.3; extra == "dev"
64
+ Requires-Dist: sphinxcontrib_applehelp<1.0.8; extra == "dev"
65
+ Requires-Dist: sphinxcontrib-devhelp<1.0.6; extra == "dev"
66
+ Requires-Dist: sphinxcontrib-htmlhelp<2.0.5; extra == "dev"
67
+ Requires-Dist: sphinxcontrib_serializinghtml<1.1.10; extra == "dev"
68
+ Requires-Dist: sphinxcontrib_qthelp<1.0.7; extra == "dev"
69
+ Requires-Dist: alabaster<0.7.13; extra == "dev"
70
+ Requires-Dist: Jinja2<3,>=2; python_version < "3.12" and extra == "dev"
71
+ Requires-Dist: Jinja2<4,>=2; python_version >= "3.12" and extra == "dev"
72
+ Requires-Dist: ruff<1,>=0.3.2; extra == "dev"
80
73
  Requires-Dist: twine<4,>=1.10.0; extra == "dev"
81
74
  Requires-Dist: wheel>=0.30.0; extra == "dev"
82
75
  Requires-Dist: coverage<6,>=4.5.1; extra == "dev"
83
76
  Requires-Dist: tox<4,>=2.9.1; extra == "dev"
84
77
  Requires-Dist: invoke; extra == "dev"
85
- Requires-Dist: doc8<0.9,>=0.8.0; extra == "dev"
86
78
  Requires-Dist: urllib3<1.26,>=1.20; extra == "dev"
87
79
  Requires-Dist: tabulate<0.9,>=0.8.3; extra == "dev"
88
80
  Requires-Dist: boto3<1.10,>=1.7.47; extra == "dev"
@@ -0,0 +1,91 @@
1
+ """Top-level package for Copulas."""
2
+
3
+ __author__ = 'DataCebo, Inc.'
4
+ __email__ = 'info@sdv.dev'
5
+ __version__ = '0.12.1.dev0'
6
+
7
+ import sys
8
+ import warnings
9
+ from copy import deepcopy
10
+ from importlib.metadata import entry_points
11
+ from operator import attrgetter
12
+ from types import ModuleType
13
+
14
+
15
+ def _get_addon_target(addon_path_name):
16
+ """Find the target object for the add-on.
17
+
18
+ Args:
19
+ addon_path_name (str):
20
+ The add-on's name. The add-on's name should be the full path of valid Python
21
+ identifiers (i.e. importable.module:object.attr).
22
+
23
+ Returns:
24
+ tuple:
25
+ * object:
26
+ The base module or object the add-on should be added to.
27
+ * str:
28
+ The name the add-on should be added to under the module or object.
29
+ """
30
+ module_path, _, object_path = addon_path_name.partition(':')
31
+ module_path = module_path.split('.')
32
+
33
+ if module_path[0] != __name__:
34
+ msg = f"expected base module to be '{__name__}', found '{module_path[0]}'"
35
+ raise AttributeError(msg)
36
+
37
+ target_base = sys.modules[__name__]
38
+ for submodule in module_path[1:-1]:
39
+ target_base = getattr(target_base, submodule)
40
+
41
+ addon_name = module_path[-1]
42
+ if object_path:
43
+ if len(module_path) > 1 and not hasattr(target_base, module_path[-1]):
44
+ msg = f"cannot add '{object_path}' to unknown submodule '{'.'.join(module_path)}'"
45
+ raise AttributeError(msg)
46
+
47
+ if len(module_path) > 1:
48
+ target_base = getattr(target_base, module_path[-1])
49
+
50
+ split_object = object_path.split('.')
51
+ addon_name = split_object[-1]
52
+
53
+ if len(split_object) > 1:
54
+ target_base = attrgetter('.'.join(split_object[:-1]))(target_base)
55
+
56
+ return target_base, addon_name
57
+
58
+
59
+ def _find_addons():
60
+ """Find and load all copulas add-ons."""
61
+ group = 'copulas_modules'
62
+ try:
63
+ eps = entry_points(group=group)
64
+ except TypeError:
65
+ # Load-time selection requires Python >= 3.10 or importlib_metadata >= 3.6
66
+ eps = entry_points().get(group, [])
67
+
68
+ for entry_point in eps:
69
+ try:
70
+ addon = entry_point.load()
71
+ except Exception as e: # pylint: disable=broad-exception-caught
72
+ msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}" with error:\n{e}'
73
+ warnings.warn(msg)
74
+ continue
75
+
76
+ try:
77
+ addon_target, addon_name = _get_addon_target(entry_point.name)
78
+ except AttributeError as error:
79
+ msg = f"Failed to set '{entry_point.name}': {error}."
80
+ warnings.warn(msg)
81
+ continue
82
+
83
+ if isinstance(addon, ModuleType):
84
+ addon_module_name = f'{addon_target.__name__}.{addon_name}'
85
+ if addon_module_name not in sys.modules:
86
+ sys.modules[addon_module_name] = addon
87
+
88
+ setattr(addon_target, addon_name, addon)
89
+
90
+
91
+ _find_addons()
@@ -3,7 +3,7 @@
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
 
6
- from copulas import EPSILON
6
+ from copulas.utils import EPSILON
7
7
  from copulas.bivariate.base import Bivariate, CopulaTypes
8
8
  from copulas.bivariate.clayton import Clayton
9
9
  from copulas.bivariate.frank import Frank
@@ -47,7 +47,6 @@ def _compute_empirical(X):
47
47
  right = sum(np.logical_and(U >= base[k], V >= base[k])) / N
48
48
 
49
49
  if left > 0:
50
-
51
50
  z_left.append(base[k])
52
51
  L.append(left / base[k] ** 2)
53
52
 
@@ -151,7 +150,8 @@ def select_copula(X):
151
150
 
152
151
  left_tail, empirical_left_aut, right_tail, empirical_right_aut = _compute_empirical(X)
153
152
  candidate_left_auts, candidate_right_auts = _compute_candidates(
154
- copula_candidates, left_tail, right_tail)
153
+ copula_candidates, left_tail, right_tail
154
+ )
155
155
 
156
156
  empirical_aut = np.concatenate((empirical_left_aut, empirical_right_aut))
157
157
  candidate_auts = [
@@ -8,8 +8,9 @@ import numpy as np
8
8
  from scipy import stats
9
9
  from scipy.optimize import brentq
10
10
 
11
- from copulas import EPSILON, NotFittedError, random_state, validate_random_state
12
11
  from copulas.bivariate.utils import split_matrix
12
+ from copulas.errors import NotFittedError
13
+ from copulas.utils import EPSILON, random_state, validate_random_state
13
14
 
14
15
 
15
16
  class CopulaTypes(Enum):
@@ -96,7 +97,7 @@ class Bivariate(object):
96
97
  return super(Bivariate, cls).__new__(cls)
97
98
 
98
99
  if not isinstance(copula_type, CopulaTypes):
99
- if (isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__):
100
+ if isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__:
100
101
  copula_type = CopulaTypes[copula_type.upper()]
101
102
  else:
102
103
  raise ValueError(f'Invalid copula type {copula_type}')
@@ -192,11 +193,7 @@ class Bivariate(object):
192
193
  dict: Parameters of the copula.
193
194
 
194
195
  """
195
- return {
196
- 'copula_type': self.copula_type.name,
197
- 'theta': self.theta,
198
- 'tau': self.tau
199
- }
196
+ return {'copula_type': self.copula_type.name, 'theta': self.theta, 'tau': self.tau}
200
197
 
201
198
  @classmethod
202
199
  def from_dict(cls, copula_dict):
@@ -297,6 +294,7 @@ class Bivariate(object):
297
294
  self.check_fit()
298
295
  result = []
299
296
  for _y, _v in zip(y, V):
297
+
300
298
  def f(u):
301
299
  return self.partial_derivative_scalar(u, _v) - _y
302
300
 
@@ -330,7 +328,7 @@ class Bivariate(object):
330
328
  np.ndarray
331
329
 
332
330
  """
333
- delta = (-2 * (X[:, 1] > 0.5) + 1)
331
+ delta = -2 * (X[:, 1] > 0.5) + 1
334
332
  delta = 0.0001 * delta
335
333
  X_prime = X.copy()
336
334
  X_prime[:, 1] += delta
@@ -411,10 +409,11 @@ class Bivariate(object):
411
409
 
412
410
  """
413
411
  from copulas.bivariate import select_copula # noqa
412
+
414
413
  warnings.warn(
415
414
  '`Bivariate.select_copula` has been deprecated and will be removed in a later '
416
415
  'release. Please use `copulas.bivariate.select_copula` instead',
417
- DeprecationWarning
416
+ DeprecationWarning,
418
417
  )
419
418
  return select_copula(X)
420
419
 
@@ -84,9 +84,10 @@ class Clayton(Bivariate):
84
84
  cdfs = [
85
85
  np.power(
86
86
  np.power(U[i], -self.theta) + np.power(V[i], -self.theta) - 1,
87
- -1.0 / self.theta
87
+ -1.0 / self.theta,
88
88
  )
89
- if (U[i] > 0 and V[i] > 0) else 0
89
+ if (U[i] > 0 and V[i] > 0)
90
+ else 0
90
91
  for i in range(len(U))
91
92
  ]
92
93
 
@@ -6,9 +6,9 @@ import numpy as np
6
6
  import scipy.integrate as integrate
7
7
  from scipy.optimize import least_squares
8
8
 
9
- from copulas import EPSILON
10
9
  from copulas.bivariate.base import Bivariate, CopulaTypes
11
10
  from copulas.bivariate.utils import split_matrix
11
+ from copulas.utils import EPSILON
12
12
 
13
13
  MIN_FLOAT_LOG = np.log(sys.float_info.min)
14
14
  MAX_FLOAT_LOG = np.log(sys.float_info.max)
@@ -162,6 +162,7 @@ class Frank(Bivariate):
162
162
 
163
163
  def _tau_to_theta(self, alpha):
164
164
  """Relationship between tau and theta as a solvable equation."""
165
+
165
166
  def debye(t):
166
167
  return t / (np.exp(t) - 1)
167
168
 
@@ -4,7 +4,7 @@ import numpy as np
4
4
  import pandas as pd
5
5
  from scipy import stats
6
6
 
7
- from copulas import set_random_state, validate_random_state
7
+ from copulas.utils import set_random_state, validate_random_state
8
8
 
9
9
 
10
10
  def _dummy_fn(state):
@@ -33,10 +33,7 @@ def sample_bivariate_age_income(size=1000, seed=42):
33
33
  income += np.random.normal(loc=np.log(age) / 100, scale=10, size=size)
34
34
  income[np.random.randint(0, 10, size=size) == 0] /= 1000
35
35
 
36
- return pd.DataFrame({
37
- 'age': age,
38
- 'income': income
39
- })
36
+ return pd.DataFrame({'age': age, 'income': income})
40
37
 
41
38
 
42
39
  def sample_trivariate_xyz(size=1000, seed=42):
@@ -61,11 +58,7 @@ def sample_trivariate_xyz(size=1000, seed=42):
61
58
  with set_random_state(validate_random_state(seed), _dummy_fn):
62
59
  x = stats.beta.rvs(a=0.1, b=0.1, size=size)
63
60
  y = stats.beta.rvs(a=0.1, b=0.5, size=size)
64
- return pd.DataFrame({
65
- 'x': x,
66
- 'y': y,
67
- 'z': np.random.normal(size=size) + y * 10
68
- })
61
+ return pd.DataFrame({'x': x, 'y': y, 'z': np.random.normal(size=size) + y * 10})
69
62
 
70
63
 
71
64
  def sample_univariate_bernoulli(size=1000, seed=42):
@@ -0,0 +1,5 @@
1
+ """Copulas Exceptions."""
2
+
3
+
4
+ class NotFittedError(Exception):
5
+ """NotFittedError class."""
@@ -5,10 +5,4 @@ from copulas.multivariate.gaussian import GaussianMultivariate
5
5
  from copulas.multivariate.tree import Tree, TreeTypes
6
6
  from copulas.multivariate.vine import VineCopula
7
7
 
8
- __all__ = (
9
- 'Multivariate',
10
- 'GaussianMultivariate',
11
- 'VineCopula',
12
- 'Tree',
13
- 'TreeTypes'
14
- )
8
+ __all__ = ('Multivariate', 'GaussianMultivariate', 'VineCopula', 'Tree', 'TreeTypes')
@@ -4,7 +4,8 @@ import pickle
4
4
 
5
5
  import numpy as np
6
6
 
7
- from copulas import NotFittedError, get_instance, validate_random_state
7
+ from copulas.errors import NotFittedError
8
+ from copulas.utils import get_instance, validate_random_state
8
9
 
9
10
 
10
11
  class Multivariate(object):
@@ -7,11 +7,17 @@ import numpy as np
7
7
  import pandas as pd
8
8
  from scipy import stats
9
9
 
10
- from copulas import (
11
- EPSILON, check_valid_values, get_instance, get_qualified_name, random_state, store_args,
12
- validate_random_state)
13
10
  from copulas.multivariate.base import Multivariate
14
11
  from copulas.univariate import GaussianUnivariate, Univariate
12
+ from copulas.utils import (
13
+ EPSILON,
14
+ check_valid_values,
15
+ get_instance,
16
+ get_qualified_name,
17
+ random_state,
18
+ store_args,
19
+ validate_random_state,
20
+ )
15
21
 
16
22
  LOGGER = logging.getLogger(__name__)
17
23
  DEFAULT_DISTRIBUTION = Univariate
@@ -64,26 +70,6 @@ class GaussianMultivariate(Multivariate):
64
70
 
65
71
  return stats.norm.ppf(np.column_stack(U))
66
72
 
67
- def _get_correlation(self, X):
68
- """Compute correlation matrix with transformed data.
69
-
70
- Args:
71
- X (numpy.ndarray):
72
- Data for which the correlation needs to be computed.
73
-
74
- Returns:
75
- numpy.ndarray:
76
- computed correlation matrix.
77
- """
78
- result = self._transform_to_normal(X)
79
- correlation = pd.DataFrame(data=result).corr().to_numpy()
80
- correlation = np.nan_to_num(correlation, nan=0.0)
81
- # If singular, add some noise to the diagonal
82
- if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
83
- correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
84
-
85
- return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
86
-
87
73
  @check_valid_values
88
74
  def fit(self, X):
89
75
  """Compute the distribution for each variable and then its correlation matrix.
@@ -94,42 +80,88 @@ class GaussianMultivariate(Multivariate):
94
80
  """
95
81
  LOGGER.info('Fitting %s', self)
96
82
 
83
+ # Validate the input data
84
+ X = self._validate_input(X)
85
+ columns, univariates = self._fit_columns(X)
86
+
87
+ self.columns = columns
88
+ self.univariates = univariates
89
+
90
+ LOGGER.debug('Computing correlation.')
91
+ self.correlation = self._get_correlation(X)
92
+ self.fitted = True
93
+ LOGGER.debug('GaussianMultivariate fitted successfully')
94
+
95
+ def _validate_input(self, X):
96
+ """Validate the input data."""
97
97
  if not isinstance(X, pd.DataFrame):
98
98
  X = pd.DataFrame(X)
99
99
 
100
+ return X
101
+
102
+ def _fit_columns(self, X):
103
+ """Fit each column to its distribution."""
100
104
  columns = []
101
105
  univariates = []
102
106
  for column_name, column in X.items():
103
- if isinstance(self.distribution, dict):
104
- distribution = self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
105
- else:
106
- distribution = self.distribution
107
-
107
+ distribution = self._get_distribution_for_column(column_name)
108
108
  LOGGER.debug('Fitting column %s to %s', column_name, distribution)
109
109
 
110
- univariate = get_instance(distribution)
111
- try:
112
- univariate.fit(column)
113
- except BaseException:
114
- log_message = (
115
- f'Unable to fit to a {distribution} distribution for column {column_name}. '
116
- 'Using a Gaussian distribution instead.'
117
- )
118
- LOGGER.info(log_message)
119
- univariate = GaussianUnivariate()
120
- univariate.fit(column)
121
-
110
+ univariate = self._fit_column(column, distribution, column_name)
122
111
  columns.append(column_name)
123
112
  univariates.append(univariate)
124
113
 
125
- self.columns = columns
126
- self.univariates = univariates
114
+ return columns, univariates
115
+
116
+ def _get_distribution_for_column(self, column_name):
117
+ """Retrieve the distribution for a given column name."""
118
+ if isinstance(self.distribution, dict):
119
+ return self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
120
+
121
+ return self.distribution
122
+
123
+ def _fit_column(self, column, distribution, column_name):
124
+ """Fit a single column to its distribution with exception handling."""
125
+ univariate = get_instance(distribution)
126
+ try:
127
+ univariate.fit(column)
128
+ except Exception as error:
129
+ univariate = self._fit_with_fallback_distribution(
130
+ column, distribution, column_name, error
131
+ )
132
+
133
+ return univariate
134
+
135
+ def _fit_with_fallback_distribution(self, column, distribution, column_name, error):
136
+ """Fall back to fitting a Gaussian distribution and log the error."""
137
+ log_message = (
138
+ f'Unable to fit to a {distribution} distribution for column {column_name}. '
139
+ 'Using a Gaussian distribution instead.'
140
+ )
141
+ LOGGER.info(log_message)
142
+ univariate = GaussianUnivariate()
143
+ univariate.fit(column)
144
+ return univariate
127
145
 
128
- LOGGER.debug('Computing correlation')
129
- self.correlation = self._get_correlation(X)
130
- self.fitted = True
146
+ def _get_correlation(self, X):
147
+ """Compute correlation matrix with transformed data.
131
148
 
132
- LOGGER.debug('GaussianMultivariate fitted successfully')
149
+ Args:
150
+ X (numpy.ndarray):
151
+ Data for which the correlation needs to be computed.
152
+
153
+ Returns:
154
+ numpy.ndarray:
155
+ computed correlation matrix.
156
+ """
157
+ result = self._transform_to_normal(X)
158
+ correlation = pd.DataFrame(data=result).corr().to_numpy()
159
+ correlation = np.nan_to_num(correlation, nan=0.0)
160
+ # If singular, add some noise to the diagonal
161
+ if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
162
+ correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
163
+
164
+ return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
133
165
 
134
166
  def probability_density(self, X):
135
167
  """Compute the probability density for each point in X.
@@ -149,8 +181,7 @@ class GaussianMultivariate(Multivariate):
149
181
  self.check_fit()
150
182
  transformed = self._transform_to_normal(X)
151
183
 
152
- return stats.multivariate_normal.pdf(
153
- transformed, cov=self.correlation, allow_singular=True)
184
+ return stats.multivariate_normal.pdf(transformed, cov=self.correlation, allow_singular=True)
154
185
 
155
186
  def cumulative_distribution(self, X):
156
187
  """Compute the cumulative distribution value for each point in X.
@@ -6,9 +6,9 @@ from enum import Enum
6
6
  import numpy as np
7
7
  import scipy
8
8
 
9
- from copulas import EPSILON, get_qualified_name
10
9
  from copulas.bivariate.base import Bivariate
11
10
  from copulas.multivariate.base import Multivariate
11
+ from copulas.utils import EPSILON, get_qualified_name
12
12
 
13
13
  LOGGER = logging.getLogger(__name__)
14
14
 
@@ -98,7 +98,7 @@ class Tree(Multivariate):
98
98
  """
99
99
  # first column is the variable of interest
100
100
  tau_y = self.tau_matrix[:, y]
101
- tau_y[y] = np.NaN
101
+ tau_y[y] = np.nan
102
102
 
103
103
  temp = np.empty([self.n_nodes, 3])
104
104
  temp[:, 0] = np.arange(self.n_nodes)
@@ -131,7 +131,7 @@ class Tree(Multivariate):
131
131
  left_parent, right_parent = edge.parents
132
132
  left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent)
133
133
 
134
- tau[i, j], pvalue = scipy.stats.kendalltau(left_u, right_u)
134
+ tau[i, j], _pvalue = scipy.stats.kendalltau(left_u, right_u)
135
135
 
136
136
  return tau
137
137
 
@@ -212,8 +212,7 @@ class Tree(Multivariate):
212
212
  """Produce printable representation of the class."""
213
213
  template = 'L:{} R:{} D:{} Copula:{} Theta:{}'
214
214
  return '\n'.join([
215
- template.format(edge.L, edge.R, edge.D, edge.name, edge.theta)
216
- for edge in self.edges
215
+ template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) for edge in self.edges
217
216
  ])
218
217
 
219
218
  def _serialize_previous_tree(self):
@@ -237,11 +236,7 @@ class Tree(Multivariate):
237
236
  Parameters of this Tree.
238
237
  """
239
238
  fitted = self.fitted
240
- result = {
241
- 'tree_type': self.tree_type,
242
- 'type': get_qualified_name(self),
243
- 'fitted': fitted
244
- }
239
+ result = {'tree_type': self.tree_type, 'type': get_qualified_name(self), 'fitted': fitted}
245
240
 
246
241
  if not fitted:
247
242
  return result
@@ -451,7 +446,7 @@ def get_tree(tree_type):
451
446
  Instance of a Tree of the specified type.
452
447
  """
453
448
  if not isinstance(tree_type, TreeTypes):
454
- if (isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__):
449
+ if isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__:
455
450
  tree_type = TreeTypes[tree_type.upper()]
456
451
  else:
457
452
  raise ValueError(f'Invalid tree type {tree_type}')
@@ -657,7 +652,7 @@ class Edge(object):
657
652
  'theta': self.theta,
658
653
  'tau': self.tau,
659
654
  'U': U,
660
- 'likelihood': self.likelihood
655
+ 'likelihood': self.likelihood,
661
656
  }
662
657
 
663
658
  @classmethod
@@ -674,8 +669,11 @@ class Edge(object):
674
669
  Instance of the edge defined on the parameters.
675
670
  """
676
671
  instance = cls(
677
- edge_dict['index'], edge_dict['L'], edge_dict['R'],
678
- edge_dict['name'], edge_dict['theta']
672
+ edge_dict['index'],
673
+ edge_dict['L'],
674
+ edge_dict['R'],
675
+ edge_dict['name'],
676
+ edge_dict['theta'],
679
677
  )
680
678
  instance.U = np.array(edge_dict['U'])
681
679
  parents = edge_dict['parents']