copulas 0.10.1__tar.gz → 0.12.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of copulas might be problematic. Click here for more details.
- {copulas-0.10.1 → copulas-0.12.1}/PKG-INFO +32 -40
- copulas-0.12.1/copulas/__init__.py +91 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/__init__.py +3 -3
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/base.py +8 -9
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/clayton.py +3 -2
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/frank.py +2 -1
- {copulas-0.10.1 → copulas-0.12.1}/copulas/datasets.py +3 -10
- copulas-0.12.1/copulas/errors.py +5 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/multivariate/__init__.py +1 -7
- {copulas-0.10.1 → copulas-0.12.1}/copulas/multivariate/base.py +2 -1
- {copulas-0.10.1 → copulas-0.12.1}/copulas/multivariate/gaussian.py +79 -48
- {copulas-0.10.1 → copulas-0.12.1}/copulas/multivariate/tree.py +12 -14
- {copulas-0.10.1 → copulas-0.12.1}/copulas/multivariate/vine.py +14 -9
- {copulas-0.10.1 → copulas-0.12.1}/copulas/optimize/__init__.py +4 -3
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/__init__.py +1 -1
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/base.py +16 -5
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/beta.py +1 -6
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/gaussian.py +2 -8
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/gaussian_kde.py +6 -7
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/selection.py +1 -1
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/student_t.py +1 -5
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/truncated_gaussian.py +9 -17
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/uniform.py +2 -8
- copulas-0.10.1/copulas/__init__.py → copulas-0.12.1/copulas/utils.py +10 -94
- {copulas-0.10.1 → copulas-0.12.1}/copulas/visualization.py +15 -20
- {copulas-0.10.1 → copulas-0.12.1}/copulas.egg-info/PKG-INFO +32 -40
- {copulas-0.10.1 → copulas-0.12.1}/copulas.egg-info/SOURCES.txt +2 -1
- copulas-0.12.1/copulas.egg-info/requires.txt +79 -0
- {copulas-0.10.1 → copulas-0.12.1}/pyproject.toml +96 -45
- copulas-0.12.1/setup.cfg +4 -0
- {copulas-0.10.1 → copulas-0.12.1}/tests/test_tasks.py +6 -3
- copulas-0.10.1/copulas.egg-info/requires.txt +0 -75
- copulas-0.10.1/setup.cfg +0 -23
- {copulas-0.10.1 → copulas-0.12.1}/LICENSE +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/README.md +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/gumbel.py +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/independence.py +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/bivariate/utils.py +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/gamma.py +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas/univariate/log_laplace.py +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas.egg-info/dependency_links.txt +0 -0
- {copulas-0.10.1 → copulas-0.12.1}/copulas.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: copulas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.1
|
|
4
4
|
Summary: Create tabular synthetic data using copulas-based modeling.
|
|
5
5
|
Author-email: "DataCebo, Inc." <info@sdv.dev>
|
|
6
6
|
License: BSL-1.1
|
|
@@ -19,27 +19,36 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.9
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.10
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
24
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
-
Requires-Python: <3.
|
|
25
|
+
Requires-Python: <3.14,>=3.8
|
|
24
26
|
Description-Content-Type: text/markdown
|
|
25
27
|
License-File: LICENSE
|
|
26
|
-
Requires-Dist: numpy
|
|
27
|
-
Requires-Dist: numpy
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist: pandas>=1.
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: numpy>=1.21.0; python_version < "3.10"
|
|
29
|
+
Requires-Dist: numpy>=1.23.3; python_version >= "3.10" and python_version < "3.12"
|
|
30
|
+
Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
|
|
31
|
+
Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
|
|
32
|
+
Requires-Dist: pandas>=1.4.0; python_version < "3.11"
|
|
33
|
+
Requires-Dist: pandas>=1.5.0; python_version >= "3.11" and python_version < "3.12"
|
|
34
|
+
Requires-Dist: pandas>=2.1.1; python_version >= "3.12" and python_version < "3.13"
|
|
35
|
+
Requires-Dist: pandas>=2.2.3; python_version >= "3.13"
|
|
36
|
+
Requires-Dist: plotly>=5.10.0; python_version < "3.13"
|
|
37
|
+
Requires-Dist: plotly>=5.12.0; python_version >= "3.13"
|
|
38
|
+
Requires-Dist: scipy>=1.7.3; python_version < "3.10"
|
|
39
|
+
Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
|
|
40
|
+
Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
|
|
41
|
+
Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
|
|
34
42
|
Provides-Extra: tutorials
|
|
35
43
|
Requires-Dist: markupsafe<=2.0.1; extra == "tutorials"
|
|
36
|
-
Requires-Dist: scikit-learn
|
|
44
|
+
Requires-Dist: scikit-learn>=0.24; python_version < "3.12" and extra == "tutorials"
|
|
45
|
+
Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and extra == "tutorials"
|
|
37
46
|
Requires-Dist: jupyter<2,>=1.0.0; extra == "tutorials"
|
|
38
47
|
Provides-Extra: test
|
|
39
48
|
Requires-Dist: copulas[tutorials]; extra == "test"
|
|
40
49
|
Requires-Dist: pytest<7,>=6.2.5; extra == "test"
|
|
41
50
|
Requires-Dist: pytest-cov<3,>=2.6.0; extra == "test"
|
|
42
|
-
Requires-Dist: pytest-rerunfailures<10
|
|
51
|
+
Requires-Dist: pytest-rerunfailures<15,>=10.3; extra == "test"
|
|
43
52
|
Requires-Dist: rundoc<0.5,>=0.4.3; extra == "test"
|
|
44
53
|
Requires-Dist: tomli<3,>=2.0.0; extra == "test"
|
|
45
54
|
Provides-Extra: dev
|
|
@@ -47,42 +56,25 @@ Requires-Dist: copulas[test,tutorials]; extra == "dev"
|
|
|
47
56
|
Requires-Dist: pip>=9.0.1; extra == "dev"
|
|
48
57
|
Requires-Dist: build<2,>=1.0.0; extra == "dev"
|
|
49
58
|
Requires-Dist: bump-my-version<1,>=0.18.3; extra == "dev"
|
|
50
|
-
Requires-Dist: watchdog<
|
|
59
|
+
Requires-Dist: watchdog<5,>=1.0.1; extra == "dev"
|
|
51
60
|
Requires-Dist: m2r<0.3,>=0.2.0; extra == "dev"
|
|
52
61
|
Requires-Dist: nbsphinx<0.7,>=0.5.0; extra == "dev"
|
|
53
62
|
Requires-Dist: Sphinx<3,>=1.7.1; extra == "dev"
|
|
54
63
|
Requires-Dist: sphinx_rtd_theme<0.5,>=0.2.4; extra == "dev"
|
|
55
|
-
Requires-Dist:
|
|
56
|
-
Requires-Dist:
|
|
57
|
-
Requires-Dist:
|
|
58
|
-
Requires-Dist:
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist:
|
|
62
|
-
Requires-Dist:
|
|
63
|
-
Requires-Dist:
|
|
64
|
-
Requires-Dist: flake8-docstrings<2,>=1.5.0; extra == "dev"
|
|
65
|
-
Requires-Dist: pydocstyle<6.2,>=6.1.1; extra == "dev"
|
|
66
|
-
Requires-Dist: flake8-pytest-style<2,>=1.5.0; extra == "dev"
|
|
67
|
-
Requires-Dist: flake8-comprehensions<3.7,>=3.6.1; extra == "dev"
|
|
68
|
-
Requires-Dist: flake8-print<4.1,>=4.0.0; extra == "dev"
|
|
69
|
-
Requires-Dist: flake8-expression-complexity<0.1,>=0.0.9; extra == "dev"
|
|
70
|
-
Requires-Dist: flake8-multiline-containers<0.1,>=0.0.18; extra == "dev"
|
|
71
|
-
Requires-Dist: pandas-vet<0.3,>=0.2.2; extra == "dev"
|
|
72
|
-
Requires-Dist: flake8-builtins<1.6,>=1.5.3; extra == "dev"
|
|
73
|
-
Requires-Dist: flake8-eradicate<1.2,>=1.1.0; extra == "dev"
|
|
74
|
-
Requires-Dist: flake8-quotes<4,>=3.3.0; extra == "dev"
|
|
75
|
-
Requires-Dist: flake8-variables-names<0.1,>=0.0.4; extra == "dev"
|
|
76
|
-
Requires-Dist: flake8-sfs<0.1,>=0.0.3; extra == "dev"
|
|
77
|
-
Requires-Dist: flake8-absolute-import<2,>=1.0; extra == "dev"
|
|
78
|
-
Requires-Dist: autoflake<2,>=1.1; extra == "dev"
|
|
79
|
-
Requires-Dist: autopep8<1.6,>=1.4.3; extra == "dev"
|
|
64
|
+
Requires-Dist: sphinxcontrib_applehelp<1.0.8; extra == "dev"
|
|
65
|
+
Requires-Dist: sphinxcontrib-devhelp<1.0.6; extra == "dev"
|
|
66
|
+
Requires-Dist: sphinxcontrib-htmlhelp<2.0.5; extra == "dev"
|
|
67
|
+
Requires-Dist: sphinxcontrib_serializinghtml<1.1.10; extra == "dev"
|
|
68
|
+
Requires-Dist: sphinxcontrib_qthelp<1.0.7; extra == "dev"
|
|
69
|
+
Requires-Dist: alabaster<0.7.13; extra == "dev"
|
|
70
|
+
Requires-Dist: Jinja2<3,>=2; python_version < "3.12" and extra == "dev"
|
|
71
|
+
Requires-Dist: Jinja2<4,>=2; python_version >= "3.12" and extra == "dev"
|
|
72
|
+
Requires-Dist: ruff<1,>=0.3.2; extra == "dev"
|
|
80
73
|
Requires-Dist: twine<4,>=1.10.0; extra == "dev"
|
|
81
74
|
Requires-Dist: wheel>=0.30.0; extra == "dev"
|
|
82
75
|
Requires-Dist: coverage<6,>=4.5.1; extra == "dev"
|
|
83
76
|
Requires-Dist: tox<4,>=2.9.1; extra == "dev"
|
|
84
77
|
Requires-Dist: invoke; extra == "dev"
|
|
85
|
-
Requires-Dist: doc8<0.9,>=0.8.0; extra == "dev"
|
|
86
78
|
Requires-Dist: urllib3<1.26,>=1.20; extra == "dev"
|
|
87
79
|
Requires-Dist: tabulate<0.9,>=0.8.3; extra == "dev"
|
|
88
80
|
Requires-Dist: boto3<1.10,>=1.7.47; extra == "dev"
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Top-level package for Copulas."""
|
|
2
|
+
|
|
3
|
+
__author__ = 'DataCebo, Inc.'
|
|
4
|
+
__email__ = 'info@sdv.dev'
|
|
5
|
+
__version__ = '0.12.1'
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import warnings
|
|
9
|
+
from copy import deepcopy
|
|
10
|
+
from importlib.metadata import entry_points
|
|
11
|
+
from operator import attrgetter
|
|
12
|
+
from types import ModuleType
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_addon_target(addon_path_name):
|
|
16
|
+
"""Find the target object for the add-on.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
addon_path_name (str):
|
|
20
|
+
The add-on's name. The add-on's name should be the full path of valid Python
|
|
21
|
+
identifiers (i.e. importable.module:object.attr).
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
tuple:
|
|
25
|
+
* object:
|
|
26
|
+
The base module or object the add-on should be added to.
|
|
27
|
+
* str:
|
|
28
|
+
The name the add-on should be added to under the module or object.
|
|
29
|
+
"""
|
|
30
|
+
module_path, _, object_path = addon_path_name.partition(':')
|
|
31
|
+
module_path = module_path.split('.')
|
|
32
|
+
|
|
33
|
+
if module_path[0] != __name__:
|
|
34
|
+
msg = f"expected base module to be '{__name__}', found '{module_path[0]}'"
|
|
35
|
+
raise AttributeError(msg)
|
|
36
|
+
|
|
37
|
+
target_base = sys.modules[__name__]
|
|
38
|
+
for submodule in module_path[1:-1]:
|
|
39
|
+
target_base = getattr(target_base, submodule)
|
|
40
|
+
|
|
41
|
+
addon_name = module_path[-1]
|
|
42
|
+
if object_path:
|
|
43
|
+
if len(module_path) > 1 and not hasattr(target_base, module_path[-1]):
|
|
44
|
+
msg = f"cannot add '{object_path}' to unknown submodule '{'.'.join(module_path)}'"
|
|
45
|
+
raise AttributeError(msg)
|
|
46
|
+
|
|
47
|
+
if len(module_path) > 1:
|
|
48
|
+
target_base = getattr(target_base, module_path[-1])
|
|
49
|
+
|
|
50
|
+
split_object = object_path.split('.')
|
|
51
|
+
addon_name = split_object[-1]
|
|
52
|
+
|
|
53
|
+
if len(split_object) > 1:
|
|
54
|
+
target_base = attrgetter('.'.join(split_object[:-1]))(target_base)
|
|
55
|
+
|
|
56
|
+
return target_base, addon_name
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _find_addons():
|
|
60
|
+
"""Find and load all copulas add-ons."""
|
|
61
|
+
group = 'copulas_modules'
|
|
62
|
+
try:
|
|
63
|
+
eps = entry_points(group=group)
|
|
64
|
+
except TypeError:
|
|
65
|
+
# Load-time selection requires Python >= 3.10 or importlib_metadata >= 3.6
|
|
66
|
+
eps = entry_points().get(group, [])
|
|
67
|
+
|
|
68
|
+
for entry_point in eps:
|
|
69
|
+
try:
|
|
70
|
+
addon = entry_point.load()
|
|
71
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
72
|
+
msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}" with error:\n{e}'
|
|
73
|
+
warnings.warn(msg)
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
addon_target, addon_name = _get_addon_target(entry_point.name)
|
|
78
|
+
except AttributeError as error:
|
|
79
|
+
msg = f"Failed to set '{entry_point.name}': {error}."
|
|
80
|
+
warnings.warn(msg)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if isinstance(addon, ModuleType):
|
|
84
|
+
addon_module_name = f'{addon_target.__name__}.{addon_name}'
|
|
85
|
+
if addon_module_name not in sys.modules:
|
|
86
|
+
sys.modules[addon_module_name] = addon
|
|
87
|
+
|
|
88
|
+
setattr(addon_target, addon_name, addon)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
_find_addons()
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from copulas import EPSILON
|
|
6
|
+
from copulas.utils import EPSILON
|
|
7
7
|
from copulas.bivariate.base import Bivariate, CopulaTypes
|
|
8
8
|
from copulas.bivariate.clayton import Clayton
|
|
9
9
|
from copulas.bivariate.frank import Frank
|
|
@@ -47,7 +47,6 @@ def _compute_empirical(X):
|
|
|
47
47
|
right = sum(np.logical_and(U >= base[k], V >= base[k])) / N
|
|
48
48
|
|
|
49
49
|
if left > 0:
|
|
50
|
-
|
|
51
50
|
z_left.append(base[k])
|
|
52
51
|
L.append(left / base[k] ** 2)
|
|
53
52
|
|
|
@@ -151,7 +150,8 @@ def select_copula(X):
|
|
|
151
150
|
|
|
152
151
|
left_tail, empirical_left_aut, right_tail, empirical_right_aut = _compute_empirical(X)
|
|
153
152
|
candidate_left_auts, candidate_right_auts = _compute_candidates(
|
|
154
|
-
copula_candidates, left_tail, right_tail
|
|
153
|
+
copula_candidates, left_tail, right_tail
|
|
154
|
+
)
|
|
155
155
|
|
|
156
156
|
empirical_aut = np.concatenate((empirical_left_aut, empirical_right_aut))
|
|
157
157
|
candidate_auts = [
|
|
@@ -8,8 +8,9 @@ import numpy as np
|
|
|
8
8
|
from scipy import stats
|
|
9
9
|
from scipy.optimize import brentq
|
|
10
10
|
|
|
11
|
-
from copulas import EPSILON, NotFittedError, random_state, validate_random_state
|
|
12
11
|
from copulas.bivariate.utils import split_matrix
|
|
12
|
+
from copulas.errors import NotFittedError
|
|
13
|
+
from copulas.utils import EPSILON, random_state, validate_random_state
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class CopulaTypes(Enum):
|
|
@@ -96,7 +97,7 @@ class Bivariate(object):
|
|
|
96
97
|
return super(Bivariate, cls).__new__(cls)
|
|
97
98
|
|
|
98
99
|
if not isinstance(copula_type, CopulaTypes):
|
|
99
|
-
if
|
|
100
|
+
if isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__:
|
|
100
101
|
copula_type = CopulaTypes[copula_type.upper()]
|
|
101
102
|
else:
|
|
102
103
|
raise ValueError(f'Invalid copula type {copula_type}')
|
|
@@ -192,11 +193,7 @@ class Bivariate(object):
|
|
|
192
193
|
dict: Parameters of the copula.
|
|
193
194
|
|
|
194
195
|
"""
|
|
195
|
-
return {
|
|
196
|
-
'copula_type': self.copula_type.name,
|
|
197
|
-
'theta': self.theta,
|
|
198
|
-
'tau': self.tau
|
|
199
|
-
}
|
|
196
|
+
return {'copula_type': self.copula_type.name, 'theta': self.theta, 'tau': self.tau}
|
|
200
197
|
|
|
201
198
|
@classmethod
|
|
202
199
|
def from_dict(cls, copula_dict):
|
|
@@ -297,6 +294,7 @@ class Bivariate(object):
|
|
|
297
294
|
self.check_fit()
|
|
298
295
|
result = []
|
|
299
296
|
for _y, _v in zip(y, V):
|
|
297
|
+
|
|
300
298
|
def f(u):
|
|
301
299
|
return self.partial_derivative_scalar(u, _v) - _y
|
|
302
300
|
|
|
@@ -330,7 +328,7 @@ class Bivariate(object):
|
|
|
330
328
|
np.ndarray
|
|
331
329
|
|
|
332
330
|
"""
|
|
333
|
-
delta =
|
|
331
|
+
delta = -2 * (X[:, 1] > 0.5) + 1
|
|
334
332
|
delta = 0.0001 * delta
|
|
335
333
|
X_prime = X.copy()
|
|
336
334
|
X_prime[:, 1] += delta
|
|
@@ -411,10 +409,11 @@ class Bivariate(object):
|
|
|
411
409
|
|
|
412
410
|
"""
|
|
413
411
|
from copulas.bivariate import select_copula # noqa
|
|
412
|
+
|
|
414
413
|
warnings.warn(
|
|
415
414
|
'`Bivariate.select_copula` has been deprecated and will be removed in a later '
|
|
416
415
|
'release. Please use `copulas.bivariate.select_copula` instead',
|
|
417
|
-
DeprecationWarning
|
|
416
|
+
DeprecationWarning,
|
|
418
417
|
)
|
|
419
418
|
return select_copula(X)
|
|
420
419
|
|
|
@@ -84,9 +84,10 @@ class Clayton(Bivariate):
|
|
|
84
84
|
cdfs = [
|
|
85
85
|
np.power(
|
|
86
86
|
np.power(U[i], -self.theta) + np.power(V[i], -self.theta) - 1,
|
|
87
|
-
-1.0 / self.theta
|
|
87
|
+
-1.0 / self.theta,
|
|
88
88
|
)
|
|
89
|
-
if (U[i] > 0 and V[i] > 0)
|
|
89
|
+
if (U[i] > 0 and V[i] > 0)
|
|
90
|
+
else 0
|
|
90
91
|
for i in range(len(U))
|
|
91
92
|
]
|
|
92
93
|
|
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
import scipy.integrate as integrate
|
|
7
7
|
from scipy.optimize import least_squares
|
|
8
8
|
|
|
9
|
-
from copulas import EPSILON
|
|
10
9
|
from copulas.bivariate.base import Bivariate, CopulaTypes
|
|
11
10
|
from copulas.bivariate.utils import split_matrix
|
|
11
|
+
from copulas.utils import EPSILON
|
|
12
12
|
|
|
13
13
|
MIN_FLOAT_LOG = np.log(sys.float_info.min)
|
|
14
14
|
MAX_FLOAT_LOG = np.log(sys.float_info.max)
|
|
@@ -162,6 +162,7 @@ class Frank(Bivariate):
|
|
|
162
162
|
|
|
163
163
|
def _tau_to_theta(self, alpha):
|
|
164
164
|
"""Relationship between tau and theta as a solvable equation."""
|
|
165
|
+
|
|
165
166
|
def debye(t):
|
|
166
167
|
return t / (np.exp(t) - 1)
|
|
167
168
|
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from scipy import stats
|
|
6
6
|
|
|
7
|
-
from copulas import set_random_state, validate_random_state
|
|
7
|
+
from copulas.utils import set_random_state, validate_random_state
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _dummy_fn(state):
|
|
@@ -33,10 +33,7 @@ def sample_bivariate_age_income(size=1000, seed=42):
|
|
|
33
33
|
income += np.random.normal(loc=np.log(age) / 100, scale=10, size=size)
|
|
34
34
|
income[np.random.randint(0, 10, size=size) == 0] /= 1000
|
|
35
35
|
|
|
36
|
-
return pd.DataFrame({
|
|
37
|
-
'age': age,
|
|
38
|
-
'income': income
|
|
39
|
-
})
|
|
36
|
+
return pd.DataFrame({'age': age, 'income': income})
|
|
40
37
|
|
|
41
38
|
|
|
42
39
|
def sample_trivariate_xyz(size=1000, seed=42):
|
|
@@ -61,11 +58,7 @@ def sample_trivariate_xyz(size=1000, seed=42):
|
|
|
61
58
|
with set_random_state(validate_random_state(seed), _dummy_fn):
|
|
62
59
|
x = stats.beta.rvs(a=0.1, b=0.1, size=size)
|
|
63
60
|
y = stats.beta.rvs(a=0.1, b=0.5, size=size)
|
|
64
|
-
return pd.DataFrame({
|
|
65
|
-
'x': x,
|
|
66
|
-
'y': y,
|
|
67
|
-
'z': np.random.normal(size=size) + y * 10
|
|
68
|
-
})
|
|
61
|
+
return pd.DataFrame({'x': x, 'y': y, 'z': np.random.normal(size=size) + y * 10})
|
|
69
62
|
|
|
70
63
|
|
|
71
64
|
def sample_univariate_bernoulli(size=1000, seed=42):
|
|
@@ -5,10 +5,4 @@ from copulas.multivariate.gaussian import GaussianMultivariate
|
|
|
5
5
|
from copulas.multivariate.tree import Tree, TreeTypes
|
|
6
6
|
from copulas.multivariate.vine import VineCopula
|
|
7
7
|
|
|
8
|
-
__all__ = (
|
|
9
|
-
'Multivariate',
|
|
10
|
-
'GaussianMultivariate',
|
|
11
|
-
'VineCopula',
|
|
12
|
-
'Tree',
|
|
13
|
-
'TreeTypes'
|
|
14
|
-
)
|
|
8
|
+
__all__ = ('Multivariate', 'GaussianMultivariate', 'VineCopula', 'Tree', 'TreeTypes')
|
|
@@ -7,11 +7,17 @@ import numpy as np
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from scipy import stats
|
|
9
9
|
|
|
10
|
-
from copulas import (
|
|
11
|
-
EPSILON, check_valid_values, get_instance, get_qualified_name, random_state, store_args,
|
|
12
|
-
validate_random_state)
|
|
13
10
|
from copulas.multivariate.base import Multivariate
|
|
14
11
|
from copulas.univariate import GaussianUnivariate, Univariate
|
|
12
|
+
from copulas.utils import (
|
|
13
|
+
EPSILON,
|
|
14
|
+
check_valid_values,
|
|
15
|
+
get_instance,
|
|
16
|
+
get_qualified_name,
|
|
17
|
+
random_state,
|
|
18
|
+
store_args,
|
|
19
|
+
validate_random_state,
|
|
20
|
+
)
|
|
15
21
|
|
|
16
22
|
LOGGER = logging.getLogger(__name__)
|
|
17
23
|
DEFAULT_DISTRIBUTION = Univariate
|
|
@@ -64,26 +70,6 @@ class GaussianMultivariate(Multivariate):
|
|
|
64
70
|
|
|
65
71
|
return stats.norm.ppf(np.column_stack(U))
|
|
66
72
|
|
|
67
|
-
def _get_correlation(self, X):
|
|
68
|
-
"""Compute correlation matrix with transformed data.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
X (numpy.ndarray):
|
|
72
|
-
Data for which the correlation needs to be computed.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
numpy.ndarray:
|
|
76
|
-
computed correlation matrix.
|
|
77
|
-
"""
|
|
78
|
-
result = self._transform_to_normal(X)
|
|
79
|
-
correlation = pd.DataFrame(data=result).corr().to_numpy()
|
|
80
|
-
correlation = np.nan_to_num(correlation, nan=0.0)
|
|
81
|
-
# If singular, add some noise to the diagonal
|
|
82
|
-
if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
|
|
83
|
-
correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
|
|
84
|
-
|
|
85
|
-
return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
|
|
86
|
-
|
|
87
73
|
@check_valid_values
|
|
88
74
|
def fit(self, X):
|
|
89
75
|
"""Compute the distribution for each variable and then its correlation matrix.
|
|
@@ -94,42 +80,88 @@ class GaussianMultivariate(Multivariate):
|
|
|
94
80
|
"""
|
|
95
81
|
LOGGER.info('Fitting %s', self)
|
|
96
82
|
|
|
83
|
+
# Validate the input data
|
|
84
|
+
X = self._validate_input(X)
|
|
85
|
+
columns, univariates = self._fit_columns(X)
|
|
86
|
+
|
|
87
|
+
self.columns = columns
|
|
88
|
+
self.univariates = univariates
|
|
89
|
+
|
|
90
|
+
LOGGER.debug('Computing correlation.')
|
|
91
|
+
self.correlation = self._get_correlation(X)
|
|
92
|
+
self.fitted = True
|
|
93
|
+
LOGGER.debug('GaussianMultivariate fitted successfully')
|
|
94
|
+
|
|
95
|
+
def _validate_input(self, X):
|
|
96
|
+
"""Validate the input data."""
|
|
97
97
|
if not isinstance(X, pd.DataFrame):
|
|
98
98
|
X = pd.DataFrame(X)
|
|
99
99
|
|
|
100
|
+
return X
|
|
101
|
+
|
|
102
|
+
def _fit_columns(self, X):
|
|
103
|
+
"""Fit each column to its distribution."""
|
|
100
104
|
columns = []
|
|
101
105
|
univariates = []
|
|
102
106
|
for column_name, column in X.items():
|
|
103
|
-
|
|
104
|
-
distribution = self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
|
|
105
|
-
else:
|
|
106
|
-
distribution = self.distribution
|
|
107
|
-
|
|
107
|
+
distribution = self._get_distribution_for_column(column_name)
|
|
108
108
|
LOGGER.debug('Fitting column %s to %s', column_name, distribution)
|
|
109
109
|
|
|
110
|
-
univariate =
|
|
111
|
-
try:
|
|
112
|
-
univariate.fit(column)
|
|
113
|
-
except BaseException:
|
|
114
|
-
log_message = (
|
|
115
|
-
f'Unable to fit to a {distribution} distribution for column {column_name}. '
|
|
116
|
-
'Using a Gaussian distribution instead.'
|
|
117
|
-
)
|
|
118
|
-
LOGGER.info(log_message)
|
|
119
|
-
univariate = GaussianUnivariate()
|
|
120
|
-
univariate.fit(column)
|
|
121
|
-
|
|
110
|
+
univariate = self._fit_column(column, distribution, column_name)
|
|
122
111
|
columns.append(column_name)
|
|
123
112
|
univariates.append(univariate)
|
|
124
113
|
|
|
125
|
-
|
|
126
|
-
|
|
114
|
+
return columns, univariates
|
|
115
|
+
|
|
116
|
+
def _get_distribution_for_column(self, column_name):
|
|
117
|
+
"""Retrieve the distribution for a given column name."""
|
|
118
|
+
if isinstance(self.distribution, dict):
|
|
119
|
+
return self.distribution.get(column_name, DEFAULT_DISTRIBUTION)
|
|
120
|
+
|
|
121
|
+
return self.distribution
|
|
122
|
+
|
|
123
|
+
def _fit_column(self, column, distribution, column_name):
|
|
124
|
+
"""Fit a single column to its distribution with exception handling."""
|
|
125
|
+
univariate = get_instance(distribution)
|
|
126
|
+
try:
|
|
127
|
+
univariate.fit(column)
|
|
128
|
+
except Exception as error:
|
|
129
|
+
univariate = self._fit_with_fallback_distribution(
|
|
130
|
+
column, distribution, column_name, error
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
return univariate
|
|
134
|
+
|
|
135
|
+
def _fit_with_fallback_distribution(self, column, distribution, column_name, error):
|
|
136
|
+
"""Fall back to fitting a Gaussian distribution and log the error."""
|
|
137
|
+
log_message = (
|
|
138
|
+
f'Unable to fit to a {distribution} distribution for column {column_name}. '
|
|
139
|
+
'Using a Gaussian distribution instead.'
|
|
140
|
+
)
|
|
141
|
+
LOGGER.info(log_message)
|
|
142
|
+
univariate = GaussianUnivariate()
|
|
143
|
+
univariate.fit(column)
|
|
144
|
+
return univariate
|
|
127
145
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
self.fitted = True
|
|
146
|
+
def _get_correlation(self, X):
|
|
147
|
+
"""Compute correlation matrix with transformed data.
|
|
131
148
|
|
|
132
|
-
|
|
149
|
+
Args:
|
|
150
|
+
X (numpy.ndarray):
|
|
151
|
+
Data for which the correlation needs to be computed.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
numpy.ndarray:
|
|
155
|
+
computed correlation matrix.
|
|
156
|
+
"""
|
|
157
|
+
result = self._transform_to_normal(X)
|
|
158
|
+
correlation = pd.DataFrame(data=result).corr().to_numpy()
|
|
159
|
+
correlation = np.nan_to_num(correlation, nan=0.0)
|
|
160
|
+
# If singular, add some noise to the diagonal
|
|
161
|
+
if np.linalg.cond(correlation) > 1.0 / sys.float_info.epsilon:
|
|
162
|
+
correlation = correlation + np.identity(correlation.shape[0]) * EPSILON
|
|
163
|
+
|
|
164
|
+
return pd.DataFrame(correlation, index=self.columns, columns=self.columns)
|
|
133
165
|
|
|
134
166
|
def probability_density(self, X):
|
|
135
167
|
"""Compute the probability density for each point in X.
|
|
@@ -149,8 +181,7 @@ class GaussianMultivariate(Multivariate):
|
|
|
149
181
|
self.check_fit()
|
|
150
182
|
transformed = self._transform_to_normal(X)
|
|
151
183
|
|
|
152
|
-
return stats.multivariate_normal.pdf(
|
|
153
|
-
transformed, cov=self.correlation, allow_singular=True)
|
|
184
|
+
return stats.multivariate_normal.pdf(transformed, cov=self.correlation, allow_singular=True)
|
|
154
185
|
|
|
155
186
|
def cumulative_distribution(self, X):
|
|
156
187
|
"""Compute the cumulative distribution value for each point in X.
|
|
@@ -6,9 +6,9 @@ from enum import Enum
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import scipy
|
|
8
8
|
|
|
9
|
-
from copulas import EPSILON, get_qualified_name
|
|
10
9
|
from copulas.bivariate.base import Bivariate
|
|
11
10
|
from copulas.multivariate.base import Multivariate
|
|
11
|
+
from copulas.utils import EPSILON, get_qualified_name
|
|
12
12
|
|
|
13
13
|
LOGGER = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -98,7 +98,7 @@ class Tree(Multivariate):
|
|
|
98
98
|
"""
|
|
99
99
|
# first column is the variable of interest
|
|
100
100
|
tau_y = self.tau_matrix[:, y]
|
|
101
|
-
tau_y[y] = np.
|
|
101
|
+
tau_y[y] = np.nan
|
|
102
102
|
|
|
103
103
|
temp = np.empty([self.n_nodes, 3])
|
|
104
104
|
temp[:, 0] = np.arange(self.n_nodes)
|
|
@@ -131,7 +131,7 @@ class Tree(Multivariate):
|
|
|
131
131
|
left_parent, right_parent = edge.parents
|
|
132
132
|
left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent)
|
|
133
133
|
|
|
134
|
-
tau[i, j],
|
|
134
|
+
tau[i, j], _pvalue = scipy.stats.kendalltau(left_u, right_u)
|
|
135
135
|
|
|
136
136
|
return tau
|
|
137
137
|
|
|
@@ -212,8 +212,7 @@ class Tree(Multivariate):
|
|
|
212
212
|
"""Produce printable representation of the class."""
|
|
213
213
|
template = 'L:{} R:{} D:{} Copula:{} Theta:{}'
|
|
214
214
|
return '\n'.join([
|
|
215
|
-
template.format(edge.L, edge.R, edge.D, edge.name, edge.theta)
|
|
216
|
-
for edge in self.edges
|
|
215
|
+
template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) for edge in self.edges
|
|
217
216
|
])
|
|
218
217
|
|
|
219
218
|
def _serialize_previous_tree(self):
|
|
@@ -237,11 +236,7 @@ class Tree(Multivariate):
|
|
|
237
236
|
Parameters of this Tree.
|
|
238
237
|
"""
|
|
239
238
|
fitted = self.fitted
|
|
240
|
-
result = {
|
|
241
|
-
'tree_type': self.tree_type,
|
|
242
|
-
'type': get_qualified_name(self),
|
|
243
|
-
'fitted': fitted
|
|
244
|
-
}
|
|
239
|
+
result = {'tree_type': self.tree_type, 'type': get_qualified_name(self), 'fitted': fitted}
|
|
245
240
|
|
|
246
241
|
if not fitted:
|
|
247
242
|
return result
|
|
@@ -451,7 +446,7 @@ def get_tree(tree_type):
|
|
|
451
446
|
Instance of a Tree of the specified type.
|
|
452
447
|
"""
|
|
453
448
|
if not isinstance(tree_type, TreeTypes):
|
|
454
|
-
if
|
|
449
|
+
if isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__:
|
|
455
450
|
tree_type = TreeTypes[tree_type.upper()]
|
|
456
451
|
else:
|
|
457
452
|
raise ValueError(f'Invalid tree type {tree_type}')
|
|
@@ -657,7 +652,7 @@ class Edge(object):
|
|
|
657
652
|
'theta': self.theta,
|
|
658
653
|
'tau': self.tau,
|
|
659
654
|
'U': U,
|
|
660
|
-
'likelihood': self.likelihood
|
|
655
|
+
'likelihood': self.likelihood,
|
|
661
656
|
}
|
|
662
657
|
|
|
663
658
|
@classmethod
|
|
@@ -674,8 +669,11 @@ class Edge(object):
|
|
|
674
669
|
Instance of the edge defined on the parameters.
|
|
675
670
|
"""
|
|
676
671
|
instance = cls(
|
|
677
|
-
edge_dict['index'],
|
|
678
|
-
edge_dict['
|
|
672
|
+
edge_dict['index'],
|
|
673
|
+
edge_dict['L'],
|
|
674
|
+
edge_dict['R'],
|
|
675
|
+
edge_dict['name'],
|
|
676
|
+
edge_dict['theta'],
|
|
679
677
|
)
|
|
680
678
|
instance.U = np.array(edge_dict['U'])
|
|
681
679
|
parents = edge_dict['parents']
|