PyPI - copulas - Versions diffs - 0.10.1.dev0__py3-none-any.whl - Mend

copulas 0.10.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of copulas might be problematic. Click here for more details.

Files changed (32) hide show

copulas/__init__.py +332 -0
copulas/bivariate/__init__.py +175 -0
copulas/bivariate/base.py +449 -0
copulas/bivariate/clayton.py +162 -0
copulas/bivariate/frank.py +169 -0
copulas/bivariate/gumbel.py +144 -0
copulas/bivariate/independence.py +81 -0
copulas/bivariate/utils.py +19 -0
copulas/datasets.py +221 -0
copulas/multivariate/__init__.py +14 -0
copulas/multivariate/base.py +199 -0
copulas/multivariate/gaussian.py +314 -0
copulas/multivariate/tree.py +693 -0
copulas/multivariate/vine.py +356 -0
copulas/optimize/__init__.py +153 -0
copulas/univariate/__init__.py +25 -0
copulas/univariate/base.py +650 -0
copulas/univariate/beta.py +42 -0
copulas/univariate/gamma.py +38 -0
copulas/univariate/gaussian.py +33 -0
copulas/univariate/gaussian_kde.py +193 -0
copulas/univariate/log_laplace.py +38 -0
copulas/univariate/selection.py +36 -0
copulas/univariate/student_t.py +35 -0
copulas/univariate/truncated_gaussian.py +74 -0
copulas/univariate/uniform.py +33 -0
copulas/visualization.py +350 -0
copulas-0.10.1.dev0.dist-info/LICENSE +106 -0
copulas-0.10.1.dev0.dist-info/METADATA +223 -0
copulas-0.10.1.dev0.dist-info/RECORD +32 -0
copulas-0.10.1.dev0.dist-info/WHEEL +5 -0
copulas-0.10.1.dev0.dist-info/top_level.txt +1 -0

copulas/bivariate/frank.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Frank module."""
+import sys
+import numpy as np
+import scipy.integrate as integrate
+from scipy.optimize import least_squares
+from copulas import EPSILON
+from copulas.bivariate.base import Bivariate, CopulaTypes
+from copulas.bivariate.utils import split_matrix
+MIN_FLOAT_LOG = np.log(sys.float_info.min)
+MAX_FLOAT_LOG = np.log(sys.float_info.max)
+class Frank(Bivariate):
+    """Class for Frank copula model."""
+    copula_type = CopulaTypes.FRANK
+    theta_interval = [-float('inf'), float('inf')]
+    invalid_thetas = [0]
+    def generator(self, t):
+        """Return the generator function."""
+        a = (np.exp(-self.theta * t) - 1) / (np.exp(-self.theta) - 1)
+        return -np.log(a)
+    def _g(self, z):
+        r"""Assist in solving the Frank copula.
+        This functions encapsulates :math:`g(z) = e^{-\theta z} - 1` used on Frank copulas.
+        Argument:
+            z: np.ndarray
+        Returns:
+            np.ndarray
+        """
+        return np.exp(-self.theta * z) - 1
+    def probability_density(self, X):
+        r"""Compute probability density function for given copula family.
+        The probability density(PDF) for the Frank family of copulas correspond to the formula:
+        .. math:: c(U,V) = \frac{\partial^2 C(u,v)}{\partial v \partial u} =
+             \frac{-\theta g(1)(1 + g(u + v))}{(g(u) g(v) + g(1)) ^ 2}
+        Where the g function is defined by:
+        .. math:: g(x) = e^{-\theta x} - 1
+        Args:
+            X: `np.ndarray`
+        Returns:
+            np.array: probability density
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        if self.theta == 0:
+            return U * V
+        else:
+            num = (-self.theta * self._g(1)) * (1 + self._g(U + V))
+            aux = self._g(U) * self._g(V) + self._g(1)
+            den = np.power(aux, 2)
+            return num / den
+    def cumulative_distribution(self, X):
+        r"""Compute the cumulative distribution function for the Frank copula.
+        The cumulative density(cdf), or distribution function for the Frank family of copulas
+        correspond to the formula:
+        .. math:: C(u,v) =  −\frac{\ln({\frac{1 + g(u) g(v)}{g(1)}})}{\theta}
+        Args:
+            X: `np.ndarray`
+        Returns:
+            np.array: cumulative distribution
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        num = (np.exp(-self.theta * U) - 1) * (np.exp(-self.theta * V) - 1)
+        den = np.exp(-self.theta) - 1
+        return -1.0 / self.theta * np.log(1 + num / den)
+    def percent_point(self, y, V):
+        """Compute the inverse of conditional cumulative distribution :math:`C(u|v)^{-1}`.
+        Args:
+            y: `np.ndarray` value of :math:`C(u|v)`.
+            v: `np.ndarray` given value of v.
+        """
+        self.check_fit()
+        if self.theta == 0:
+            return V
+        else:
+            return super().percent_point(y, V)
+    def partial_derivative(self, X):
+        r"""Compute partial derivative of cumulative distribution.
+        The partial derivative of the copula(CDF) is the conditional CDF.
+        .. math:: F(v|u) = \frac{\partial}{\partial u}C(u,v) =
+            \frac{g(u)g(v) + g(v)}{g(u)g(v) + g(1)}
+        Args:
+            X (np.ndarray)
+            y (float)
+        Returns:
+            np.ndarray
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        if self.theta == 0:
+            return V
+        else:
+            num = self._g(U) * self._g(V) + self._g(U)
+            den = self._g(U) * self._g(V) + self._g(1)
+            return num / den
+    def compute_theta(self):
+        r"""Compute theta parameter using Kendall's tau.
+        On Frank copula, the relationship between tau and theta is defined by:
+        .. math:: \tau = 1 − \frac{4}{\theta} + \frac{4}{\theta^2}\int_0^\theta \!
+            \frac{t}{e^t -1} \mathrm{d}t.
+        In order to solve it, we can simplify it as
+        .. math:: 0 = 1 + \frac{4}{\theta}(D_1(\theta) - 1) - \tau
+        where the function D is the Debye function of first order, defined as:
+        .. math:: D_1(x) = \frac{1}{x}\int_0^x\frac{t}{e^t -1} \mathrm{d}t.
+        """
+        result = least_squares(self._tau_to_theta, 1, bounds=(MIN_FLOAT_LOG, MAX_FLOAT_LOG))
+        return result.x[0]
+    def _tau_to_theta(self, alpha):
+        """Relationship between tau and theta as a solvable equation."""
+        def debye(t):
+            return t / (np.exp(t) - 1)
+        debye_value = integrate.quad(debye, EPSILON, alpha)[0] / alpha
+        return 4 * (debye_value - 1) / alpha + 1 - self.tau

copulas/bivariate/gumbel.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Gumbel module."""
+import numpy as np
+from copulas.bivariate.base import Bivariate, CopulaTypes
+from copulas.bivariate.utils import split_matrix
+class Gumbel(Bivariate):
+    """Class for clayton copula model."""
+    copula_type = CopulaTypes.GUMBEL
+    theta_interval = [1, float('inf')]
+    invalid_thetas = []
+    def generator(self, t):
+        """Return the generator function."""
+        return np.power(-np.log(t), self.theta)
+    def probability_density(self, X):
+        r"""Compute probability density function for given copula family.
+        The probability density(PDF) for the Gumbel family of copulas correspond to the formula:
+        .. math::
+            \begin{align}
+                c(U,V)
+                    &= \frac{\partial^2 C(u,v)}{\partial v \partial u}
+                    &= \frac{C(u,v)}{uv} \frac{((-\ln u)^{\theta}  # noqa: JS101
+                    + (-\ln v)^{\theta})^{\frac{2}  # noqa: JS101
+                {\theta} - 2 }}{(\ln u \ln v)^{1 - \theta}}  # noqa: JS101
+                ( 1 + (\theta-1) \big((-\ln u)^\theta
+                + (-\ln v)^\theta\big)^{-1/\theta})
+            \end{align}
+        Args:
+            X (numpy.ndarray)
+        Returns:
+            numpy.ndarray
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        if self.theta == 1:
+            return U * V
+        else:
+            a = np.power(U * V, -1)
+            tmp = np.power(-np.log(U), self.theta) + np.power(-np.log(V), self.theta)
+            b = np.power(tmp, -2 + 2.0 / self.theta)
+            c = np.power(np.log(U) * np.log(V), self.theta - 1)
+            d = 1 + (self.theta - 1) * np.power(tmp, -1.0 / self.theta)
+            return self.cumulative_distribution(X) * a * b * c * d
+    def cumulative_distribution(self, X):
+        r"""Compute the cumulative distribution function for the Gumbel copula.
+        The cumulative density(cdf), or distribution function for the Gumbel family of copulas
+        correspond to the formula:
+        .. math:: C(u,v) = e^{-((-\ln u)^{\theta} + (-\ln v)^{\theta})^{\frac{1}{\theta}}}
+        Args:
+            X (np.ndarray)
+        Returns:
+            np.ndarray: cumulative probability for the given datapoints, cdf(X).
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        if self.theta == 1:
+            return U * V
+        else:
+            h = np.power(-np.log(U), self.theta) + np.power(-np.log(V), self.theta)
+            h = -np.power(h, 1.0 / self.theta)
+            cdfs = np.exp(h)
+            return cdfs
+    def percent_point(self, y, V):
+        """Compute the inverse of conditional cumulative distribution :math:`C(u|v)^{-1}`.
+        Args:
+            y (np.ndarray): value of :math:`C(u|v)`.
+            v (np.ndarray): given value of v.
+        """
+        self.check_fit()
+        if self.theta == 1:
+            return y
+        else:
+            return super().percent_point(y, V)
+    def partial_derivative(self, X):
+        r"""Compute partial derivative of cumulative distribution.
+        The partial derivative of the copula(CDF) is the conditional CDF.
+        .. math:: F(v|u) = \frac{\partial C(u,v)}{\partial u} =
+            C(u,v)\frac{((-\ln u)^{\theta} + (-\ln v)^{\theta})^{\frac{1}{\theta} - 1}}
+            {\theta(- \ln u)^{1 -\theta}}
+        Args:
+            X (np.ndarray)
+            y (float)
+        Returns:
+            numpy.ndarray
+        """
+        self.check_fit()
+        U, V = split_matrix(X)
+        if self.theta == 1:
+            return V
+        else:
+            t1 = np.power(-np.log(U), self.theta)
+            t2 = np.power(-np.log(V), self.theta)
+            p1 = self.cumulative_distribution(X)
+            p2 = np.power(t1 + t2, -1 + 1.0 / self.theta)
+            p3 = np.power(-np.log(V), self.theta - 1)
+            return p1 * p2 * p3 / V
+    def compute_theta(self):
+        r"""Compute theta parameter using Kendall's tau.
+        On Gumbel copula :math:`\tau` is defined as :math:`τ = \frac{θ−1}{θ}`
+        that we solve as :math:`θ = \frac{1}{1-τ}`
+        """
+        if self.tau == 1:
+            raise ValueError("Tau value can't be 1")
+        return 1 / (1 - self.tau)

copulas/bivariate/independence.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Independence module."""
+import numpy as np
+from copulas.bivariate.base import Bivariate, CopulaTypes
+from copulas.bivariate.utils import split_matrix
+class Independence(Bivariate):
+    """This class represent the copula for two independent variables."""
+    copula_type = CopulaTypes.INDEPENDENCE
+    def fit(self, X):
+        """Fit the copula to the given data.
+        Args:
+            X (numpy.array): Probabilites in a matrix shaped (n, 2)
+        Returns:
+            None
+        """
+    def generator(self, t):
+        """Compute the generator function for the Copula.
+        The generator function is a function f(t), such that an archimedian copula can be
+        defined as
+        C(u1, ..., uN) = f(f^-1(u1), ..., f^-1(uN)).
+        Args:
+            t(numpy.array)
+        Returns:
+            np.array
+        """
+        return np.log(t)
+    def probability_density(self, X):
+        """Compute the probability density for the independence copula."""
+        return np.all((0.0 <= X) & (X <= 1.0), axis=1).astype(float)
+    def cumulative_distribution(self, X):
+        """Compute the cumulative distribution of the independence bivariate copula is the product.
+        Args:
+            X(numpy.array): Matrix of shape (n,2), whose values are in [0, 1]
+        Returns:
+            numpy.array: Cumulative distribution values of given input.
+        """
+        U, V = split_matrix(X)
+        return U * V
+    def partial_derivative(self, X):
+        """Compute the conditional probability of one event conditiones to the other.
+        In the case of the independence copula, due to C(u,v) = u*v, we have that
+        F(u|v) = dC/du = v.
+        Args:
+            X()
+        """
+        _, V = split_matrix(X)
+        return V
+    def percent_point(self, y, V):
+        """Compute the inverse of conditional cumulative distribution :math:`F(u|v)^-1`.
+        Args:
+            y: `np.ndarray` value of :math:`F(u|v)`.
+            v: `np.ndarray` given value of v.
+        """
+        self.check_fit()
+        return y

copulas/bivariate/utils.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Utilities for bivariate copulas."""
+import numpy as np
+def split_matrix(X):
+    """Split an (n,2) numpy.array into two vectors.
+    Args:
+        X(numpy.array): Matrix of shape (n,2)
+    Returns:
+        tuple[numpy.array]: Both of shape (n,)
+    """
+    if len(X):
+        return X[:, 0], X[:, 1]
+    return np.array([]), np.array([])

copulas/datasets.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Sample datasets for the Copulas library."""
+import numpy as np
+import pandas as pd
+from scipy import stats
+from copulas import set_random_state, validate_random_state
+def _dummy_fn(state):
+    pass
+def sample_bivariate_age_income(size=1000, seed=42):
+    """Sample from a bivariate toy dataset.
+    This dataset contains two columns which correspond to the simulated age and
+    income which are positively correlated with outliers.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.DataFrame:
+            DataFrame with two columns, ``age`` and ``income``.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        age = stats.beta.rvs(a=2.0, b=6.0, loc=18, scale=100, size=size)
+        income = np.log(age) * 100
+        income += np.random.normal(loc=np.log(age) / 100, scale=10, size=size)
+        income[np.random.randint(0, 10, size=size) == 0] /= 1000
+    return pd.DataFrame({
+        'age': age,
+        'income': income
+    })
+def sample_trivariate_xyz(size=1000, seed=42):
+    """Sample from three dimensional toy dataset.
+    The output is a DataFrame containing three columns:
+    * ``x``: Beta distribution with a=0.1 and b=0.1
+    * ``y``: Beta distribution with a=0.1 and b=0.5
+    * ``z``: Normal distribution + 10 times ``y``
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.DataFrame:
+            DataFrame with three columns, ``x``, ``y`` and ``z``.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        x = stats.beta.rvs(a=0.1, b=0.1, size=size)
+        y = stats.beta.rvs(a=0.1, b=0.5, size=size)
+        return pd.DataFrame({
+            'x': x,
+            'y': y,
+            'z': np.random.normal(size=size) + y * 10
+        })
+def sample_univariate_bernoulli(size=1000, seed=42):
+    """Sample from a Bernoulli distribution with p=0.3.
+    The distribution is built by sampling a uniform random and then setting
+    0 or 1 depending on whether the value is above or below 0.3.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(np.random.random(size=size) < 0.3).astype(float)
+def sample_univariate_bimodal(size=1000, seed=42):
+    """Sample from a bimodal distribution which mixes two Gaussians at 0.0 and 10.0 with stdev=1.
+    The distribution is built by sampling a standard normal and a normal with mean ``10``
+    and then selecting one or the other based on a bernoulli distribution.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        bernoulli = sample_univariate_bernoulli(size, seed)
+        mode1 = np.random.normal(size=size) * bernoulli
+        mode2 = np.random.normal(size=size, loc=10) * (1.0 - bernoulli)
+        return pd.Series(mode1 + mode2)
+def sample_univariate_uniform(size=1000, seed=42):
+    """Sample from a uniform distribution in [-1.0, 3.0].
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(4.0 * np.random.random(size=size) - 1.0)
+def sample_univariate_normal(size=1000, seed=42):
+    """Sample from a normal distribution with mean 1 and stdev 1.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(np.random.normal(size=size, loc=1.0))
+def sample_univariate_degenerate(size=1000, seed=42):
+    """Sample from a degenerate distribution that only takes one random value.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(np.full(size, np.random.random()))
+def sample_univariate_exponential(size=1000, seed=42):
+    """Sample from an exponential distribution at 3.0 with rate 1.0.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(np.random.exponential(size=size) + 3.0)
+def sample_univariate_beta(size=1000, seed=42):
+    """Sample from a beta distribution with a=3 and b=1 and loc=4.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.Series:
+            Series with the sampled values.
+    """
+    with set_random_state(validate_random_state(seed), _dummy_fn):
+        return pd.Series(stats.beta.rvs(a=3, b=1, loc=4, size=size))
+def sample_univariates(size=1000, seed=42):
+    """Sample from a list of univariate distributions.
+    Args:
+        size (int):
+            Amount of samples to generate. Defaults to 1000.
+        seed (int):
+            Random seed to use. Defaults to 42.
+    Returns:
+        pandas.DataFrame:
+            DataFrame with the sampled distributions.
+    """
+    return pd.DataFrame({
+        'bernoulli': sample_univariate_bernoulli(size, seed),
+        'bimodal': sample_univariate_bimodal(size, seed),
+        'uniform': sample_univariate_uniform(size, seed),
+        'normal': sample_univariate_normal(size, seed),
+        'degenerate': sample_univariate_degenerate(size, seed),
+        'exponential': sample_univariate_exponential(size, seed),
+        'beta': sample_univariate_beta(size, seed),
+    })

copulas/multivariate/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Multivariate copulas module."""
+from copulas.multivariate.base import Multivariate
+from copulas.multivariate.gaussian import GaussianMultivariate
+from copulas.multivariate.tree import Tree, TreeTypes
+from copulas.multivariate.vine import VineCopula
+__all__ = (
+    'Multivariate',
+    'GaussianMultivariate',
+    'VineCopula',
+    'Tree',
+    'TreeTypes'
+)