PyPI - phasegen - Versions diffs - 0.0.3b0__py3-none-any.whl - Mend

phasegen 0.0.3b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

phasegen/__init__.py +225 -0
phasegen/coalescent_models.py +462 -0
phasegen/comparison.py +406 -0
phasegen/demography.py +1066 -0
phasegen/distributions.py +2928 -0
phasegen/expm.py +77 -0
phasegen/inference.py +740 -0
phasegen/lineage.py +79 -0
phasegen/locus.py +88 -0
phasegen/norms.py +114 -0
phasegen/rewards.py +540 -0
phasegen/serialization.py +49 -0
phasegen/spectrum.py +441 -0
phasegen/state_space.py +924 -0
phasegen/state_space_old.py +1601 -0
phasegen/utils.py +45 -0
phasegen/visualization.py +174 -0
phasegen-0.0.3b0.dist-info/METADATA +36 -0
phasegen-0.0.3b0.dist-info/RECORD +20 -0
phasegen-0.0.3b0.dist-info/WHEEL +4 -0

phasegen/__init__.py ADDED Viewed

@@ -0,0 +1,225 @@
+"""
+PhaseGen package.
+"""
+__author__ = "Janek Sendrowski"
+__contact__ = "sendrowski.janek@gmail.com"
+__date__ = "2023-04-09"
+__version__ = '0.0.3-beta'
+import logging
+import os
+import sys
+import jsonpickle.ext.numpy as jsonpickle_numpy
+from tqdm import tqdm
+# lower the verbosity of TensorFlow
+if 'TF_CPP_MIN_LOG_LEVEL' not in os.environ:
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+# register handlers
+jsonpickle_numpy.register_handlers()
+class TqdmLoggingHandler(logging.Handler):
+    """
+    A logging handler that uses TQDM to display log messages.
+    """
+    def __init__(self, level=logging.NOTSET):
+        """
+        Initialize the handler.
+        :param level:
+        """
+        super().__init__(level)
+    def emit(self, record):
+        """
+        Emit a record.
+        """
+        try:
+            msg = self.format(record)
+            # we write to stderr to avoid as the progress bar
+            # to make the two work together
+            tqdm.write(msg, file=sys.stderr)
+            self.flush()
+        except Exception:
+            self.handleError(record)
+class ColoredFormatter(logging.Formatter):
+    """
+    Colored formatter.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the formatter.
+        """
+        super().__init__(*args, **kwargs)
+        self.colors = {
+            "DEBUG": "\033[36m",  # Cyan
+            "INFO": "\033[32m",  # Green
+            "WARNING": "\033[33m",  # Yellow
+            "ERROR": "\033[31m",  # Red
+            "CRITICAL": "\033[31m",  # Red
+        }
+        self.reset = "\033[0m"
+    def format(self, record):
+        """
+        Format the record.
+        """
+        color = self.colors.get(record.levelname, self.reset)
+        formatted = super().format(record)
+        # remove package name
+        formatted = formatted.replace(record.name, record.name.split('.')[-1])
+        return f"{color}{formatted}{self.reset}"
+# configure logger
+logger = logging.getLogger('phasegen')
+# don't propagate to the root logger
+logger.propagate = False
+# set to INFO by default
+logger.setLevel(logging.INFO)
+# let TQDM handle the logging
+handler = TqdmLoggingHandler()
+# define a Formatter with colors
+formatter = ColoredFormatter('%(levelname)s:%(name)s: %(message)s')
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+from .distributions import PhaseTypeDistribution
+from .distributions import Coalescent
+from .demography import (
+    Demography,
+    Epoch,
+    DiscreteRateChanges,
+    PopSizeChanges,
+    PopSizeChange,
+    MigrationRateChanges,
+    MigrationRateChange,
+    SymmetricMigrationRateChanges,
+    PopulationSplit,
+    DiscretizedRateChanges,
+    DiscretizedRateChange,
+    ExponentialPopSizeChanges,
+    ExponentialRateChanges
+)
+from .coalescent_models import (
+    CoalescentModel,
+    StandardCoalescent,
+    BetaCoalescent,
+    DiracCoalescent
+)
+from .state_space import (
+    StateSpace,
+    DefaultStateSpace,
+    BlockCountingStateSpace
+)
+from .rewards import (
+    Reward,
+    DefaultReward,
+    NonDefaultReward,
+    TreeHeightReward,
+    TotalTreeHeightReward,
+    TotalBranchLengthReward,
+    UnfoldedSFSReward,
+    FoldedSFSReward,
+    CustomReward,
+    ProductReward,
+    SumReward,
+    CombinedReward,
+    DemeReward
+)
+from .spectrum import (
+    SFS,
+    Spectra,
+    SFS2
+)
+from .inference import Inference
+from .lineage import LineageConfig
+from .locus import LocusConfig
+from .norms import (
+    LNorm,
+    L1Norm,
+    L2Norm,
+    LInfNorm,
+    PoissonLikelihood
+)
+from .state_space_old import StateSpace as OldStateSpace
+__all__ = [
+    'PhaseTypeDistribution',
+    'Coalescent',
+    'Demography',
+    'Epoch',
+    'PopSizeChanges',
+    'PopSizeChange',
+    'MigrationRateChanges',
+    'MigrationRateChange',
+    'SymmetricMigrationRateChanges',
+    'PopulationSplit',
+    'ExponentialPopSizeChanges',
+    'ExponentialRateChanges',
+    'DiscreteRateChanges',
+    'DiscretizedRateChange',
+    'DiscretizedRateChanges',
+    'StandardCoalescent',
+    'BetaCoalescent',
+    'DiracCoalescent',
+    'SFS2',
+    'SFS',
+    'Spectra',
+    'Inference',
+    'LNorm',
+    'L1Norm',
+    'L2Norm',
+    'LInfNorm',
+    'PoissonLikelihood',
+    'Reward',
+    'TreeHeightReward',
+    'TotalTreeHeightReward',
+    'TotalBranchLengthReward',
+    'UnfoldedSFSReward',
+    'FoldedSFSReward',
+    'CustomReward',
+    'ProductReward',
+    'SumReward',
+    'DemeReward',
+    'DefaultReward',
+    'NonDefaultReward',
+    'CombinedReward',
+    'StateSpace',
+    'DefaultStateSpace',
+    'BlockCountingStateSpace',
+    'CoalescentModel',
+    'LineageConfig',
+    'LocusConfig',
+]

phasegen/coalescent_models.py ADDED Viewed

@@ -0,0 +1,462 @@
+"""
+Coalescent models.
+"""
+import itertools
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Sequence
+import numpy as np
+from scipy.special import comb, beta
+from scipy.stats import binom
+class CoalescentModel(ABC):
+    """
+    Abstract class for coalescent models.
+    """
+    def get_rate(self, s1: int, s2: int) -> float:
+        """
+        Get rate for a merger collapsing k1 lineages into k2 lineages.
+        :param s1: Number of lineages in the first state.
+        :param s2: Number of lineages in the second state.
+        :return: The rate.
+        """
+        # not possible
+        if s2 > s1:
+            return 0
+        return self._get_rate(b=s1, k=s1 + 1 - s2)
+    def get_rate_block_counting(self, n: int, s1: np.ndarray, s2: np.ndarray) -> float:
+        r"""
+        Get (positive) rate between two block counting states.
+        :math:`{ (a_1,...,a_n) \in \mathbb{Z}^+ : \sum_{i=1}^{n} a_i = n \}`.
+        :param n: Number of lineages.
+        :param s1: Block configuration 1, a vector of length n.
+        :param s2: Block configuration 2, a vector of length n.
+        :return: The rate.
+        """
+        diff = s2 - s1
+        # make sure only one class has one more lineage
+        if np.sum(diff == 1) == 1 and n == s1.shape[0]:
+            # get the index for the class that lost lineages
+            where_less = np.where(diff < 0)[0]
+            # only continue if there is a class that lost lineages
+            if len(where_less) > 0:
+                # get the number of lineages that were lost
+                diff_less = -diff[where_less]
+                # determine the index of the class that gained lineages
+                i_more = np.dot(where_less + 1, diff_less) - 1
+                # make sure that the class that gained lineages only gained one lineage
+                if diff[i_more] == 1:
+                    # number of lineages before the merger
+                    b = s1[where_less]
+                    # determine number of lineages that coalesce
+                    k = b - s2[where_less]
+                    # get rate
+                    rate = self._get_rate_block_counting(n=s1.sum(), b=b, k=k)
+                    return rate
+        return 0
+    @abstractmethod
+    def _get_timescale(self, N: float) -> float:
+        """
+        Get the timescale.
+        :param N: The effective population size.
+        :return: The generation time.
+        """
+        pass
+    @abstractmethod
+    def _get_rate(self, b: int, k: int) -> float:
+        """
+        Get positive rate for a merger of k out of b lineages.
+        Negative rates will be inferred later
+        :param b: Number of lineages.
+        :param k: Number of lineages that merge.
+        :return: The rate.
+        """
+        pass
+    @abstractmethod
+    def _get_rate_block_counting(self, n: int, b: Sequence[int], k: Sequence[int]) -> float:
+        """
+        Get positive rate for a merger of k_i out of b_i lineages for all i.
+        Negative rates will be inferred later
+        :param n: Number of lineages.
+        :param b: Number of lineages before merge for blocks that experience a merger.
+        :param k: Number of lineages that merge for blocks that experience a merger.
+        :return: The rate.
+        """
+        pass
+    @abstractmethod
+    def coalesce(self, n: int, blocks: np.ndarray) -> List[Tuple[np.ndarray, float]]:
+        """
+        Coalesce a state.
+        :param n: The total number of lineages.
+        :param blocks: The lineages in each block.
+        :return: List of coalesced states and their rates.
+        """
+        pass
+class StandardCoalescent(CoalescentModel):
+    """
+    Standard (Kingman) coalescent model. Refer to
+    `Msprime docs <https://tskit.dev/msprime/docs/stable/api.html?
+    highlight=standard+coalescent#msprime.StandardCoalescent>`__
+    for more information.
+    """
+    def _get_timescale(self, N: float) -> float:
+        """
+        Get the timescale.
+        :param N: The effective population size.
+        :return: The generation time.
+        """
+        return N
+    def _get_rate(self, b: int, k: int) -> float:
+        """
+        Get positive rate for a merger of k out of b lineages.
+        :param b: Number of lineages.
+        :param k: Number of lineages that merge.
+        :return: The rate.
+        """
+        # two lineages can merge with a rate depending on b
+        if k == 2:
+            return b * (b - 1) / 2
+        # no other mergers can happen
+        return 0
+    def _get_rate_block_counting(self, n: int, b: Sequence[int], k: Sequence[int]) -> float:
+        """
+        Get positive rate for a merger of k_i out of b_i lineages for all i.
+        :param n: Number of lineages.
+        :param b: Number of lineages before merge for blocks that experience a merger.
+        :param k: Number of lineages that merge for blocks that experience a merger.
+        :return: The rate.
+        """
+        # if we have a single class
+        if len(b) == 1:
+            return self._get_rate(b=b[0], k=k[0])
+        # if we have a merger from two classes
+        if len(b) == 2:
+            if k[0] == 1 and k[1] == 1:
+                # same as b[0] choose k[0] times b[1] choose k[1]
+                return b[0] * b[1]
+        # no other mergers possible
+        return 0
+    def coalesce(self, n: int, blocks: np.ndarray[int]) -> List[Tuple[np.ndarray, float]]:
+        """
+        Coalesce a state.
+        :param n: The total number of lineages.
+        :param blocks: The lineages in each block.
+        :return: List of coalesced states and their rates.
+        """
+        n_blocks = len(blocks)
+        states = []
+        # default state space
+        if n_blocks == 1:
+            if blocks[0] > 1:
+                states += [(np.array([blocks[0] - 1]), self._get_rate(b=blocks[0], k=2))]
+            return states
+        # block counting state space
+        for i, j in itertools.product(range(n_blocks), repeat=2):
+            if i == j:
+                if blocks[i] > 1:
+                    new = blocks.copy()
+                    new[i] -= 2
+                    new[2 * (i + 1) - 1] += 1
+                    states += [(new, self._get_rate_block_counting(n=n, b=[blocks[i]], k=[2]))]
+            elif i > j:
+                if blocks[i] > 0 and blocks[j] > 0:
+                    new = blocks.copy()
+                    new[i] -= 1
+                    new[j] -= 1
+                    new[i + j + 1] += 1
+                    rate = self._get_rate_block_counting(n=n, b=[blocks[i], blocks[j]], k=[1, 1])
+                    states += [(new, rate)]
+        return states
+    def __eq__(self, other):
+        """
+        Check if two coalescent models are equal.
+        :param other: The other coalescent model.
+        :return: Whether the two coalescent models are equal.
+        """
+        return isinstance(other, StandardCoalescent)
+class MultipleMergerCoalescent(CoalescentModel, ABC):
+    """
+    Base class for multiple merger coalescent models.
+    :meta private:
+    """
+    def coalesce(self, n: int, blocks: np.ndarray[int]) -> List[Tuple[np.ndarray, float]]:
+        """
+        Coalesce a state.
+        :param n: The total number of lineages.
+        :param blocks: The lineages in each block.
+        :return: List of coalesced states and their rates.
+        """
+        n_blocks = len(blocks)
+        states = []
+        # default state space
+        if n_blocks == 1:
+            for k in range(1, blocks[0]):
+                states += [(np.array([blocks[0] - k]), self._get_rate(b=blocks[0], k=k + 1))]
+            return states
+        # block counting state space
+        for comb in itertools.product(*[list(range(blocks[i] + 1)) for i in range(n_blocks)]):
+            comb = np.array(comb)
+            if comb.sum() > 1:
+                new = blocks.copy()
+                new -= comb
+                new[comb.dot(np.arange(1, n_blocks + 1)) - 1] += 1
+                rate = self._get_rate_block_counting(n=blocks.sum(), b=blocks[comb > 0], k=comb[comb > 0])
+                states += [(new, rate)]
+        return states
+class BetaCoalescent(MultipleMergerCoalescent):
+    """
+    Beta coalescent model. Refer to
+    `Msprime docs <https://tskit.dev/msprime/docs/stable/api.html?highlight=beta+coalescent#msprime.BetaCoalescent>`__
+    for more information.
+    """
+    def __init__(self, alpha: float, scale_time: bool = True):
+        """
+        Initialize the beta coalescent model.
+        :param alpha: The alpha parameter of the beta coalescent model.
+        :param scale_time: Whether to scale coalescence time as described in
+            `Msprime docs <https://tskit.dev/msprime/docs/stable/api.html?
+            highlight=beta+coalescent#msprime.BetaCoalescent>`__. If ``False``, the timescale is set to N.
+        """
+        if alpha < 1 or alpha > 2:
+            raise ValueError("Alpha must be between 1 and 2.")
+        #: Whether to scale coalescence time
+        self.scale_time: bool = scale_time
+        #: The alpha parameter of the beta coalescent model.
+        self.alpha: float = alpha
+    def _get_base_rate(self, b: int, k: int) -> float:
+        """
+        Get base rate for a merger of k out of b lineages (without number of ways).
+        :param b: The number of lineages before the merger.
+        :param k: The number of lineages that merge.
+        :return: The rate.
+        """
+        rate = beta(k - self.alpha, b - k + self.alpha) / beta(self.alpha, 2 - self.alpha)
+        return rate
+    def _get_timescale(self, N: float) -> float:
+        """
+        Get the timescale.
+        :param N: The effective population size.
+        :return: The generation time.
+        """
+        if not self.scale_time:
+            return N
+        m = 1 + 1 / 2 ** (self.alpha - 1) / (self.alpha - 1)
+        scale = m ** self.alpha * N ** (self.alpha - 1) / self.alpha / beta(2 - self.alpha, self.alpha)
+        return scale
+    def _get_rate(self, b: int, k: int) -> float:
+        """
+        Get positive rate for a merger of k out of b lineages.
+        Negative rates will be filled in later.
+        :param b: The number of lineages before the merger.
+        :param k: The number of lineages that merge.
+        :return: The rate.
+        """
+        if k < 1 or k > b:
+            return 0
+        return comb(b, k, exact=True) * self._get_base_rate(b, k)
+    def _get_rate_block_counting(self, n: int, b: Sequence[int], k: Sequence[int]) -> float:
+        """
+        Get positive rate for a merger of k_i out of b_i lineages for all i.
+        :param n: Number of lineages.
+        :param b: Number of lineages before merge for blocks that experience a merger.
+        :param k: Number of lineages that merge for blocks that experience a merger.
+        :return: The rate.
+        """
+        combinations = np.prod([comb(N=b_i, k=k_i, exact=True) for b_i, k_i in zip(b, k)])
+        return combinations * self._get_base_rate(b=n, k=sum(k))
+    def __eq__(self, other):
+        """
+        Check if two coalescent models are equal.
+        :param other: The other coalescent model.
+        :return: Whether the two coalescent models are equal.
+        """
+        return (
+                isinstance(other, BetaCoalescent) and
+                self.alpha == other.alpha and
+                self.scale_time == other.scale_time
+        )
+class DiracCoalescent(MultipleMergerCoalescent):
+    """
+    Dirac coalescent model. Refer to
+    `Msprime docs <https://tskit.dev/msprime/docs/stable/api.html?highlight=dirac+coalescent#msprime.DiracCoalescent>`__
+    for more information.
+    """
+    def __init__(self, psi: float, c: float, scale_time: bool = True):
+        """
+        Initialize the Dirac coalescent model.
+        :param psi: The fraction of the population replaced by offspring in one large reproduction event
+        :param c: The rate of potential multiple merger events.
+        :param scale_time: Whether to scale coalescence time as described in
+            `Msprime docs <https://tskit.dev/msprime/docs/stable/api.html?
+            highlight=dirac+coalescent#msprime.DiracCoalescent>`__. If ``False``, the timescale is set to N.
+        """
+        super().__init__()
+        if not 0 < psi < 1:
+            raise ValueError("Psi must be between 0 and 1.")
+        #: The fraction of the population replaced by offspring in one large reproduction event
+        self.psi = psi
+        #: The rate of potential multiple merger events.
+        self.c = c
+        #: Whether to scale coalescence time
+        self.scale_time: bool = scale_time
+        #: The standard coalescent model
+        self._standard = StandardCoalescent()
+    def _get_timescale(self, N: float) -> float:
+        """
+        Get the timescale.
+        :param N: The effective population size.
+        :return: The generation time.
+        """
+        if not self.scale_time:
+            return N
+        return N ** 2
+    def _get_rate(self, b: int, k: int) -> float:
+        """
+        Get positive rate for a merger of k out of b lineages.
+        Negative rates will be filled in later.
+        :param b: The number of lineages before the merger.
+        :param k: The number of lineages that merge.
+        :return: The rate.
+        """
+        # rate of binary merger
+        rate_binary = self._standard._get_rate(b=b, k=k)
+        # probability of multiple merger of k out of b lineages
+        p_psi = binom.pmf(k=k, n=b, p=self.psi)
+        # rate of multiple merger
+        rate_multi = p_psi * self.c
+        return rate_binary + rate_multi
+    def _get_rate_block_counting(self, n: int, b: Sequence[int], k: Sequence[int]) -> float:
+        """
+        Get positive rate for a merger of k_i out of b_i lineages for all i.
+        :param n: Number of lineages.
+        :param b: Number of lineages before merge for blocks that experience a merger.
+        :param k: Number of lineages that merge for blocks that experience a merger.
+        :return: The rate.
+        """
+        # rate of binary merger
+        rate_binary = self._standard._get_rate_block_counting(n=n, b=b, k=k)
+        # probability of multiple merger of k out of n lineages
+        # p_psi = binom.pmf(k=k.sum(), n=n, p=self.psi)
+        p_psi = np.prod([binom.pmf(k=k[i], n=b[i], p=self.psi) for i in range(len(k))])
+        if sum(b) < n:
+            p_psi *= binom.pmf(k=0, n=n - sum(b), p=self.psi)
+        # rate of multiple merger
+        rate_multi = p_psi * self.c
+        rate = rate_binary + rate_multi
+        return rate
+    def __eq__(self, other):
+        """
+        Check if two coalescent models are equal.
+        :param other: The other coalescent model.
+        :return: Whether the two coalescent models are equal.
+        """
+        return (
+                isinstance(other, DiracCoalescent) and
+                self.psi == other.psi and
+                self.c == other.c and
+                self.scale_time == other.scale_time
+        )