PyPI - ml4gw - Versions diffs - 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl - Mend

ml4gw 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ml4gw might be problematic. Click here for more details.

Files changed (33) hide show

ml4gw/augmentations.py +4 -4
ml4gw/dataloading/chunked_dataset.py +3 -3
ml4gw/dataloading/hdf5_dataset.py +7 -10
ml4gw/dataloading/in_memory_dataset.py +21 -21
ml4gw/distributions.py +216 -10
ml4gw/gw.py +60 -53
ml4gw/nn/autoencoder/base.py +9 -9
ml4gw/nn/autoencoder/convolutional.py +4 -4
ml4gw/nn/resnet/resnet_1d.py +13 -13
ml4gw/nn/resnet/resnet_2d.py +12 -12
ml4gw/nn/streaming/online_average.py +1 -1
ml4gw/nn/streaming/snapshotter.py +14 -14
ml4gw/spectral.py +48 -48
ml4gw/transforms/iirfilter.py +3 -3
ml4gw/transforms/pearson.py +7 -8
ml4gw/transforms/qtransform.py +19 -19
ml4gw/transforms/scaler.py +4 -4
ml4gw/transforms/spectral.py +10 -10
ml4gw/transforms/spectrogram.py +12 -11
ml4gw/transforms/spline_interpolation.py +8 -15
ml4gw/transforms/transform.py +1 -1
ml4gw/transforms/whitening.py +36 -36
ml4gw/utils/slicing.py +40 -40
ml4gw/waveforms/cbc/phenom_d.py +22 -66
ml4gw/waveforms/cbc/phenom_p.py +9 -5
ml4gw/waveforms/cbc/taylorf2.py +8 -7
ml4gw/waveforms/conversion.py +2 -1
ml4gw/waveforms/generator.py +33 -32
{ml4gw-0.7.4.dist-info → ml4gw-0.7.6.dist-info}/METADATA +7 -1
ml4gw-0.7.6.dist-info/RECORD +55 -0
ml4gw-0.7.4.dist-info/RECORD +0 -55
{ml4gw-0.7.4.dist-info → ml4gw-0.7.6.dist-info}/WHEEL +0 -0
{ml4gw-0.7.4.dist-info → ml4gw-0.7.6.dist-info}/licenses/LICENSE +0 -0

ml4gw/augmentations.py CHANGED Viewed

@@ -6,8 +6,8 @@ from torch import Tensor
 class SignalInverter(torch.nn.Module):
     """
     Takes a tensor of timeseries of arbitrary dimension
-    and randomly inverts (i.e. h(t) -> -h(t))
-    each timeseries with probability `prob`.
+    and randomly inverts i.e. :math:`h(t) \\rightarrow -h(t)`
+    each timeseries with probability ``prob``.
     Args:
         prob:
@@ -29,8 +29,8 @@ class SignalInverter(torch.nn.Module):
 class SignalReverser(torch.nn.Module):
     """
     Takes a tensor of timeseries of arbitrary dimension
-    and randomly reverses (i.e. h(t) -> h(-t))
-    each timeseries with probability `prob`.
+    and randomly reverses i.e., :math:`h(t) \\rightarrow h(-t)`.
+    each timeseries with probability ``prob``.
     Args:
         prob:

ml4gw/dataloading/chunked_dataset.py CHANGED Viewed

@@ -15,9 +15,9 @@ class ChunkedTimeSeriesDataset(torch.utils.data.IterableDataset):
         chunk_it:
             Iterator which will produce chunks of timeseries
             data to sample windows from. Should have shape
-            `(N, C, T)`, where `N` is the number of chunks
-            to sample from, `C` is the number of channels,
-            and `T` is the number of samples along the
+            ``(N, C, T)``, where ``N`` is the number of chunks
+            to sample from, ``C`` is the number of channels,
+            and ``T`` is the number of samples along the
             time dimension for each chunk.
         kernel_size:
             Size of windows to be sampled from each chunk.

ml4gw/dataloading/hdf5_dataset.py CHANGED Viewed

@@ -17,8 +17,7 @@ class Hdf5TimeSeriesDataset(torch.utils.data.IterableDataset):
     Iterable dataset that samples and loads windows of
     timeseries data uniformly from a set of HDF5 files.
     It is _strongly_ recommended that these files have been
-    written using [chunked storage]
-    (https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage).
+    written using `chunked storage <https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage>`_.
     This has shown to produce increases in read-time speeds
     of over an order of magnitude.
@@ -37,27 +36,25 @@ class Hdf5TimeSeriesDataset(torch.utils.data.IterableDataset):
             Number of windows to sample at each iteration.
         batches_per_epoch:
             Number of batches to generate during each call
-            to `__iter__`.
+            to ``__iter__``.
         coincident:
             Whether windows for each channel in a given batch
             element should be sampled coincidentally, i.e.
             corresponding to the same time indices from the
             same files, or should be sampled independently.
             For the latter case, users can either specify
-            `False`, which will sample filenames independently
-            for each channel, or `"files"`, which will sample
+            ``False``, which will sample filenames independently
+            for each channel, or ``"files"``, which will sample
             windows independently within a given file for each
             channel. The latter setting limits the amount of
             entropy in the effective dataset, but can provide
             over 2x improvement in total throughput.
         num_files_per_batch:
             The number of unique files from which to sample
-            batch elements each epoch. If left as `None`,
+            batch elements each epoch. If left as ``None``,
             will use all available files. Useful when reading
             from many files is bottlenecking dataloading.
-    """
+    """  # noqa E501
     def __init__(
         self,
@@ -117,7 +114,7 @@ class Hdf5TimeSeriesDataset(torch.utils.data.IterableDataset):
         return self.batches_per_epoch
     def sample_fnames(self, size) -> np.ndarray:
-        # first, randomly select `self.num_files_per_batch`
+        # first, randomly select ``self.num_files_per_batch``
         # file indices based on their probabilities
         fname_indices = np.arange(len(self.fnames))
         fname_indices = np.random.choice(

ml4gw/dataloading/in_memory_dataset.py CHANGED Viewed

@@ -20,56 +20,56 @@ class InMemoryDataset(torch.utils.data.IterableDataset):
     Args:
         X:
             Timeseries data to be iterated through. Should have
-            shape `(num_channels, length * sample_rate)`. Windows
+            shape ``(num_channels, length * sample_rate)``. Windows
             will be sampled from the time (1st) dimension for all
             channels along the channel (0th) dimension.
         kernel_size:
-            The length of the windows to sample from `X` in units
+            The length of the windows to sample from ``X`` in units
             of samples.
         y:
             Target timeseries to be iterated through. If specified,
             should be a single channel and have shape
-            `(length * sample_rate,)`. If left as `None`, only windows
-            sampled from `X` will be returned during iteration.
+            ``(length * sample_rate,)``. If left as ``None``, only windows
+            sampled from ``X`` will be returned during iteration.
             Otherwise, windows sampled from both arrays will be
             returned. Note that if sampling is performed non-coincidentally,
             there's no sensible way to align windows sampled from this
-            array with the windows sampled from `X`, so this combination
+            array with the windows sampled from ``X``, so this combination
             of arguments is not permitted.
         batch_size:
             Maximum number of windows to return at each iteration. Will
             be the length of the 0th dimension of the returned array(s).
-            If `batches_per_epoch` is specified, this will be the length
-            of _every_ array returned during iteration. Otherwise, it's
+            If ``batches_per_epoch`` is specified, this will be the length
+            of **every** array returned during iteration. Otherwise, it's
             possible that the last array will be shorter due to the number
             of windows in the timeseries being a non-integer multiple of
-            `batch_size`.
+            ``batch_size``.
         stride:
             The resolution at which windows will be sampled from the
             specified timeseries, in units of samples. E.g. if
-            `stride=2`, the first sample of each window can only be
-            from an index of `X` which is a multiple of 2. Obviously,
+            ``stride=2``, the first sample of each window can only be
+            from an index of ``X`` which is a multiple of 2. Obviously,
             this reduces the number of windows which can be iterated
-            through by a factor of `stride`.
+            through by a factor of ``stride``.
         batches_per_epoch:
             Number of batches of window to produce during iteration
-            before raising a `StopIteration`. Must be specified if
+            before raising a ``StopIteration``. Must be specified if
             performing non-coincident sampling. Otherwise, if left
-            as `None`, windows will be sampled until the entire
+            as ``None``, windows will be sampled until the entire
             timeseries has been exhausted. Note that
-            `batch_size * batches_per_epoch` must be be small
+            ``batch_size * batches_per_epoch`` must be be small
             enough to be able to be fulfilled by the number of
-            windows in the timeseries, otherise a `ValueError`
+            windows in the timeseries, otherise a ``ValueError``
             will be raised.
         coincident:
-            Whether to sample windows from the channels of `X`
+            Whether to sample windows from the channels of ``X``
             using the same indices or independently. Can't be
-            `True` if `batches_per_epoch` is `None` or `y` is
-            _not_ `None`.
+            ``True`` if ``batches_per_epoch`` is ``None`` or ``y`` is
+            **not** ``None``.
         shuffle:
             Whether to sample windows from timeseries randomly
-            or in order along the time axis. If `coincident=False`
-            and `shuffle=False`, channels will be iterated through
+            or in order along the time axis. If ``coincident=False``
+            and ``shuffle=False``, channels will be iterated through
             with the index along the last channel moving fastest.
         device:
             Which device to host the timeseries arrays on
@@ -91,7 +91,7 @@ class InMemoryDataset(torch.utils.data.IterableDataset):
         # make sure if we specified a target array that all other
         # other necessary conditions are met (it has the same
-        # length as `X` and we're sampling coincidentally)
+        # length as ``X`` and we're sampling coincidentally)
         if y is not None and y.shape[-1] != X.shape[-1]:
             raise ValueError(
                 "Target timeseries must have same length as input"

ml4gw/distributions.py CHANGED Viewed

@@ -1,24 +1,30 @@
 """
 Module containing callables classes for generating samples
 from specified distributions. Each callable should map from
-an integer `N` to a 1D torch `Tensor` containing `N` samples
+an integer ``N`` to a 1D torch ``Tensor`` containing ``N`` samples
 from the corresponding distribution.
 """
 import math
-from typing import Optional
+from typing import Callable, Optional
 import torch
 import torch.distributions as dist
 from jaxtyping import Float
 from torch import Tensor
+from ml4gw.constants import C
+_PLANCK18_H0 = 67.66  # Hubble constant in km/s/Mpc
+_PLANCK18_OMEGA_M = 0.30966  # Matter density parameter
 class Cosine(dist.Distribution):
     """
     Cosine distribution based on
-    ``torch.distributions.TransformedDistribution``.
-    """
+    ``torch.distributions.TransformedDistribution``
+    (see `documentation <https://docs.pytorch.org/docs/stable/distributions.html#transformeddistribution>`_).
+    """  # noqa E501
     arg_constraints = {}
@@ -112,18 +118,17 @@ class LogNormal(dist.LogNormal):
 class PowerLaw(dist.TransformedDistribution):
     """
     Sample from a power law distribution,
-    .. math::
-        p(x) \approx x^{\alpha}.
+    .. math:: p(x) \\approx x^{\\alpha}.
     Index alpha cannot be 0, since it is equivalent to a Uniform distribution.
     This could be used, for example, as a universal distribution of
     signal-to-noise ratios (SNRs) from uniformly volume distributed
     sources
-    .. math::
-       p(\rho) = 3*\rho_0^3 / \rho^4
+    .. math:: p(\\rho) = 3\;\\rho_0^3 / \\rho^4
-    where :math:`\rho_0` is a representative minimum SNR
+    where :math:`\\rho_0` is a representative minimum SNR
     considered for detection. See, for example,
     `Schutz (2011) <https://arxiv.org/abs/1102.5421>`_.
     Or, for example, ``index=2`` for uniform in Euclidean volume.
@@ -135,7 +140,7 @@ class PowerLaw(dist.TransformedDistribution):
         self, minimum: float, maximum: float, index: int, validate_args=None
     ):
         if index == 0:
-            raise RuntimeError("Index of 0 is the same as Uniform")
+            raise ValueError("Index of 0 is the same as Uniform")
         elif index == -1:
             base_min = torch.as_tensor(minimum).log()
             base_max = torch.as_tensor(maximum).log()
@@ -173,3 +178,204 @@ class DeltaFunction(dist.Distribution):
         return self.peak * torch.ones(
             sample_shape, device=self.peak.device, dtype=torch.float32
         )
+class UniformComovingVolume(dist.Distribution):
+    """
+    Sample either redshift, comoving distance, or luminosity distance
+    such that they are uniform in comoving volume, assuming a flat
+    lambda-CDM cosmology. Default H0 and Omega_M values match
+    `Planck18 parameters in Astropy <https://docs.astropy.org/en/latest/api/astropy.cosmology.realizations.Planck18.html>`_.
+    Args:
+        minimum: Minimum distance in the specified distance type
+        maximum: Maximum distance in the specified distance type
+        distance_type:
+            Type of distance to sample from. Can be ``redshift``,
+            ``comoving_distance``, or ``luminosity_distance``
+        h0: Hubble constant in km/s/Mpc
+        omega_m: Matter density parameter
+        z_max: Maximum redshift for the grid
+        grid_size: Number of points in the grid for interpolation
+        validate_args: Whether to validate arguments
+    """
+    arg_constraints = {}
+    support = dist.constraints.nonnegative
+    def __init__(
+        self,
+        minimum: float,
+        maximum: float,
+        distance_type: str = "redshift",
+        h0: float = _PLANCK18_H0,
+        omega_m: float = _PLANCK18_OMEGA_M,
+        z_grid_max: float = 5,
+        grid_size: int = 10000,
+        validate_args: bool = None,
+    ):
+        super().__init__(validate_args=validate_args)
+        if distance_type not in [
+            "redshift",
+            "comoving_distance",
+            "luminosity_distance",
+        ]:
+            raise ValueError(
+                "Distance type must be 'redshift', 'comoving_distance', "
+                f"or 'luminosity_distance'; got {distance_type}"
+            )
+        self.minimum = minimum
+        self.maximum = maximum
+        self.distance_type = distance_type
+        self.grid_size = grid_size
+        self.z_grid_max = z_grid_max
+        self.h0 = h0
+        self.omega_m = omega_m
+        # Compute redshift range based on the given min and max distances
+        z_min, z_max = self._get_z_bounds()
+        if z_max > z_grid_max:
+            raise ValueError(
+                f"Maximum {distance_type} {maximum} "
+                f"exceeds given z_max {z_grid_max}."
+            )
+        # Restrict distance grids to the specified redshift range
+        mask = (self.z_grid >= z_min) & (self.z_grid <= z_max)
+        self.distance_grid = self.distance_grid[mask]
+        self.z_grid = self.z_grid[mask]
+        self.comoving_dist_grid = self.comoving_dist_grid[mask]
+        self.luminosity_dist_grid = self.luminosity_dist_grid[mask]
+        # Compute probability arrays from those grids
+        self._generate_probability_grids()
+    def _hubble_function(self):
+        """
+        Compute H(z) assuming a flat lambda-CDM cosmology.
+        """
+        omega_l = 1 - self.omega_m
+        return self.h0 * torch.sqrt(
+            self.omega_m * (1 + self.z_grid) ** 3 + omega_l
+        )
+    def _get_z_bounds(self):
+        """
+        Compute the bounds on redshift based on the given minimum and maximum
+        distances, using the specified distance type.
+        """
+        self._generate_distance_grids()
+        bounds = torch.tensor([self.minimum, self.maximum])
+        z_min, z_max = self._linear_interp_1d(
+            self.distance_grid, self.z_grid, bounds
+        )
+        return z_min, z_max
+    def _generate_distance_grids(self):
+        """
+        Generate distance grids based on the specified redshift range.
+        """
+        self.z_grid = torch.linspace(0, self.z_grid_max, self.grid_size)
+        self.dz = self.z_grid[1] - self.z_grid[0]
+        # C is specfied in m/s, h0 in km/s/Mpc, so divide by 1000 to convert
+        comoving_dist_grid = (
+            torch.cumulative_trapezoid(
+                (C / self._hubble_function()), self.z_grid
+            )
+            / 1000
+        )
+        zero_prefix = torch.zeros(1, dtype=comoving_dist_grid.dtype)
+        self.comoving_dist_grid = torch.cat([zero_prefix, comoving_dist_grid])
+        self.luminosity_dist_grid = self.comoving_dist_grid * (1 + self.z_grid)
+        if self.distance_type == "redshift":
+            self.distance_grid = self.z_grid
+        elif self.distance_type == "comoving_distance":
+            self.distance_grid = self.comoving_dist_grid
+        else:  # luminosity_distance
+            self.distance_grid = self.luminosity_dist_grid
+    def _p_of_distance(self):
+        """
+        Compute the unnormalized probability as a function of distance
+        """
+        dV_dz = self.comoving_dist_grid**2 / self._hubble_function()
+        # This is a tensor of ones if the distance type is redshift
+        jacobian = torch.gradient(self.distance_grid, spacing=self.dz)[0]
+        return dV_dz / jacobian
+    def _generate_probability_grids(self):
+        """
+        Compute the pdf, cdf, and log pdf based on the
+        comoving volume differential and distance grid.
+        """
+        p_of_distance = self._p_of_distance()
+        self.pdf = p_of_distance / torch.trapz(
+            p_of_distance, self.distance_grid
+        )
+        cdf = torch.cumulative_trapezoid(self.pdf, self.distance_grid)
+        zero_prefix = torch.zeros(1, dtype=cdf.dtype)
+        self.cdf = torch.cat([zero_prefix, cdf])
+        self.log_pdf = torch.log(self.pdf)
+    def _linear_interp_1d(self, x_grid, y_grid, x_query):
+        idx = torch.bucketize(x_query, x_grid, right=True)
+        idx = idx.clamp(min=1, max=len(x_grid) - 1)
+        x0 = x_grid[idx - 1]
+        x1 = x_grid[idx]
+        y0 = y_grid[idx - 1]
+        y1 = y_grid[idx]
+        t = (x_query - x0) / (x1 - x0)
+        return y0 + t * (y1 - y0)
+    def rsample(self, sample_shape: torch.Size = None) -> Tensor:
+        sample_shape = sample_shape or torch.Size()
+        u = torch.rand(sample_shape)
+        return self._linear_interp_1d(self.cdf, self.distance_grid, u)
+    def log_prob(self, value: Tensor) -> Tensor:
+        log_prob = self._linear_interp_1d(
+            self.distance_grid, self.log_pdf, value
+        )
+        inside_range = (value >= self.minimum) & (value <= self.maximum)
+        log_prob[~inside_range] = float("-inf")
+        return log_prob
+class RateEvolution(UniformComovingVolume):
+    """
+    Wrapper around :meth:`~ml4gw.distributions.UniformComovingVolume` to allow for
+    arbitrary rate evolution functions. E.g., if
+    ``rate_function = lambda z: 1 / (1 + z)``, then the distribution
+    will sample values such that they occur uniform in
+    source frame time.
+    Args:
+        rate_function: Callable that takes redshift as input
+            and returns the rate evolution factor.
+        *args: Arguments passed to
+            :meth:`~ml4gw.distributions.UniformComovingVolume` constructor.
+        **kwargs: Keyword arguments passed to
+            :meth:`~ml4gw.distributions.UniformComovingVolume` constructor.
+    """  # noqa E501
+    def __init__(
+        self,
+        rate_function: Callable,
+        *args,
+        **kwargs,
+    ):
+        self.rate_function = rate_function
+        super().__init__(*args, **kwargs)
+    def _p_of_distance(self):
+        """
+        Compute the unnormalized probability as a function of distance
+        """
+        dV_dz = self.comoving_dist_grid**2 / self._hubble_function()
+        # This is a tensor of ones if the distance type is redshift
+        jacobian = torch.gradient(self.distance_grid, spacing=self.dz)[0]
+        return dV_dz / jacobian * self.rate_function(self.z_grid)

ml4gw 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl

Potentially problematic release.

ml4gw 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl