PyPI - pytme - Versions diffs - 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.3__cp311-cp311-macosx_14_0_arm64.whl - Mend

pytme 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.3__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/match_template.py +219 -216
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/postprocess.py +86 -54
pytme-0.2.3.data/scripts/preprocess.py +132 -0
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/preprocessor_gui.py +181 -94
pytme-0.2.3.dist-info/METADATA +92 -0
pytme-0.2.3.dist-info/RECORD +75 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/WHEEL +1 -1
pytme-0.2.1.data/scripts/preprocess.py → scripts/eval.py +1 -1
scripts/extract_candidates.py +20 -13
scripts/match_template.py +219 -216
scripts/match_template_filters.py +154 -95
scripts/postprocess.py +86 -54
scripts/preprocess.py +95 -56
scripts/preprocessor_gui.py +181 -94
scripts/refine_matches.py +265 -61
tme/__init__.py +0 -1
tme/__version__.py +1 -1
tme/analyzer.py +458 -813
tme/backends/__init__.py +40 -11
tme/backends/_jax_utils.py +187 -0
tme/backends/cupy_backend.py +109 -226
tme/backends/jax_backend.py +230 -152
tme/backends/matching_backend.py +445 -384
tme/backends/mlx_backend.py +32 -59
tme/backends/npfftw_backend.py +240 -507
tme/backends/pytorch_backend.py +30 -151
tme/density.py +248 -371
tme/extensions.cpython-311-darwin.so +0 -0
tme/matching_data.py +328 -284
tme/matching_exhaustive.py +195 -1499
tme/matching_optimization.py +143 -106
tme/matching_scores.py +887 -0
tme/matching_utils.py +287 -388
tme/memory.py +377 -0
tme/orientations.py +78 -21
tme/parser.py +3 -4
tme/preprocessing/_utils.py +61 -32
tme/preprocessing/composable_filter.py +7 -4
tme/preprocessing/compose.py +7 -3
tme/preprocessing/frequency_filters.py +49 -39
tme/preprocessing/tilt_series.py +44 -72
tme/preprocessor.py +560 -526
tme/structure.py +491 -188
tme/types.py +5 -3
pytme-0.2.1.dist-info/METADATA +0 -73
pytme-0.2.1.dist-info/RECORD +0 -73
tme/helpers.py +0 -881
tme/matching_constrained.py +0 -195
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/estimate_ram_usage.py +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/LICENSE +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/entry_points.txt +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/top_level.txt +0 -0

tme/memory.py ADDED Viewed

@@ -0,0 +1,377 @@
+""" Compute memory consumption of template matching components.
+    Copyright (c) 2023 European Molecular Biology Laboratory
+    Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
+"""
+from abc import ABC, abstractmethod
+from typing import Tuple
+import numpy as np
+from pyfftw import next_fast_len
+class MatchingMemoryUsage(ABC):
+    """
+    Class specification for estimating the memory requirements of template matching.
+    Parameters
+    ----------
+    fast_shape : tuple of int
+        Shape of the real array.
+    ft_shape : tuple of int
+        Shape of the complex array.
+    float_nbytes : int
+        Number of bytes of the used float, e.g. 4 for float32.
+    complex_nbytes : int
+        Number of bytes of the used complex, e.g. 8 for complex64.
+    integer_nbytes : int
+        Number of bytes of the used integer, e.g. 4 for int32.
+    Attributes
+    ----------
+    real_array_size : int
+        Number of elements in real array.
+    complex_array_size : int
+        Number of elements in complex array.
+    float_nbytes : int
+        Number of bytes of the used float, e.g. 4 for float32.
+    complex_nbytes : int
+        Number of bytes of the used complex, e.g. 8 for complex64.
+    integer_nbytes : int
+        Number of bytes of the used integer, e.g. 4 for int32.
+    Methods
+    -------
+    base_usage():
+        Returns the base memory usage in bytes.
+    per_fork():
+        Returns the memory usage in bytes per fork.
+    """
+    def __init__(
+        self,
+        fast_shape: Tuple[int],
+        ft_shape: Tuple[int],
+        float_nbytes: int,
+        complex_nbytes: int,
+        integer_nbytes: int,
+    ):
+        self.real_array_size = np.prod(fast_shape)
+        self.complex_array_size = np.prod(ft_shape)
+        self.float_nbytes = float_nbytes
+        self.complex_nbytes = complex_nbytes
+        self.integer_nbytes = integer_nbytes
+    @abstractmethod
+    def base_usage(self) -> int:
+        """Return the base memory usage in bytes."""
+    @abstractmethod
+    def per_fork(self) -> int:
+        """Return the memory usage per fork in bytes."""
+class CCMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation for CC scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.cc_setup`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes
+        complex_arrays = self.complex_array_size * self.complex_nbytes
+        return float_arrays + complex_arrays
+    def per_fork(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes
+        complex_arrays = self.complex_array_size * self.complex_nbytes
+        return float_arrays + complex_arrays
+class LCCMemoryUsage(CCMemoryUsage):
+    """
+    Memory usage estimation for LCC scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.lcc_setup`.
+    """
+class CORRMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation for CORR scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.corr_setup`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 4
+        complex_arrays = self.complex_array_size * self.complex_nbytes
+        return float_arrays + complex_arrays
+    def per_fork(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes
+        complex_arrays = self.complex_array_size * self.complex_nbytes
+        return float_arrays + complex_arrays
+class CAMMemoryUsage(CORRMemoryUsage):
+    """
+    Memory usage estimation for CAM scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.cam_setup`.
+    """
+class FLCSphericalMaskMemoryUsage(CORRMemoryUsage):
+    """
+    Memory usage estimation for FLCMSphericalMask scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.flcSphericalMask_setup`.
+    """
+class FLCMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation for FLC scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.flc_setup`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 2
+        complex_arrays = self.complex_array_size * self.complex_nbytes * 2
+        return float_arrays + complex_arrays
+    def per_fork(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 3
+        complex_arrays = self.complex_array_size * self.complex_nbytes * 2
+        return float_arrays + complex_arrays
+class MCCMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation for MCC scoring.
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.mcc_setup`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 2
+        complex_arrays = self.complex_array_size * self.complex_nbytes * 3
+        return float_arrays + complex_arrays
+    def per_fork(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 6
+        complex_arrays = self.complex_array_size * self.complex_nbytes
+        return float_arrays + complex_arrays
+class MaxScoreOverRotationsMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation MaxScoreOverRotations Analyzer.
+    See Also
+    --------
+    :py:class:`tme.analyzer.MaxScoreOverRotations`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes * 2
+        return float_arrays
+    def per_fork(self) -> int:
+        return 0
+class PeakCallerMaximumFilterMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation MaxScoreOverRotations Analyzer.
+    See Also
+    --------
+    :py:class:`tme.analyzer.PeakCallerMaximumFilter`.
+    """
+    def base_usage(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes
+        return float_arrays
+    def per_fork(self) -> int:
+        float_arrays = self.real_array_size * self.float_nbytes
+        return float_arrays
+class CupyBackendMemoryUsage(MatchingMemoryUsage):
+    """
+    Memory usage estimation for CupyBackend.
+    See Also
+    --------
+    :py:class:`tme.backends.CupyBackend`.
+    """
+    def base_usage(self) -> int:
+        # FFT plans, overhead from assigning FFT result, rotation interpolation
+        complex_arrays = self.real_array_size * self.complex_nbytes * 3
+        float_arrays = self.complex_array_size * self.float_nbytes * 2
+        return float_arrays + complex_arrays
+    def per_fork(self) -> int:
+        return 0
+def _compute_convolution_shapes(
+    arr1_shape: Tuple[int], arr2_shape: Tuple[int]
+) -> Tuple[Tuple[int], Tuple[int], Tuple[int]]:
+    """
+    Computes regular, optimized and fourier convolution shape.
+    Parameters
+    ----------
+    arr1_shape : tuple
+        Tuple of integers corresponding to array1 shape.
+    arr2_shape : tuple
+        Tuple of integers corresponding to array2 shape.
+    Returns
+    -------
+    tuple
+        Tuple with regular convolution shape, convolution shape optimized for faster
+        fourier transform, shape of the forward fourier transform
+        (see :py:meth:`build_fft`).
+    """
+    convolution_shape = np.add(arr1_shape, arr2_shape) - 1
+    fast_shape = [next_fast_len(x) for x in convolution_shape]
+    fast_ft_shape = list(fast_shape[:-1]) + [fast_shape[-1] // 2 + 1]
+    return convolution_shape, fast_shape, fast_ft_shape
+MATCHING_MEMORY_REGISTRY = {
+    "CC": CCMemoryUsage,
+    "LCC": LCCMemoryUsage,
+    "CORR": CORRMemoryUsage,
+    "CAM": CAMMemoryUsage,
+    "MCC": MCCMemoryUsage,
+    "FLCSphericalMask": FLCSphericalMaskMemoryUsage,
+    "FLC": FLCMemoryUsage,
+    "MaxScoreOverRotations": MaxScoreOverRotationsMemoryUsage,
+    "PeakCallerMaximumFilter": PeakCallerMaximumFilterMemoryUsage,
+    "cupy": CupyBackendMemoryUsage,
+    "pytorch": CupyBackendMemoryUsage,
+}
+def estimate_ram_usage(
+    shape1: Tuple[int],
+    shape2: Tuple[int],
+    matching_method: str,
+    ncores: int,
+    analyzer_method: str = None,
+    backend: str = None,
+    float_nbytes: int = 4,
+    complex_nbytes: int = 8,
+    integer_nbytes: int = 4,
+) -> int:
+    """
+    Estimate the RAM usage for a given convolution operation based on input shapes,
+    matching_method, and number of cores.
+    Parameters
+    ----------
+    shape1 : tuple
+        The shape of the input target.
+    shape2 : tuple
+        The shape of the input template.
+    matching_method : str
+        The method used for the operation.
+    is_gpu : bool, optional
+        Whether the computation is performed on GPU. This factors in FFT
+        plan caching.
+    analyzer_method : str, optional
+        The method used for score analysis.
+    backend : str, optional
+        Backend used for computation.
+    ncores : int
+        The number of CPU cores used for the operation.
+    float_nbytes : int
+        Number of bytes of the used float, e.g. 4 for float32.
+    complex_nbytes : int
+        Number of bytes of the used complex, e.g. 8 for complex64.
+    integer_nbytes : int
+        Number of bytes of the used integer, e.g. 4 for int32.
+    Returns
+    -------
+    int
+        The estimated RAM usage for the operation in bytes.
+    Notes
+    -----
+        Residual memory from other objects that may remain allocated during
+        template matching, e.g. the full sized target when using splitting,
+        are not considered by this function.
+    Raises
+    ------
+    ValueError
+        If an unsupported matching_methode is provided.
+    """
+    if matching_method not in MATCHING_MEMORY_REGISTRY:
+        raise ValueError(
+            f"Supported options are {','.join(MATCHING_MEMORY_REGISTRY.keys())}"
+        )
+    convolution_shape, fast_shape, ft_shape = _compute_convolution_shapes(
+        shape1, shape2
+    )
+    memory_instance = MATCHING_MEMORY_REGISTRY[matching_method](
+        fast_shape=fast_shape,
+        ft_shape=ft_shape,
+        float_nbytes=float_nbytes,
+        complex_nbytes=complex_nbytes,
+        integer_nbytes=integer_nbytes,
+    )
+    nbytes = memory_instance.base_usage() + memory_instance.per_fork() * ncores
+    analyzer_instance = MATCHING_MEMORY_REGISTRY.get(analyzer_method, None)
+    if analyzer_instance is not None:
+        analyzer_instance = analyzer_instance(
+            fast_shape=fast_shape,
+            ft_shape=ft_shape,
+            float_nbytes=float_nbytes,
+            complex_nbytes=complex_nbytes,
+            integer_nbytes=integer_nbytes,
+        )
+        nbytes += analyzer_instance.base_usage() + analyzer_instance.per_fork() * ncores
+    backend_instance = MATCHING_MEMORY_REGISTRY.get(backend, None)
+    if backend_instance is not None:
+        backend_instance = backend_instance(
+            fast_shape=fast_shape,
+            ft_shape=ft_shape,
+            float_nbytes=float_nbytes,
+            complex_nbytes=complex_nbytes,
+            integer_nbytes=integer_nbytes,
+        )
+        nbytes += backend_instance.base_usage() + backend_instance.per_fork() * ncores
+    return nbytes

tme/orientations.py CHANGED Viewed

@@ -6,6 +6,7 @@
     Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
 """
 import re
+import warnings
 from collections import deque
 from dataclasses import dataclass
 from string import ascii_lowercase
@@ -62,16 +63,16 @@ class Orientations:
         Array with additional orientation details (n, ).
     """
-    #: Return a numpy array with translations of each orientation (n x d).
+    #: Array with translations of each orientation (n, d).
     translations: np.ndarray
-    #: Return a numpy array with euler angles of each orientation in zxy format (n x d).
+    #: Array with zyx euler angles of each orientation (n, d).
     rotations: np.ndarray
-    #: Return a numpy array with the score of each orientation (n, ).
+    #: Array with scores of each orientation (n, ).
     scores: np.ndarray
-    #: Return a numpy array with additional orientation details (n, ).
+    #: Array with additional details of each orientation(n, ).
     details: np.ndarray
     def __post_init__(self):
@@ -130,9 +131,21 @@ class Orientations:
             "scores",
             "details",
         )
-        kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
+        kwargs = {attr: getattr(self, attr)[indices].copy() for attr in attributes}
         return self.__class__(**kwargs)
+    def copy(self) -> "Orientations":
+        """
+        Create a copy of the current class instance.
+        Returns
+        -------
+        :py:class:`Orientations`
+            Copy of the class instance.
+        """
+        indices = np.arange(self.scores.size)
+        return self[indices]
     def to_file(self, filename: str, file_format: type = None, **kwargs) -> None:
         """
         Save the current class instance to a file in the specified format.
@@ -146,7 +159,7 @@ class Orientations:
             the file_format from the typical extension. Supported formats are
             +---------------+----------------------------------------------------+
-            | text          | pyTME's standard tab-separated orientations file   |
+            | text          | pytme's standard tab-separated orientations file   |
             +---------------+----------------------------------------------------+
             | relion        | Creates a STAR file of orientations                |
             +---------------+----------------------------------------------------+
@@ -207,11 +220,11 @@ class Orientations:
         with open(filename, mode="w", encoding="utf-8") as ofile:
             _ = ofile.write(f"{header}\n")
             for translation, angles, score, detail in self:
-                translation_string = "\t".join([str(x) for x in translation])
-                angle_string = "\t".join([str(x) for x in angles])
-                _ = ofile.write(
-                    f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
+                out_string = (
+                    "\t".join([str(x) for x in (*translation, *angles, score, detail)])
+                    + "\n"
                 )
+                _ = ofile.write(out_string)
         return None
     def _to_dynamo_tbl(
@@ -289,7 +302,7 @@ class Orientations:
     def _to_relion_star(
         self,
         filename: str,
-        name_prefix: str = None,
+        name: str = None,
         ctf_image: str = None,
         sampling_rate: float = 1.0,
         subtomogram_size: int = 0,
@@ -301,8 +314,9 @@ class Orientations:
         ----------
         filename : str
             The name of the file to save the orientations.
-        name_prefix : str, optional
-            A prefix to add to the image names in the STAR file.
+        name : str or list of str, optional
+            Path to image file the orientation is in reference to. If name is a list
+            its assumed to correspond to _rlnImageName, otherwise _rlnMicrographName.
         ctf_image : str, optional
             Path to CTF or wedge mask RELION.
         sampling_rate : float, optional
@@ -340,6 +354,21 @@ class Orientations:
         optics_header = "\n".join(optics_header)
         optics_data = "\t".join(optics_data)
+        if name is None:
+            name = ""
+            warnings.warn(
+                "Consider specifying the name argument. A single string will be "
+                "interpreted as path to the original micrograph, a list of strings "
+                "as path to individual subsets."
+            )
+        name_reference = "_rlnImageName"
+        if isinstance(name, str):
+            name = [
+                name,
+            ] * self.translations.shape[0]
+            name_reference = "_rlnMicrographName"
         header = [
             "data_particles",
             "",
@@ -347,7 +376,7 @@ class Orientations:
             "_rlnCoordinateX",
             "_rlnCoordinateY",
             "_rlnCoordinateZ",
-            "_rlnImageName",
+            name_reference,
             "_rlnAngleRot",
             "_rlnAngleTilt",
             "_rlnAnglePsi",
@@ -359,8 +388,6 @@ class Orientations:
         ctf_image = "" if ctf_image is None else f"\t{ctf_image}"
         header = "\n".join(header)
-        name_prefix = "" if name_prefix is None else name_prefix
         with open(filename, mode="w", encoding="utf-8") as ofile:
             _ = ofile.write(f"{optics_header}\n")
             _ = ofile.write(f"{optics_data}\n")
@@ -375,9 +402,8 @@ class Orientations:
                 translation_string = "\t".join([str(x) for x in translation][::-1])
                 angle_string = "\t".join([str(x) for x in rotation])
-                name = f"{name_prefix}_{index}.mrc"
                 _ = ofile.write(
-                    f"{translation_string}\t{name}\t{angle_string}\t1{ctf_image}\n"
+                    f"{translation_string}\t{name[index]}\t{angle_string}\t1{ctf_image}\n"
                 )
         return None
@@ -465,8 +491,10 @@ class Orientations:
         Notes
         -----
-        The text file is expected to have a header and data in columns corresponding to
-        z, y, x, euler_z, euler_y, euler_x, score, detail.
+        The text file is expected to have a header and data in columns. Colums containing
+        the name euler are considered to specify rotations. The second last and last
+        column correspond to score and detail. Its possible to only specify translations,
+        in this case the remaining columns will be filled with trivial values.
         """
         with open(filename, mode="r", encoding="utf-8") as infile:
             data = [x.strip().split("\t") for x in infile.read().split("\n")]
@@ -493,6 +521,32 @@ class Orientations:
         score = np.array(score)
         detail = np.array(detail)
+        if translation.shape[1] == len(header):
+            rotation = np.zeros(translation.shape, dtype=np.float32)
+            score = np.zeros(translation.shape[0], dtype=np.float32)
+            detail = np.zeros(translation.shape[0], dtype=np.float32) - 1
+        if rotation.size == 0 and translation.shape[0] != 0:
+            rotation = np.zeros(translation.shape, dtype=np.float32)
+        header_order = tuple(x for x in header if x in ascii_lowercase)
+        header_order = zip(header_order, range(len(header_order)))
+        sort_order = tuple(
+            x[1] for x in sorted(header_order, key=lambda x: x[0], reverse=True)
+        )
+        translation = translation[..., sort_order]
+        header_order = tuple(
+            x
+            for x in header
+            if "euler" in x and x.replace("euler_", "") in ascii_lowercase
+        )
+        header_order = zip(header_order, range(len(header_order)))
+        sort_order = tuple(
+            x[1] for x in sorted(header_order, key=lambda x: x[0], reverse=True)
+        )
+        rotation = rotation[..., sort_order]
         return translation, rotation, score, detail
     @staticmethod
@@ -544,7 +598,10 @@ class Orientations:
         cls, filename: str, delimiter: str = None
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         ret = cls._parse_star(filename=filename, delimiter=delimiter)
-        ret = ret["data_particles"]
+        ret = ret.get("data_particles", None)
+        if ret is None:
+            raise ValueError(f"No data_particles section found in {filename}.")
         translation = np.vstack(
             (ret["_rlnCoordinateZ"], ret["_rlnCoordinateY"], ret["_rlnCoordinateX"])

tme/parser.py CHANGED Viewed

@@ -137,8 +137,7 @@ class Parser(ABC):
 class PDBParser(Parser):
     """
-    A Parser subclass for converting PDB file data into a dictionary representation.
-    This class is specifically designed to work with PDB file format.
+    Convert PDB file data into a dictionary representation [1]_.
     References
     ----------
@@ -228,8 +227,8 @@ class PDBParser(Parser):
 class MMCIFParser(Parser):
     """
-    A Parser subclass for converting MMCIF file data into a dictionary representation.
-    This implementation heavily relies on the atomium library [1]_.
+    Convert MMCIF file data into a dictionary representation. This implementation
+    heavily relies on the atomium library [1]_.
     References
     ----------