PyPI - pytme - Versions diffs - 0.1.8__cp311-cp311-macosx_14_0_arm64.whl → 0.2.0b0__cp311-cp311-macosx_14_0_arm64.whl - Mend

pytme 0.1.8__cp311-cp311-macosx_14_0_arm64.whl → 0.2.0b0__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{pytme-0.1.8.data → pytme-0.2.0b0.data}/scripts/match_template.py +148 -126
pytme-0.2.0b0.data/scripts/postprocess.py +570 -0
{pytme-0.1.8.data → pytme-0.2.0b0.data}/scripts/preprocessor_gui.py +244 -60
{pytme-0.1.8.dist-info → pytme-0.2.0b0.dist-info}/METADATA +3 -1
pytme-0.2.0b0.dist-info/RECORD +66 -0
{pytme-0.1.8.dist-info → pytme-0.2.0b0.dist-info}/WHEEL +1 -1
scripts/extract_candidates.py +218 -0
scripts/match_template.py +148 -126
scripts/match_template_filters.py +852 -0
scripts/postprocess.py +380 -435
scripts/preprocessor_gui.py +244 -60
scripts/refine_matches.py +218 -0
tme/__init__.py +2 -1
tme/__version__.py +1 -1
tme/analyzer.py +545 -78
tme/backends/cupy_backend.py +80 -15
tme/backends/npfftw_backend.py +33 -2
tme/backends/pytorch_backend.py +15 -7
tme/density.py +156 -63
tme/extensions.cpython-311-darwin.so +0 -0
tme/matching_constrained.py +195 -0
tme/matching_data.py +76 -32
tme/matching_exhaustive.py +366 -204
tme/matching_memory.py +1 -0
tme/matching_optimization.py +728 -651
tme/matching_utils.py +152 -8
tme/orientations.py +561 -0
tme/preprocessor.py +21 -18
tme/structure.py +2 -37
pytme-0.1.8.data/scripts/postprocess.py +0 -625
pytme-0.1.8.dist-info/RECORD +0 -61
{pytme-0.1.8.data → pytme-0.2.0b0.data}/scripts/estimate_ram_usage.py +0 -0
{pytme-0.1.8.data → pytme-0.2.0b0.data}/scripts/preprocess.py +0 -0
{pytme-0.1.8.dist-info → pytme-0.2.0b0.dist-info}/LICENSE +0 -0
{pytme-0.1.8.dist-info → pytme-0.2.0b0.dist-info}/entry_points.txt +0 -0
{pytme-0.1.8.dist-info → pytme-0.2.0b0.dist-info}/top_level.txt +0 -0

tme/orientations.py ADDED Viewed

@@ -0,0 +1,561 @@
+#!python3
+""" Handle template matching peaks and convert between formats.
+    Copyright (c) 2024 European Molecular Biology Laboratory
+    Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
+"""
+import re
+from collections import deque
+from dataclasses import dataclass
+from typing import List, Tuple, Dict
+import numpy as np
+from scipy.spatial.transform import Rotation
+@dataclass
+class Orientations:
+    """
+    Handle template matching peaks and convert between formats.
+    """
+    #: Return a numpy array with translations of each orientation (n x d).
+    translations: np.ndarray
+    #: Return a numpy array with euler angles of each orientation in zxy format (n x d).
+    rotations: np.ndarray
+    #: Return a numpy array with the score of each orientation (n, ).
+    scores: np.ndarray
+    #: Return a numpy array with additional orientation details (n, ).
+    details: np.ndarray
+    def __iter__(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Iterate over the current class instance. Each iteration returns a orientation
+        defined by its translation, rotation, score and additional detail.
+        Yields
+        ------
+        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
+            A tuple of arrays defining the given orientation.
+        """
+        yield from zip(self.translations, self.rotations, self.scores, self.details)
+    def __getitem__(self, indices: List[int]) -> "Orientations":
+        """
+        Retrieve a subset of orientations based on the provided indices.
+        Parameters
+        ----------
+        indices : List[int]
+            A list of indices specifying the orientations to be retrieved.
+        Returns
+        -------
+        :py:class:`Orientations`
+            A new :py:class:`Orientations`instance containing only the selected orientations.
+        """
+        indices = np.asarray(indices)
+        attributes = (
+            "translations",
+            "rotations",
+            "scores",
+            "details",
+        )
+        kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
+        return self.__class__(**kwargs)
+    def to_file(self, filename: str, file_format: type = None, **kwargs) -> None:
+        """
+        Save the current class instance to a file in the specified format.
+        Parameters
+        ----------
+        filename : str
+            The name of the file where the orientations will be saved.
+        file_format : type, optional
+            The format in which to save the orientations. Supported formats are 'text' and 'relion'.
+        **kwargs : dict
+            Additional keyword arguments specific to the file format.
+        Raises
+        ------
+        ValueError
+            If an unsupported file format is specified.
+        """
+        mapping = {
+            "text": self._to_text,
+            "relion": self._to_relion_star,
+            "dynamo": self._to_dynamo_tbl,
+        }
+        if file_format is None:
+            file_format = "text"
+            if filename.lower().endswith(".star"):
+                file_format = "relion"
+            elif filename.lower().endswith(".tbl"):
+                file_format = "dynamo"
+        func = mapping.get(file_format, None)
+        if func is None:
+            raise ValueError(
+                f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
+            )
+        return func(filename=filename, **kwargs)
+    def _to_text(self, filename: str) -> None:
+        """
+        Save orientations in a text file format.
+        Parameters
+        ----------
+        filename : str
+            The name of the file to save the orientations.
+        Notes
+        -----
+        The file is saved with a header specifying each column: z, y, x, euler_z,
+        euler_y, euler_x, score, detail. Each row in the file corresponds to an orientation.
+        """
+        header = "\t".join(
+            ["z", "y", "x", "euler_z", "euler_y", "euler_x", "score", "detail"]
+        )
+        with open(filename, mode="w", encoding="utf-8") as ofile:
+            _ = ofile.write(f"{header}\n")
+            for translation, angles, score, detail in self:
+                translation_string = "\t".join([str(x) for x in translation])
+                angle_string = "\t".join([str(x) for x in angles])
+                _ = ofile.write(
+                    f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
+                )
+        return None
+    def _to_dynamo_tbl(
+        self,
+        filename: str,
+        name_prefix: str = None,
+        sampling_rate: float = 1.0,
+        subtomogram_size: int = 0,
+    ) -> None:
+        """
+        Save orientations in Dynamo's tbl file format.
+        Parameters
+        ----------
+        filename : str
+            The name of the file to save the orientations.
+        sampling_rate : float, optional
+            Subtomogram sampling rate in angstrom per voxel
+        Notes
+        -----
+        The file is saved with a standard header used in Dynamo tbl files
+        outlined in [1]_. Each row corresponds to a particular partice.
+        References
+        ----------
+        .. [1]  https://wiki.dynamo.biozentrum.unibas.ch/w/index.php/Table
+        The file is saved with a standard header used in Dynamo STAR files.
+        Each row in the file corresponds to an orientation.
+        """
+        with open(filename, mode="w", encoding="utf-8") as ofile:
+            for index, (translation, rotation, score, detail) in enumerate(self):
+                rotation = Rotation.from_euler("zyx", rotation, degrees=True)
+                rotation = rotation.as_euler(seq="xyx", degrees=True)
+                out = [
+                    index,
+                    1,
+                    0,
+                    0,
+                    0,
+                    0,
+                    *rotation,
+                    self.scores[index],
+                    self.scores[index],
+                    0,
+                    0,
+                    # Wedge parameters
+                    -90,
+                    90,
+                    -60,
+                    60,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    # Coordinate in original volume
+                    *translation[::-1],
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    sampling_rate,
+                    3,
+                    0,
+                    0,
+                ]
+                _ = ofile.write(" ".join([str(x) for x in out]) + "\n")
+        return None
+    def _to_relion_star(
+        self,
+        filename: str,
+        name_prefix: str = None,
+        ctf_image: str = None,
+        sampling_rate: float = 1.0,
+        subtomogram_size: int = 0,
+    ) -> None:
+        """
+        Save orientations in RELION's STAR file format.
+        Parameters
+        ----------
+        filename : str
+            The name of the file to save the orientations.
+        name_prefix : str, optional
+            A prefix to add to the image names in the STAR file.
+        ctf_image : str, optional
+            Path to CTF or wedge mask RELION.
+        sampling_rate : float, optional
+            Subtomogram sampling rate in angstrom per voxel
+        subtomogram_size : int, optional
+            Size of the square shaped subtomogram.
+        Notes
+        -----
+        The file is saved with a standard header used in RELION STAR files.
+        Each row in the file corresponds to an orientation.
+        """
+        optics_header = [
+            "# version 30001",
+            "data_optics",
+            "",
+            "loop_",
+            "_rlnOpticsGroup",
+            "_rlnOpticsGroupName",
+            "_rlnSphericalAberration",
+            "_rlnVoltage",
+            "_rlnImageSize",
+            "_rlnImageDimensionality",
+            "_rlnImagePixelSize",
+        ]
+        optics_data = [
+            "1",
+            "opticsGroup1",
+            "2.700000",
+            "300.000000",
+            str(int(subtomogram_size)),
+            "3",
+            str(float(sampling_rate)),
+        ]
+        optics_header = "\n".join(optics_header)
+        optics_data = "\t".join(optics_data)
+        header = [
+            "data_particles",
+            "",
+            "loop_",
+            "_rlnCoordinateX",
+            "_rlnCoordinateY",
+            "_rlnCoordinateZ",
+            "_rlnImageName",
+            "_rlnAngleRot",
+            "_rlnAngleTilt",
+            "_rlnAnglePsi",
+            "_rlnOpticsGroup",
+        ]
+        if ctf_image is not None:
+            header.append("_rlnCtfImage")
+        ctf_image = "" if ctf_image is None else f"\t{ctf_image}"
+        header = "\n".join(header)
+        name_prefix = "" if name_prefix is None else name_prefix
+        with open(filename, mode="w", encoding="utf-8") as ofile:
+            _ = ofile.write(f"{optics_header}\n")
+            _ = ofile.write(f"{optics_data}\n")
+            _ = ofile.write("\n# version 30001\n")
+            _ = ofile.write(f"{header}\n")
+            # pyTME uses a zyx data layout
+            for index, (translation, rotation, score, detail) in enumerate(self):
+                rotation = Rotation.from_euler("zyx", rotation, degrees=True)
+                rotation = rotation.as_euler(seq="xyx", degrees=True)
+                translation_string = "\t".join([str(x) for x in translation][::-1])
+                angle_string = "\t".join([str(x) for x in rotation])
+                name = f"{name_prefix}_{index}.mrc"
+                _ = ofile.write(
+                    f"{translation_string}\t{name}\t{angle_string}\t1{ctf_image}\n"
+                )
+        return None
+    @classmethod
+    def from_file(
+        cls, filename: str, file_format: type = None, **kwargs
+    ) -> "Orientations":
+        """
+        Create an instance of :py:class:`Orientations` from a file.
+        Parameters
+        ----------
+        filename : str
+            The name of the file from which to read the orientations.
+        file_format : type, optional
+            The format of the file. Currently, only 'text' format is supported.
+        **kwargs : dict
+            Additional keyword arguments specific to the file format.
+        Returns
+        -------
+        :py:class:`Orientations`
+            An instance of :py:class:`Orientations` populated with data from the file.
+        Raises
+        ------
+        ValueError
+            If an unsupported file format is specified.
+        """
+        mapping = {"text": cls._from_text, "relion": cls._from_relion_star}
+        if file_format is None:
+            file_format = "text"
+            if filename.lower().endswith(".star"):
+                file_format = "relion"
+        func = mapping.get(file_format, None)
+        if func is None:
+            raise ValueError(
+                f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
+            )
+        translations, rotations, scores, details, *_ = func(filename=filename, **kwargs)
+        return cls(
+            translations=translations,
+            rotations=rotations,
+            scores=scores,
+            details=details,
+        )
+    @staticmethod
+    def _from_text(
+        filename: str,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Read orientations from a text file.
+        Parameters
+        ----------
+        filename : str
+            The name of the file from which to read the orientations.
+        Returns
+        -------
+        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
+            A tuple containing numpy arrays for translations, rotations, scores,
+            and details.
+        Notes
+        -----
+        The text file is expected to have a header and data in columns corresponding to
+        z, y, x, euler_z, euler_y, euler_x, score, detail.
+        """
+        with open(filename, mode="r", encoding="utf-8") as infile:
+            data = [x.strip().split("\t") for x in infile.read().split("\n")]
+            _ = data.pop(0)
+        translation, rotation, score, detail = [], [], [], []
+        for candidate in data:
+            if len(candidate) <= 1:
+                continue
+            if len(candidate) != 8:
+                candidate.append(-1)
+            candidate = [float(x) for x in candidate]
+            translation.append((candidate[0], candidate[1], candidate[2]))
+            rotation.append((candidate[3], candidate[4], candidate[5]))
+            score.append(candidate[6])
+            detail.append(candidate[7])
+        translation = np.vstack(translation).astype(int)
+        rotation = np.vstack(rotation).astype(float)
+        score = np.array(score).astype(float)
+        detail = np.array(detail).astype(float)
+        return translation, rotation, score, detail
+    @staticmethod
+    def _parse_star(filename: str, delimiter: str = None) -> Dict:
+        pattern = re.compile(r"\s*#.*")
+        with open(filename, mode="r", encoding="utf-8") as infile:
+            data = infile.read()
+        data = deque(filter(lambda line: line and line[0] != "#", data.split("\n")))
+        ret, category, block = {}, None, []
+        while data:
+            line = data.popleft()
+            if line.startswith("data") and not line.startswith("_"):
+                if category != line and category is not None:
+                    headers = list(ret[category].keys())
+                    headers = [pattern.sub("", x) for x in headers]
+                    ret[category] = {
+                        header: list(column)
+                        for header, column in zip(headers, zip(*block))
+                    }
+                    block.clear()
+                category = line
+                if category not in ret:
+                    ret[category] = {}
+                continue
+            if line.startswith("_"):
+                ret[category][line] = []
+                continue
+            if line.startswith("loop"):
+                continue
+            line_split = line.split(delimiter)
+            if len(line_split):
+                block.append(line_split)
+        headers = list(ret[category].keys())
+        headers = [pattern.sub("", x) for x in headers]
+        ret[category] = {
+            header: list(column) for header, column in zip(headers, zip(*block))
+        }
+        return ret
+    @classmethod
+    def _from_relion_star(
+        cls, filename: str, delimiter: str = None
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        ret = cls._parse_star(filename=filename, delimiter=delimiter)
+        ret = ret["data_particles"]
+        translation = (
+            np.vstack(
+                (ret["_rlnCoordinateZ"], ret["_rlnCoordinateY"], ret["_rlnCoordinateX"])
+            )
+            .astype(np.float32)
+            .astype(int)
+            .T
+        )
+        rotation = (
+            np.vstack((ret["_rlnAngleRot"], ret["_rlnAngleTilt"], ret["_rlnAnglePsi"]))
+            .astype(np.float32)
+            .T
+        )
+        rotation = Rotation.from_euler("xyx", rotation, degrees=True)
+        rotation = rotation.as_euler(seq="zyx", degrees=True)
+        score = np.ones(translation.shape[0])
+        detail = np.ones(translation.shape[0]) * 1
+        return translation, rotation, score, detail
+    def get_extraction_slices(
+        self,
+        target_shape: Tuple[int],
+        extraction_shape: Tuple[int],
+        drop_out_of_box: bool = False,
+        return_orientations: bool = False,
+    ) -> "Orientations":
+        """
+        Calculate slices for extracting regions of interest within a larger array.
+        Parameters
+        ----------
+        target_shape : Tuple[int]
+            The shape of the target array within which regions are to be extracted.
+        extraction_shape : Tuple[int]
+            The shape of the regions to be extracted.
+        drop_out_of_box : bool, optional
+            If True, drop regions that extend beyond the target array boundary, by default False.
+        return_orientations : bool, optional
+            If True, return orientations along with slices, by default False.
+        Returns
+        -------
+        Union[Tuple[List[slice]], Tuple["Orientations", List[slice], List[slice]]]
+            If return_orientations is False, returns a tuple containing slices for candidate
+            regions and observation regions.
+            If return_orientations is True, returns a tuple containing orientations along
+            with slices for candidate regions and observation regions.
+        Raises
+        ------
+        SystemExit
+            If no peak remains after filtering, indicating an error.
+        """
+        left_pad = np.divide(extraction_shape, 2).astype(int)
+        right_pad = np.add(left_pad, np.mod(extraction_shape, 2)).astype(int)
+        obs_start = np.subtract(self.translations, left_pad)
+        obs_stop = np.add(self.translations, right_pad)
+        cand_start = np.subtract(np.maximum(obs_start, 0), obs_start)
+        cand_stop = np.subtract(obs_stop, np.minimum(obs_stop, target_shape))
+        cand_stop = np.subtract(extraction_shape, cand_stop)
+        obs_start = np.maximum(obs_start, 0)
+        obs_stop = np.minimum(obs_stop, target_shape)
+        subset = self
+        if drop_out_of_box:
+            stops = np.subtract(cand_stop, extraction_shape)
+            keep_peaks = (
+                np.sum(
+                    np.multiply(cand_start == 0, stops == 0),
+                    axis=1,
+                )
+                == self.translations.shape[1]
+            )
+            n_remaining = keep_peaks.sum()
+            if n_remaining == 0:
+                print(
+                    "No peak remaining after filtering. Started with"
+                    f" {self.translations.shape[0]} filtered to {n_remaining}."
+                    " Consider reducing min_distance, increase num_peaks or use"
+                    " a different peak caller."
+                )
+                exit(-1)
+            cand_start = cand_start[keep_peaks,]
+            cand_stop = cand_stop[keep_peaks,]
+            obs_start = obs_start[keep_peaks,]
+            obs_stop = obs_stop[keep_peaks,]
+            subset = self[keep_peaks]
+        cand_start, cand_stop = cand_start.astype(int), cand_stop.astype(int)
+        obs_start, obs_stop = obs_start.astype(int), obs_stop.astype(int)
+        candidate_slices = [
+            tuple(slice(s, e) for s, e in zip(start_row, stop_row))
+            for start_row, stop_row in zip(cand_start, cand_stop)
+        ]
+        observation_slices = [
+            tuple(slice(s, e) for s, e in zip(start_row, stop_row))
+            for start_row, stop_row in zip(obs_start, obs_stop)
+        ]
+        if return_orientations:
+            return subset, candidate_slices, observation_slices
+        return candidate_slices, observation_slices

tme/preprocessor.py CHANGED Viewed

@@ -654,12 +654,9 @@ class Preprocessor:
         array = template.copy()
         interpolation_box = array.shape
-        print(array.shape)
         for k in range(template.ndim):
             array = decimate(array, q=level, axis=k)
-        print(array.shape)
         template = zoom(array, np.divide(template.shape, array.shape))
         template = self.interpolate_box(box=interpolation_box, arr=template)
@@ -768,21 +765,24 @@ class Preprocessor:
         sigma = sigma_factor * resolution
         sigma_grid = sigma / sampling_rate
         sigma_grid2 = sigma_grid * sigma_grid
-        for index, point in enumerate(np.rollaxis(positions, 0)):
-            starts = np.maximum(np.ceil(point - cutoff_value * sigma_grid), 0).astype(
-                int
-            )
-            stops = np.minimum(
-                np.floor(point + cutoff_value * sigma_grid), shape
-            ).astype(int)
-            grid_index = np.meshgrid(
-                *[range(start, stop) for start, stop in zip(starts, stops)]
-            )
-            distances = np.einsum(
-                "aijk->ijk",
-                np.array([(grid_index[i] - point[i]) ** 2 for i in range(len(point))]),
-                dtype=np.float64,
+        starts = np.maximum(np.ceil(positions - cutoff_value * sigma_grid), 0).astype(
+            int
+        )
+        stops = np.minimum(
+            np.floor(positions + cutoff_value * sigma_grid), shape
+        ).astype(int)
+        ranges = tuple(tuple(zip(start, stop)) for start, stop in zip(starts, stops))
+        positions = positions.reshape(
+            *positions.shape, *tuple(1 for _ in range(positions.shape[1]))
+        )
+        for index in range(positions.shape[0]):
+            grid_index = np.meshgrid(*[range(*coord) for coord in ranges[index]])
+            distances = np.sum(
+                np.square(np.subtract(grid_index, positions[index])),
+                dtype=np.float32,
+                axis=0,
             )
             np.add.at(
                 out,
@@ -1131,6 +1131,7 @@ class Preprocessor:
         stop_tilt: float,
         tilt_step: float,
         shape: Tuple[int],
+        tilt_angles: Tuple[float] = None,
         opening_axis: int = 0,
         tilt_axis: int = 2,
         sigma: float = 0,
@@ -1184,7 +1185,9 @@ class Preprocessor:
         :py:meth:`Preprocessor.wedge_mask`
         :py:meth:`Preprocessor.continuous_wedge_mask`
         """
-        tilt_angles = np.arange(-start_tilt, stop_tilt + tilt_step, tilt_step)
+        if tilt_angles is None:
+            tilt_angles = np.arange(-start_tilt, stop_tilt + tilt_step, tilt_step)
         plane = np.zeros((shape[opening_axis], shape[tilt_axis]), dtype=np.float32)
         subset = tuple(
             slice(None) if i != 0 else slice(x // 2, x // 2 + 1)

tme/structure.py CHANGED Viewed

@@ -26,45 +26,10 @@ from .types import NDArray
 @dataclass(repr=False)
 class Structure:
-    """Represents atomic structures in accordance with the Protein Data Bank (PDB)
+    """
+    Represents atomic structures in accordance with the Protein Data Bank (PDB)
     format specification.
-    Attributes
-    ----------
-    record_type : NDArray
-        Type of the record, e.g., ATOM, HETATM. Array shape = (n,)
-    atom_serial_number : NDArray
-        Serial number assigned to each atom.  Array shape = (n,)
-    atom_name : NDArray
-        Standardized names for each atom.  Array shape = (n,)
-    atom_coordinate : NDArray
-        The 3D Cartesian coordinates of each atom in x, y, z.  Array shape = (n,3 )
-    alternate_location_indicator : NDArray
-        Indicator for alternate locations of an atom if it exists in multiple places.
-        Array shape = (n,)
-    residue_name : NDArray
-        Standard residue names where each atom belongs. Array shape = (n,)
-    chain_identifier : NDArray
-        Identifier for the chain where each atom is located. Array shape = (n,)
-    residue_sequence_number : NDArray
-        Sequence number of the residue in the protein chain for each atom.
-        Array shape = (n,)
-    code_for_residue_insertion : NDArray
-        Code to denote any residue insertion. Array shape = (n,)
-    occupancy : NDArray
-        Occupancy factor of each atom, indicating the fraction of time the atom
-        is located at its position. Array shape = (n,)
-    temperature_factor : NDArray
-        Measure of the atomic displacement or B-factor for each atom. Array shape = (n,)
-    segment_identifier : NDArray
-        Identifier for the segment where each atom belongs. Array shape = (n,)
-    element_symbol : NDArray
-        Atomic element symbol for each atom. Array shape = (n,)
-    charge : NDArray
-        Charge on the atom. Array shape = (n,)
-    details : dict
-        Any additional or auxiliary details. Array shape = (n,)
     References
     ----------
     .. [1]  https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html