PyPI - pytme - Versions diffs - 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.3__cp311-cp311-macosx_14_0_arm64.whl - Mend

pytme 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.3__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/match_template.py +219 -216
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/postprocess.py +86 -54
pytme-0.2.3.data/scripts/preprocess.py +132 -0
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/preprocessor_gui.py +181 -94
pytme-0.2.3.dist-info/METADATA +92 -0
pytme-0.2.3.dist-info/RECORD +75 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/WHEEL +1 -1
pytme-0.2.1.data/scripts/preprocess.py → scripts/eval.py +1 -1
scripts/extract_candidates.py +20 -13
scripts/match_template.py +219 -216
scripts/match_template_filters.py +154 -95
scripts/postprocess.py +86 -54
scripts/preprocess.py +95 -56
scripts/preprocessor_gui.py +181 -94
scripts/refine_matches.py +265 -61
tme/__init__.py +0 -1
tme/__version__.py +1 -1
tme/analyzer.py +458 -813
tme/backends/__init__.py +40 -11
tme/backends/_jax_utils.py +187 -0
tme/backends/cupy_backend.py +109 -226
tme/backends/jax_backend.py +230 -152
tme/backends/matching_backend.py +445 -384
tme/backends/mlx_backend.py +32 -59
tme/backends/npfftw_backend.py +240 -507
tme/backends/pytorch_backend.py +30 -151
tme/density.py +248 -371
tme/extensions.cpython-311-darwin.so +0 -0
tme/matching_data.py +328 -284
tme/matching_exhaustive.py +195 -1499
tme/matching_optimization.py +143 -106
tme/matching_scores.py +887 -0
tme/matching_utils.py +287 -388
tme/memory.py +377 -0
tme/orientations.py +78 -21
tme/parser.py +3 -4
tme/preprocessing/_utils.py +61 -32
tme/preprocessing/composable_filter.py +7 -4
tme/preprocessing/compose.py +7 -3
tme/preprocessing/frequency_filters.py +49 -39
tme/preprocessing/tilt_series.py +44 -72
tme/preprocessor.py +560 -526
tme/structure.py +491 -188
tme/types.py +5 -3
pytme-0.2.1.dist-info/METADATA +0 -73
pytme-0.2.1.dist-info/RECORD +0 -73
tme/helpers.py +0 -881
tme/matching_constrained.py +0 -195
{pytme-0.2.1.data → pytme-0.2.3.data}/scripts/estimate_ram_usage.py +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/LICENSE +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/entry_points.txt +0 -0
{pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/top_level.txt +0 -0

scripts/postprocess.py CHANGED Viewed

@@ -8,9 +8,8 @@
 import argparse
 from sys import exit
 from os import getcwd
-from os.path import join, abspath
 from typing import List, Tuple
-from os.path import splitext
+from os.path import join, abspath, splitext
 import numpy as np
 from numpy.typing import NDArray
@@ -26,6 +25,7 @@ from tme.analyzer import (
 )
 from tme.matching_utils import (
     load_pickle,
+    centered_mask,
     euler_to_rotationmatrix,
     euler_from_rotationmatrix,
 )
@@ -41,9 +41,7 @@ PEAK_CALLERS = {
 def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Peak Calling for Template Matching Outputs"
-    )
+    parser = argparse.ArgumentParser(description="Analyze Template Matching Outputs")
     input_group = parser.add_argument_group("Input")
     output_group = parser.add_argument_group("Output")
@@ -56,6 +54,13 @@ def parse_args():
         nargs="+",
         help="Path to the output of match_template.py.",
     )
+    input_group.add_argument(
+        "--background_file",
+        required=False,
+        nargs="+",
+        help="Path to an output of match_template.py used for normalization. "
+        "For instance from --scramble_phases or a different template.",
+    )
     input_group.add_argument(
         "--target_mask",
         required=False,
@@ -87,7 +92,7 @@ def parse_args():
             "average",
         ],
         default="orientations",
-        help="Available output formats:"
+        help="Available output formats: "
         "orientations (translation, rotation, and score), "
         "alignment (aligned template to target based on orientations), "
         "extraction (extract regions around peaks from targets, i.e. subtomograms), "
@@ -206,6 +211,15 @@ def parse_args():
     elif args.number_of_peaks is None:
         args.number_of_peaks = 1000
+    if args.background_file is None:
+        args.background_file = [None]
+    if len(args.background_file) == 1:
+        args.background_file = args.background_file * len(args.input_file)
+    elif len(args.background_file) not in (0, len(args.input_file)):
+        raise ValueError(
+            "--background_file needs to be specified once or for each --input_file."
+        )
     return args
@@ -233,8 +247,8 @@ def load_template(
     return template, center, translation, template_is_density
-def merge_outputs(data, filepaths: List[str], args):
-    if len(filepaths) == 0:
+def merge_outputs(data, foreground_paths: List[str], background_paths: List[str], args):
+    if len(foreground_paths) == 0:
         return data, 1
     if data[0].ndim != data[2].ndim:
@@ -275,8 +289,11 @@ def merge_outputs(data, filepaths: List[str], args):
     entities = np.zeros_like(data[0])
     data[0] = _norm_scores(data=data, args=args)
-    for index, filepath in enumerate(filepaths):
-        new_scores = _norm_scores(data=load_pickle(filepath), args=args)
+    for index, filepath in enumerate(foreground_paths):
+        new_scores = _norm_scores(
+            data=load_match_template_output(filepath, background_paths[index]),
+            args=args,
+        )
         indices = new_scores > data[0]
         entities[indices] = index + 1
         data[0][indices] = new_scores[indices]
@@ -284,9 +301,18 @@ def merge_outputs(data, filepaths: List[str], args):
     return data, entities
+def load_match_template_output(foreground_path, background_path):
+    data = load_pickle(foreground_path)
+    if background_path is not None:
+        data_background = load_pickle(background_path)
+        data[0] = (data[0] - data_background[0]) / (1 - data_background[0])
+        np.fmax(data[0], 0, out=data[0])
+    return data
 def main():
     args = parse_args()
-    data = load_pickle(args.input_file[0])
+    data = load_match_template_output(args.input_file[0], args.background_file[0])
     target_origin, _, sampling_rate, cli_args = data[-1]
@@ -326,7 +352,12 @@ def main():
     entities = None
     if len(args.input_file) > 1:
-        data, entities = merge_outputs(data=data, filepaths=args.input_file, args=args)
+        data, entities = merge_outputs(
+            data=data,
+            foreground_paths=args.input_file,
+            background_paths=args.background_file,
+            args=args,
+        )
     orientations = args.orientations
     if orientations is None:
@@ -339,24 +370,27 @@ def main():
                 target_mask = Density.from_file(args.target_mask)
                 scores = scores * target_mask.data
-            if args.n_false_positives is not None:
-                args.n_false_positives = max(args.n_false_positives, 1)
-                cropped_shape = np.subtract(
-                    scores.shape, np.multiply(args.min_boundary_distance, 2)
-                ).astype(int)
+            cropped_shape = np.subtract(
+                scores.shape, np.multiply(args.min_boundary_distance, 2)
+            ).astype(int)
+            if args.min_boundary_distance > 0:
+                scores = centered_mask(scores, new_shape=cropped_shape)
-                cropped_shape = tuple(
+            if args.n_false_positives is not None:
+                # Rickgauer et al. 2017
+                cropped_slice = tuple(
                     slice(
                         int(args.min_boundary_distance),
                         int(x - args.min_boundary_distance),
                     )
                     for x in scores.shape
                 )
-                # Rickgauer et al. 2017
-                n_correlations = np.size(scores[cropped_shape]) * len(rotation_mapping)
+                args.n_false_positives = max(args.n_false_positives, 1)
+                n_correlations = np.size(scores[cropped_slice]) * len(rotation_mapping)
                 minimum_score = np.multiply(
                     erfcinv(2 * args.n_false_positives / n_correlations),
-                    np.sqrt(2) * np.std(scores[cropped_shape]),
+                    np.sqrt(2) * np.std(scores[cropped_slice]),
                 )
                 print(f"Determined minimum score cutoff: {minimum_score}.")
                 minimum_score = max(minimum_score, 0)
@@ -371,6 +405,8 @@ def main():
                 "min_distance": args.min_distance,
                 "min_boundary_distance": args.min_boundary_distance,
                 "batch_dims": args.batch_dims,
+                "minimum_score": args.minimum_score,
+                "maximum_score": args.maximum_score,
             }
             peak_caller = PEAK_CALLERS[args.peak_caller](**peak_caller_kwargs)
@@ -380,7 +416,6 @@ def main():
                 mask=template.data,
                 rotation_mapping=rotation_mapping,
                 rotation_array=rotation_array,
-                minimum_score=args.minimum_score,
             )
             candidates = peak_caller.merge(
                 candidates=[tuple(peak_caller)], **peak_caller_kwargs
@@ -388,10 +423,16 @@ def main():
             if len(candidates) == 0:
                 candidates = [[], [], [], []]
                 print("Found no peaks, consider changing peak calling parameters.")
-                exit(0)
+                exit(-1)
             for translation, _, score, detail in zip(*candidates):
-                rotations.append(rotation_mapping[rotation_array[tuple(translation)]])
+                rotation_index = rotation_array[tuple(translation)]
+                rotation = rotation_mapping.get(
+                    rotation_index, np.zeros(template.data.ndim, int)
+                )
+                if rotation.ndim == 2:
+                    rotation = euler_from_rotationmatrix(rotation)
+                rotations.append(rotation)
         else:
             candidates = data
@@ -430,7 +471,7 @@ def main():
             )
             exit(-1)
         orientations.translations = peak_caller.oversample_peaks(
-            score_space=data[0],
+            scores=data[0],
             peak_positions=orientations.translations,
             oversampling_factor=args.peak_oversampling,
         )
@@ -468,19 +509,7 @@ def main():
     target = Density.from_file(cli_args.target)
     if args.invert_target_contrast:
-        if args.output_format == "relion":
-            target.data = target.data * -1
-            target.data = np.divide(
-                np.subtract(target.data, target.data.mean()), target.data.std()
-            )
-        else:
-            target.data = (
-                -np.divide(
-                    np.subtract(target.data, target.data.min()),
-                    np.subtract(target.data.max(), target.data.min()),
-                )
-                + 1
-            )
+        target.data = target.data * -1
     if args.output_format in ("extraction", "relion"):
         if not np.all(np.divide(target.shape, template.shape) > 2):
@@ -505,10 +534,14 @@ def main():
         working_directory = getcwd()
         if args.output_format == "relion":
+            name = [
+                join(working_directory, f"{args.output_prefix}_{index}.mrc")
+                for index in range(len(cand_slices))
+            ]
             orientations.to_file(
                 filename=f"{args.output_prefix}.star",
                 file_format="relion",
-                name_prefix=join(working_directory, args.output_prefix),
+                name=name,
                 ctf_image=args.wedge_mask,
                 sampling_rate=target.sampling_rate.max(),
                 subtomogram_size=extraction_shape[0],
@@ -565,24 +598,22 @@ def main():
     if args.output_format == "average":
         orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
             target_shape=target.shape,
-            extraction_shape=np.multiply(template.shape, 2),
+            extraction_shape=template.shape,
             drop_out_of_box=True,
             return_orientations=True,
         )
         out = np.zeros_like(template.data)
-        out = np.zeros(np.multiply(template.shape, 2).astype(int))
         for index in range(len(cand_slices)):
-            from scipy.spatial.transform import Rotation
-            rotation = Rotation.from_euler(
-                angles=orientations.rotations[index], seq="zyx", degrees=True
-            )
-            rotation_matrix = rotation.inv().as_matrix()
             subset = Density(target.data[obs_slices[index]])
-            subset = subset.rigid_transform(rotation_matrix=rotation_matrix, order=1)
+            rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
+            subset = subset.rigid_transform(
+                rotation_matrix=np.linalg.inv(rotation_matrix),
+                order=1,
+                use_geometric_center=True,
+            )
             np.add(out, subset.data, out=out)
         out /= len(cand_slices)
         ret = Density(out, sampling_rate=template.sampling_rate, origin=0)
         ret.pad(template.shape, center=True)
@@ -596,17 +627,18 @@ def main():
         target_shape=target.shape,
     )
+    # Template is larger than target
     for index, (translation, angles, *_) in enumerate(orientations):
         rotation_matrix = euler_to_rotationmatrix(angles)
         if template_is_density:
-            translation = np.subtract(translation, center)
             transformed_template = template.rigid_transform(
-                rotation_matrix=rotation_matrix
-            )
-            transformed_template.origin = np.add(
-                target_origin, np.multiply(translation, sampling_rate)
+                rotation_matrix=rotation_matrix, use_geometric_center=True
             )
+            # Just adapting the coordinate system not the in-box position
+            shift = np.multiply(np.subtract(translation, center), sampling_rate)
+            transformed_template.origin = np.add(target_origin, shift)
         else:
             template = Structure.from_file(cli_args.template)
             new_center_of_mass = np.add(

scripts/preprocess.py CHANGED Viewed

@@ -1,93 +1,132 @@
 #!python3
-""" Apply tme.preprocessor.Preprocessor methods to an input file based
-    on a provided yaml configuration obtaiend from preprocessor_gui.py.
+""" Preprocessing routines for template matching.
     Copyright (c) 2023 European Molecular Biology Laboratory
     Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
 """
-import yaml
+import warnings
 import argparse
-import textwrap
-from tme import Preprocessor, Density
+import numpy as np
+from tme import Density, Structure
+from tme.backends import backend as be
+from tme.preprocessing.frequency_filters import BandPassFilter
 def parse_args():
     parser = argparse.ArgumentParser(
-        description=textwrap.dedent(
-            """
-        Apply preprocessing to an input file based on a provided YAML configuration.
-        Expected YAML file format:
-        ```yaml
-        <method_name>:
-            <parameter1>: <value1>
-            <parameter2>: <value2>
-            ...
-        ```
-        """
-        ),
-        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description="Perform template matching preprocessing.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
-    parser.add_argument(
-        "-i",
-        "--input_file",
+    io_group = parser.add_argument_group("Input / Output")
+    io_group.add_argument(
+        "-m",
+        "--data",
+        dest="data",
         type=str,
         required=True,
-        help="Path to the input data file in CCP4/MRC format.",
+        help="Path to a file in PDB/MMCIF, CCP4/MRC, EM, H5 or a format supported by "
+        "tme.density.Density.from_file "
+        "https://kosinskilab.github.io/pyTME/reference/api/tme.density.Density.from_file.html",
     )
-    parser.add_argument(
-        "-y",
-        "--yaml_file",
+    io_group.add_argument(
+        "-o",
+        "--output",
+        dest="output",
         type=str,
         required=True,
-        help="Path to the YAML configuration file.",
+        help="Path the output should be written to.",
     )
-    parser.add_argument(
-        "-o",
-        "--output_file",
-        type=str,
+    box_group = parser.add_argument_group("Box")
+    box_group.add_argument(
+        "--box_size",
+        dest="box_size",
+        type=int,
         required=True,
-        help="Path to output file in CPP4/MRC format..",
+        help="Box size of the output",
     )
-    parser.add_argument(
-        "--compress", action="store_true", help="Compress the output file using gzip."
+    box_group.add_argument(
+        "--sampling_rate",
+        dest="sampling_rate",
+        type=float,
+        required=True,
+        help="Sampling rate of the output file.",
     )
+    modulation_group = parser.add_argument_group("Modulation")
+    modulation_group.add_argument(
+        "--invert_contrast",
+        dest="invert_contrast",
+        action="store_true",
+        required=False,
+        help="Inverts the template contrast.",
+    )
+    modulation_group.add_argument(
+        "--lowpass",
+        dest="lowpass",
+        type=float,
+        required=False,
+        default=None,
+        help="Lowpass filter the template to the given resolution. Nyquist by default. "
+        "A value of 0 disables the filter.",
+    )
+    modulation_group.add_argument(
+        "--no_centering",
+        dest="no_centering",
+        action="store_true",
+        help="Assumes the template is already centered and omits centering.",
+    )
     args = parser.parse_args()
     return args
 def main():
     args = parse_args()
-    with open(args.yaml_file, "r") as f:
-        preprocess_settings = yaml.safe_load(f)
-    if len(preprocess_settings) > 1:
-        raise NotImplementedError(
-            "Multiple preprocessing methods specified. "
-            "The script currently supports one method at a time."
+    try:
+        data = Structure.from_file(args.data)
+        data = Density.from_structure(data, sampling_rate=args.sampling_rate)
+    except NotImplementedError:
+        data = Density.from_file(args.data)
+    if not args.no_centering:
+        data, _ = data.centered(0)
+    recommended_box = be.compute_convolution_shapes([args.box_size], [1])[1][0]
+    if recommended_box != args.box_size:
+        warnings.warn(
+            f"Consider using --box_size {recommended_box} instead of {args.box_size}."
         )
-    method_name = list(preprocess_settings.keys())[0]
-    if not hasattr(Preprocessor, method_name):
-        raise ValueError(f"Method {method_name} does not exist in Preprocessor.")
+    data.pad(
+        np.multiply(args.box_size, np.divide(args.sampling_rate, data.sampling_rate)),
+        center=True,
+    )
+    bpf_mask = 1
+    lowpass = 2 * args.sampling_rate if args.lowpass is None else args.lowpass
+    if args.lowpass != 0:
+        bpf_mask = BandPassFilter(
+            lowpass=lowpass,
+            highpass=None,
+            use_gaussian=True,
+            return_real_fourier=True,
+            shape_is_real_fourier=False,
+        )(shape=data.shape)["data"]
-    density = Density.from_file(args.input_file)
-    output = density.empty
+    data_ft = np.fft.rfftn(data.data, s=data.shape)
+    data_ft = np.multiply(data_ft, bpf_mask, out=data_ft)
+    data.data = np.fft.irfftn(data_ft, s=data.shape).real
-    method_params = preprocess_settings[method_name]
-    preprocessor = Preprocessor()
-    method = getattr(preprocessor, method_name, None)
-    if not method:
-        raise ValueError(
-            f"{method} does not exist in dge.preprocessor.Preprocessor class."
-        )
+    data = data.resample(args.sampling_rate, method="spline", order=3)
-    output.data = method(template=density.data, **method_params)
-    output.to_file(args.output_file, gzip=args.compress)
+    if args.invert_contrast:
+        data.data = data.data * -1
+    data.to_file(args.output)
 if __name__ == "__main__":
-    main()
+    main()