pytme 0.1.9__cp311-cp311-macosx_14_0_arm64.whl → 0.2.0b0__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytme-0.1.9.data → pytme-0.2.0b0.data}/scripts/match_template.py +148 -126
- pytme-0.2.0b0.data/scripts/postprocess.py +570 -0
- {pytme-0.1.9.data → pytme-0.2.0b0.data}/scripts/preprocessor_gui.py +244 -60
- {pytme-0.1.9.dist-info → pytme-0.2.0b0.dist-info}/METADATA +3 -1
- pytme-0.2.0b0.dist-info/RECORD +66 -0
- {pytme-0.1.9.dist-info → pytme-0.2.0b0.dist-info}/WHEEL +1 -1
- scripts/extract_candidates.py +218 -0
- scripts/match_template.py +148 -126
- scripts/match_template_filters.py +852 -0
- scripts/postprocess.py +380 -435
- scripts/preprocessor_gui.py +244 -60
- scripts/refine_matches.py +218 -0
- tme/__init__.py +2 -1
- tme/__version__.py +1 -1
- tme/analyzer.py +545 -78
- tme/backends/cupy_backend.py +80 -15
- tme/backends/npfftw_backend.py +33 -2
- tme/backends/pytorch_backend.py +15 -7
- tme/density.py +156 -63
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/matching_constrained.py +195 -0
- tme/matching_data.py +74 -33
- tme/matching_exhaustive.py +351 -208
- tme/matching_memory.py +1 -0
- tme/matching_optimization.py +728 -651
- tme/matching_utils.py +152 -8
- tme/orientations.py +561 -0
- tme/preprocessor.py +21 -18
- tme/structure.py +2 -37
- pytme-0.1.9.data/scripts/postprocess.py +0 -625
- pytme-0.1.9.dist-info/RECORD +0 -61
- {pytme-0.1.9.data → pytme-0.2.0b0.data}/scripts/estimate_ram_usage.py +0 -0
- {pytme-0.1.9.data → pytme-0.2.0b0.data}/scripts/preprocess.py +0 -0
- {pytme-0.1.9.dist-info → pytme-0.2.0b0.dist-info}/LICENSE +0 -0
- {pytme-0.1.9.dist-info → pytme-0.2.0b0.dist-info}/entry_points.txt +0 -0
- {pytme-0.1.9.dist-info → pytme-0.2.0b0.dist-info}/top_level.txt +0 -0
scripts/postprocess.py
CHANGED
@@ -5,19 +5,18 @@
|
|
5
5
|
|
6
6
|
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
7
7
|
"""
|
8
|
-
from os import getcwd
|
9
|
-
from os.path import join
|
10
8
|
import argparse
|
11
9
|
from sys import exit
|
12
|
-
from
|
10
|
+
from os import getcwd
|
11
|
+
from os.path import join, abspath
|
12
|
+
from typing import List
|
13
13
|
from os.path import splitext
|
14
|
-
from dataclasses import dataclass
|
15
14
|
|
16
15
|
import numpy as np
|
17
|
-
from scipy.spatial.transform import Rotation
|
18
16
|
from numpy.typing import NDArray
|
17
|
+
from scipy.special import erfcinv
|
19
18
|
|
20
|
-
from tme import Density, Structure
|
19
|
+
from tme import Density, Structure, Orientations
|
21
20
|
from tme.analyzer import (
|
22
21
|
PeakCallerSort,
|
23
22
|
PeakCallerMaximumFilter,
|
@@ -29,7 +28,6 @@ from tme.matching_utils import (
|
|
29
28
|
load_pickle,
|
30
29
|
euler_to_rotationmatrix,
|
31
30
|
euler_from_rotationmatrix,
|
32
|
-
centered_mask,
|
33
31
|
)
|
34
32
|
|
35
33
|
PEAK_CALLERS = {
|
@@ -45,420 +43,325 @@ def parse_args():
|
|
45
43
|
parser = argparse.ArgumentParser(
|
46
44
|
description="Peak Calling for Template Matching Outputs"
|
47
45
|
)
|
48
|
-
|
46
|
+
|
47
|
+
input_group = parser.add_argument_group("Input")
|
48
|
+
output_group = parser.add_argument_group("Output")
|
49
|
+
peak_group = parser.add_argument_group("Peak Calling")
|
50
|
+
additional_group = parser.add_argument_group("Additional Parameters")
|
51
|
+
|
52
|
+
input_group.add_argument(
|
49
53
|
"--input_file",
|
50
54
|
required=True,
|
55
|
+
nargs="+",
|
51
56
|
help="Path to the output of match_template.py.",
|
52
57
|
)
|
53
|
-
|
58
|
+
input_group.add_argument(
|
59
|
+
"--target_mask",
|
60
|
+
required=False,
|
61
|
+
type=str,
|
62
|
+
help="Path to an optional mask applied to template matching scores.",
|
63
|
+
)
|
64
|
+
input_group.add_argument(
|
65
|
+
"--orientations",
|
66
|
+
required=False,
|
67
|
+
type=str,
|
68
|
+
help="Path to file generated using output_format orientations. Can be filtered "
|
69
|
+
"to exclude false-positive peaks. If this file is provided, peak calling "
|
70
|
+
"is skipped and corresponding parameters ignored.",
|
71
|
+
)
|
72
|
+
|
73
|
+
output_group.add_argument(
|
54
74
|
"--output_prefix",
|
55
75
|
required=True,
|
56
|
-
help="
|
76
|
+
help="Output filename, extension will be added based on output_format.",
|
57
77
|
)
|
58
|
-
|
59
|
-
"--
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
78
|
+
output_group.add_argument(
|
79
|
+
"--output_format",
|
80
|
+
choices=[
|
81
|
+
"orientations",
|
82
|
+
"alignment",
|
83
|
+
"extraction",
|
84
|
+
"relion",
|
85
|
+
"backmapping",
|
86
|
+
"average",
|
87
|
+
],
|
88
|
+
default="orientations",
|
89
|
+
help="Available output formats:"
|
90
|
+
"orientations (translation, rotation, and score), "
|
91
|
+
"alignment (aligned template to target based on orientations), "
|
92
|
+
"extraction (extract regions around peaks from targets, i.e. subtomograms), "
|
93
|
+
"relion (perform extraction step and generate corresponding star files), "
|
94
|
+
"backmapping (map template to target using identified peaks),"
|
95
|
+
"average (extract matched regions from target and average them).",
|
66
96
|
)
|
67
|
-
|
97
|
+
|
98
|
+
peak_group.add_argument(
|
99
|
+
"--peak_caller",
|
100
|
+
choices=list(PEAK_CALLERS.keys()),
|
101
|
+
default="PeakCallerScipy",
|
102
|
+
help="Peak caller for local maxima identification.",
|
103
|
+
)
|
104
|
+
peak_group.add_argument(
|
105
|
+
"--minimum_score",
|
106
|
+
type=float,
|
107
|
+
default=None,
|
108
|
+
help="Minimum score from which peaks will be considered.",
|
109
|
+
)
|
110
|
+
peak_group.add_argument(
|
111
|
+
"--maximum_score",
|
112
|
+
type=float,
|
113
|
+
default=None,
|
114
|
+
help="Maximum score until which peaks will be considered.",
|
115
|
+
)
|
116
|
+
peak_group.add_argument(
|
68
117
|
"--min_distance",
|
69
118
|
type=int,
|
70
119
|
default=5,
|
71
|
-
help="Minimum distance between peaks.
|
120
|
+
help="Minimum distance between peaks.",
|
72
121
|
)
|
73
|
-
|
122
|
+
peak_group.add_argument(
|
74
123
|
"--min_boundary_distance",
|
75
124
|
type=int,
|
76
125
|
default=0,
|
77
|
-
help="Minimum distance
|
78
|
-
"is provided.",
|
126
|
+
help="Minimum distance of peaks to target edges.",
|
79
127
|
)
|
80
|
-
|
128
|
+
peak_group.add_argument(
|
81
129
|
"--mask_edges",
|
82
130
|
action="store_true",
|
83
131
|
default=False,
|
84
|
-
help="Whether
|
85
|
-
"
|
132
|
+
help="Whether candidates should not be identified from scores that were "
|
133
|
+
"computed from padded densities. Superseded by min_boundary_distance.",
|
86
134
|
)
|
87
|
-
|
88
|
-
"--
|
89
|
-
type=
|
135
|
+
peak_group.add_argument(
|
136
|
+
"--number_of_peaks",
|
137
|
+
type=int,
|
90
138
|
default=None,
|
91
|
-
|
139
|
+
required=False,
|
140
|
+
help="Upper limit of peaks to call, subject to filtering parameters. Default 1000. "
|
141
|
+
"If minimum_score is provided all peaks scoring higher will be reported.",
|
92
142
|
)
|
93
|
-
|
94
|
-
"--
|
95
|
-
|
96
|
-
default=
|
97
|
-
help="
|
98
|
-
"
|
143
|
+
peak_group.add_argument(
|
144
|
+
"--peak_oversampling",
|
145
|
+
type=int,
|
146
|
+
default=1,
|
147
|
+
help="1 / factor equals voxel precision, e.g. 2 detects half voxel "
|
148
|
+
"translations. Useful for matching structures to electron density maps.",
|
99
149
|
)
|
100
|
-
|
101
|
-
|
150
|
+
|
151
|
+
additional_group.add_argument(
|
152
|
+
"--subtomogram_box_size",
|
153
|
+
type=int,
|
102
154
|
default=None,
|
103
|
-
help="
|
104
|
-
"
|
155
|
+
help="Subtomogram box size, by default equal to the centered template. Will be "
|
156
|
+
"padded to even values if output_format is relion.",
|
105
157
|
)
|
106
|
-
|
107
|
-
"--
|
108
|
-
|
109
|
-
default=
|
110
|
-
help="
|
111
|
-
"
|
112
|
-
|
113
|
-
|
114
|
-
"
|
158
|
+
additional_group.add_argument(
|
159
|
+
"--mask_subtomograms",
|
160
|
+
action="store_true",
|
161
|
+
default=False,
|
162
|
+
help="Whether to mask subtomograms using the template mask. The mask will be "
|
163
|
+
"rotated according to determined angles.",
|
164
|
+
)
|
165
|
+
additional_group.add_argument(
|
166
|
+
"--invert_target_contrast",
|
167
|
+
action="store_true",
|
168
|
+
default=False,
|
169
|
+
help="Whether to invert the target contrast.",
|
170
|
+
)
|
171
|
+
additional_group.add_argument(
|
172
|
+
"--wedge_mask",
|
173
|
+
type=str,
|
174
|
+
default=None,
|
175
|
+
help="Path to file used as ctf_mask for output_format relion.",
|
176
|
+
)
|
177
|
+
additional_group.add_argument(
|
178
|
+
"--n_false_positives",
|
179
|
+
type=int,
|
180
|
+
default=None,
|
181
|
+
required=False,
|
182
|
+
help="Number of accepted false-positives picks to determine minimum score.",
|
115
183
|
)
|
116
|
-
args = parser.parse_args()
|
117
|
-
|
118
|
-
return args
|
119
|
-
|
120
184
|
|
121
|
-
|
122
|
-
class Orientations:
|
123
|
-
#: Return a numpy array with translations of each orientation (n x d).
|
124
|
-
translations: np.ndarray
|
125
|
-
|
126
|
-
#: Return a numpy array with euler angles of each orientation in zxy format (n x d).
|
127
|
-
rotations: np.ndarray
|
128
|
-
|
129
|
-
#: Return a numpy array with the score of each orientation (n, ).
|
130
|
-
scores: np.ndarray
|
131
|
-
|
132
|
-
#: Return a numpy array with additional orientation details (n, ).
|
133
|
-
details: np.ndarray
|
134
|
-
|
135
|
-
def __iter__(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
136
|
-
"""
|
137
|
-
Iterate over the current class instance. Each iteration returns a orientation
|
138
|
-
defined by its translation, rotation, score and additional detail.
|
139
|
-
|
140
|
-
Yields
|
141
|
-
------
|
142
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
143
|
-
A tuple of arrays defining the given orientation.
|
144
|
-
"""
|
145
|
-
yield from zip(self.translations, self.rotations, self.scores, self.details)
|
146
|
-
|
147
|
-
def __getitem__(self, indices: List[int]) -> "Orientations":
|
148
|
-
"""
|
149
|
-
Retrieve a subset of orientations based on the provided indices.
|
150
|
-
|
151
|
-
Parameters
|
152
|
-
----------
|
153
|
-
indices : List[int]
|
154
|
-
A list of indices specifying the orientations to be retrieved.
|
155
|
-
|
156
|
-
Returns
|
157
|
-
-------
|
158
|
-
:py:class:`Orientations`
|
159
|
-
A new :py:class:`Orientations`instance containing only the selected orientations.
|
160
|
-
"""
|
161
|
-
indices = np.asarray(indices)
|
162
|
-
attributes = (
|
163
|
-
"translations",
|
164
|
-
"rotations",
|
165
|
-
"scores",
|
166
|
-
"details",
|
167
|
-
)
|
168
|
-
kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
|
169
|
-
return self.__class__(**kwargs)
|
170
|
-
|
171
|
-
def to_file(self, filename: str, file_format: type, **kwargs) -> None:
|
172
|
-
"""
|
173
|
-
Save the current class instance to a file in the specified format.
|
174
|
-
|
175
|
-
Parameters
|
176
|
-
----------
|
177
|
-
filename : str
|
178
|
-
The name of the file where the orientations will be saved.
|
179
|
-
file_format : type
|
180
|
-
The format in which to save the orientations. Supported formats are 'text' and 'relion'.
|
181
|
-
**kwargs : dict
|
182
|
-
Additional keyword arguments specific to the file format.
|
183
|
-
|
184
|
-
Raises
|
185
|
-
------
|
186
|
-
ValueError
|
187
|
-
If an unsupported file format is specified.
|
188
|
-
"""
|
189
|
-
mapping = {
|
190
|
-
"text": self._to_text,
|
191
|
-
"relion": self._to_relion_star,
|
192
|
-
}
|
193
|
-
|
194
|
-
func = mapping.get(file_format, None)
|
195
|
-
if func is None:
|
196
|
-
raise ValueError(
|
197
|
-
f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
|
198
|
-
)
|
185
|
+
args = parser.parse_args()
|
199
186
|
|
200
|
-
|
187
|
+
if args.wedge_mask is not None:
|
188
|
+
args.wedge_mask = abspath(args.wedge_mask)
|
201
189
|
|
202
|
-
|
203
|
-
|
204
|
-
Save orientations in a text file format.
|
190
|
+
if args.output_format == "relion" and args.subtomogram_box_size is not None:
|
191
|
+
args.subtomogram_box_size += args.subtomogram_box_size % 2
|
205
192
|
|
206
|
-
|
207
|
-
|
208
|
-
filename : str
|
209
|
-
The name of the file to save the orientations.
|
193
|
+
if args.orientations is not None:
|
194
|
+
args.orientations = Orientations.from_file(filename=args.orientations)
|
210
195
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
"""
|
216
|
-
header = "\t".join(
|
217
|
-
["z", "y", "x", "euler_z", "euler_y", "euler_x", "score", "detail"]
|
218
|
-
)
|
219
|
-
with open(filename, mode="w", encoding="utf-8") as ofile:
|
220
|
-
_ = ofile.write(f"{header}\n")
|
221
|
-
for translation, angles, score, detail in self:
|
222
|
-
translation_string = "\t".join([str(x) for x in translation])
|
223
|
-
angle_string = "\t".join([str(x) for x in angles])
|
224
|
-
_ = ofile.write(
|
225
|
-
f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
|
226
|
-
)
|
227
|
-
return None
|
228
|
-
|
229
|
-
def _to_relion_star(
|
230
|
-
self,
|
231
|
-
filename: str,
|
232
|
-
name_prefix: str = None,
|
233
|
-
ctf_image: str = None,
|
234
|
-
sampling_rate: float = 1.0,
|
235
|
-
subtomogram_size: int = 0,
|
236
|
-
) -> None:
|
237
|
-
"""
|
238
|
-
Save orientations in RELION's STAR file format.
|
239
|
-
|
240
|
-
Parameters
|
241
|
-
----------
|
242
|
-
filename : str
|
243
|
-
The name of the file to save the orientations.
|
244
|
-
name_prefix : str, optional
|
245
|
-
A prefix to add to the image names in the STAR file.
|
246
|
-
ctf_image : str, optional
|
247
|
-
Path to CTF or wedge mask RELION.
|
248
|
-
sampling_rate : float, optional
|
249
|
-
Subtomogram sampling rate in angstrom per voxel
|
250
|
-
subtomogram_size : int, optional
|
251
|
-
Size of the square shaped subtomogram.
|
252
|
-
|
253
|
-
Notes
|
254
|
-
-----
|
255
|
-
The file is saved with a standard header used in RELION STAR files.
|
256
|
-
Each row in the file corresponds to an orientation.
|
257
|
-
"""
|
258
|
-
optics_header = [
|
259
|
-
"# version 30001",
|
260
|
-
"data_optics",
|
261
|
-
"",
|
262
|
-
"loop_",
|
263
|
-
"_rlnOpticsGroup",
|
264
|
-
"_rlnOpticsGroupName",
|
265
|
-
"_rlnSphericalAberration",
|
266
|
-
"_rlnVoltage",
|
267
|
-
"_rlnImageSize",
|
268
|
-
"_rlnImageDimensionality",
|
269
|
-
"_rlnImagePixelSize",
|
270
|
-
]
|
271
|
-
optics_data = [
|
272
|
-
"1",
|
273
|
-
"opticsGroup1",
|
274
|
-
"2.700000",
|
275
|
-
"300.000000",
|
276
|
-
str(int(subtomogram_size)),
|
277
|
-
"3",
|
278
|
-
str(float(sampling_rate)),
|
279
|
-
]
|
280
|
-
optics_header = "\n".join(optics_header)
|
281
|
-
optics_data = "\t".join(optics_data)
|
282
|
-
|
283
|
-
header = [
|
284
|
-
"data_particles",
|
285
|
-
"",
|
286
|
-
"loop_",
|
287
|
-
"_rlnCoordinateX",
|
288
|
-
"_rlnCoordinateY",
|
289
|
-
"_rlnCoordinateZ",
|
290
|
-
"_rlnImageName",
|
291
|
-
"_rlnAngleRot",
|
292
|
-
"_rlnAngleTilt",
|
293
|
-
"_rlnAnglePsi",
|
294
|
-
"_rlnOpticsGroup",
|
295
|
-
]
|
296
|
-
if ctf_image is not None:
|
297
|
-
header.append("_rlnCtfImage")
|
298
|
-
|
299
|
-
ctf_image = "" if ctf_image is None else f"\t{ctf_image}"
|
300
|
-
|
301
|
-
header = "\n".join(header)
|
302
|
-
name_prefix = "" if name_prefix is None else name_prefix
|
303
|
-
|
304
|
-
with open(filename, mode="w", encoding="utf-8") as ofile:
|
305
|
-
_ = ofile.write(f"{optics_header}\n")
|
306
|
-
_ = ofile.write(f"{optics_data}\n")
|
307
|
-
|
308
|
-
_ = ofile.write("\n# version 30001\n")
|
309
|
-
_ = ofile.write(f"{header}\n")
|
310
|
-
|
311
|
-
# pyTME uses a zyx data layout
|
312
|
-
for index, (translation, rotation, score, detail) in enumerate(self):
|
313
|
-
rotation = Rotation.from_euler("zyx", rotation, degrees=True)
|
314
|
-
rotation = rotation.as_euler(seq="xyx", degrees=True)
|
315
|
-
|
316
|
-
translation_string = "\t".join([str(x) for x in translation][::-1])
|
317
|
-
angle_string = "\t".join([str(x) for x in rotation])
|
318
|
-
name = f"{name_prefix}_{index}.mrc"
|
319
|
-
_ = ofile.write(
|
320
|
-
f"{translation_string}\t{name}\t{angle_string}\t1{ctf_image}\n"
|
321
|
-
)
|
196
|
+
if args.minimum_score is not None or args.n_false_positives is not None:
|
197
|
+
args.number_of_peaks = np.iinfo(np.int64).max
|
198
|
+
else:
|
199
|
+
args.number_of_peaks = 1000
|
322
200
|
|
323
|
-
|
324
|
-
|
325
|
-
@classmethod
|
326
|
-
def from_file(cls, filename: str, file_format: type, **kwargs) -> "Orientations":
|
327
|
-
"""
|
328
|
-
Create an instance of :py:class:`Orientations` from a file.
|
329
|
-
|
330
|
-
Parameters
|
331
|
-
----------
|
332
|
-
filename : str
|
333
|
-
The name of the file from which to read the orientations.
|
334
|
-
file_format : type
|
335
|
-
The format of the file. Currently, only 'text' format is supported.
|
336
|
-
**kwargs : dict
|
337
|
-
Additional keyword arguments specific to the file format.
|
338
|
-
|
339
|
-
Returns
|
340
|
-
-------
|
341
|
-
:py:class:`Orientations`
|
342
|
-
An instance of :py:class:`Orientations` populated with data from the file.
|
343
|
-
|
344
|
-
Raises
|
345
|
-
------
|
346
|
-
ValueError
|
347
|
-
If an unsupported file format is specified.
|
348
|
-
"""
|
349
|
-
mapping = {
|
350
|
-
"text": cls._from_text,
|
351
|
-
}
|
352
|
-
|
353
|
-
func = mapping.get(file_format, None)
|
354
|
-
if func is None:
|
355
|
-
raise ValueError(
|
356
|
-
f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
|
357
|
-
)
|
201
|
+
return args
|
358
202
|
|
359
|
-
translations, rotations, scores, details, *_ = func(filename=filename, **kwargs)
|
360
|
-
return cls(
|
361
|
-
translations=translations,
|
362
|
-
rotations=rotations,
|
363
|
-
scores=scores,
|
364
|
-
details=details,
|
365
|
-
)
|
366
203
|
|
367
|
-
|
368
|
-
def _from_text(
|
369
|
-
filename: str,
|
370
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
371
|
-
"""
|
372
|
-
Read orientations from a text file.
|
373
|
-
|
374
|
-
Parameters
|
375
|
-
----------
|
376
|
-
filename : str
|
377
|
-
The name of the file from which to read the orientations.
|
378
|
-
|
379
|
-
Returns
|
380
|
-
-------
|
381
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
|
382
|
-
A tuple containing numpy arrays for translations, rotations, scores,
|
383
|
-
and details.
|
384
|
-
|
385
|
-
Notes
|
386
|
-
-----
|
387
|
-
The text file is expected to have a header and data in columns corresponding to
|
388
|
-
z, y, x, euler_z, euler_y, euler_x, score, detail.
|
389
|
-
"""
|
390
|
-
with open(filename, mode="r", encoding="utf-8") as infile:
|
391
|
-
data = [x.strip().split("\t") for x in infile.read().split("\n")]
|
392
|
-
_ = data.pop(0)
|
393
|
-
|
394
|
-
translation, rotation, score, detail = [], [], [], []
|
395
|
-
for candidate in data:
|
396
|
-
if len(candidate) <= 1:
|
397
|
-
continue
|
398
|
-
if len(candidate) != 8:
|
399
|
-
candidate.append(-1)
|
400
|
-
|
401
|
-
candidate = [float(x) for x in candidate]
|
402
|
-
translation.append((candidate[0], candidate[1], candidate[2]))
|
403
|
-
rotation.append((candidate[3], candidate[4], candidate[5]))
|
404
|
-
score.append(candidate[6])
|
405
|
-
detail.append(candidate[7])
|
406
|
-
|
407
|
-
translation = np.vstack(translation).astype(int)
|
408
|
-
rotation = np.vstack(rotation).astype(float)
|
409
|
-
score = np.array(score).astype(float)
|
410
|
-
detail = np.array(detail).astype(float)
|
411
|
-
|
412
|
-
return translation, rotation, score, detail
|
413
|
-
|
414
|
-
|
415
|
-
def load_template(filepath: str, sampling_rate: NDArray) -> "Density":
|
204
|
+
def load_template(filepath: str, sampling_rate: NDArray, center: bool = True):
|
416
205
|
try:
|
417
206
|
template = Density.from_file(filepath)
|
418
|
-
template, _ = template.centered(0)
|
419
207
|
center_of_mass = template.center_of_mass(template.data)
|
208
|
+
template_is_density = True
|
420
209
|
except ValueError:
|
421
210
|
template = Structure.from_file(filepath)
|
422
211
|
center_of_mass = template.center_of_mass()[::-1]
|
423
212
|
template = Density.from_structure(template, sampling_rate=sampling_rate)
|
213
|
+
template_is_density = False
|
214
|
+
|
215
|
+
translation = np.zeros_like(center_of_mass)
|
216
|
+
if center:
|
217
|
+
template, translation = template.centered(0)
|
218
|
+
|
219
|
+
return template, center_of_mass, translation, template_is_density
|
220
|
+
|
424
221
|
|
425
|
-
|
222
|
+
def merge_outputs(data, filepaths: List[str], args):
|
223
|
+
if len(filepaths) == 0:
|
224
|
+
return data, 1
|
225
|
+
|
226
|
+
if data[0].ndim != data[2].ndim:
|
227
|
+
return data, 1
|
228
|
+
|
229
|
+
from tme.matching_exhaustive import _normalize_under_mask
|
230
|
+
|
231
|
+
def _norm_scores(data, args):
|
232
|
+
target_origin, _, sampling_rate, cli_args = data[-1]
|
233
|
+
|
234
|
+
_, template_extension = splitext(cli_args.template)
|
235
|
+
ret = load_template(
|
236
|
+
filepath=cli_args.template,
|
237
|
+
sampling_rate=sampling_rate,
|
238
|
+
center=not cli_args.no_centering,
|
239
|
+
)
|
240
|
+
template, center_of_mass, translation, template_is_density = ret
|
241
|
+
|
242
|
+
if args.mask_edges and args.min_boundary_distance == 0:
|
243
|
+
max_shape = np.max(template.shape)
|
244
|
+
args.min_boundary_distance = np.ceil(np.divide(max_shape, 2))
|
245
|
+
|
246
|
+
target_mask = 1
|
247
|
+
if args.target_mask is not None:
|
248
|
+
target_mask = Density.from_file(args.target_mask).data
|
249
|
+
elif cli_args.target_mask is not None:
|
250
|
+
target_mask = Density.from_file(args.target_mask).data
|
251
|
+
|
252
|
+
mask = np.ones_like(data[0])
|
253
|
+
np.multiply(mask, target_mask, out=mask)
|
254
|
+
|
255
|
+
cropped_shape = np.subtract(
|
256
|
+
mask.shape, np.multiply(args.min_boundary_distance, 2)
|
257
|
+
).astype(int)
|
258
|
+
mask[cropped_shape] = 0
|
259
|
+
_normalize_under_mask(template=data[0], mask=mask, mask_intensity=mask.sum())
|
260
|
+
return data[0]
|
261
|
+
|
262
|
+
entities = np.zeros_like(data[0])
|
263
|
+
data[0] = _norm_scores(data=data, args=args)
|
264
|
+
for index, filepath in enumerate(filepaths):
|
265
|
+
new_scores = _norm_scores(data=load_pickle(filepath), args=args)
|
266
|
+
indices = new_scores > data[0]
|
267
|
+
entities[indices] = index + 1
|
268
|
+
data[0][indices] = new_scores[indices]
|
269
|
+
|
270
|
+
return data, entities
|
426
271
|
|
427
272
|
|
428
273
|
def main():
|
429
274
|
args = parse_args()
|
430
|
-
data = load_pickle(args.input_file)
|
275
|
+
data = load_pickle(args.input_file[0])
|
431
276
|
|
432
|
-
|
433
|
-
target_origin, _, sampling_rate, cli_args = meta
|
277
|
+
target_origin, _, sampling_rate, cli_args = data[-1]
|
434
278
|
|
435
|
-
|
436
|
-
|
437
|
-
|
279
|
+
_, template_extension = splitext(cli_args.template)
|
280
|
+
ret = load_template(
|
281
|
+
filepath=cli_args.template,
|
282
|
+
sampling_rate=sampling_rate,
|
283
|
+
center=not cli_args.no_centering,
|
284
|
+
)
|
285
|
+
template, center_of_mass, translation, template_is_density = ret
|
286
|
+
|
287
|
+
if args.output_format == "relion" and args.subtomogram_box_size is None:
|
288
|
+
new_shape = np.add(template.shape, np.mod(template.shape, 2))
|
289
|
+
new_shape = np.repeat(new_shape.max(), new_shape.size).astype(int)
|
290
|
+
print(f"Padding template from {template.shape} to {new_shape} for RELION.")
|
291
|
+
template.pad(new_shape)
|
292
|
+
|
293
|
+
template_mask = template.empty
|
294
|
+
template_mask.data[:] = 1
|
295
|
+
if cli_args.template_mask is not None:
|
296
|
+
template_mask = Density.from_file(cli_args.template_mask)
|
297
|
+
template_mask.pad(template.shape, center=False)
|
298
|
+
origin_translation = np.divide(
|
299
|
+
np.subtract(template.origin, template_mask.origin), template.sampling_rate
|
438
300
|
)
|
301
|
+
translation = np.add(translation, origin_translation)
|
439
302
|
|
440
|
-
|
303
|
+
template_mask = template_mask.rigid_transform(
|
304
|
+
rotation_matrix=np.eye(template_mask.data.ndim),
|
305
|
+
translation=-translation,
|
306
|
+
order=1,
|
307
|
+
)
|
308
|
+
|
309
|
+
if args.mask_edges and args.min_boundary_distance == 0:
|
310
|
+
max_shape = np.max(template.shape)
|
311
|
+
args.min_boundary_distance = np.ceil(np.divide(max_shape, 2))
|
312
|
+
|
313
|
+
# data, entities = merge_outputs(data=data, filepaths=args.input_file[1:], args=args)
|
314
|
+
|
315
|
+
orientations = args.orientations
|
316
|
+
if orientations is None:
|
441
317
|
translations, rotations, scores, details = [], [], [], []
|
442
318
|
# Output is MaxScoreOverRotations
|
443
319
|
if data[0].ndim == data[2].ndim:
|
444
320
|
scores, offset, rotation_array, rotation_mapping, meta = data
|
445
|
-
|
446
|
-
|
447
|
-
|
321
|
+
|
322
|
+
if args.target_mask is not None:
|
323
|
+
target_mask = Density.from_file(args.target_mask)
|
324
|
+
scores = scores * target_mask.data
|
325
|
+
|
326
|
+
if args.n_false_positives is not None:
|
327
|
+
args.n_false_positives = max(args.n_false_positives, 1)
|
328
|
+
cropped_shape = np.subtract(
|
329
|
+
scores.shape, np.multiply(args.min_boundary_distance, 2)
|
330
|
+
).astype(int)
|
331
|
+
|
332
|
+
cropped_shape = tuple(
|
333
|
+
slice(
|
334
|
+
int(args.min_boundary_distance),
|
335
|
+
int(x - args.min_boundary_distance),
|
336
|
+
)
|
337
|
+
for x in scores.shape
|
448
338
|
)
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
339
|
+
# Rickgauer et al. 2017
|
340
|
+
n_correlations = np.size(scores[cropped_shape]) * len(rotation_mapping)
|
341
|
+
minimum_score = np.multiply(
|
342
|
+
erfcinv(2 * args.n_false_positives / n_correlations),
|
343
|
+
np.sqrt(2) * np.std(scores[cropped_shape]),
|
344
|
+
)
|
345
|
+
print(f"Determined minimum score cutoff: {minimum_score}.")
|
346
|
+
minimum_score = max(minimum_score, 0)
|
347
|
+
args.minimum_score = minimum_score
|
455
348
|
|
456
349
|
peak_caller = PEAK_CALLERS[args.peak_caller](
|
457
350
|
number_of_peaks=args.number_of_peaks,
|
458
351
|
min_distance=args.min_distance,
|
459
352
|
min_boundary_distance=args.min_boundary_distance,
|
460
353
|
)
|
461
|
-
|
354
|
+
if args.minimum_score is not None:
|
355
|
+
args.number_of_peaks = np.inf
|
356
|
+
|
357
|
+
peak_caller(
|
358
|
+
scores,
|
359
|
+
rotation_matrix=np.eye(3),
|
360
|
+
mask=template.data,
|
361
|
+
rotation_mapping=rotation_mapping,
|
362
|
+
rotation_array=rotation_array,
|
363
|
+
minimum_score=args.minimum_score,
|
364
|
+
)
|
462
365
|
candidates = peak_caller.merge(
|
463
366
|
candidates=[tuple(peak_caller)],
|
464
367
|
number_of_peaks=args.number_of_peaks,
|
@@ -466,16 +369,15 @@ def main():
|
|
466
369
|
min_boundary_distance=args.min_boundary_distance,
|
467
370
|
)
|
468
371
|
if len(candidates) == 0:
|
469
|
-
|
470
|
-
|
471
|
-
)
|
372
|
+
print("Found no peaks. Consider changing peak calling parameters.")
|
373
|
+
exit(-1)
|
472
374
|
|
473
375
|
for translation, _, score, detail in zip(*candidates):
|
474
376
|
rotations.append(rotation_mapping[rotation_array[tuple(translation)]])
|
475
377
|
|
476
378
|
else:
|
477
379
|
candidates = data
|
478
|
-
translation, rotation,
|
380
|
+
translation, rotation, *_ = data
|
479
381
|
for i in range(translation.shape[0]):
|
480
382
|
rotations.append(euler_from_rotationmatrix(rotation[i]))
|
481
383
|
|
@@ -488,25 +390,35 @@ def main():
|
|
488
390
|
details=details,
|
489
391
|
)
|
490
392
|
|
393
|
+
if args.minimum_score is not None:
|
394
|
+
keep = orientations.scores >= args.minimum_score
|
395
|
+
orientations = orientations[keep]
|
396
|
+
|
397
|
+
if args.maximum_score is not None:
|
398
|
+
keep = orientations.scores <= args.maximum_score
|
399
|
+
orientations = orientations[keep]
|
400
|
+
|
491
401
|
if args.output_format == "orientations":
|
492
402
|
orientations.to_file(filename=f"{args.output_prefix}.tsv", file_format="text")
|
493
403
|
exit(0)
|
494
404
|
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
405
|
+
target = Density.from_file(cli_args.target)
|
406
|
+
if args.invert_target_contrast:
|
407
|
+
if args.output_format == "relion":
|
408
|
+
target.data = target.data * -1
|
409
|
+
target.data = np.divide(
|
410
|
+
np.subtract(target.data, target.data.mean()), target.data.std()
|
411
|
+
)
|
412
|
+
else:
|
413
|
+
target.data = (
|
414
|
+
-np.divide(
|
415
|
+
np.subtract(target.data, target.data.min()),
|
416
|
+
np.subtract(target.data.max(), target.data.min()),
|
417
|
+
)
|
418
|
+
+ 1
|
419
|
+
)
|
506
420
|
|
507
421
|
if args.output_format in ("extraction", "relion"):
|
508
|
-
target = Density.from_file(cli_args.target)
|
509
|
-
|
510
422
|
if not np.all(np.divide(target.shape, template.shape) > 2):
|
511
423
|
print(
|
512
424
|
"Target might be too small relative to template to extract"
|
@@ -514,26 +426,19 @@ def main():
|
|
514
426
|
f" Target : {target.shape}, template : {template.shape}."
|
515
427
|
)
|
516
428
|
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
left_pad = half_shape
|
522
|
-
right_pad = np.add(half_shape, max_shape % 2)
|
523
|
-
starts = np.subtract(peaks, left_pad)
|
524
|
-
stops = np.add(peaks, right_pad)
|
525
|
-
|
526
|
-
candidate_starts = np.maximum(starts, 0).astype(int)
|
527
|
-
candidate_stops = np.minimum(stops, target.shape).astype(int)
|
528
|
-
keep_peaks = (
|
529
|
-
np.sum(
|
530
|
-
np.multiply(starts == candidate_starts, stops == candidate_stops),
|
531
|
-
axis=1,
|
429
|
+
extraction_shape = template.shape
|
430
|
+
if args.subtomogram_box_size is not None:
|
431
|
+
extraction_shape = np.repeat(
|
432
|
+
args.subtomogram_box_size, len(extraction_shape)
|
532
433
|
)
|
533
|
-
|
434
|
+
|
435
|
+
orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
|
436
|
+
target_shape=target.shape,
|
437
|
+
extraction_shape=extraction_shape,
|
438
|
+
drop_out_of_box=True,
|
439
|
+
return_orientations=True,
|
534
440
|
)
|
535
441
|
|
536
|
-
orientations = orientations[keep_peaks]
|
537
442
|
working_directory = getcwd()
|
538
443
|
if args.output_format == "relion":
|
539
444
|
orientations.to_file(
|
@@ -542,62 +447,101 @@ def main():
|
|
542
447
|
name_prefix=join(working_directory, args.output_prefix),
|
543
448
|
ctf_image=args.wedge_mask,
|
544
449
|
sampling_rate=target.sampling_rate.max(),
|
545
|
-
subtomogram_size=
|
450
|
+
subtomogram_size=extraction_shape[0],
|
546
451
|
)
|
547
452
|
|
548
|
-
|
549
|
-
|
550
|
-
stops = stops[keep_peaks,]
|
551
|
-
candidate_starts = candidate_starts[keep_peaks,]
|
552
|
-
candidate_stops = candidate_stops[keep_peaks,]
|
553
|
-
|
554
|
-
if not len(peaks):
|
555
|
-
print(
|
556
|
-
"No peak remaining after filtering. Started with"
|
557
|
-
f" {orientations.translations.shape[0]} filtered to {peaks.shape[0]}."
|
558
|
-
" Consider reducing min_distance, increase num_peaks or use"
|
559
|
-
" a different peak caller."
|
560
|
-
)
|
561
|
-
exit(-1)
|
562
|
-
|
563
|
-
observation_starts = np.subtract(candidate_starts, starts).astype(int)
|
564
|
-
observation_stops = np.subtract(np.add(max_shape, candidate_stops), stops)
|
565
|
-
observation_stops = observation_stops.astype(int)
|
566
|
-
|
567
|
-
candidate_slices = [
|
568
|
-
tuple(slice(s, e) for s, e in zip(start_row, stop_row))
|
569
|
-
for start_row, stop_row in zip(candidate_starts, candidate_stops)
|
570
|
-
]
|
571
|
-
|
572
|
-
observation_slices = [
|
573
|
-
tuple(slice(s, e) for s, e in zip(start_row, stop_row))
|
574
|
-
for start_row, stop_row in zip(observation_starts, observation_stops)
|
575
|
-
]
|
576
|
-
observations = np.zeros(
|
577
|
-
(len(candidate_slices), max_shape, max_shape, max_shape)
|
578
|
-
)
|
579
|
-
|
580
|
-
slices = zip(candidate_slices, observation_slices)
|
453
|
+
observations = np.zeros((len(cand_slices), *extraction_shape))
|
454
|
+
slices = zip(cand_slices, obs_slices)
|
581
455
|
for idx, (cand_slice, obs_slice) in enumerate(slices):
|
582
|
-
observations[idx][:] = np.mean(target.data[
|
583
|
-
observations[idx][
|
456
|
+
observations[idx][:] = np.mean(target.data[obs_slice])
|
457
|
+
observations[idx][cand_slice] = target.data[obs_slice]
|
584
458
|
|
585
459
|
for index in range(observations.shape[0]):
|
460
|
+
cand_start = [x.start for x in cand_slices[index]]
|
586
461
|
out_density = Density(
|
587
462
|
data=observations[index],
|
588
463
|
sampling_rate=sampling_rate,
|
589
|
-
origin=
|
464
|
+
origin=np.multiply(cand_start, sampling_rate),
|
590
465
|
)
|
591
|
-
|
466
|
+
if args.mask_subtomograms:
|
467
|
+
rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
|
468
|
+
mask_transfomed = template_mask.rigid_transform(
|
469
|
+
rotation_matrix=rotation_matrix, order=1
|
470
|
+
)
|
471
|
+
out_density.data = out_density.data * mask_transfomed.data
|
592
472
|
out_density.to_file(
|
593
473
|
join(working_directory, f"{args.output_prefix}_{index}.mrc")
|
594
474
|
)
|
595
475
|
|
596
476
|
exit(0)
|
597
477
|
|
598
|
-
|
599
|
-
|
478
|
+
if args.output_format == "backmapping":
|
479
|
+
orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
|
480
|
+
target_shape=target.shape,
|
481
|
+
extraction_shape=template.shape,
|
482
|
+
drop_out_of_box=True,
|
483
|
+
return_orientations=True,
|
484
|
+
)
|
485
|
+
ret, template_sum = target.empty, template.data.sum()
|
486
|
+
for index in range(len(cand_slices)):
|
487
|
+
rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
|
600
488
|
|
489
|
+
transformed_template = template.rigid_transform(
|
490
|
+
rotation_matrix=rotation_matrix
|
491
|
+
)
|
492
|
+
transformed_template.data = np.multiply(
|
493
|
+
transformed_template.data,
|
494
|
+
np.divide(template_sum, transformed_template.data.sum()),
|
495
|
+
)
|
496
|
+
cand_slice, obs_slice = cand_slices[index], obs_slices[index]
|
497
|
+
ret.data[obs_slice] += transformed_template.data[cand_slice]
|
498
|
+
ret.to_file(f"{args.output_prefix}_backmapped.mrc")
|
499
|
+
exit(0)
|
500
|
+
|
501
|
+
if args.output_format == "average":
|
502
|
+
orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
|
503
|
+
target_shape=target.shape,
|
504
|
+
extraction_shape=np.multiply(template.shape, 2),
|
505
|
+
drop_out_of_box=True,
|
506
|
+
return_orientations=True,
|
507
|
+
)
|
508
|
+
out = np.zeros_like(template.data)
|
509
|
+
out = np.zeros(np.multiply(template.shape, 2).astype(int))
|
510
|
+
for index in range(len(cand_slices)):
|
511
|
+
from scipy.spatial.transform import Rotation
|
512
|
+
|
513
|
+
rotation = Rotation.from_euler(
|
514
|
+
angles=orientations.rotations[index], seq="zyx", degrees=True
|
515
|
+
)
|
516
|
+
rotation_matrix = rotation.inv().as_matrix()
|
517
|
+
|
518
|
+
# rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
|
519
|
+
subset = Density(target.data[obs_slices[index]])
|
520
|
+
subset = subset.rigid_transform(rotation_matrix=rotation_matrix, order=1)
|
521
|
+
|
522
|
+
np.add(out, subset.data, out=out)
|
523
|
+
out /= len(cand_slices)
|
524
|
+
ret = Density(out, sampling_rate=template.sampling_rate, origin=0)
|
525
|
+
ret.pad(template.shape, center=True)
|
526
|
+
ret.to_file(f"{args.output_prefix}_average.mrc")
|
527
|
+
exit(0)
|
528
|
+
|
529
|
+
if args.peak_oversampling > 1:
|
530
|
+
peak_caller = peak_caller = PEAK_CALLERS[args.peak_caller]()
|
531
|
+
if data[0].ndim != data[2].ndim:
|
532
|
+
print(
|
533
|
+
"Input pickle does not contain template matching scores."
|
534
|
+
" Cannot oversample peaks."
|
535
|
+
)
|
536
|
+
exit(-1)
|
537
|
+
orientations.translations = peak_caller.oversample_peaks(
|
538
|
+
score_space=data[0],
|
539
|
+
translations=orientations.translations,
|
540
|
+
oversampling_factor=args.oversampling_factor,
|
541
|
+
)
|
542
|
+
|
543
|
+
for index, (translation, angles, *_) in enumerate(orientations):
|
544
|
+
rotation_matrix = euler_to_rotationmatrix(angles)
|
601
545
|
if template_is_density:
|
602
546
|
translation = np.subtract(translation, center_of_mass)
|
603
547
|
transformed_template = template.rigid_transform(
|
@@ -606,6 +550,7 @@ def main():
|
|
606
550
|
new_origin = np.add(target_origin / sampling_rate, translation)
|
607
551
|
transformed_template.origin = np.multiply(new_origin, sampling_rate)
|
608
552
|
else:
|
553
|
+
template = Structure.from_file(cli_args.template)
|
609
554
|
new_center_of_mass = np.add(
|
610
555
|
np.multiply(translation, sampling_rate), target_origin
|
611
556
|
)
|
@@ -614,7 +559,7 @@ def main():
|
|
614
559
|
translation=translation[::-1],
|
615
560
|
rotation_matrix=rotation_matrix[::-1, ::-1],
|
616
561
|
)
|
617
|
-
# template_extension should contain
|
562
|
+
# template_extension should contain '.'
|
618
563
|
transformed_template.to_file(
|
619
564
|
f"{args.output_prefix}_{index}{template_extension}"
|
620
565
|
)
|