pytme 0.1.9__cp311-cp311-macosx_14_0_arm64.whl → 0.2.0__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pytme-0.2.0.data/scripts/match_template.py +1019 -0
  2. pytme-0.2.0.data/scripts/postprocess.py +570 -0
  3. {pytme-0.1.9.data → pytme-0.2.0.data}/scripts/preprocessor_gui.py +244 -60
  4. {pytme-0.1.9.dist-info → pytme-0.2.0.dist-info}/METADATA +3 -1
  5. pytme-0.2.0.dist-info/RECORD +72 -0
  6. {pytme-0.1.9.dist-info → pytme-0.2.0.dist-info}/WHEEL +1 -1
  7. scripts/extract_candidates.py +218 -0
  8. scripts/match_template.py +459 -218
  9. pytme-0.1.9.data/scripts/match_template.py → scripts/match_template_filters.py +459 -218
  10. scripts/postprocess.py +380 -435
  11. scripts/preprocessor_gui.py +244 -60
  12. scripts/refine_matches.py +218 -0
  13. tme/__init__.py +2 -1
  14. tme/__version__.py +1 -1
  15. tme/analyzer.py +533 -78
  16. tme/backends/cupy_backend.py +80 -15
  17. tme/backends/npfftw_backend.py +35 -6
  18. tme/backends/pytorch_backend.py +15 -7
  19. tme/density.py +173 -78
  20. tme/extensions.cpython-311-darwin.so +0 -0
  21. tme/matching_constrained.py +195 -0
  22. tme/matching_data.py +76 -33
  23. tme/matching_exhaustive.py +354 -225
  24. tme/matching_memory.py +1 -0
  25. tme/matching_optimization.py +753 -649
  26. tme/matching_utils.py +152 -8
  27. tme/orientations.py +561 -0
  28. tme/preprocessing/__init__.py +2 -0
  29. tme/preprocessing/_utils.py +176 -0
  30. tme/preprocessing/composable_filter.py +30 -0
  31. tme/preprocessing/compose.py +52 -0
  32. tme/preprocessing/frequency_filters.py +322 -0
  33. tme/preprocessing/tilt_series.py +967 -0
  34. tme/preprocessor.py +35 -25
  35. tme/structure.py +2 -37
  36. pytme-0.1.9.data/scripts/postprocess.py +0 -625
  37. pytme-0.1.9.dist-info/RECORD +0 -61
  38. {pytme-0.1.9.data → pytme-0.2.0.data}/scripts/estimate_ram_usage.py +0 -0
  39. {pytme-0.1.9.data → pytme-0.2.0.data}/scripts/preprocess.py +0 -0
  40. {pytme-0.1.9.dist-info → pytme-0.2.0.dist-info}/LICENSE +0 -0
  41. {pytme-0.1.9.dist-info → pytme-0.2.0.dist-info}/entry_points.txt +0 -0
  42. {pytme-0.1.9.dist-info → pytme-0.2.0.dist-info}/top_level.txt +0 -0
scripts/postprocess.py CHANGED
@@ -5,19 +5,18 @@
5
5
 
6
6
  Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
7
7
  """
8
- from os import getcwd
9
- from os.path import join
10
8
  import argparse
11
9
  from sys import exit
12
- from typing import List, Tuple
10
+ from os import getcwd
11
+ from os.path import join, abspath
12
+ from typing import List
13
13
  from os.path import splitext
14
- from dataclasses import dataclass
15
14
 
16
15
  import numpy as np
17
- from scipy.spatial.transform import Rotation
18
16
  from numpy.typing import NDArray
17
+ from scipy.special import erfcinv
19
18
 
20
- from tme import Density, Structure
19
+ from tme import Density, Structure, Orientations
21
20
  from tme.analyzer import (
22
21
  PeakCallerSort,
23
22
  PeakCallerMaximumFilter,
@@ -29,7 +28,6 @@ from tme.matching_utils import (
29
28
  load_pickle,
30
29
  euler_to_rotationmatrix,
31
30
  euler_from_rotationmatrix,
32
- centered_mask,
33
31
  )
34
32
 
35
33
  PEAK_CALLERS = {
@@ -45,420 +43,325 @@ def parse_args():
45
43
  parser = argparse.ArgumentParser(
46
44
  description="Peak Calling for Template Matching Outputs"
47
45
  )
48
- parser.add_argument(
46
+
47
+ input_group = parser.add_argument_group("Input")
48
+ output_group = parser.add_argument_group("Output")
49
+ peak_group = parser.add_argument_group("Peak Calling")
50
+ additional_group = parser.add_argument_group("Additional Parameters")
51
+
52
+ input_group.add_argument(
49
53
  "--input_file",
50
54
  required=True,
55
+ nargs="+",
51
56
  help="Path to the output of match_template.py.",
52
57
  )
53
- parser.add_argument(
58
+ input_group.add_argument(
59
+ "--target_mask",
60
+ required=False,
61
+ type=str,
62
+ help="Path to an optional mask applied to template matching scores.",
63
+ )
64
+ input_group.add_argument(
65
+ "--orientations",
66
+ required=False,
67
+ type=str,
68
+ help="Path to file generated using output_format orientations. Can be filtered "
69
+ "to exclude false-positive peaks. If this file is provided, peak calling "
70
+ "is skipped and corresponding parameters ignored.",
71
+ )
72
+
73
+ output_group.add_argument(
54
74
  "--output_prefix",
55
75
  required=True,
56
- help="Prefix for the output file name. Extension depends on output_format.",
76
+ help="Output filename, extension will be added based on output_format.",
57
77
  )
58
- parser.add_argument(
59
- "--number_of_peaks",
60
- type=int,
61
- default=1000,
62
- help="Number of peaks to consider. Note, this is the number of called peaks "
63
- ", subject to min_distance and min_boundary_distance filtering. Therefore, the "
64
- "returned number of peaks will be at most equal to number_of_peaks. "
65
- "Ignored when --orientations is provided.",
78
+ output_group.add_argument(
79
+ "--output_format",
80
+ choices=[
81
+ "orientations",
82
+ "alignment",
83
+ "extraction",
84
+ "relion",
85
+ "backmapping",
86
+ "average",
87
+ ],
88
+ default="orientations",
89
+ help="Available output formats:"
90
+ "orientations (translation, rotation, and score), "
91
+ "alignment (aligned template to target based on orientations), "
92
+ "extraction (extract regions around peaks from targets, i.e. subtomograms), "
93
+ "relion (perform extraction step and generate corresponding star files), "
94
+ "backmapping (map template to target using identified peaks),"
95
+ "average (extract matched regions from target and average them).",
66
96
  )
67
- parser.add_argument(
97
+
98
+ peak_group.add_argument(
99
+ "--peak_caller",
100
+ choices=list(PEAK_CALLERS.keys()),
101
+ default="PeakCallerScipy",
102
+ help="Peak caller for local maxima identification.",
103
+ )
104
+ peak_group.add_argument(
105
+ "--minimum_score",
106
+ type=float,
107
+ default=None,
108
+ help="Minimum score from which peaks will be considered.",
109
+ )
110
+ peak_group.add_argument(
111
+ "--maximum_score",
112
+ type=float,
113
+ default=None,
114
+ help="Maximum score until which peaks will be considered.",
115
+ )
116
+ peak_group.add_argument(
68
117
  "--min_distance",
69
118
  type=int,
70
119
  default=5,
71
- help="Minimum distance between peaks. Ignored when --orientations is provided.",
120
+ help="Minimum distance between peaks.",
72
121
  )
73
- parser.add_argument(
122
+ peak_group.add_argument(
74
123
  "--min_boundary_distance",
75
124
  type=int,
76
125
  default=0,
77
- help="Minimum distance from target boundaries. Ignored when --orientations "
78
- "is provided.",
126
+ help="Minimum distance of peaks to target edges.",
79
127
  )
80
- parser.add_argument(
128
+ peak_group.add_argument(
81
129
  "--mask_edges",
82
130
  action="store_true",
83
131
  default=False,
84
- help="Whether to mask edges of the input score array according to the template shape."
85
- "Uses twice the value of --min_boundary_distance if boht are provided.",
132
+ help="Whether candidates should not be identified from scores that were "
133
+ "computed from padded densities. Superseded by min_boundary_distance.",
86
134
  )
87
- parser.add_argument(
88
- "--wedge_mask",
89
- type=str,
135
+ peak_group.add_argument(
136
+ "--number_of_peaks",
137
+ type=int,
90
138
  default=None,
91
- help="Path to Fourier space mask. Only considered if output_format is relion.",
139
+ required=False,
140
+ help="Upper limit of peaks to call, subject to filtering parameters. Default 1000. "
141
+ "If minimum_score is provided all peaks scoring higher will be reported.",
92
142
  )
93
- parser.add_argument(
94
- "--peak_caller",
95
- choices=list(PEAK_CALLERS.keys()),
96
- default="PeakCallerScipy",
97
- help="Peak caller to use for analysis. Ignored if input_file contains peaks or when "
98
- "--orientations is provided.",
143
+ peak_group.add_argument(
144
+ "--peak_oversampling",
145
+ type=int,
146
+ default=1,
147
+ help="1 / factor equals voxel precision, e.g. 2 detects half voxel "
148
+ "translations. Useful for matching structures to electron density maps.",
99
149
  )
100
- parser.add_argument(
101
- "--orientations",
150
+
151
+ additional_group.add_argument(
152
+ "--subtomogram_box_size",
153
+ type=int,
102
154
  default=None,
103
- help="Path to orientations file to overwrite orientations computed from"
104
- " match_template.py output.",
155
+ help="Subtomogram box size, by default equal to the centered template. Will be "
156
+ "padded to even values if output_format is relion.",
105
157
  )
106
- parser.add_argument(
107
- "--output_format",
108
- choices=["orientations", "alignment", "extraction", "relion"],
109
- default="orientations",
110
- help="Choose the output format. Available formats are: "
111
- "orientations (translation, rotation, and score), "
112
- "alignment (aligned template to target based on orientations), "
113
- "extraction (extract regions around peaks from targets, i.e. subtomograms). "
114
- "relion (perform extraction step and generate corresponding star files).",
158
+ additional_group.add_argument(
159
+ "--mask_subtomograms",
160
+ action="store_true",
161
+ default=False,
162
+ help="Whether to mask subtomograms using the template mask. The mask will be "
163
+ "rotated according to determined angles.",
164
+ )
165
+ additional_group.add_argument(
166
+ "--invert_target_contrast",
167
+ action="store_true",
168
+ default=False,
169
+ help="Whether to invert the target contrast.",
170
+ )
171
+ additional_group.add_argument(
172
+ "--wedge_mask",
173
+ type=str,
174
+ default=None,
175
+ help="Path to file used as ctf_mask for output_format relion.",
176
+ )
177
+ additional_group.add_argument(
178
+ "--n_false_positives",
179
+ type=int,
180
+ default=None,
181
+ required=False,
182
+ help="Number of accepted false-positives picks to determine minimum score.",
115
183
  )
116
- args = parser.parse_args()
117
-
118
- return args
119
-
120
184
 
121
- @dataclass
122
- class Orientations:
123
- #: Return a numpy array with translations of each orientation (n x d).
124
- translations: np.ndarray
125
-
126
- #: Return a numpy array with euler angles of each orientation in zxy format (n x d).
127
- rotations: np.ndarray
128
-
129
- #: Return a numpy array with the score of each orientation (n, ).
130
- scores: np.ndarray
131
-
132
- #: Return a numpy array with additional orientation details (n, ).
133
- details: np.ndarray
134
-
135
- def __iter__(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
136
- """
137
- Iterate over the current class instance. Each iteration returns a orientation
138
- defined by its translation, rotation, score and additional detail.
139
-
140
- Yields
141
- ------
142
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
143
- A tuple of arrays defining the given orientation.
144
- """
145
- yield from zip(self.translations, self.rotations, self.scores, self.details)
146
-
147
- def __getitem__(self, indices: List[int]) -> "Orientations":
148
- """
149
- Retrieve a subset of orientations based on the provided indices.
150
-
151
- Parameters
152
- ----------
153
- indices : List[int]
154
- A list of indices specifying the orientations to be retrieved.
155
-
156
- Returns
157
- -------
158
- :py:class:`Orientations`
159
- A new :py:class:`Orientations`instance containing only the selected orientations.
160
- """
161
- indices = np.asarray(indices)
162
- attributes = (
163
- "translations",
164
- "rotations",
165
- "scores",
166
- "details",
167
- )
168
- kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
169
- return self.__class__(**kwargs)
170
-
171
- def to_file(self, filename: str, file_format: type, **kwargs) -> None:
172
- """
173
- Save the current class instance to a file in the specified format.
174
-
175
- Parameters
176
- ----------
177
- filename : str
178
- The name of the file where the orientations will be saved.
179
- file_format : type
180
- The format in which to save the orientations. Supported formats are 'text' and 'relion'.
181
- **kwargs : dict
182
- Additional keyword arguments specific to the file format.
183
-
184
- Raises
185
- ------
186
- ValueError
187
- If an unsupported file format is specified.
188
- """
189
- mapping = {
190
- "text": self._to_text,
191
- "relion": self._to_relion_star,
192
- }
193
-
194
- func = mapping.get(file_format, None)
195
- if func is None:
196
- raise ValueError(
197
- f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
198
- )
185
+ args = parser.parse_args()
199
186
 
200
- return func(filename=filename, **kwargs)
187
+ if args.wedge_mask is not None:
188
+ args.wedge_mask = abspath(args.wedge_mask)
201
189
 
202
- def _to_text(self, filename: str) -> None:
203
- """
204
- Save orientations in a text file format.
190
+ if args.output_format == "relion" and args.subtomogram_box_size is not None:
191
+ args.subtomogram_box_size += args.subtomogram_box_size % 2
205
192
 
206
- Parameters
207
- ----------
208
- filename : str
209
- The name of the file to save the orientations.
193
+ if args.orientations is not None:
194
+ args.orientations = Orientations.from_file(filename=args.orientations)
210
195
 
211
- Notes
212
- -----
213
- The file is saved with a header specifying each column: z, y, x, euler_z,
214
- euler_y, euler_x, score, detail. Each row in the file corresponds to an orientation.
215
- """
216
- header = "\t".join(
217
- ["z", "y", "x", "euler_z", "euler_y", "euler_x", "score", "detail"]
218
- )
219
- with open(filename, mode="w", encoding="utf-8") as ofile:
220
- _ = ofile.write(f"{header}\n")
221
- for translation, angles, score, detail in self:
222
- translation_string = "\t".join([str(x) for x in translation])
223
- angle_string = "\t".join([str(x) for x in angles])
224
- _ = ofile.write(
225
- f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
226
- )
227
- return None
228
-
229
- def _to_relion_star(
230
- self,
231
- filename: str,
232
- name_prefix: str = None,
233
- ctf_image: str = None,
234
- sampling_rate: float = 1.0,
235
- subtomogram_size: int = 0,
236
- ) -> None:
237
- """
238
- Save orientations in RELION's STAR file format.
239
-
240
- Parameters
241
- ----------
242
- filename : str
243
- The name of the file to save the orientations.
244
- name_prefix : str, optional
245
- A prefix to add to the image names in the STAR file.
246
- ctf_image : str, optional
247
- Path to CTF or wedge mask RELION.
248
- sampling_rate : float, optional
249
- Subtomogram sampling rate in angstrom per voxel
250
- subtomogram_size : int, optional
251
- Size of the square shaped subtomogram.
252
-
253
- Notes
254
- -----
255
- The file is saved with a standard header used in RELION STAR files.
256
- Each row in the file corresponds to an orientation.
257
- """
258
- optics_header = [
259
- "# version 30001",
260
- "data_optics",
261
- "",
262
- "loop_",
263
- "_rlnOpticsGroup",
264
- "_rlnOpticsGroupName",
265
- "_rlnSphericalAberration",
266
- "_rlnVoltage",
267
- "_rlnImageSize",
268
- "_rlnImageDimensionality",
269
- "_rlnImagePixelSize",
270
- ]
271
- optics_data = [
272
- "1",
273
- "opticsGroup1",
274
- "2.700000",
275
- "300.000000",
276
- str(int(subtomogram_size)),
277
- "3",
278
- str(float(sampling_rate)),
279
- ]
280
- optics_header = "\n".join(optics_header)
281
- optics_data = "\t".join(optics_data)
282
-
283
- header = [
284
- "data_particles",
285
- "",
286
- "loop_",
287
- "_rlnCoordinateX",
288
- "_rlnCoordinateY",
289
- "_rlnCoordinateZ",
290
- "_rlnImageName",
291
- "_rlnAngleRot",
292
- "_rlnAngleTilt",
293
- "_rlnAnglePsi",
294
- "_rlnOpticsGroup",
295
- ]
296
- if ctf_image is not None:
297
- header.append("_rlnCtfImage")
298
-
299
- ctf_image = "" if ctf_image is None else f"\t{ctf_image}"
300
-
301
- header = "\n".join(header)
302
- name_prefix = "" if name_prefix is None else name_prefix
303
-
304
- with open(filename, mode="w", encoding="utf-8") as ofile:
305
- _ = ofile.write(f"{optics_header}\n")
306
- _ = ofile.write(f"{optics_data}\n")
307
-
308
- _ = ofile.write("\n# version 30001\n")
309
- _ = ofile.write(f"{header}\n")
310
-
311
- # pyTME uses a zyx data layout
312
- for index, (translation, rotation, score, detail) in enumerate(self):
313
- rotation = Rotation.from_euler("zyx", rotation, degrees=True)
314
- rotation = rotation.as_euler(seq="xyx", degrees=True)
315
-
316
- translation_string = "\t".join([str(x) for x in translation][::-1])
317
- angle_string = "\t".join([str(x) for x in rotation])
318
- name = f"{name_prefix}_{index}.mrc"
319
- _ = ofile.write(
320
- f"{translation_string}\t{name}\t{angle_string}\t1{ctf_image}\n"
321
- )
196
+ if args.minimum_score is not None or args.n_false_positives is not None:
197
+ args.number_of_peaks = np.iinfo(np.int64).max
198
+ else:
199
+ args.number_of_peaks = 1000
322
200
 
323
- return None
324
-
325
- @classmethod
326
- def from_file(cls, filename: str, file_format: type, **kwargs) -> "Orientations":
327
- """
328
- Create an instance of :py:class:`Orientations` from a file.
329
-
330
- Parameters
331
- ----------
332
- filename : str
333
- The name of the file from which to read the orientations.
334
- file_format : type
335
- The format of the file. Currently, only 'text' format is supported.
336
- **kwargs : dict
337
- Additional keyword arguments specific to the file format.
338
-
339
- Returns
340
- -------
341
- :py:class:`Orientations`
342
- An instance of :py:class:`Orientations` populated with data from the file.
343
-
344
- Raises
345
- ------
346
- ValueError
347
- If an unsupported file format is specified.
348
- """
349
- mapping = {
350
- "text": cls._from_text,
351
- }
352
-
353
- func = mapping.get(file_format, None)
354
- if func is None:
355
- raise ValueError(
356
- f"{file_format} not implemented. Supported are {','.join(mapping.keys())}."
357
- )
201
+ return args
358
202
 
359
- translations, rotations, scores, details, *_ = func(filename=filename, **kwargs)
360
- return cls(
361
- translations=translations,
362
- rotations=rotations,
363
- scores=scores,
364
- details=details,
365
- )
366
203
 
367
- @staticmethod
368
- def _from_text(
369
- filename: str,
370
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
371
- """
372
- Read orientations from a text file.
373
-
374
- Parameters
375
- ----------
376
- filename : str
377
- The name of the file from which to read the orientations.
378
-
379
- Returns
380
- -------
381
- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
382
- A tuple containing numpy arrays for translations, rotations, scores,
383
- and details.
384
-
385
- Notes
386
- -----
387
- The text file is expected to have a header and data in columns corresponding to
388
- z, y, x, euler_z, euler_y, euler_x, score, detail.
389
- """
390
- with open(filename, mode="r", encoding="utf-8") as infile:
391
- data = [x.strip().split("\t") for x in infile.read().split("\n")]
392
- _ = data.pop(0)
393
-
394
- translation, rotation, score, detail = [], [], [], []
395
- for candidate in data:
396
- if len(candidate) <= 1:
397
- continue
398
- if len(candidate) != 8:
399
- candidate.append(-1)
400
-
401
- candidate = [float(x) for x in candidate]
402
- translation.append((candidate[0], candidate[1], candidate[2]))
403
- rotation.append((candidate[3], candidate[4], candidate[5]))
404
- score.append(candidate[6])
405
- detail.append(candidate[7])
406
-
407
- translation = np.vstack(translation).astype(int)
408
- rotation = np.vstack(rotation).astype(float)
409
- score = np.array(score).astype(float)
410
- detail = np.array(detail).astype(float)
411
-
412
- return translation, rotation, score, detail
413
-
414
-
415
- def load_template(filepath: str, sampling_rate: NDArray) -> "Density":
204
+ def load_template(filepath: str, sampling_rate: NDArray, center: bool = True):
416
205
  try:
417
206
  template = Density.from_file(filepath)
418
- template, _ = template.centered(0)
419
207
  center_of_mass = template.center_of_mass(template.data)
208
+ template_is_density = True
420
209
  except ValueError:
421
210
  template = Structure.from_file(filepath)
422
211
  center_of_mass = template.center_of_mass()[::-1]
423
212
  template = Density.from_structure(template, sampling_rate=sampling_rate)
213
+ template_is_density = False
214
+
215
+ translation = np.zeros_like(center_of_mass)
216
+ if center:
217
+ template, translation = template.centered(0)
218
+
219
+ return template, center_of_mass, translation, template_is_density
220
+
424
221
 
425
- return template, center_of_mass
222
+ def merge_outputs(data, filepaths: List[str], args):
223
+ if len(filepaths) == 0:
224
+ return data, 1
225
+
226
+ if data[0].ndim != data[2].ndim:
227
+ return data, 1
228
+
229
+ from tme.matching_exhaustive import _normalize_under_mask
230
+
231
+ def _norm_scores(data, args):
232
+ target_origin, _, sampling_rate, cli_args = data[-1]
233
+
234
+ _, template_extension = splitext(cli_args.template)
235
+ ret = load_template(
236
+ filepath=cli_args.template,
237
+ sampling_rate=sampling_rate,
238
+ center=not cli_args.no_centering,
239
+ )
240
+ template, center_of_mass, translation, template_is_density = ret
241
+
242
+ if args.mask_edges and args.min_boundary_distance == 0:
243
+ max_shape = np.max(template.shape)
244
+ args.min_boundary_distance = np.ceil(np.divide(max_shape, 2))
245
+
246
+ target_mask = 1
247
+ if args.target_mask is not None:
248
+ target_mask = Density.from_file(args.target_mask).data
249
+ elif cli_args.target_mask is not None:
250
+ target_mask = Density.from_file(args.target_mask).data
251
+
252
+ mask = np.ones_like(data[0])
253
+ np.multiply(mask, target_mask, out=mask)
254
+
255
+ cropped_shape = np.subtract(
256
+ mask.shape, np.multiply(args.min_boundary_distance, 2)
257
+ ).astype(int)
258
+ mask[cropped_shape] = 0
259
+ _normalize_under_mask(template=data[0], mask=mask, mask_intensity=mask.sum())
260
+ return data[0]
261
+
262
+ entities = np.zeros_like(data[0])
263
+ data[0] = _norm_scores(data=data, args=args)
264
+ for index, filepath in enumerate(filepaths):
265
+ new_scores = _norm_scores(data=load_pickle(filepath), args=args)
266
+ indices = new_scores > data[0]
267
+ entities[indices] = index + 1
268
+ data[0][indices] = new_scores[indices]
269
+
270
+ return data, entities
426
271
 
427
272
 
428
273
  def main():
429
274
  args = parse_args()
430
- data = load_pickle(args.input_file)
275
+ data = load_pickle(args.input_file[0])
431
276
 
432
- meta = data[-1]
433
- target_origin, _, sampling_rate, cli_args = meta
277
+ target_origin, _, sampling_rate, cli_args = data[-1]
434
278
 
435
- if args.orientations is not None:
436
- orientations = Orientations.from_file(
437
- filename=args.orientations, file_format="text"
279
+ _, template_extension = splitext(cli_args.template)
280
+ ret = load_template(
281
+ filepath=cli_args.template,
282
+ sampling_rate=sampling_rate,
283
+ center=not cli_args.no_centering,
284
+ )
285
+ template, center_of_mass, translation, template_is_density = ret
286
+
287
+ if args.output_format == "relion" and args.subtomogram_box_size is None:
288
+ new_shape = np.add(template.shape, np.mod(template.shape, 2))
289
+ new_shape = np.repeat(new_shape.max(), new_shape.size).astype(int)
290
+ print(f"Padding template from {template.shape} to {new_shape} for RELION.")
291
+ template.pad(new_shape)
292
+
293
+ template_mask = template.empty
294
+ template_mask.data[:] = 1
295
+ if cli_args.template_mask is not None:
296
+ template_mask = Density.from_file(cli_args.template_mask)
297
+ template_mask.pad(template.shape, center=False)
298
+ origin_translation = np.divide(
299
+ np.subtract(template.origin, template_mask.origin), template.sampling_rate
438
300
  )
301
+ translation = np.add(translation, origin_translation)
439
302
 
440
- else:
303
+ template_mask = template_mask.rigid_transform(
304
+ rotation_matrix=np.eye(template_mask.data.ndim),
305
+ translation=-translation,
306
+ order=1,
307
+ )
308
+
309
+ if args.mask_edges and args.min_boundary_distance == 0:
310
+ max_shape = np.max(template.shape)
311
+ args.min_boundary_distance = np.ceil(np.divide(max_shape, 2))
312
+
313
+ # data, entities = merge_outputs(data=data, filepaths=args.input_file[1:], args=args)
314
+
315
+ orientations = args.orientations
316
+ if orientations is None:
441
317
  translations, rotations, scores, details = [], [], [], []
442
318
  # Output is MaxScoreOverRotations
443
319
  if data[0].ndim == data[2].ndim:
444
320
  scores, offset, rotation_array, rotation_mapping, meta = data
445
- if args.mask_edges:
446
- template, center_of_mass = load_template(
447
- cli_args.template, sampling_rate=sampling_rate
321
+
322
+ if args.target_mask is not None:
323
+ target_mask = Density.from_file(args.target_mask)
324
+ scores = scores * target_mask.data
325
+
326
+ if args.n_false_positives is not None:
327
+ args.n_false_positives = max(args.n_false_positives, 1)
328
+ cropped_shape = np.subtract(
329
+ scores.shape, np.multiply(args.min_boundary_distance, 2)
330
+ ).astype(int)
331
+
332
+ cropped_shape = tuple(
333
+ slice(
334
+ int(args.min_boundary_distance),
335
+ int(x - args.min_boundary_distance),
336
+ )
337
+ for x in scores.shape
448
338
  )
449
- if not cli_args.no_centering:
450
- template, *_ = template.centered(0)
451
- mask_size = template.shape
452
- if args.min_boundary_distance > 0:
453
- mask_size = 2 * args.min_boundary_distance
454
- scores = centered_mask(scores, np.subtract(scores.shape, mask_size) + 1)
339
+ # Rickgauer et al. 2017
340
+ n_correlations = np.size(scores[cropped_shape]) * len(rotation_mapping)
341
+ minimum_score = np.multiply(
342
+ erfcinv(2 * args.n_false_positives / n_correlations),
343
+ np.sqrt(2) * np.std(scores[cropped_shape]),
344
+ )
345
+ print(f"Determined minimum score cutoff: {minimum_score}.")
346
+ minimum_score = max(minimum_score, 0)
347
+ args.minimum_score = minimum_score
455
348
 
456
349
  peak_caller = PEAK_CALLERS[args.peak_caller](
457
350
  number_of_peaks=args.number_of_peaks,
458
351
  min_distance=args.min_distance,
459
352
  min_boundary_distance=args.min_boundary_distance,
460
353
  )
461
- peak_caller(scores, rotation_matrix=np.eye(3))
354
+ if args.minimum_score is not None:
355
+ args.number_of_peaks = np.inf
356
+
357
+ peak_caller(
358
+ scores,
359
+ rotation_matrix=np.eye(3),
360
+ mask=template.data,
361
+ rotation_mapping=rotation_mapping,
362
+ rotation_array=rotation_array,
363
+ minimum_score=args.minimum_score,
364
+ )
462
365
  candidates = peak_caller.merge(
463
366
  candidates=[tuple(peak_caller)],
464
367
  number_of_peaks=args.number_of_peaks,
@@ -466,16 +369,15 @@ def main():
466
369
  min_boundary_distance=args.min_boundary_distance,
467
370
  )
468
371
  if len(candidates) == 0:
469
- exit(
470
- "Found no peaks. Try reducing min_distance or min_boundary_distance."
471
- )
372
+ print("Found no peaks. Consider changing peak calling parameters.")
373
+ exit(-1)
472
374
 
473
375
  for translation, _, score, detail in zip(*candidates):
474
376
  rotations.append(rotation_mapping[rotation_array[tuple(translation)]])
475
377
 
476
378
  else:
477
379
  candidates = data
478
- translation, rotation, score, detail, *_ = data
380
+ translation, rotation, *_ = data
479
381
  for i in range(translation.shape[0]):
480
382
  rotations.append(euler_from_rotationmatrix(rotation[i]))
481
383
 
@@ -488,25 +390,35 @@ def main():
488
390
  details=details,
489
391
  )
490
392
 
393
+ if args.minimum_score is not None:
394
+ keep = orientations.scores >= args.minimum_score
395
+ orientations = orientations[keep]
396
+
397
+ if args.maximum_score is not None:
398
+ keep = orientations.scores <= args.maximum_score
399
+ orientations = orientations[keep]
400
+
491
401
  if args.output_format == "orientations":
492
402
  orientations.to_file(filename=f"{args.output_prefix}.tsv", file_format="text")
493
403
  exit(0)
494
404
 
495
- _, template_extension = splitext(cli_args.template)
496
- template, center_of_mass = load_template(
497
- filepath=cli_args.template, sampling_rate=sampling_rate
498
- )
499
- template_is_density, index = isinstance(template, Density), 0
500
-
501
- if args.output_format == "relion":
502
- new_shape = np.add(template.shape, np.mod(template.shape, 2))
503
- new_shape = np.repeat(new_shape.max(), new_shape.size).astype(int)
504
- print(f"Padding template from {template.shape} to {new_shape} for RELION.")
505
- template.pad(new_shape)
405
+ target = Density.from_file(cli_args.target)
406
+ if args.invert_target_contrast:
407
+ if args.output_format == "relion":
408
+ target.data = target.data * -1
409
+ target.data = np.divide(
410
+ np.subtract(target.data, target.data.mean()), target.data.std()
411
+ )
412
+ else:
413
+ target.data = (
414
+ -np.divide(
415
+ np.subtract(target.data, target.data.min()),
416
+ np.subtract(target.data.max(), target.data.min()),
417
+ )
418
+ + 1
419
+ )
506
420
 
507
421
  if args.output_format in ("extraction", "relion"):
508
- target = Density.from_file(cli_args.target)
509
-
510
422
  if not np.all(np.divide(target.shape, template.shape) > 2):
511
423
  print(
512
424
  "Target might be too small relative to template to extract"
@@ -514,26 +426,19 @@ def main():
514
426
  f" Target : {target.shape}, template : {template.shape}."
515
427
  )
516
428
 
517
- peaks = orientations.translations.astype(int)
518
- max_shape = np.max(template.shape).astype(int)
519
- half_shape = max_shape // 2
520
-
521
- left_pad = half_shape
522
- right_pad = np.add(half_shape, max_shape % 2)
523
- starts = np.subtract(peaks, left_pad)
524
- stops = np.add(peaks, right_pad)
525
-
526
- candidate_starts = np.maximum(starts, 0).astype(int)
527
- candidate_stops = np.minimum(stops, target.shape).astype(int)
528
- keep_peaks = (
529
- np.sum(
530
- np.multiply(starts == candidate_starts, stops == candidate_stops),
531
- axis=1,
429
+ extraction_shape = template.shape
430
+ if args.subtomogram_box_size is not None:
431
+ extraction_shape = np.repeat(
432
+ args.subtomogram_box_size, len(extraction_shape)
532
433
  )
533
- == peaks.shape[1]
434
+
435
+ orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
436
+ target_shape=target.shape,
437
+ extraction_shape=extraction_shape,
438
+ drop_out_of_box=True,
439
+ return_orientations=True,
534
440
  )
535
441
 
536
- orientations = orientations[keep_peaks]
537
442
  working_directory = getcwd()
538
443
  if args.output_format == "relion":
539
444
  orientations.to_file(
@@ -542,62 +447,101 @@ def main():
542
447
  name_prefix=join(working_directory, args.output_prefix),
543
448
  ctf_image=args.wedge_mask,
544
449
  sampling_rate=target.sampling_rate.max(),
545
- subtomogram_size=template.shape[0],
450
+ subtomogram_size=extraction_shape[0],
546
451
  )
547
452
 
548
- peaks = peaks[keep_peaks,]
549
- starts = starts[keep_peaks,]
550
- stops = stops[keep_peaks,]
551
- candidate_starts = candidate_starts[keep_peaks,]
552
- candidate_stops = candidate_stops[keep_peaks,]
553
-
554
- if not len(peaks):
555
- print(
556
- "No peak remaining after filtering. Started with"
557
- f" {orientations.translations.shape[0]} filtered to {peaks.shape[0]}."
558
- " Consider reducing min_distance, increase num_peaks or use"
559
- " a different peak caller."
560
- )
561
- exit(-1)
562
-
563
- observation_starts = np.subtract(candidate_starts, starts).astype(int)
564
- observation_stops = np.subtract(np.add(max_shape, candidate_stops), stops)
565
- observation_stops = observation_stops.astype(int)
566
-
567
- candidate_slices = [
568
- tuple(slice(s, e) for s, e in zip(start_row, stop_row))
569
- for start_row, stop_row in zip(candidate_starts, candidate_stops)
570
- ]
571
-
572
- observation_slices = [
573
- tuple(slice(s, e) for s, e in zip(start_row, stop_row))
574
- for start_row, stop_row in zip(observation_starts, observation_stops)
575
- ]
576
- observations = np.zeros(
577
- (len(candidate_slices), max_shape, max_shape, max_shape)
578
- )
579
-
580
- slices = zip(candidate_slices, observation_slices)
453
+ observations = np.zeros((len(cand_slices), *extraction_shape))
454
+ slices = zip(cand_slices, obs_slices)
581
455
  for idx, (cand_slice, obs_slice) in enumerate(slices):
582
- observations[idx][:] = np.mean(target.data[cand_slice])
583
- observations[idx][obs_slice] = target.data[cand_slice]
456
+ observations[idx][:] = np.mean(target.data[obs_slice])
457
+ observations[idx][cand_slice] = target.data[obs_slice]
584
458
 
585
459
  for index in range(observations.shape[0]):
460
+ cand_start = [x.start for x in cand_slices[index]]
586
461
  out_density = Density(
587
462
  data=observations[index],
588
463
  sampling_rate=sampling_rate,
589
- origin=candidate_starts[index] * sampling_rate,
464
+ origin=np.multiply(cand_start, sampling_rate),
590
465
  )
591
- # out_density.data = out_density.data * template_mask.data
466
+ if args.mask_subtomograms:
467
+ rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
468
+ mask_transfomed = template_mask.rigid_transform(
469
+ rotation_matrix=rotation_matrix, order=1
470
+ )
471
+ out_density.data = out_density.data * mask_transfomed.data
592
472
  out_density.to_file(
593
473
  join(working_directory, f"{args.output_prefix}_{index}.mrc")
594
474
  )
595
475
 
596
476
  exit(0)
597
477
 
598
- for translation, angles, *_ in orientations:
599
- rotation_matrix = euler_to_rotationmatrix(angles)
478
+ if args.output_format == "backmapping":
479
+ orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
480
+ target_shape=target.shape,
481
+ extraction_shape=template.shape,
482
+ drop_out_of_box=True,
483
+ return_orientations=True,
484
+ )
485
+ ret, template_sum = target.empty, template.data.sum()
486
+ for index in range(len(cand_slices)):
487
+ rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
600
488
 
489
+ transformed_template = template.rigid_transform(
490
+ rotation_matrix=rotation_matrix
491
+ )
492
+ transformed_template.data = np.multiply(
493
+ transformed_template.data,
494
+ np.divide(template_sum, transformed_template.data.sum()),
495
+ )
496
+ cand_slice, obs_slice = cand_slices[index], obs_slices[index]
497
+ ret.data[obs_slice] += transformed_template.data[cand_slice]
498
+ ret.to_file(f"{args.output_prefix}_backmapped.mrc")
499
+ exit(0)
500
+
501
+ if args.output_format == "average":
502
+ orientations, cand_slices, obs_slices = orientations.get_extraction_slices(
503
+ target_shape=target.shape,
504
+ extraction_shape=np.multiply(template.shape, 2),
505
+ drop_out_of_box=True,
506
+ return_orientations=True,
507
+ )
508
+ out = np.zeros_like(template.data)
509
+ out = np.zeros(np.multiply(template.shape, 2).astype(int))
510
+ for index in range(len(cand_slices)):
511
+ from scipy.spatial.transform import Rotation
512
+
513
+ rotation = Rotation.from_euler(
514
+ angles=orientations.rotations[index], seq="zyx", degrees=True
515
+ )
516
+ rotation_matrix = rotation.inv().as_matrix()
517
+
518
+ # rotation_matrix = euler_to_rotationmatrix(orientations.rotations[index])
519
+ subset = Density(target.data[obs_slices[index]])
520
+ subset = subset.rigid_transform(rotation_matrix=rotation_matrix, order=1)
521
+
522
+ np.add(out, subset.data, out=out)
523
+ out /= len(cand_slices)
524
+ ret = Density(out, sampling_rate=template.sampling_rate, origin=0)
525
+ ret.pad(template.shape, center=True)
526
+ ret.to_file(f"{args.output_prefix}_average.mrc")
527
+ exit(0)
528
+
529
+ if args.peak_oversampling > 1:
530
+ peak_caller = peak_caller = PEAK_CALLERS[args.peak_caller]()
531
+ if data[0].ndim != data[2].ndim:
532
+ print(
533
+ "Input pickle does not contain template matching scores."
534
+ " Cannot oversample peaks."
535
+ )
536
+ exit(-1)
537
+ orientations.translations = peak_caller.oversample_peaks(
538
+ score_space=data[0],
539
+ translations=orientations.translations,
540
+ oversampling_factor=args.oversampling_factor,
541
+ )
542
+
543
+ for index, (translation, angles, *_) in enumerate(orientations):
544
+ rotation_matrix = euler_to_rotationmatrix(angles)
601
545
  if template_is_density:
602
546
  translation = np.subtract(translation, center_of_mass)
603
547
  transformed_template = template.rigid_transform(
@@ -606,6 +550,7 @@ def main():
606
550
  new_origin = np.add(target_origin / sampling_rate, translation)
607
551
  transformed_template.origin = np.multiply(new_origin, sampling_rate)
608
552
  else:
553
+ template = Structure.from_file(cli_args.template)
609
554
  new_center_of_mass = np.add(
610
555
  np.multiply(translation, sampling_rate), target_origin
611
556
  )
@@ -614,7 +559,7 @@ def main():
614
559
  translation=translation[::-1],
615
560
  rotation_matrix=rotation_matrix[::-1, ::-1],
616
561
  )
617
- # template_extension should contain the extension '.'
562
+ # template_extension should contain '.'
618
563
  transformed_template.to_file(
619
564
  f"{args.output_prefix}_{index}{template_extension}"
620
565
  )