pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
  2. pytme-0.1.5.data/scripts/match_template.py +744 -0
  3. pytme-0.1.5.data/scripts/postprocess.py +279 -0
  4. pytme-0.1.5.data/scripts/preprocess.py +93 -0
  5. pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
  6. pytme-0.1.5.dist-info/LICENSE +153 -0
  7. pytme-0.1.5.dist-info/METADATA +69 -0
  8. pytme-0.1.5.dist-info/RECORD +63 -0
  9. pytme-0.1.5.dist-info/WHEEL +5 -0
  10. pytme-0.1.5.dist-info/entry_points.txt +6 -0
  11. pytme-0.1.5.dist-info/top_level.txt +2 -0
  12. scripts/__init__.py +0 -0
  13. scripts/estimate_ram_usage.py +81 -0
  14. scripts/match_template.py +744 -0
  15. scripts/match_template_devel.py +788 -0
  16. scripts/postprocess.py +279 -0
  17. scripts/preprocess.py +93 -0
  18. scripts/preprocessor_gui.py +729 -0
  19. tme/__init__.py +6 -0
  20. tme/__version__.py +1 -0
  21. tme/analyzer.py +1144 -0
  22. tme/backends/__init__.py +134 -0
  23. tme/backends/cupy_backend.py +309 -0
  24. tme/backends/matching_backend.py +1154 -0
  25. tme/backends/npfftw_backend.py +763 -0
  26. tme/backends/pytorch_backend.py +526 -0
  27. tme/data/__init__.py +0 -0
  28. tme/data/c48n309.npy +0 -0
  29. tme/data/c48n527.npy +0 -0
  30. tme/data/c48n9.npy +0 -0
  31. tme/data/c48u1.npy +0 -0
  32. tme/data/c48u1153.npy +0 -0
  33. tme/data/c48u1201.npy +0 -0
  34. tme/data/c48u1641.npy +0 -0
  35. tme/data/c48u181.npy +0 -0
  36. tme/data/c48u2219.npy +0 -0
  37. tme/data/c48u27.npy +0 -0
  38. tme/data/c48u2947.npy +0 -0
  39. tme/data/c48u3733.npy +0 -0
  40. tme/data/c48u4749.npy +0 -0
  41. tme/data/c48u5879.npy +0 -0
  42. tme/data/c48u7111.npy +0 -0
  43. tme/data/c48u815.npy +0 -0
  44. tme/data/c48u83.npy +0 -0
  45. tme/data/c48u8649.npy +0 -0
  46. tme/data/c600v.npy +0 -0
  47. tme/data/c600vc.npy +0 -0
  48. tme/data/metadata.yaml +80 -0
  49. tme/data/quat_to_numpy.py +42 -0
  50. tme/data/scattering_factors.pickle +0 -0
  51. tme/density.py +2314 -0
  52. tme/extensions.cpython-311-darwin.so +0 -0
  53. tme/helpers.py +881 -0
  54. tme/matching_data.py +377 -0
  55. tme/matching_exhaustive.py +1553 -0
  56. tme/matching_memory.py +382 -0
  57. tme/matching_optimization.py +1123 -0
  58. tme/matching_utils.py +1180 -0
  59. tme/parser.py +429 -0
  60. tme/preprocessor.py +1291 -0
  61. tme/scoring.py +866 -0
  62. tme/structure.py +1428 -0
  63. tme/types.py +10 -0
@@ -0,0 +1,279 @@
1
+ #!python
2
+ """ CLI to simplify analysing the output of match_template.py.
3
+
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
5
+
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
7
+ """
8
+ import argparse
9
+ from sys import exit
10
+ from os.path import splitext
11
+
12
+ import numpy as np
13
+
14
+ from tme import Density, Structure
15
+ from tme.analyzer import (
16
+ PeakCallerSort,
17
+ PeakCallerMaximumFilter,
18
+ PeakCallerFast,
19
+ PeakCallerRecursiveMasking,
20
+ PeakCallerScipy,
21
+ )
22
+ from tme.matching_utils import (
23
+ load_pickle,
24
+ euler_to_rotationmatrix,
25
+ euler_from_rotationmatrix,
26
+ )
27
+
28
+ PEAK_CALLERS = {
29
+ "PeakCallerSort": PeakCallerSort,
30
+ "PeakCallerMaximumFilter": PeakCallerMaximumFilter,
31
+ "PeakCallerFast": PeakCallerFast,
32
+ "PeakCallerRecursiveMasking": PeakCallerRecursiveMasking,
33
+ "PeakCallerScipy": PeakCallerScipy,
34
+ }
35
+
36
+
37
+ def parse_args():
38
+ parser = argparse.ArgumentParser(
39
+ description="Peak Calling for Template Matching Outputs"
40
+ )
41
+ parser.add_argument(
42
+ "--input_file",
43
+ required=True,
44
+ help="Path to the output of match_template.py.",
45
+ )
46
+ parser.add_argument(
47
+ "--output_prefix",
48
+ required=True,
49
+ help="Prefix for the output file name. Extension depends on output_format.",
50
+ )
51
+ parser.add_argument(
52
+ "--number_of_peaks", type=int, default=1000, help="Number of peaks to consider."
53
+ )
54
+ parser.add_argument(
55
+ "--min_distance", type=int, default=5, help="Minimum distance between peaks."
56
+ )
57
+ parser.add_argument(
58
+ "--peak_caller",
59
+ choices=list(PEAK_CALLERS.keys()),
60
+ default="PeakCallerScipy",
61
+ help="Peak caller to use for analysis. Ignored if input_file contains peaks.",
62
+ )
63
+ parser.add_argument(
64
+ "--orientations",
65
+ default=None,
66
+ help="Path to orientations file to overwrite orientations computed from"
67
+ " match_template.py output.",
68
+ )
69
+ parser.add_argument(
70
+ "--output_format",
71
+ choices=["orientations", "alignment", "extraction"],
72
+ default="orientations",
73
+ help="Choose the output format. Available formats are: "
74
+ "orientations (translation, rotation, and score), "
75
+ "alignment (aligned template to target based on orientations), "
76
+ "extraction (extract regions around peaks from targets, i.e. subtomograms).",
77
+ )
78
+ args = parser.parse_args()
79
+
80
+ return args
81
+
82
+
83
+ def main():
84
+ args = parse_args()
85
+ data = load_pickle(args.input_file)
86
+
87
+ meta = data[-1]
88
+
89
+ orientations = []
90
+ if args.orientations is None:
91
+ if data[0].ndim == data[2].ndim:
92
+ scores, offset, rotations, rotation_mapping, meta = data
93
+ peak_caller = PEAK_CALLERS[args.peak_caller](
94
+ number_of_peaks=args.number_of_peaks, min_distance=args.min_distance
95
+ )
96
+ peak_caller(scores, rotation_matrix=np.eye(3))
97
+ candidates = peak_caller.merge([tuple(peak_caller)])
98
+ for translation, _, score, detail in zip(*candidates):
99
+ angles = rotation_mapping[rotations[tuple(translation)]]
100
+ orientations.append((translation, angles, score, detail))
101
+ else:
102
+ candidates = data
103
+ translation, rotation, score, detail, *_ = data
104
+ for i in range(translation.shape[0]):
105
+ angles = euler_from_rotationmatrix(rotation[i])
106
+ orientations.append(
107
+ (np.array(translation[i]), angles, score[i], detail[i])
108
+ )
109
+ else:
110
+ with open(args.orientations, mode="r", encoding="utf-8") as infile:
111
+ data = [x.strip().split("\t") for x in infile.read().split("\n")]
112
+ _ = data.pop(0)
113
+ translation, rotation, score, detail = [], [], [], []
114
+ for candidate in data:
115
+ if len(candidate) <= 1:
116
+ continue
117
+ if len(candidate) != 8:
118
+ candidate.append(-1)
119
+
120
+ candidate = [float(x) for x in candidate]
121
+ translation.append((candidate[0], candidate[1], candidate[2]))
122
+ rotation.append(
123
+ euler_to_rotationmatrix((candidate[3], candidate[4], candidate[5]))
124
+ )
125
+ score.append(candidate[6])
126
+ detail.append(candidate[7])
127
+ orientations.append(
128
+ (
129
+ translation[-1],
130
+ (candidate[3], candidate[4], candidate[5]),
131
+ score[-1],
132
+ detail[-1],
133
+ )
134
+ )
135
+
136
+ candidates = (
137
+ np.vstack(translation).astype(int),
138
+ np.vstack(rotation).astype(float),
139
+ np.array(score).astype(float),
140
+ np.array(detail).astype(float),
141
+ )
142
+
143
+ if args.output_format == "orientations":
144
+ header = "\t".join(
145
+ ["z", "y", "x", "euler_z", "euler_y", "euler_x", "score", "detail"]
146
+ )
147
+ output_file = f"{args.output_prefix}.tsv"
148
+ with open(output_file, mode="w", encoding="utf-8") as ofile:
149
+ _ = ofile.write(f"{header}\n")
150
+ for translation, angles, score, detail in orientations:
151
+ translation_string = "\t".join([str(x) for x in translation])
152
+ angle_string = "\t".join([str(x) for x in angles])
153
+ _ = ofile.write(
154
+ f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
155
+ )
156
+ exit(0)
157
+
158
+ target_origin, _, sampling_rate, cli_args = meta
159
+
160
+ template_mask = Density(np.ones(1))
161
+ template_is_density, index = True, 0
162
+ _, template_extension = splitext(cli_args.template)
163
+ try:
164
+ template = Density.from_file(cli_args.template)
165
+ template, _ = template.centered(0)
166
+ center_of_mass = template.center_of_mass(template.data)
167
+ except ValueError:
168
+ template_is_density = False
169
+ template = Structure.from_file(cli_args.template)
170
+ center_of_mass = template.center_of_mass()[::-1]
171
+
172
+ if cli_args.template_mask is not None:
173
+ template_mask = Density.from_file(cli_args.template_mask)
174
+
175
+ if args.output_format == "extraction":
176
+ target = Density.from_file(cli_args.target)
177
+ if type(template) == Structure:
178
+ template = Density.from_structure(
179
+ template, sampling_rate = target.sampling_rate
180
+ )
181
+
182
+ if not np.all(np.divide(target.shape, template.shape) > 2):
183
+ print(
184
+ "Target might be too small relative to template to extract"
185
+ " meaningful particles."
186
+ f" Target : {target.shape}, template : {template.shape}."
187
+ )
188
+
189
+ peaks = candidates[0].astype(int)
190
+ max_shape = np.max(template.shape).astype(int)
191
+ half_shape = max_shape // 2
192
+
193
+ left_pad = half_shape
194
+ right_pad = np.add(half_shape, max_shape % 2)
195
+ starts = np.subtract(peaks, left_pad)
196
+ stops = np.add(peaks, right_pad)
197
+
198
+ candidate_starts = np.maximum(starts, 0).astype(int)
199
+ candidate_stops = np.minimum(stops, target.shape).astype(int)
200
+ keep_peaks = (
201
+ np.sum(
202
+ np.multiply(starts == candidate_starts, stops == candidate_stops),
203
+ axis=1,
204
+ )
205
+ == peaks.shape[1]
206
+ )
207
+
208
+ peaks = peaks[keep_peaks,]
209
+ starts = starts[keep_peaks,]
210
+ stops = stops[keep_peaks,]
211
+ candidate_starts = candidate_starts[keep_peaks,]
212
+ candidate_stops = candidate_stops[keep_peaks,]
213
+
214
+ if not len(peaks):
215
+ print(
216
+ "No peak remaining after filtering. Started with"
217
+ f" {candidates[0].shape[0]} filtered to {peaks.shape[0]}."
218
+ " Consider reducing min_distance, increase num_peaks or use"
219
+ " a different peak caller."
220
+ )
221
+ exit(-1)
222
+
223
+ observation_starts = np.subtract(candidate_starts, starts).astype(int)
224
+ observation_stops = np.subtract(np.add(max_shape, candidate_stops), stops)
225
+ observation_stops = observation_stops.astype(int)
226
+
227
+ candidate_slices = [
228
+ tuple(slice(s, e) for s, e in zip(start_row, stop_row))
229
+ for start_row, stop_row in zip(candidate_starts, candidate_stops)
230
+ ]
231
+
232
+ observation_slices = [
233
+ tuple(slice(s, e) for s, e in zip(start_row, stop_row))
234
+ for start_row, stop_row in zip(observation_starts, observation_stops)
235
+ ]
236
+ observations = np.zeros(
237
+ (len(candidate_slices), max_shape, max_shape, max_shape)
238
+ )
239
+
240
+ slices = zip(candidate_slices, observation_slices)
241
+ for idx, (cand_slice, obs_slice) in enumerate(slices):
242
+ observations[idx][:] = np.mean(target.data[cand_slice])
243
+ observations[idx][obs_slice] = target.data[cand_slice]
244
+
245
+ for index in range(observations.shape[0]):
246
+ out_density = Density(
247
+ data=observations[index],
248
+ sampling_rate=sampling_rate,
249
+ origin=candidate_starts[index] * sampling_rate,
250
+ )
251
+ out_density.data = out_density.data * template_mask.data
252
+ out_density.to_file(f"{args.output_prefix}{index}.mrc")
253
+ exit(0)
254
+
255
+ for translation, angles, *_ in orientations:
256
+ rotation_matrix = euler_to_rotationmatrix(angles)
257
+
258
+ if template_is_density:
259
+ translation = np.subtract(translation, center_of_mass)
260
+ transformed_template = template.rigid_transform(
261
+ rotation_matrix=rotation_matrix
262
+ )
263
+ new_origin = np.add(target_origin / sampling_rate, translation)
264
+ transformed_template.origin = np.multiply(new_origin, sampling_rate)
265
+ else:
266
+ new_center_of_mass = np.add(
267
+ np.multiply(translation, sampling_rate), target_origin
268
+ )
269
+ translation = np.subtract(new_center_of_mass, center_of_mass)
270
+ transformed_template = template.rigid_transform(
271
+ translation=translation[::-1],
272
+ rotation_matrix=rotation_matrix[::-1, ::-1],
273
+ )
274
+ transformed_template.to_file(f"{args.output_prefix}{index}{template_extension}")
275
+ index += 1
276
+
277
+
278
+ if __name__ == "__main__":
279
+ main()
@@ -0,0 +1,93 @@
1
+ #!python
2
+ """ Apply tme.preprocessor.Preprocessor methods to an input file based
3
+ on a provided yaml configuration obtaiend from preprocessor_gui.py.
4
+
5
+ Copyright (c) 2023 European Molecular Biology Laboratory
6
+
7
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
8
+ """
9
+ import yaml
10
+ import argparse
11
+ import textwrap
12
+ from tme import Preprocessor, Density
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser(
17
+ description=textwrap.dedent(
18
+ """
19
+ Apply preprocessing to an input file based on a provided YAML configuration.
20
+
21
+ Expected YAML file format:
22
+ ```yaml
23
+ <method_name>:
24
+ <parameter1>: <value1>
25
+ <parameter2>: <value2>
26
+ ...
27
+ ```
28
+ """
29
+ ),
30
+ formatter_class=argparse.RawDescriptionHelpFormatter,
31
+ )
32
+ parser.add_argument(
33
+ "-i",
34
+ "--input_file",
35
+ type=str,
36
+ required=True,
37
+ help="Path to the input data file in CCP4/MRC format.",
38
+ )
39
+ parser.add_argument(
40
+ "-y",
41
+ "--yaml_file",
42
+ type=str,
43
+ required=True,
44
+ help="Path to the YAML configuration file.",
45
+ )
46
+ parser.add_argument(
47
+ "-o",
48
+ "--output_file",
49
+ type=str,
50
+ required=True,
51
+ help="Path to output file in CPP4/MRC format..",
52
+ )
53
+ parser.add_argument(
54
+ "--compress", action="store_true", help="Compress the output file using gzip."
55
+ )
56
+
57
+ args = parser.parse_args()
58
+
59
+ return args
60
+
61
+
62
+ def main():
63
+ args = parse_args()
64
+ with open(args.yaml_file, "r") as f:
65
+ preprocess_settings = yaml.safe_load(f)
66
+
67
+ if len(preprocess_settings) > 1:
68
+ raise NotImplementedError(
69
+ "Multiple preprocessing methods specified. "
70
+ "The script currently supports one method at a time."
71
+ )
72
+
73
+ method_name = list(preprocess_settings.keys())[0]
74
+ if not hasattr(Preprocessor, method_name):
75
+ raise ValueError(f"Method {method_name} does not exist in Preprocessor.")
76
+
77
+ density = Density.from_file(args.input_file)
78
+ output = density.empty
79
+
80
+ method_params = preprocess_settings[method_name]
81
+ preprocessor = Preprocessor()
82
+ method = getattr(preprocessor, method_name, None)
83
+ if not method:
84
+ raise ValueError(
85
+ f"{method} does not exist in dge.preprocessor.Preprocessor class."
86
+ )
87
+
88
+ output.data = method(template=density.data, **method_params)
89
+ output.to_file(args.output_file, gzip=args.compress)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()