pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
- pytme-0.1.5.data/scripts/match_template.py +744 -0
- pytme-0.1.5.data/scripts/postprocess.py +279 -0
- pytme-0.1.5.data/scripts/preprocess.py +93 -0
- pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
- pytme-0.1.5.dist-info/LICENSE +153 -0
- pytme-0.1.5.dist-info/METADATA +69 -0
- pytme-0.1.5.dist-info/RECORD +63 -0
- pytme-0.1.5.dist-info/WHEEL +5 -0
- pytme-0.1.5.dist-info/entry_points.txt +6 -0
- pytme-0.1.5.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/estimate_ram_usage.py +81 -0
- scripts/match_template.py +744 -0
- scripts/match_template_devel.py +788 -0
- scripts/postprocess.py +279 -0
- scripts/preprocess.py +93 -0
- scripts/preprocessor_gui.py +729 -0
- tme/__init__.py +6 -0
- tme/__version__.py +1 -0
- tme/analyzer.py +1144 -0
- tme/backends/__init__.py +134 -0
- tme/backends/cupy_backend.py +309 -0
- tme/backends/matching_backend.py +1154 -0
- tme/backends/npfftw_backend.py +763 -0
- tme/backends/pytorch_backend.py +526 -0
- tme/data/__init__.py +0 -0
- tme/data/c48n309.npy +0 -0
- tme/data/c48n527.npy +0 -0
- tme/data/c48n9.npy +0 -0
- tme/data/c48u1.npy +0 -0
- tme/data/c48u1153.npy +0 -0
- tme/data/c48u1201.npy +0 -0
- tme/data/c48u1641.npy +0 -0
- tme/data/c48u181.npy +0 -0
- tme/data/c48u2219.npy +0 -0
- tme/data/c48u27.npy +0 -0
- tme/data/c48u2947.npy +0 -0
- tme/data/c48u3733.npy +0 -0
- tme/data/c48u4749.npy +0 -0
- tme/data/c48u5879.npy +0 -0
- tme/data/c48u7111.npy +0 -0
- tme/data/c48u815.npy +0 -0
- tme/data/c48u83.npy +0 -0
- tme/data/c48u8649.npy +0 -0
- tme/data/c600v.npy +0 -0
- tme/data/c600vc.npy +0 -0
- tme/data/metadata.yaml +80 -0
- tme/data/quat_to_numpy.py +42 -0
- tme/data/scattering_factors.pickle +0 -0
- tme/density.py +2314 -0
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/helpers.py +881 -0
- tme/matching_data.py +377 -0
- tme/matching_exhaustive.py +1553 -0
- tme/matching_memory.py +382 -0
- tme/matching_optimization.py +1123 -0
- tme/matching_utils.py +1180 -0
- tme/parser.py +429 -0
- tme/preprocessor.py +1291 -0
- tme/scoring.py +866 -0
- tme/structure.py +1428 -0
- tme/types.py +10 -0
scripts/postprocess.py
ADDED
@@ -0,0 +1,279 @@
|
|
1
|
+
#!python3
|
2
|
+
""" CLI to simplify analysing the output of match_template.py.
|
3
|
+
|
4
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
5
|
+
|
6
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
7
|
+
"""
|
8
|
+
import argparse
|
9
|
+
from sys import exit
|
10
|
+
from os.path import splitext
|
11
|
+
|
12
|
+
import numpy as np
|
13
|
+
|
14
|
+
from tme import Density, Structure
|
15
|
+
from tme.analyzer import (
|
16
|
+
PeakCallerSort,
|
17
|
+
PeakCallerMaximumFilter,
|
18
|
+
PeakCallerFast,
|
19
|
+
PeakCallerRecursiveMasking,
|
20
|
+
PeakCallerScipy,
|
21
|
+
)
|
22
|
+
from tme.matching_utils import (
|
23
|
+
load_pickle,
|
24
|
+
euler_to_rotationmatrix,
|
25
|
+
euler_from_rotationmatrix,
|
26
|
+
)
|
27
|
+
|
28
|
+
PEAK_CALLERS = {
|
29
|
+
"PeakCallerSort": PeakCallerSort,
|
30
|
+
"PeakCallerMaximumFilter": PeakCallerMaximumFilter,
|
31
|
+
"PeakCallerFast": PeakCallerFast,
|
32
|
+
"PeakCallerRecursiveMasking": PeakCallerRecursiveMasking,
|
33
|
+
"PeakCallerScipy": PeakCallerScipy,
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
def parse_args():
|
38
|
+
parser = argparse.ArgumentParser(
|
39
|
+
description="Peak Calling for Template Matching Outputs"
|
40
|
+
)
|
41
|
+
parser.add_argument(
|
42
|
+
"--input_file",
|
43
|
+
required=True,
|
44
|
+
help="Path to the output of match_template.py.",
|
45
|
+
)
|
46
|
+
parser.add_argument(
|
47
|
+
"--output_prefix",
|
48
|
+
required=True,
|
49
|
+
help="Prefix for the output file name. Extension depends on output_format.",
|
50
|
+
)
|
51
|
+
parser.add_argument(
|
52
|
+
"--number_of_peaks", type=int, default=1000, help="Number of peaks to consider."
|
53
|
+
)
|
54
|
+
parser.add_argument(
|
55
|
+
"--min_distance", type=int, default=5, help="Minimum distance between peaks."
|
56
|
+
)
|
57
|
+
parser.add_argument(
|
58
|
+
"--peak_caller",
|
59
|
+
choices=list(PEAK_CALLERS.keys()),
|
60
|
+
default="PeakCallerScipy",
|
61
|
+
help="Peak caller to use for analysis. Ignored if input_file contains peaks.",
|
62
|
+
)
|
63
|
+
parser.add_argument(
|
64
|
+
"--orientations",
|
65
|
+
default=None,
|
66
|
+
help="Path to orientations file to overwrite orientations computed from"
|
67
|
+
" match_template.py output.",
|
68
|
+
)
|
69
|
+
parser.add_argument(
|
70
|
+
"--output_format",
|
71
|
+
choices=["orientations", "alignment", "extraction"],
|
72
|
+
default="orientations",
|
73
|
+
help="Choose the output format. Available formats are: "
|
74
|
+
"orientations (translation, rotation, and score), "
|
75
|
+
"alignment (aligned template to target based on orientations), "
|
76
|
+
"extraction (extract regions around peaks from targets, i.e. subtomograms).",
|
77
|
+
)
|
78
|
+
args = parser.parse_args()
|
79
|
+
|
80
|
+
return args
|
81
|
+
|
82
|
+
|
83
|
+
def main():
|
84
|
+
args = parse_args()
|
85
|
+
data = load_pickle(args.input_file)
|
86
|
+
|
87
|
+
meta = data[-1]
|
88
|
+
|
89
|
+
orientations = []
|
90
|
+
if args.orientations is None:
|
91
|
+
if data[0].ndim == data[2].ndim:
|
92
|
+
scores, offset, rotations, rotation_mapping, meta = data
|
93
|
+
peak_caller = PEAK_CALLERS[args.peak_caller](
|
94
|
+
number_of_peaks=args.number_of_peaks, min_distance=args.min_distance
|
95
|
+
)
|
96
|
+
peak_caller(scores, rotation_matrix=np.eye(3))
|
97
|
+
candidates = peak_caller.merge([tuple(peak_caller)])
|
98
|
+
for translation, _, score, detail in zip(*candidates):
|
99
|
+
angles = rotation_mapping[rotations[tuple(translation)]]
|
100
|
+
orientations.append((translation, angles, score, detail))
|
101
|
+
else:
|
102
|
+
candidates = data
|
103
|
+
translation, rotation, score, detail, *_ = data
|
104
|
+
for i in range(translation.shape[0]):
|
105
|
+
angles = euler_from_rotationmatrix(rotation[i])
|
106
|
+
orientations.append(
|
107
|
+
(np.array(translation[i]), angles, score[i], detail[i])
|
108
|
+
)
|
109
|
+
else:
|
110
|
+
with open(args.orientations, mode="r", encoding="utf-8") as infile:
|
111
|
+
data = [x.strip().split("\t") for x in infile.read().split("\n")]
|
112
|
+
_ = data.pop(0)
|
113
|
+
translation, rotation, score, detail = [], [], [], []
|
114
|
+
for candidate in data:
|
115
|
+
if len(candidate) <= 1:
|
116
|
+
continue
|
117
|
+
if len(candidate) != 8:
|
118
|
+
candidate.append(-1)
|
119
|
+
|
120
|
+
candidate = [float(x) for x in candidate]
|
121
|
+
translation.append((candidate[0], candidate[1], candidate[2]))
|
122
|
+
rotation.append(
|
123
|
+
euler_to_rotationmatrix((candidate[3], candidate[4], candidate[5]))
|
124
|
+
)
|
125
|
+
score.append(candidate[6])
|
126
|
+
detail.append(candidate[7])
|
127
|
+
orientations.append(
|
128
|
+
(
|
129
|
+
translation[-1],
|
130
|
+
(candidate[3], candidate[4], candidate[5]),
|
131
|
+
score[-1],
|
132
|
+
detail[-1],
|
133
|
+
)
|
134
|
+
)
|
135
|
+
|
136
|
+
candidates = (
|
137
|
+
np.vstack(translation).astype(int),
|
138
|
+
np.vstack(rotation).astype(float),
|
139
|
+
np.array(score).astype(float),
|
140
|
+
np.array(detail).astype(float),
|
141
|
+
)
|
142
|
+
|
143
|
+
if args.output_format == "orientations":
|
144
|
+
header = "\t".join(
|
145
|
+
["z", "y", "x", "euler_z", "euler_y", "euler_x", "score", "detail"]
|
146
|
+
)
|
147
|
+
output_file = f"{args.output_prefix}.tsv"
|
148
|
+
with open(output_file, mode="w", encoding="utf-8") as ofile:
|
149
|
+
_ = ofile.write(f"{header}\n")
|
150
|
+
for translation, angles, score, detail in orientations:
|
151
|
+
translation_string = "\t".join([str(x) for x in translation])
|
152
|
+
angle_string = "\t".join([str(x) for x in angles])
|
153
|
+
_ = ofile.write(
|
154
|
+
f"{translation_string}\t{angle_string}\t{score}\t{detail}\n"
|
155
|
+
)
|
156
|
+
exit(0)
|
157
|
+
|
158
|
+
target_origin, _, sampling_rate, cli_args = meta
|
159
|
+
|
160
|
+
template_mask = Density(np.ones(1))
|
161
|
+
template_is_density, index = True, 0
|
162
|
+
_, template_extension = splitext(cli_args.template)
|
163
|
+
try:
|
164
|
+
template = Density.from_file(cli_args.template)
|
165
|
+
template, _ = template.centered(0)
|
166
|
+
center_of_mass = template.center_of_mass(template.data)
|
167
|
+
except ValueError:
|
168
|
+
template_is_density = False
|
169
|
+
template = Structure.from_file(cli_args.template)
|
170
|
+
center_of_mass = template.center_of_mass()[::-1]
|
171
|
+
|
172
|
+
if cli_args.template_mask is not None:
|
173
|
+
template_mask = Density.from_file(cli_args.template_mask)
|
174
|
+
|
175
|
+
if args.output_format == "extraction":
|
176
|
+
target = Density.from_file(cli_args.target)
|
177
|
+
if type(template) == Structure:
|
178
|
+
template = Density.from_structure(
|
179
|
+
template, sampling_rate = target.sampling_rate
|
180
|
+
)
|
181
|
+
|
182
|
+
if not np.all(np.divide(target.shape, template.shape) > 2):
|
183
|
+
print(
|
184
|
+
"Target might be too small relative to template to extract"
|
185
|
+
" meaningful particles."
|
186
|
+
f" Target : {target.shape}, template : {template.shape}."
|
187
|
+
)
|
188
|
+
|
189
|
+
peaks = candidates[0].astype(int)
|
190
|
+
max_shape = np.max(template.shape).astype(int)
|
191
|
+
half_shape = max_shape // 2
|
192
|
+
|
193
|
+
left_pad = half_shape
|
194
|
+
right_pad = np.add(half_shape, max_shape % 2)
|
195
|
+
starts = np.subtract(peaks, left_pad)
|
196
|
+
stops = np.add(peaks, right_pad)
|
197
|
+
|
198
|
+
candidate_starts = np.maximum(starts, 0).astype(int)
|
199
|
+
candidate_stops = np.minimum(stops, target.shape).astype(int)
|
200
|
+
keep_peaks = (
|
201
|
+
np.sum(
|
202
|
+
np.multiply(starts == candidate_starts, stops == candidate_stops),
|
203
|
+
axis=1,
|
204
|
+
)
|
205
|
+
== peaks.shape[1]
|
206
|
+
)
|
207
|
+
|
208
|
+
peaks = peaks[keep_peaks,]
|
209
|
+
starts = starts[keep_peaks,]
|
210
|
+
stops = stops[keep_peaks,]
|
211
|
+
candidate_starts = candidate_starts[keep_peaks,]
|
212
|
+
candidate_stops = candidate_stops[keep_peaks,]
|
213
|
+
|
214
|
+
if not len(peaks):
|
215
|
+
print(
|
216
|
+
"No peak remaining after filtering. Started with"
|
217
|
+
f" {candidates[0].shape[0]} filtered to {peaks.shape[0]}."
|
218
|
+
" Consider reducing min_distance, increase num_peaks or use"
|
219
|
+
" a different peak caller."
|
220
|
+
)
|
221
|
+
exit(-1)
|
222
|
+
|
223
|
+
observation_starts = np.subtract(candidate_starts, starts).astype(int)
|
224
|
+
observation_stops = np.subtract(np.add(max_shape, candidate_stops), stops)
|
225
|
+
observation_stops = observation_stops.astype(int)
|
226
|
+
|
227
|
+
candidate_slices = [
|
228
|
+
tuple(slice(s, e) for s, e in zip(start_row, stop_row))
|
229
|
+
for start_row, stop_row in zip(candidate_starts, candidate_stops)
|
230
|
+
]
|
231
|
+
|
232
|
+
observation_slices = [
|
233
|
+
tuple(slice(s, e) for s, e in zip(start_row, stop_row))
|
234
|
+
for start_row, stop_row in zip(observation_starts, observation_stops)
|
235
|
+
]
|
236
|
+
observations = np.zeros(
|
237
|
+
(len(candidate_slices), max_shape, max_shape, max_shape)
|
238
|
+
)
|
239
|
+
|
240
|
+
slices = zip(candidate_slices, observation_slices)
|
241
|
+
for idx, (cand_slice, obs_slice) in enumerate(slices):
|
242
|
+
observations[idx][:] = np.mean(target.data[cand_slice])
|
243
|
+
observations[idx][obs_slice] = target.data[cand_slice]
|
244
|
+
|
245
|
+
for index in range(observations.shape[0]):
|
246
|
+
out_density = Density(
|
247
|
+
data=observations[index],
|
248
|
+
sampling_rate=sampling_rate,
|
249
|
+
origin=candidate_starts[index] * sampling_rate,
|
250
|
+
)
|
251
|
+
out_density.data = out_density.data * template_mask.data
|
252
|
+
out_density.to_file(f"{args.output_prefix}{index}.mrc")
|
253
|
+
exit(0)
|
254
|
+
|
255
|
+
for translation, angles, *_ in orientations:
|
256
|
+
rotation_matrix = euler_to_rotationmatrix(angles)
|
257
|
+
|
258
|
+
if template_is_density:
|
259
|
+
translation = np.subtract(translation, center_of_mass)
|
260
|
+
transformed_template = template.rigid_transform(
|
261
|
+
rotation_matrix=rotation_matrix
|
262
|
+
)
|
263
|
+
new_origin = np.add(target_origin / sampling_rate, translation)
|
264
|
+
transformed_template.origin = np.multiply(new_origin, sampling_rate)
|
265
|
+
else:
|
266
|
+
new_center_of_mass = np.add(
|
267
|
+
np.multiply(translation, sampling_rate), target_origin
|
268
|
+
)
|
269
|
+
translation = np.subtract(new_center_of_mass, center_of_mass)
|
270
|
+
transformed_template = template.rigid_transform(
|
271
|
+
translation=translation[::-1],
|
272
|
+
rotation_matrix=rotation_matrix[::-1, ::-1],
|
273
|
+
)
|
274
|
+
transformed_template.to_file(f"{args.output_prefix}{index}{template_extension}")
|
275
|
+
index += 1
|
276
|
+
|
277
|
+
|
278
|
+
if __name__ == "__main__":
|
279
|
+
main()
|
scripts/preprocess.py
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!python3
|
2
|
+
""" Apply tme.preprocessor.Preprocessor methods to an input file based
|
3
|
+
on a provided yaml configuration obtaiend from preprocessor_gui.py.
|
4
|
+
|
5
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
6
|
+
|
7
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
8
|
+
"""
|
9
|
+
import yaml
|
10
|
+
import argparse
|
11
|
+
import textwrap
|
12
|
+
from tme import Preprocessor, Density
|
13
|
+
|
14
|
+
|
15
|
+
def parse_args():
|
16
|
+
parser = argparse.ArgumentParser(
|
17
|
+
description=textwrap.dedent(
|
18
|
+
"""
|
19
|
+
Apply preprocessing to an input file based on a provided YAML configuration.
|
20
|
+
|
21
|
+
Expected YAML file format:
|
22
|
+
```yaml
|
23
|
+
<method_name>:
|
24
|
+
<parameter1>: <value1>
|
25
|
+
<parameter2>: <value2>
|
26
|
+
...
|
27
|
+
```
|
28
|
+
"""
|
29
|
+
),
|
30
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
31
|
+
)
|
32
|
+
parser.add_argument(
|
33
|
+
"-i",
|
34
|
+
"--input_file",
|
35
|
+
type=str,
|
36
|
+
required=True,
|
37
|
+
help="Path to the input data file in CCP4/MRC format.",
|
38
|
+
)
|
39
|
+
parser.add_argument(
|
40
|
+
"-y",
|
41
|
+
"--yaml_file",
|
42
|
+
type=str,
|
43
|
+
required=True,
|
44
|
+
help="Path to the YAML configuration file.",
|
45
|
+
)
|
46
|
+
parser.add_argument(
|
47
|
+
"-o",
|
48
|
+
"--output_file",
|
49
|
+
type=str,
|
50
|
+
required=True,
|
51
|
+
help="Path to output file in CPP4/MRC format..",
|
52
|
+
)
|
53
|
+
parser.add_argument(
|
54
|
+
"--compress", action="store_true", help="Compress the output file using gzip."
|
55
|
+
)
|
56
|
+
|
57
|
+
args = parser.parse_args()
|
58
|
+
|
59
|
+
return args
|
60
|
+
|
61
|
+
|
62
|
+
def main():
|
63
|
+
args = parse_args()
|
64
|
+
with open(args.yaml_file, "r") as f:
|
65
|
+
preprocess_settings = yaml.safe_load(f)
|
66
|
+
|
67
|
+
if len(preprocess_settings) > 1:
|
68
|
+
raise NotImplementedError(
|
69
|
+
"Multiple preprocessing methods specified. "
|
70
|
+
"The script currently supports one method at a time."
|
71
|
+
)
|
72
|
+
|
73
|
+
method_name = list(preprocess_settings.keys())[0]
|
74
|
+
if not hasattr(Preprocessor, method_name):
|
75
|
+
raise ValueError(f"Method {method_name} does not exist in Preprocessor.")
|
76
|
+
|
77
|
+
density = Density.from_file(args.input_file)
|
78
|
+
output = density.empty
|
79
|
+
|
80
|
+
method_params = preprocess_settings[method_name]
|
81
|
+
preprocessor = Preprocessor()
|
82
|
+
method = getattr(preprocessor, method_name, None)
|
83
|
+
if not method:
|
84
|
+
raise ValueError(
|
85
|
+
f"{method} does not exist in dge.preprocessor.Preprocessor class."
|
86
|
+
)
|
87
|
+
|
88
|
+
output.data = method(template=density.data, **method_params)
|
89
|
+
output.to_file(args.output_file, gzip=args.compress)
|
90
|
+
|
91
|
+
|
92
|
+
if __name__ == "__main__":
|
93
|
+
main()
|