pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
- pytme-0.1.5.data/scripts/match_template.py +744 -0
- pytme-0.1.5.data/scripts/postprocess.py +279 -0
- pytme-0.1.5.data/scripts/preprocess.py +93 -0
- pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
- pytme-0.1.5.dist-info/LICENSE +153 -0
- pytme-0.1.5.dist-info/METADATA +69 -0
- pytme-0.1.5.dist-info/RECORD +63 -0
- pytme-0.1.5.dist-info/WHEEL +5 -0
- pytme-0.1.5.dist-info/entry_points.txt +6 -0
- pytme-0.1.5.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/estimate_ram_usage.py +81 -0
- scripts/match_template.py +744 -0
- scripts/match_template_devel.py +788 -0
- scripts/postprocess.py +279 -0
- scripts/preprocess.py +93 -0
- scripts/preprocessor_gui.py +729 -0
- tme/__init__.py +6 -0
- tme/__version__.py +1 -0
- tme/analyzer.py +1144 -0
- tme/backends/__init__.py +134 -0
- tme/backends/cupy_backend.py +309 -0
- tme/backends/matching_backend.py +1154 -0
- tme/backends/npfftw_backend.py +763 -0
- tme/backends/pytorch_backend.py +526 -0
- tme/data/__init__.py +0 -0
- tme/data/c48n309.npy +0 -0
- tme/data/c48n527.npy +0 -0
- tme/data/c48n9.npy +0 -0
- tme/data/c48u1.npy +0 -0
- tme/data/c48u1153.npy +0 -0
- tme/data/c48u1201.npy +0 -0
- tme/data/c48u1641.npy +0 -0
- tme/data/c48u181.npy +0 -0
- tme/data/c48u2219.npy +0 -0
- tme/data/c48u27.npy +0 -0
- tme/data/c48u2947.npy +0 -0
- tme/data/c48u3733.npy +0 -0
- tme/data/c48u4749.npy +0 -0
- tme/data/c48u5879.npy +0 -0
- tme/data/c48u7111.npy +0 -0
- tme/data/c48u815.npy +0 -0
- tme/data/c48u83.npy +0 -0
- tme/data/c48u8649.npy +0 -0
- tme/data/c600v.npy +0 -0
- tme/data/c600vc.npy +0 -0
- tme/data/metadata.yaml +80 -0
- tme/data/quat_to_numpy.py +42 -0
- tme/data/scattering_factors.pickle +0 -0
- tme/density.py +2314 -0
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/helpers.py +881 -0
- tme/matching_data.py +377 -0
- tme/matching_exhaustive.py +1553 -0
- tme/matching_memory.py +382 -0
- tme/matching_optimization.py +1123 -0
- tme/matching_utils.py +1180 -0
- tme/parser.py +429 -0
- tme/preprocessor.py +1291 -0
- tme/scoring.py +866 -0
- tme/structure.py +1428 -0
- tme/types.py +10 -0
tme/parser.py
ADDED
@@ -0,0 +1,429 @@
|
|
1
|
+
""" Implements parsers for atomic structure file formats.
|
2
|
+
|
3
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
4
|
+
|
5
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
6
|
+
"""
|
7
|
+
import re
|
8
|
+
from collections import deque
|
9
|
+
from typing import List, Dict
|
10
|
+
from abc import ABC, abstractmethod
|
11
|
+
|
12
|
+
import numpy as np
|
13
|
+
|
14
|
+
|
15
|
+
class Parser(ABC):
|
16
|
+
"""
|
17
|
+
Base class for structure file parsers.
|
18
|
+
|
19
|
+
Classes inheriting from :py:class:`Parser` need to define
|
20
|
+
a ``parse_input`` method that accepts a list of lines and returns a
|
21
|
+
dictionary representation of the data.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(self, filename: str, mode: str = "r") -> None:
|
25
|
+
"""
|
26
|
+
Initialize a Parser object.
|
27
|
+
|
28
|
+
Parameters
|
29
|
+
----------
|
30
|
+
filename : str
|
31
|
+
File name to parse data from.
|
32
|
+
|
33
|
+
mode : str, optional
|
34
|
+
Mode to open the file. Default is 'r' for read.
|
35
|
+
"""
|
36
|
+
with open(filename, "r") as infile:
|
37
|
+
data = infile.read()
|
38
|
+
|
39
|
+
data = deque(filter(lambda line: line and line[0] != "#", data.split("\n")))
|
40
|
+
self._data = self.parse_input(data)
|
41
|
+
|
42
|
+
def __getitem__(self, key: str):
|
43
|
+
"""
|
44
|
+
Retrieve a value from the internal data using a given key.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
key : str
|
49
|
+
The key to use for retrieving the corresponding value from
|
50
|
+
the internal data.
|
51
|
+
|
52
|
+
Returns
|
53
|
+
-------
|
54
|
+
value
|
55
|
+
The value associated with the provided key in the internal data.
|
56
|
+
"""
|
57
|
+
return self._data[key]
|
58
|
+
|
59
|
+
def __contains__(self, key) -> bool:
|
60
|
+
"""
|
61
|
+
Check if a given key exists in the internal data.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
key : str
|
66
|
+
The key to check for in the internal data.
|
67
|
+
|
68
|
+
Returns
|
69
|
+
-------
|
70
|
+
bool
|
71
|
+
True if the key exists in the internal data, False otherwise.
|
72
|
+
"""
|
73
|
+
return key in self._data
|
74
|
+
|
75
|
+
def get(self, key, default):
|
76
|
+
"""
|
77
|
+
Retrieve a value from the internal data using a given key. If the
|
78
|
+
key does not exist, return a default value.
|
79
|
+
|
80
|
+
Parameters
|
81
|
+
----------
|
82
|
+
key : str
|
83
|
+
The key to use for retrieving the corresponding value from
|
84
|
+
the internal data.
|
85
|
+
|
86
|
+
default : Any
|
87
|
+
The value to return if the key does not exist in the internal data.
|
88
|
+
|
89
|
+
Returns
|
90
|
+
-------
|
91
|
+
value
|
92
|
+
The value associated with the provided key in the internal data,
|
93
|
+
or the default value if the key does not exist.
|
94
|
+
"""
|
95
|
+
if key in self._data:
|
96
|
+
return self[key]
|
97
|
+
return default
|
98
|
+
|
99
|
+
def keys(self):
|
100
|
+
"""
|
101
|
+
List keys available in internal dictionary.
|
102
|
+
"""
|
103
|
+
return self._data.keys()
|
104
|
+
|
105
|
+
def values(self):
|
106
|
+
"""
|
107
|
+
List values available in internal dictionary.
|
108
|
+
"""
|
109
|
+
return self._data.values()
|
110
|
+
|
111
|
+
def items(self):
|
112
|
+
"""
|
113
|
+
List items available in internal dictionary.
|
114
|
+
"""
|
115
|
+
return self._data.items()
|
116
|
+
|
117
|
+
@abstractmethod
|
118
|
+
def parse_input(self, lines: List[str]) -> Dict:
|
119
|
+
"""
|
120
|
+
Parse a list of lines from a file and convert the data into a dictionary.
|
121
|
+
|
122
|
+
This function is not intended to be called directly, but should rather be
|
123
|
+
defined by classes inheriting from :py:class:`Parser` to parse a given
|
124
|
+
file format.
|
125
|
+
|
126
|
+
Parameters
|
127
|
+
----------
|
128
|
+
lines : list of str
|
129
|
+
The lines of a structure file to parse.
|
130
|
+
|
131
|
+
Returns
|
132
|
+
-------
|
133
|
+
dict
|
134
|
+
A dictionary containing the parsed data.
|
135
|
+
"""
|
136
|
+
|
137
|
+
|
138
|
+
class PDBParser(Parser):
|
139
|
+
"""
|
140
|
+
A Parser subclass for converting PDB file data into a dictionary representation.
|
141
|
+
This class is specifically designed to work with PDB file format.
|
142
|
+
|
143
|
+
References
|
144
|
+
----------
|
145
|
+
.. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
|
146
|
+
"""
|
147
|
+
|
148
|
+
def parse_input(self, lines: List[str]) -> Dict:
|
149
|
+
"""
|
150
|
+
Parse a list of lines from a PDB file and convert the data into a dictionary.
|
151
|
+
|
152
|
+
Parameters
|
153
|
+
----------
|
154
|
+
lines : list of str
|
155
|
+
The lines of a PDB file to parse.
|
156
|
+
|
157
|
+
Returns
|
158
|
+
-------
|
159
|
+
dict
|
160
|
+
A dictionary containing the parsed data from the PDB file.
|
161
|
+
"""
|
162
|
+
metadata = {
|
163
|
+
"resolution": re.compile(
|
164
|
+
r"(.)+?(EFFECTIVE RESOLUTION\s+\(ANGSTROMS\)){1}(.)+?(\d+\.\d+)(\s)*$"
|
165
|
+
),
|
166
|
+
"reconstruction_method": re.compile(
|
167
|
+
r"(.)+?(RECONSTRUCTION METHOD)+(.)+?(\w+\s*\w+)(\s)*$"
|
168
|
+
),
|
169
|
+
"electron_source": re.compile(r"(.)+?(SOURCE)+(.)+?(\w+\s*\w+)(\s)*$"),
|
170
|
+
"illumination_mode": re.compile(
|
171
|
+
r"(.)+?(ILLUMINATION MODE)+(.)+?(\w+\s*\w+)(\s)*$"
|
172
|
+
),
|
173
|
+
"microscope_mode": re.compile(
|
174
|
+
r"(.)+?(IMAGING MODE)+(.)+?(\w+\s*\w+)(\s)*$"
|
175
|
+
),
|
176
|
+
"microscope_model": re.compile(
|
177
|
+
r"(.)+?(MICROSCOPE MODEL)+(.+?:\s+)+?(.+)(\s)*$"
|
178
|
+
),
|
179
|
+
}
|
180
|
+
|
181
|
+
data = {
|
182
|
+
"record_type": [],
|
183
|
+
"atom_serial_number": [],
|
184
|
+
"atom_name": [],
|
185
|
+
"alternate_location_indicator": [],
|
186
|
+
"residue_name": [],
|
187
|
+
"chain_identifier": [],
|
188
|
+
"residue_sequence_number": [],
|
189
|
+
"code_for_residue_insertion": [],
|
190
|
+
"atom_coordinate": [],
|
191
|
+
"occupancy": [],
|
192
|
+
"temperature_factor": [],
|
193
|
+
"segment_identifier": [],
|
194
|
+
"element_symbol": [],
|
195
|
+
"charge": [],
|
196
|
+
"details": {},
|
197
|
+
}
|
198
|
+
data["details"]["resolution"] = np.nan
|
199
|
+
|
200
|
+
for line in lines:
|
201
|
+
if line.startswith("REMARK"):
|
202
|
+
matches = [(key, metadata[key].match(line)) for key in metadata]
|
203
|
+
matches = [match for match in matches if match[1]]
|
204
|
+
for key, match in matches:
|
205
|
+
data["details"][key] = match.group(4)
|
206
|
+
_ = metadata.pop(key)
|
207
|
+
elif line.startswith("ATOM") or line.startswith("HETATM"):
|
208
|
+
data["record_type"].append(line[0:6])
|
209
|
+
data["atom_serial_number"].append(line[6:11])
|
210
|
+
data["atom_name"].append(line[12:16])
|
211
|
+
data["alternate_location_indicator"].append(line[16])
|
212
|
+
data["residue_name"].append(line[17:20])
|
213
|
+
|
214
|
+
data["chain_identifier"].append(line[21])
|
215
|
+
data["residue_sequence_number"].append(line[22:26])
|
216
|
+
data["code_for_residue_insertion"].append(line[26])
|
217
|
+
data["atom_coordinate"].append((line[30:38], line[38:46], line[46:54]))
|
218
|
+
data["occupancy"].append(line[54:60])
|
219
|
+
data["temperature_factor"].append(line[60:66])
|
220
|
+
data["segment_identifier"].append(line[74:76])
|
221
|
+
data["element_symbol"].append(line[76:78])
|
222
|
+
data["charge"].append(line[78:80])
|
223
|
+
|
224
|
+
data["details"]["resolution"] = float(data["details"]["resolution"])
|
225
|
+
|
226
|
+
return data
|
227
|
+
|
228
|
+
|
229
|
+
class MMCIFParser(Parser):
|
230
|
+
"""
|
231
|
+
A Parser subclass for converting MMCIF file data into a dictionary representation.
|
232
|
+
This implementation heavily relies on the atomium library:
|
233
|
+
|
234
|
+
References
|
235
|
+
----------
|
236
|
+
.. [1] Ireland, S. M., & Martin, A. C. R. (2020). atomium (Version 1.0.0)
|
237
|
+
[Computer software]. https://doi.org/10.1093/bioinformatics/btaa072
|
238
|
+
"""
|
239
|
+
|
240
|
+
def parse_input(self, lines: List[str]) -> Dict:
|
241
|
+
"""
|
242
|
+
Parse a list of lines from an MMCIF file and convert the data into a dictionary.
|
243
|
+
|
244
|
+
Parameters
|
245
|
+
----------
|
246
|
+
lines : list of str
|
247
|
+
The lines of an MMCIF file to parse.
|
248
|
+
|
249
|
+
Returns
|
250
|
+
-------
|
251
|
+
dict
|
252
|
+
A dictionary containing the parsed data from the MMCIF file.
|
253
|
+
"""
|
254
|
+
lines = self._consolidate_strings(lines)
|
255
|
+
blocks = self._split_in_blocks(lines)
|
256
|
+
mmcif_dict = {}
|
257
|
+
for block in blocks:
|
258
|
+
if block["lines"][0] == "loop_":
|
259
|
+
mmcif_dict[block["category"]] = self._loop_block_to_dict(block)
|
260
|
+
else:
|
261
|
+
mmcif_dict[block["category"]] = self._non_loop_block_to_dict(block)
|
262
|
+
return mmcif_dict
|
263
|
+
|
264
|
+
@staticmethod
|
265
|
+
def _consolidate_strings(lines: List[str]) -> List[str]:
|
266
|
+
"""
|
267
|
+
Consolidate multi-line strings that have been separated by semicolons in a
|
268
|
+
list of strings.
|
269
|
+
|
270
|
+
Parameters
|
271
|
+
----------
|
272
|
+
lines : deque of str
|
273
|
+
Deque of strings where each string is a line from an MMCIF file.
|
274
|
+
|
275
|
+
Returns
|
276
|
+
-------
|
277
|
+
deque of str
|
278
|
+
A deque of consolidated strings from the given input.
|
279
|
+
"""
|
280
|
+
new_lines = deque()
|
281
|
+
while lines:
|
282
|
+
line = lines.popleft()
|
283
|
+
if line.startswith(";"):
|
284
|
+
string = [line[1:].strip()]
|
285
|
+
while not lines[0].startswith(";"):
|
286
|
+
string.append(lines.popleft())
|
287
|
+
lines.popleft()
|
288
|
+
new_lines[-1] += ' "{}"'.format(
|
289
|
+
" ".join(string).replace('"', "").replace("'", "'")
|
290
|
+
)
|
291
|
+
else:
|
292
|
+
new_lines.append(line.replace('"', "").replace("'", "'"))
|
293
|
+
return new_lines
|
294
|
+
|
295
|
+
@staticmethod
|
296
|
+
def _split_in_blocks(lines: List[str]) -> List[Dict]:
|
297
|
+
"""
|
298
|
+
Split a deque of consolidated strings into a list of dictionaries,
|
299
|
+
each representing a block of data.
|
300
|
+
|
301
|
+
Parameters
|
302
|
+
----------
|
303
|
+
lines : deque of str
|
304
|
+
Deque of consolidated strings where each string is a line from
|
305
|
+
an MMCIF file.
|
306
|
+
|
307
|
+
Returns
|
308
|
+
-------
|
309
|
+
list of dict
|
310
|
+
A list of dictionaries where each dictionary represents a block
|
311
|
+
of data from the MMCIF file.
|
312
|
+
"""
|
313
|
+
category = None
|
314
|
+
block, blocks = [], []
|
315
|
+
while lines:
|
316
|
+
line = lines.popleft()
|
317
|
+
if line.startswith("data_"):
|
318
|
+
continue
|
319
|
+
if line.startswith("_"):
|
320
|
+
line_category = line.split(".")[0]
|
321
|
+
if line_category != category:
|
322
|
+
if category:
|
323
|
+
blocks.append({"category": category[1:], "lines": block})
|
324
|
+
category = line_category
|
325
|
+
block = []
|
326
|
+
if line.startswith("loop_"):
|
327
|
+
if category:
|
328
|
+
blocks.append({"category": category[1:], "lines": block})
|
329
|
+
category = lines[0].split(".")[0]
|
330
|
+
block = []
|
331
|
+
block.append(line)
|
332
|
+
if block:
|
333
|
+
blocks.append({"category": category[1:], "lines": block})
|
334
|
+
return blocks
|
335
|
+
|
336
|
+
@staticmethod
|
337
|
+
def _non_loop_block_to_dict(block: Dict) -> Dict:
|
338
|
+
"""
|
339
|
+
Convert a non-loop block of data into a dictionary.
|
340
|
+
|
341
|
+
Parameters
|
342
|
+
----------
|
343
|
+
block : dict
|
344
|
+
A dictionary representing a non-loop block of data from an MMCIF file.
|
345
|
+
|
346
|
+
Returns
|
347
|
+
-------
|
348
|
+
dict
|
349
|
+
A dictionary representing the parsed data from the given non-loop block.
|
350
|
+
"""
|
351
|
+
d = {}
|
352
|
+
# category = block["lines"][0].split(".")[0]
|
353
|
+
for index in range(len(block["lines"]) - 1):
|
354
|
+
if block["lines"][index + 1][0] != "_":
|
355
|
+
block["lines"][index] += " " + block["lines"][index + 1]
|
356
|
+
block["lines"] = [line for line in block["lines"] if line[0] == "_"]
|
357
|
+
for line in block["lines"]:
|
358
|
+
name = line.split(".")[1].split()[0]
|
359
|
+
value = " ".join(line.split()[1:])
|
360
|
+
d[name] = value
|
361
|
+
return d
|
362
|
+
|
363
|
+
def _loop_block_to_dict(self, block: Dict) -> Dict:
|
364
|
+
"""
|
365
|
+
Convert a loop block of data into a dictionary.
|
366
|
+
|
367
|
+
Parameters
|
368
|
+
----------
|
369
|
+
block : dict
|
370
|
+
A dictionary representing a loop block of data from an MMCIF file.
|
371
|
+
|
372
|
+
Returns
|
373
|
+
-------
|
374
|
+
dict
|
375
|
+
A dictionary representing the parsed data from the given loop block.
|
376
|
+
"""
|
377
|
+
names, lines = [], []
|
378
|
+
body_start = 0
|
379
|
+
for index, line in enumerate(block["lines"][1:], start=1):
|
380
|
+
if not line.startswith("_" + block["category"]):
|
381
|
+
body_start = index
|
382
|
+
break
|
383
|
+
names = [line.split(".")[1].rstrip() for line in block["lines"][1:body_start]]
|
384
|
+
lines = [self._split_line(line) for line in block["lines"][body_start:]]
|
385
|
+
# reunites broken lines
|
386
|
+
for n in range(len(lines) - 1):
|
387
|
+
while n < len(lines) - 1 and len(lines[n]) + len(lines[n + 1]) <= len(
|
388
|
+
names
|
389
|
+
):
|
390
|
+
lines[n] += lines.pop(n + 1)
|
391
|
+
res = {name: [] for name in names}
|
392
|
+
for line in lines:
|
393
|
+
for name, value in zip(names, line):
|
394
|
+
res[name].append(value)
|
395
|
+
return res
|
396
|
+
|
397
|
+
@staticmethod
|
398
|
+
def _split_line(line: str) -> List[str]:
|
399
|
+
"""
|
400
|
+
Split a string into substrings, ignoring quotation marks within the string.
|
401
|
+
|
402
|
+
Parameters
|
403
|
+
----------
|
404
|
+
line : str
|
405
|
+
The string to be split.
|
406
|
+
|
407
|
+
Returns
|
408
|
+
-------
|
409
|
+
list of str
|
410
|
+
A list of substrings resulting from the split operation on the given string.
|
411
|
+
"""
|
412
|
+
if not re.search("['\"]", line):
|
413
|
+
return line.split()
|
414
|
+
|
415
|
+
chars = deque(line.strip())
|
416
|
+
values, value, in_string = [], [], False
|
417
|
+
while chars:
|
418
|
+
char = chars.popleft()
|
419
|
+
if char == " " and not in_string:
|
420
|
+
values.append("".join(value))
|
421
|
+
value = []
|
422
|
+
elif char == '"':
|
423
|
+
in_string = not in_string
|
424
|
+
value.append(char)
|
425
|
+
else:
|
426
|
+
value.append(char)
|
427
|
+
|
428
|
+
values.append(value)
|
429
|
+
return ["".join(v) for v in values if v]
|