pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
  2. pytme-0.1.5.data/scripts/match_template.py +744 -0
  3. pytme-0.1.5.data/scripts/postprocess.py +279 -0
  4. pytme-0.1.5.data/scripts/preprocess.py +93 -0
  5. pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
  6. pytme-0.1.5.dist-info/LICENSE +153 -0
  7. pytme-0.1.5.dist-info/METADATA +69 -0
  8. pytme-0.1.5.dist-info/RECORD +63 -0
  9. pytme-0.1.5.dist-info/WHEEL +5 -0
  10. pytme-0.1.5.dist-info/entry_points.txt +6 -0
  11. pytme-0.1.5.dist-info/top_level.txt +2 -0
  12. scripts/__init__.py +0 -0
  13. scripts/estimate_ram_usage.py +81 -0
  14. scripts/match_template.py +744 -0
  15. scripts/match_template_devel.py +788 -0
  16. scripts/postprocess.py +279 -0
  17. scripts/preprocess.py +93 -0
  18. scripts/preprocessor_gui.py +729 -0
  19. tme/__init__.py +6 -0
  20. tme/__version__.py +1 -0
  21. tme/analyzer.py +1144 -0
  22. tme/backends/__init__.py +134 -0
  23. tme/backends/cupy_backend.py +309 -0
  24. tme/backends/matching_backend.py +1154 -0
  25. tme/backends/npfftw_backend.py +763 -0
  26. tme/backends/pytorch_backend.py +526 -0
  27. tme/data/__init__.py +0 -0
  28. tme/data/c48n309.npy +0 -0
  29. tme/data/c48n527.npy +0 -0
  30. tme/data/c48n9.npy +0 -0
  31. tme/data/c48u1.npy +0 -0
  32. tme/data/c48u1153.npy +0 -0
  33. tme/data/c48u1201.npy +0 -0
  34. tme/data/c48u1641.npy +0 -0
  35. tme/data/c48u181.npy +0 -0
  36. tme/data/c48u2219.npy +0 -0
  37. tme/data/c48u27.npy +0 -0
  38. tme/data/c48u2947.npy +0 -0
  39. tme/data/c48u3733.npy +0 -0
  40. tme/data/c48u4749.npy +0 -0
  41. tme/data/c48u5879.npy +0 -0
  42. tme/data/c48u7111.npy +0 -0
  43. tme/data/c48u815.npy +0 -0
  44. tme/data/c48u83.npy +0 -0
  45. tme/data/c48u8649.npy +0 -0
  46. tme/data/c600v.npy +0 -0
  47. tme/data/c600vc.npy +0 -0
  48. tme/data/metadata.yaml +80 -0
  49. tme/data/quat_to_numpy.py +42 -0
  50. tme/data/scattering_factors.pickle +0 -0
  51. tme/density.py +2314 -0
  52. tme/extensions.cpython-311-darwin.so +0 -0
  53. tme/helpers.py +881 -0
  54. tme/matching_data.py +377 -0
  55. tme/matching_exhaustive.py +1553 -0
  56. tme/matching_memory.py +382 -0
  57. tme/matching_optimization.py +1123 -0
  58. tme/matching_utils.py +1180 -0
  59. tme/parser.py +429 -0
  60. tme/preprocessor.py +1291 -0
  61. tme/scoring.py +866 -0
  62. tme/structure.py +1428 -0
  63. tme/types.py +10 -0
tme/parser.py ADDED
@@ -0,0 +1,429 @@
1
+ """ Implements parsers for atomic structure file formats.
2
+
3
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
+
5
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ """
7
+ import re
8
+ from collections import deque
9
+ from typing import List, Dict
10
+ from abc import ABC, abstractmethod
11
+
12
+ import numpy as np
13
+
14
+
15
+ class Parser(ABC):
16
+ """
17
+ Base class for structure file parsers.
18
+
19
+ Classes inheriting from :py:class:`Parser` need to define
20
+ a ``parse_input`` method that accepts a list of lines and returns a
21
+ dictionary representation of the data.
22
+ """
23
+
24
+ def __init__(self, filename: str, mode: str = "r") -> None:
25
+ """
26
+ Initialize a Parser object.
27
+
28
+ Parameters
29
+ ----------
30
+ filename : str
31
+ File name to parse data from.
32
+
33
+ mode : str, optional
34
+ Mode to open the file. Default is 'r' for read.
35
+ """
36
+ with open(filename, "r") as infile:
37
+ data = infile.read()
38
+
39
+ data = deque(filter(lambda line: line and line[0] != "#", data.split("\n")))
40
+ self._data = self.parse_input(data)
41
+
42
+ def __getitem__(self, key: str):
43
+ """
44
+ Retrieve a value from the internal data using a given key.
45
+
46
+ Parameters
47
+ ----------
48
+ key : str
49
+ The key to use for retrieving the corresponding value from
50
+ the internal data.
51
+
52
+ Returns
53
+ -------
54
+ value
55
+ The value associated with the provided key in the internal data.
56
+ """
57
+ return self._data[key]
58
+
59
+ def __contains__(self, key) -> bool:
60
+ """
61
+ Check if a given key exists in the internal data.
62
+
63
+ Parameters
64
+ ----------
65
+ key : str
66
+ The key to check for in the internal data.
67
+
68
+ Returns
69
+ -------
70
+ bool
71
+ True if the key exists in the internal data, False otherwise.
72
+ """
73
+ return key in self._data
74
+
75
+ def get(self, key, default):
76
+ """
77
+ Retrieve a value from the internal data using a given key. If the
78
+ key does not exist, return a default value.
79
+
80
+ Parameters
81
+ ----------
82
+ key : str
83
+ The key to use for retrieving the corresponding value from
84
+ the internal data.
85
+
86
+ default : Any
87
+ The value to return if the key does not exist in the internal data.
88
+
89
+ Returns
90
+ -------
91
+ value
92
+ The value associated with the provided key in the internal data,
93
+ or the default value if the key does not exist.
94
+ """
95
+ if key in self._data:
96
+ return self[key]
97
+ return default
98
+
99
+ def keys(self):
100
+ """
101
+ List keys available in internal dictionary.
102
+ """
103
+ return self._data.keys()
104
+
105
+ def values(self):
106
+ """
107
+ List values available in internal dictionary.
108
+ """
109
+ return self._data.values()
110
+
111
+ def items(self):
112
+ """
113
+ List items available in internal dictionary.
114
+ """
115
+ return self._data.items()
116
+
117
+ @abstractmethod
118
+ def parse_input(self, lines: List[str]) -> Dict:
119
+ """
120
+ Parse a list of lines from a file and convert the data into a dictionary.
121
+
122
+ This function is not intended to be called directly, but should rather be
123
+ defined by classes inheriting from :py:class:`Parser` to parse a given
124
+ file format.
125
+
126
+ Parameters
127
+ ----------
128
+ lines : list of str
129
+ The lines of a structure file to parse.
130
+
131
+ Returns
132
+ -------
133
+ dict
134
+ A dictionary containing the parsed data.
135
+ """
136
+
137
+
138
+ class PDBParser(Parser):
139
+ """
140
+ A Parser subclass for converting PDB file data into a dictionary representation.
141
+ This class is specifically designed to work with PDB file format.
142
+
143
+ References
144
+ ----------
145
+ .. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
146
+ """
147
+
148
+ def parse_input(self, lines: List[str]) -> Dict:
149
+ """
150
+ Parse a list of lines from a PDB file and convert the data into a dictionary.
151
+
152
+ Parameters
153
+ ----------
154
+ lines : list of str
155
+ The lines of a PDB file to parse.
156
+
157
+ Returns
158
+ -------
159
+ dict
160
+ A dictionary containing the parsed data from the PDB file.
161
+ """
162
+ metadata = {
163
+ "resolution": re.compile(
164
+ r"(.)+?(EFFECTIVE RESOLUTION\s+\(ANGSTROMS\)){1}(.)+?(\d+\.\d+)(\s)*$"
165
+ ),
166
+ "reconstruction_method": re.compile(
167
+ r"(.)+?(RECONSTRUCTION METHOD)+(.)+?(\w+\s*\w+)(\s)*$"
168
+ ),
169
+ "electron_source": re.compile(r"(.)+?(SOURCE)+(.)+?(\w+\s*\w+)(\s)*$"),
170
+ "illumination_mode": re.compile(
171
+ r"(.)+?(ILLUMINATION MODE)+(.)+?(\w+\s*\w+)(\s)*$"
172
+ ),
173
+ "microscope_mode": re.compile(
174
+ r"(.)+?(IMAGING MODE)+(.)+?(\w+\s*\w+)(\s)*$"
175
+ ),
176
+ "microscope_model": re.compile(
177
+ r"(.)+?(MICROSCOPE MODEL)+(.+?:\s+)+?(.+)(\s)*$"
178
+ ),
179
+ }
180
+
181
+ data = {
182
+ "record_type": [],
183
+ "atom_serial_number": [],
184
+ "atom_name": [],
185
+ "alternate_location_indicator": [],
186
+ "residue_name": [],
187
+ "chain_identifier": [],
188
+ "residue_sequence_number": [],
189
+ "code_for_residue_insertion": [],
190
+ "atom_coordinate": [],
191
+ "occupancy": [],
192
+ "temperature_factor": [],
193
+ "segment_identifier": [],
194
+ "element_symbol": [],
195
+ "charge": [],
196
+ "details": {},
197
+ }
198
+ data["details"]["resolution"] = np.nan
199
+
200
+ for line in lines:
201
+ if line.startswith("REMARK"):
202
+ matches = [(key, metadata[key].match(line)) for key in metadata]
203
+ matches = [match for match in matches if match[1]]
204
+ for key, match in matches:
205
+ data["details"][key] = match.group(4)
206
+ _ = metadata.pop(key)
207
+ elif line.startswith("ATOM") or line.startswith("HETATM"):
208
+ data["record_type"].append(line[0:6])
209
+ data["atom_serial_number"].append(line[6:11])
210
+ data["atom_name"].append(line[12:16])
211
+ data["alternate_location_indicator"].append(line[16])
212
+ data["residue_name"].append(line[17:20])
213
+
214
+ data["chain_identifier"].append(line[21])
215
+ data["residue_sequence_number"].append(line[22:26])
216
+ data["code_for_residue_insertion"].append(line[26])
217
+ data["atom_coordinate"].append((line[30:38], line[38:46], line[46:54]))
218
+ data["occupancy"].append(line[54:60])
219
+ data["temperature_factor"].append(line[60:66])
220
+ data["segment_identifier"].append(line[74:76])
221
+ data["element_symbol"].append(line[76:78])
222
+ data["charge"].append(line[78:80])
223
+
224
+ data["details"]["resolution"] = float(data["details"]["resolution"])
225
+
226
+ return data
227
+
228
+
229
+ class MMCIFParser(Parser):
230
+ """
231
+ A Parser subclass for converting MMCIF file data into a dictionary representation.
232
+ This implementation heavily relies on the atomium library:
233
+
234
+ References
235
+ ----------
236
+ .. [1] Ireland, S. M., & Martin, A. C. R. (2020). atomium (Version 1.0.0)
237
+ [Computer software]. https://doi.org/10.1093/bioinformatics/btaa072
238
+ """
239
+
240
+ def parse_input(self, lines: List[str]) -> Dict:
241
+ """
242
+ Parse a list of lines from an MMCIF file and convert the data into a dictionary.
243
+
244
+ Parameters
245
+ ----------
246
+ lines : list of str
247
+ The lines of an MMCIF file to parse.
248
+
249
+ Returns
250
+ -------
251
+ dict
252
+ A dictionary containing the parsed data from the MMCIF file.
253
+ """
254
+ lines = self._consolidate_strings(lines)
255
+ blocks = self._split_in_blocks(lines)
256
+ mmcif_dict = {}
257
+ for block in blocks:
258
+ if block["lines"][0] == "loop_":
259
+ mmcif_dict[block["category"]] = self._loop_block_to_dict(block)
260
+ else:
261
+ mmcif_dict[block["category"]] = self._non_loop_block_to_dict(block)
262
+ return mmcif_dict
263
+
264
+ @staticmethod
265
+ def _consolidate_strings(lines: List[str]) -> List[str]:
266
+ """
267
+ Consolidate multi-line strings that have been separated by semicolons in a
268
+ list of strings.
269
+
270
+ Parameters
271
+ ----------
272
+ lines : deque of str
273
+ Deque of strings where each string is a line from an MMCIF file.
274
+
275
+ Returns
276
+ -------
277
+ deque of str
278
+ A deque of consolidated strings from the given input.
279
+ """
280
+ new_lines = deque()
281
+ while lines:
282
+ line = lines.popleft()
283
+ if line.startswith(";"):
284
+ string = [line[1:].strip()]
285
+ while not lines[0].startswith(";"):
286
+ string.append(lines.popleft())
287
+ lines.popleft()
288
+ new_lines[-1] += ' "{}"'.format(
289
+ " ".join(string).replace('"', "").replace("'", "'")
290
+ )
291
+ else:
292
+ new_lines.append(line.replace('"', "").replace("'", "'"))
293
+ return new_lines
294
+
295
+ @staticmethod
296
+ def _split_in_blocks(lines: List[str]) -> List[Dict]:
297
+ """
298
+ Split a deque of consolidated strings into a list of dictionaries,
299
+ each representing a block of data.
300
+
301
+ Parameters
302
+ ----------
303
+ lines : deque of str
304
+ Deque of consolidated strings where each string is a line from
305
+ an MMCIF file.
306
+
307
+ Returns
308
+ -------
309
+ list of dict
310
+ A list of dictionaries where each dictionary represents a block
311
+ of data from the MMCIF file.
312
+ """
313
+ category = None
314
+ block, blocks = [], []
315
+ while lines:
316
+ line = lines.popleft()
317
+ if line.startswith("data_"):
318
+ continue
319
+ if line.startswith("_"):
320
+ line_category = line.split(".")[0]
321
+ if line_category != category:
322
+ if category:
323
+ blocks.append({"category": category[1:], "lines": block})
324
+ category = line_category
325
+ block = []
326
+ if line.startswith("loop_"):
327
+ if category:
328
+ blocks.append({"category": category[1:], "lines": block})
329
+ category = lines[0].split(".")[0]
330
+ block = []
331
+ block.append(line)
332
+ if block:
333
+ blocks.append({"category": category[1:], "lines": block})
334
+ return blocks
335
+
336
+ @staticmethod
337
+ def _non_loop_block_to_dict(block: Dict) -> Dict:
338
+ """
339
+ Convert a non-loop block of data into a dictionary.
340
+
341
+ Parameters
342
+ ----------
343
+ block : dict
344
+ A dictionary representing a non-loop block of data from an MMCIF file.
345
+
346
+ Returns
347
+ -------
348
+ dict
349
+ A dictionary representing the parsed data from the given non-loop block.
350
+ """
351
+ d = {}
352
+ # category = block["lines"][0].split(".")[0]
353
+ for index in range(len(block["lines"]) - 1):
354
+ if block["lines"][index + 1][0] != "_":
355
+ block["lines"][index] += " " + block["lines"][index + 1]
356
+ block["lines"] = [line for line in block["lines"] if line[0] == "_"]
357
+ for line in block["lines"]:
358
+ name = line.split(".")[1].split()[0]
359
+ value = " ".join(line.split()[1:])
360
+ d[name] = value
361
+ return d
362
+
363
+ def _loop_block_to_dict(self, block: Dict) -> Dict:
364
+ """
365
+ Convert a loop block of data into a dictionary.
366
+
367
+ Parameters
368
+ ----------
369
+ block : dict
370
+ A dictionary representing a loop block of data from an MMCIF file.
371
+
372
+ Returns
373
+ -------
374
+ dict
375
+ A dictionary representing the parsed data from the given loop block.
376
+ """
377
+ names, lines = [], []
378
+ body_start = 0
379
+ for index, line in enumerate(block["lines"][1:], start=1):
380
+ if not line.startswith("_" + block["category"]):
381
+ body_start = index
382
+ break
383
+ names = [line.split(".")[1].rstrip() for line in block["lines"][1:body_start]]
384
+ lines = [self._split_line(line) for line in block["lines"][body_start:]]
385
+ # reunites broken lines
386
+ for n in range(len(lines) - 1):
387
+ while n < len(lines) - 1 and len(lines[n]) + len(lines[n + 1]) <= len(
388
+ names
389
+ ):
390
+ lines[n] += lines.pop(n + 1)
391
+ res = {name: [] for name in names}
392
+ for line in lines:
393
+ for name, value in zip(names, line):
394
+ res[name].append(value)
395
+ return res
396
+
397
+ @staticmethod
398
+ def _split_line(line: str) -> List[str]:
399
+ """
400
+ Split a string into substrings, ignoring quotation marks within the string.
401
+
402
+ Parameters
403
+ ----------
404
+ line : str
405
+ The string to be split.
406
+
407
+ Returns
408
+ -------
409
+ list of str
410
+ A list of substrings resulting from the split operation on the given string.
411
+ """
412
+ if not re.search("['\"]", line):
413
+ return line.split()
414
+
415
+ chars = deque(line.strip())
416
+ values, value, in_string = [], [], False
417
+ while chars:
418
+ char = chars.popleft()
419
+ if char == " " and not in_string:
420
+ values.append("".join(value))
421
+ value = []
422
+ elif char == '"':
423
+ in_string = not in_string
424
+ value.append(char)
425
+ else:
426
+ value.append(char)
427
+
428
+ values.append(value)
429
+ return ["".join(v) for v in values if v]