pytme 0.2.9__cp311-cp311-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.2.9.data/scripts/estimate_ram_usage.py +97 -0
- pytme-0.2.9.data/scripts/match_template.py +1135 -0
- pytme-0.2.9.data/scripts/postprocess.py +622 -0
- pytme-0.2.9.data/scripts/preprocess.py +209 -0
- pytme-0.2.9.data/scripts/preprocessor_gui.py +1227 -0
- pytme-0.2.9.dist-info/METADATA +95 -0
- pytme-0.2.9.dist-info/RECORD +119 -0
- pytme-0.2.9.dist-info/WHEEL +5 -0
- pytme-0.2.9.dist-info/entry_points.txt +6 -0
- pytme-0.2.9.dist-info/licenses/LICENSE +153 -0
- pytme-0.2.9.dist-info/top_level.txt +3 -0
- scripts/__init__.py +0 -0
- scripts/estimate_ram_usage.py +97 -0
- scripts/match_template.py +1135 -0
- scripts/postprocess.py +622 -0
- scripts/preprocess.py +209 -0
- scripts/preprocessor_gui.py +1227 -0
- tests/__init__.py +0 -0
- tests/data/Blurring/blob_width18.npy +0 -0
- tests/data/Blurring/edgegaussian_sigma3.npy +0 -0
- tests/data/Blurring/gaussian_sigma2.npy +0 -0
- tests/data/Blurring/hamming_width6.npy +0 -0
- tests/data/Blurring/kaiserb_width18.npy +0 -0
- tests/data/Blurring/localgaussian_sigma0510.npy +0 -0
- tests/data/Blurring/mean_size5.npy +0 -0
- tests/data/Blurring/ntree_sigma0510.npy +0 -0
- tests/data/Blurring/rank_rank3.npy +0 -0
- tests/data/Maps/.DS_Store +0 -0
- tests/data/Maps/emd_8621.mrc.gz +0 -0
- tests/data/README.md +2 -0
- tests/data/Raw/em_map.map +0 -0
- tests/data/Structures/.DS_Store +0 -0
- tests/data/Structures/1pdj.cif +3339 -0
- tests/data/Structures/1pdj.pdb +1429 -0
- tests/data/Structures/5khe.cif +3685 -0
- tests/data/Structures/5khe.ent +2210 -0
- tests/data/Structures/5khe.pdb +2210 -0
- tests/data/Structures/5uz4.cif +70548 -0
- tests/preprocessing/__init__.py +0 -0
- tests/preprocessing/test_compose.py +76 -0
- tests/preprocessing/test_frequency_filters.py +178 -0
- tests/preprocessing/test_preprocessor.py +136 -0
- tests/preprocessing/test_utils.py +79 -0
- tests/test_analyzer.py +216 -0
- tests/test_backends.py +446 -0
- tests/test_density.py +503 -0
- tests/test_extensions.py +130 -0
- tests/test_matching_cli.py +283 -0
- tests/test_matching_data.py +162 -0
- tests/test_matching_exhaustive.py +124 -0
- tests/test_matching_memory.py +30 -0
- tests/test_matching_optimization.py +226 -0
- tests/test_matching_utils.py +189 -0
- tests/test_orientations.py +175 -0
- tests/test_parser.py +33 -0
- tests/test_rotations.py +153 -0
- tests/test_structure.py +247 -0
- tme/__init__.py +6 -0
- tme/__version__.py +1 -0
- tme/analyzer/__init__.py +2 -0
- tme/analyzer/_utils.py +186 -0
- tme/analyzer/aggregation.py +577 -0
- tme/analyzer/peaks.py +953 -0
- tme/backends/__init__.py +171 -0
- tme/backends/_cupy_utils.py +734 -0
- tme/backends/_jax_utils.py +188 -0
- tme/backends/cupy_backend.py +294 -0
- tme/backends/jax_backend.py +314 -0
- tme/backends/matching_backend.py +1270 -0
- tme/backends/mlx_backend.py +241 -0
- tme/backends/npfftw_backend.py +583 -0
- tme/backends/pytorch_backend.py +430 -0
- tme/data/__init__.py +0 -0
- tme/data/c48n309.npy +0 -0
- tme/data/c48n527.npy +0 -0
- tme/data/c48n9.npy +0 -0
- tme/data/c48u1.npy +0 -0
- tme/data/c48u1153.npy +0 -0
- tme/data/c48u1201.npy +0 -0
- tme/data/c48u1641.npy +0 -0
- tme/data/c48u181.npy +0 -0
- tme/data/c48u2219.npy +0 -0
- tme/data/c48u27.npy +0 -0
- tme/data/c48u2947.npy +0 -0
- tme/data/c48u3733.npy +0 -0
- tme/data/c48u4749.npy +0 -0
- tme/data/c48u5879.npy +0 -0
- tme/data/c48u7111.npy +0 -0
- tme/data/c48u815.npy +0 -0
- tme/data/c48u83.npy +0 -0
- tme/data/c48u8649.npy +0 -0
- tme/data/c600v.npy +0 -0
- tme/data/c600vc.npy +0 -0
- tme/data/metadata.yaml +80 -0
- tme/data/quat_to_numpy.py +42 -0
- tme/data/scattering_factors.pickle +0 -0
- tme/density.py +2263 -0
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/external/bindings.cpp +332 -0
- tme/filters/__init__.py +6 -0
- tme/filters/_utils.py +311 -0
- tme/filters/bandpass.py +230 -0
- tme/filters/compose.py +81 -0
- tme/filters/ctf.py +393 -0
- tme/filters/reconstruction.py +160 -0
- tme/filters/wedge.py +542 -0
- tme/filters/whitening.py +191 -0
- tme/matching_data.py +863 -0
- tme/matching_exhaustive.py +497 -0
- tme/matching_optimization.py +1311 -0
- tme/matching_scores.py +1183 -0
- tme/matching_utils.py +1188 -0
- tme/memory.py +337 -0
- tme/orientations.py +598 -0
- tme/parser.py +685 -0
- tme/preprocessor.py +1329 -0
- tme/rotations.py +350 -0
- tme/structure.py +1864 -0
- tme/types.py +13 -0
tme/parser.py
ADDED
@@ -0,0 +1,685 @@
|
|
1
|
+
""" Implements parsers for atomic structure file formats.
|
2
|
+
|
3
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
4
|
+
|
5
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
import xml.etree.ElementTree as ET
|
10
|
+
|
11
|
+
from collections import deque
|
12
|
+
from abc import ABC, abstractmethod
|
13
|
+
from typing import List, Dict, Union
|
14
|
+
|
15
|
+
|
16
|
+
import numpy as np
|
17
|
+
|
18
|
+
__all__ = ["PDBParser", "MMCIFParser", "GROParser", "StarParser", "XMLParser"]
|
19
|
+
|
20
|
+
|
21
|
+
class Parser(ABC):
|
22
|
+
"""
|
23
|
+
Base class for structure file parsers.
|
24
|
+
|
25
|
+
Classes inheriting from :py:class:`Parser` need to define a function
|
26
|
+
:py:meth:`Parser.parse_input` that creates a dictionary representation
|
27
|
+
of the given file. The input is a deque of all lines in the file.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, filename: str, mode: str = "r", **kwargs) -> None:
|
31
|
+
"""
|
32
|
+
Initialize a Parser object.
|
33
|
+
|
34
|
+
Parameters
|
35
|
+
----------
|
36
|
+
filename : str
|
37
|
+
File name to parse data from.
|
38
|
+
mode : str, optional
|
39
|
+
Mode to open the file. Default is 'r' for read.
|
40
|
+
kwargs : Dict, optional
|
41
|
+
Optional keyword arguments passed to the child's parse_input method.
|
42
|
+
|
43
|
+
"""
|
44
|
+
try:
|
45
|
+
with open(filename, mode) as infile:
|
46
|
+
data = infile.read()
|
47
|
+
except UnicodeDecodeError:
|
48
|
+
with open(filename, mode, encoding="utf-16") as infile:
|
49
|
+
data = infile.read()
|
50
|
+
|
51
|
+
data = deque(filter(lambda line: line and line[0] != "#", data.split("\n")))
|
52
|
+
self._data = self.parse_input(data, **kwargs)
|
53
|
+
|
54
|
+
def __getitem__(self, key: str):
|
55
|
+
"""
|
56
|
+
Retrieve a value from the internal data using a given key.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
key : str
|
61
|
+
The key to use for retrieving the corresponding value from
|
62
|
+
the internal data.
|
63
|
+
|
64
|
+
Returns
|
65
|
+
-------
|
66
|
+
value
|
67
|
+
The value associated with the provided key in the internal data.
|
68
|
+
"""
|
69
|
+
return self._data[key]
|
70
|
+
|
71
|
+
def __contains__(self, key) -> bool:
|
72
|
+
"""
|
73
|
+
Check if a given key exists in the internal data.
|
74
|
+
|
75
|
+
Parameters
|
76
|
+
----------
|
77
|
+
key : str
|
78
|
+
The key to check for in the internal data.
|
79
|
+
|
80
|
+
Returns
|
81
|
+
-------
|
82
|
+
bool
|
83
|
+
True if the key exists in the internal data, False otherwise.
|
84
|
+
"""
|
85
|
+
return key in self._data
|
86
|
+
|
87
|
+
def get(self, key, default=None):
|
88
|
+
"""
|
89
|
+
Retrieve a value from the internal data using a given key. If the
|
90
|
+
key does not exist, return a default value.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
key : str
|
95
|
+
The key to use for retrieving the corresponding value from
|
96
|
+
the internal data.
|
97
|
+
default : Any
|
98
|
+
The value to return if the key does not exist in the
|
99
|
+
internal data, defaults to None.
|
100
|
+
|
101
|
+
Returns
|
102
|
+
-------
|
103
|
+
value
|
104
|
+
The value associated with the provided key in the internal data,
|
105
|
+
or the default value if the key does not exist.
|
106
|
+
"""
|
107
|
+
return self._data.get(key, default)
|
108
|
+
|
109
|
+
def keys(self):
|
110
|
+
"""
|
111
|
+
List keys available in internal dictionary.
|
112
|
+
"""
|
113
|
+
return self._data.keys()
|
114
|
+
|
115
|
+
def values(self):
|
116
|
+
"""
|
117
|
+
List values available in internal dictionary.
|
118
|
+
"""
|
119
|
+
return self._data.values()
|
120
|
+
|
121
|
+
def items(self):
|
122
|
+
"""
|
123
|
+
List items available in internal dictionary.
|
124
|
+
"""
|
125
|
+
return self._data.items()
|
126
|
+
|
127
|
+
@abstractmethod
|
128
|
+
def parse_input(self, lines: List[str]) -> Dict:
|
129
|
+
"""
|
130
|
+
Parse a list of lines from a file and convert the data into a dictionary.
|
131
|
+
|
132
|
+
This function is not intended to be called directly, but should rather be
|
133
|
+
defined by classes inheriting from :py:class:`Parser` to parse a given
|
134
|
+
file format.
|
135
|
+
|
136
|
+
Parameters
|
137
|
+
----------
|
138
|
+
lines : list of str
|
139
|
+
The lines of a structure file to parse.
|
140
|
+
|
141
|
+
Returns
|
142
|
+
-------
|
143
|
+
dict
|
144
|
+
A dictionary containing the parsed data.
|
145
|
+
"""
|
146
|
+
|
147
|
+
|
148
|
+
class PDBParser(Parser):
|
149
|
+
"""
|
150
|
+
Convert PDB file data into a dictionary representation [1]_.
|
151
|
+
|
152
|
+
References
|
153
|
+
----------
|
154
|
+
.. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
|
155
|
+
"""
|
156
|
+
|
157
|
+
def parse_input(self, lines: deque) -> Dict:
|
158
|
+
"""
|
159
|
+
Parse a list of lines from a PDB file and convert the data into a dictionary.
|
160
|
+
|
161
|
+
Parameters
|
162
|
+
----------
|
163
|
+
lines : deque of str
|
164
|
+
The lines of a PDB file to parse.
|
165
|
+
|
166
|
+
Returns
|
167
|
+
-------
|
168
|
+
dict
|
169
|
+
A dictionary containing the parsed data from the PDB file.
|
170
|
+
"""
|
171
|
+
metadata = {
|
172
|
+
"resolution": re.compile(
|
173
|
+
r"(.)+?(EFFECTIVE RESOLUTION\s+\(ANGSTROMS\)){1}(.)+?(\d+\.\d+)(\s)*$"
|
174
|
+
),
|
175
|
+
"reconstruction_method": re.compile(
|
176
|
+
r"(.)+?(RECONSTRUCTION METHOD)+(.)+?(\w+\s*\w+)(\s)*$"
|
177
|
+
),
|
178
|
+
"electron_source": re.compile(r"(.)+?(SOURCE)+(.)+?(\w+\s*\w+)(\s)*$"),
|
179
|
+
"illumination_mode": re.compile(
|
180
|
+
r"(.)+?(ILLUMINATION MODE)+(.)+?(\w+\s*\w+)(\s)*$"
|
181
|
+
),
|
182
|
+
"microscope_mode": re.compile(
|
183
|
+
r"(.)+?(IMAGING MODE)+(.)+?(\w+\s*\w+)(\s)*$"
|
184
|
+
),
|
185
|
+
"microscope_model": re.compile(
|
186
|
+
r"(.)+?(MICROSCOPE MODEL)+(.+?:\s+)+?(.+)(\s)*$"
|
187
|
+
),
|
188
|
+
}
|
189
|
+
|
190
|
+
data = {
|
191
|
+
"record_type": [],
|
192
|
+
"atom_serial_number": [],
|
193
|
+
"atom_name": [],
|
194
|
+
"alternate_location_indicator": [],
|
195
|
+
"residue_name": [],
|
196
|
+
"chain_identifier": [],
|
197
|
+
"residue_sequence_number": [],
|
198
|
+
"code_for_residue_insertion": [],
|
199
|
+
"atom_coordinate": [],
|
200
|
+
"occupancy": [],
|
201
|
+
"temperature_factor": [],
|
202
|
+
"segment_identifier": [],
|
203
|
+
"element_symbol": [],
|
204
|
+
"charge": [],
|
205
|
+
"details": {},
|
206
|
+
}
|
207
|
+
data["details"]["resolution"] = np.nan
|
208
|
+
|
209
|
+
for line in lines:
|
210
|
+
if line.startswith("REMARK"):
|
211
|
+
matches = [(key, metadata[key].match(line)) for key in metadata]
|
212
|
+
matches = [match for match in matches if match[1]]
|
213
|
+
for key, match in matches:
|
214
|
+
data["details"][key] = match.group(4)
|
215
|
+
_ = metadata.pop(key)
|
216
|
+
elif line.startswith("ATOM") or line.startswith("HETATM"):
|
217
|
+
data["record_type"].append(line[0:6])
|
218
|
+
data["atom_serial_number"].append(line[6:11])
|
219
|
+
data["atom_name"].append(line[12:16])
|
220
|
+
data["alternate_location_indicator"].append(line[16])
|
221
|
+
data["residue_name"].append(line[17:20])
|
222
|
+
|
223
|
+
data["chain_identifier"].append(line[21])
|
224
|
+
data["residue_sequence_number"].append(line[22:26])
|
225
|
+
data["code_for_residue_insertion"].append(line[26])
|
226
|
+
data["atom_coordinate"].append((line[30:38], line[38:46], line[46:54]))
|
227
|
+
data["occupancy"].append(line[54:60])
|
228
|
+
data["temperature_factor"].append(line[60:66])
|
229
|
+
data["segment_identifier"].append(line[74:76])
|
230
|
+
data["element_symbol"].append(line[76:78])
|
231
|
+
data["charge"].append(line[78:80])
|
232
|
+
|
233
|
+
data["details"]["resolution"] = float(data["details"]["resolution"])
|
234
|
+
|
235
|
+
return data
|
236
|
+
|
237
|
+
|
238
|
+
class MMCIFParser(Parser):
|
239
|
+
"""
|
240
|
+
Convert MMCIF file data into a dictionary representation. This implementation
|
241
|
+
heavily relies on the atomium library [1]_.
|
242
|
+
|
243
|
+
References
|
244
|
+
----------
|
245
|
+
.. [1] Ireland, S. M., & Martin, A. C. R. (2020). atomium (Version 1.0.0)
|
246
|
+
[Computer software]. https://doi.org/10.1093/bioinformatics/btaa072
|
247
|
+
"""
|
248
|
+
|
249
|
+
def parse_input(self, lines: deque) -> Dict:
|
250
|
+
"""
|
251
|
+
Parse a list of lines from an MMCIF file and convert the data into a dictionary.
|
252
|
+
|
253
|
+
Parameters
|
254
|
+
----------
|
255
|
+
lines : deque of str
|
256
|
+
The lines of an MMCIF file to parse.
|
257
|
+
|
258
|
+
Returns
|
259
|
+
-------
|
260
|
+
dict
|
261
|
+
A dictionary containing the parsed data from the MMCIF file.
|
262
|
+
"""
|
263
|
+
lines = self._consolidate_strings(lines)
|
264
|
+
blocks = self._split_in_blocks(lines)
|
265
|
+
mmcif_dict = {}
|
266
|
+
for block in blocks:
|
267
|
+
if block["lines"][0] == "loop_":
|
268
|
+
mmcif_dict[block["category"]] = self._loop_block_to_dict(block)
|
269
|
+
else:
|
270
|
+
mmcif_dict[block["category"]] = self._non_loop_block_to_dict(block)
|
271
|
+
return mmcif_dict
|
272
|
+
|
273
|
+
@staticmethod
|
274
|
+
def _consolidate_strings(lines: List[str]) -> List[str]:
|
275
|
+
"""
|
276
|
+
Consolidate multi-line strings that have been separated by semicolons in a
|
277
|
+
list of strings.
|
278
|
+
|
279
|
+
Parameters
|
280
|
+
----------
|
281
|
+
lines : deque of str
|
282
|
+
Deque of strings where each string is a line from an MMCIF file.
|
283
|
+
|
284
|
+
Returns
|
285
|
+
-------
|
286
|
+
deque of str
|
287
|
+
A deque of consolidated strings from the given input.
|
288
|
+
"""
|
289
|
+
new_lines = deque()
|
290
|
+
while lines:
|
291
|
+
line = lines.popleft()
|
292
|
+
if line.startswith(";"):
|
293
|
+
string = [line[1:].strip()]
|
294
|
+
while not lines[0].startswith(";"):
|
295
|
+
string.append(lines.popleft())
|
296
|
+
lines.popleft()
|
297
|
+
new_lines[-1] += ' "{}"'.format(
|
298
|
+
" ".join(string).replace('"', "").replace("'", "'")
|
299
|
+
)
|
300
|
+
else:
|
301
|
+
new_lines.append(line.replace('"', "").replace("'", "'"))
|
302
|
+
return new_lines
|
303
|
+
|
304
|
+
@staticmethod
|
305
|
+
def _split_in_blocks(lines: List[str]) -> List[Dict]:
|
306
|
+
"""
|
307
|
+
Split a deque of consolidated strings into a list of dictionaries,
|
308
|
+
each representing a block of data.
|
309
|
+
|
310
|
+
Parameters
|
311
|
+
----------
|
312
|
+
lines : deque of str
|
313
|
+
Deque of consolidated strings where each string is a line from
|
314
|
+
an MMCIF file.
|
315
|
+
|
316
|
+
Returns
|
317
|
+
-------
|
318
|
+
list of dict
|
319
|
+
A list of dictionaries where each dictionary represents a block
|
320
|
+
of data from the MMCIF file.
|
321
|
+
"""
|
322
|
+
category = None
|
323
|
+
block, blocks = [], []
|
324
|
+
while lines:
|
325
|
+
line = lines.popleft()
|
326
|
+
if line.startswith("data_"):
|
327
|
+
continue
|
328
|
+
if line.startswith("_"):
|
329
|
+
line_category = line.split(".")[0]
|
330
|
+
if line_category != category:
|
331
|
+
if category:
|
332
|
+
blocks.append({"category": category[1:], "lines": block})
|
333
|
+
category = line_category
|
334
|
+
block = []
|
335
|
+
if line.startswith("loop_"):
|
336
|
+
if category:
|
337
|
+
blocks.append({"category": category[1:], "lines": block})
|
338
|
+
category = lines[0].split(".")[0]
|
339
|
+
block = []
|
340
|
+
block.append(line)
|
341
|
+
if block:
|
342
|
+
blocks.append({"category": category[1:], "lines": block})
|
343
|
+
return blocks
|
344
|
+
|
345
|
+
@staticmethod
|
346
|
+
def _non_loop_block_to_dict(block: Dict) -> Dict:
|
347
|
+
"""
|
348
|
+
Convert a non-loop block of data into a dictionary.
|
349
|
+
|
350
|
+
Parameters
|
351
|
+
----------
|
352
|
+
block : dict
|
353
|
+
A dictionary representing a non-loop block of data from an MMCIF file.
|
354
|
+
|
355
|
+
Returns
|
356
|
+
-------
|
357
|
+
dict
|
358
|
+
A dictionary representing the parsed data from the given non-loop block.
|
359
|
+
"""
|
360
|
+
d = {}
|
361
|
+
# category = block["lines"][0].split(".")[0]
|
362
|
+
for index in range(len(block["lines"]) - 1):
|
363
|
+
if block["lines"][index + 1][0] != "_":
|
364
|
+
block["lines"][index] += " " + block["lines"][index + 1]
|
365
|
+
block["lines"] = [line for line in block["lines"] if line[0] == "_"]
|
366
|
+
for line in block["lines"]:
|
367
|
+
name = line.split(".")[1].split()[0]
|
368
|
+
value = " ".join(line.split()[1:])
|
369
|
+
d[name] = value
|
370
|
+
return d
|
371
|
+
|
372
|
+
def _loop_block_to_dict(self, block: Dict) -> Dict:
|
373
|
+
"""
|
374
|
+
Convert a loop block of data into a dictionary.
|
375
|
+
|
376
|
+
Parameters
|
377
|
+
----------
|
378
|
+
block : dict
|
379
|
+
A dictionary representing a loop block of data from an MMCIF file.
|
380
|
+
|
381
|
+
Returns
|
382
|
+
-------
|
383
|
+
dict
|
384
|
+
A dictionary representing the parsed data from the given loop block.
|
385
|
+
"""
|
386
|
+
names, lines = [], []
|
387
|
+
body_start = 0
|
388
|
+
for index, line in enumerate(block["lines"][1:], start=1):
|
389
|
+
if not line.startswith("_" + block["category"]):
|
390
|
+
body_start = index
|
391
|
+
break
|
392
|
+
names = [line.split(".")[1].rstrip() for line in block["lines"][1:body_start]]
|
393
|
+
lines = [self._split_line(line) for line in block["lines"][body_start:]]
|
394
|
+
# reunites broken lines
|
395
|
+
for n in range(len(lines) - 1):
|
396
|
+
while n < len(lines) - 1 and len(lines[n]) + len(lines[n + 1]) <= len(
|
397
|
+
names
|
398
|
+
):
|
399
|
+
lines[n] += lines.pop(n + 1)
|
400
|
+
res = {name: [] for name in names}
|
401
|
+
for line in lines:
|
402
|
+
for name, value in zip(names, line):
|
403
|
+
res[name].append(value)
|
404
|
+
return res
|
405
|
+
|
406
|
+
@staticmethod
|
407
|
+
def _split_line(line: str) -> List[str]:
|
408
|
+
"""
|
409
|
+
Split a string into substrings, ignoring quotation marks within the string.
|
410
|
+
|
411
|
+
Parameters
|
412
|
+
----------
|
413
|
+
line : str
|
414
|
+
The string to be split.
|
415
|
+
|
416
|
+
Returns
|
417
|
+
-------
|
418
|
+
list of str
|
419
|
+
A list of substrings resulting from the split operation on the given string.
|
420
|
+
"""
|
421
|
+
if not re.search("['\"]", line):
|
422
|
+
return line.split()
|
423
|
+
|
424
|
+
chars = deque(line.strip())
|
425
|
+
values, value, in_string = [], [], False
|
426
|
+
while chars:
|
427
|
+
char = chars.popleft()
|
428
|
+
if char == " " and not in_string:
|
429
|
+
values.append("".join(value))
|
430
|
+
value = []
|
431
|
+
elif char == '"':
|
432
|
+
in_string = not in_string
|
433
|
+
value.append(char)
|
434
|
+
else:
|
435
|
+
value.append(char)
|
436
|
+
|
437
|
+
values.append(value)
|
438
|
+
return ["".join(v) for v in values if v]
|
439
|
+
|
440
|
+
|
441
|
+
class GROParser(Parser):
|
442
|
+
"""
|
443
|
+
Convert GRO file in Gromos87 format into a dictionary representation [1]_.
|
444
|
+
|
445
|
+
References
|
446
|
+
----------
|
447
|
+
.. [1] https://manual.gromacs.org/archive/5.0.4/online/gro.html
|
448
|
+
"""
|
449
|
+
|
450
|
+
def parse_input(self, lines, **kwargs) -> Dict:
|
451
|
+
"""
|
452
|
+
Parse a list of lines from a GRO file and convert the data into a dictionary.
|
453
|
+
|
454
|
+
Parameters
|
455
|
+
----------
|
456
|
+
lines : deque of str
|
457
|
+
The lines of a GRO file to parse.
|
458
|
+
kwargs : Dict, optional
|
459
|
+
Optional keyword arguments.
|
460
|
+
|
461
|
+
Returns
|
462
|
+
-------
|
463
|
+
dict
|
464
|
+
A dictionary containing the parsed data from the GRO file.
|
465
|
+
"""
|
466
|
+
data = {
|
467
|
+
"title": [],
|
468
|
+
"num_atoms": [],
|
469
|
+
"record_type": [],
|
470
|
+
"residue_number": [],
|
471
|
+
"residue_name": [],
|
472
|
+
"atom_name": [],
|
473
|
+
"atom_number": [],
|
474
|
+
"atom_coordinate": [],
|
475
|
+
"segment_identifier": [],
|
476
|
+
"velocity": [],
|
477
|
+
"box_vectors": [],
|
478
|
+
"time": [],
|
479
|
+
}
|
480
|
+
|
481
|
+
if not lines:
|
482
|
+
return data
|
483
|
+
|
484
|
+
time_pattern = re.compile(r"t=\s*(\d+\.?\d*)")
|
485
|
+
file_index = -1
|
486
|
+
|
487
|
+
# GRO files can be concatenated. Parse one per invocation
|
488
|
+
while lines:
|
489
|
+
file_index += 1
|
490
|
+
str_file_index = str(file_index)
|
491
|
+
|
492
|
+
title = lines.popleft()
|
493
|
+
data["title"].append(title)
|
494
|
+
|
495
|
+
time_match = time_pattern.search(title)
|
496
|
+
if time_match:
|
497
|
+
data["time"].append(float(time_match.group(1)))
|
498
|
+
|
499
|
+
try:
|
500
|
+
num_atoms = int(lines.popleft())
|
501
|
+
data["num_atoms"].append(num_atoms)
|
502
|
+
except (ValueError, IndexError):
|
503
|
+
return data
|
504
|
+
|
505
|
+
if num_atoms <= 0:
|
506
|
+
continue
|
507
|
+
|
508
|
+
valid_atoms = 0
|
509
|
+
for _ in range(num_atoms):
|
510
|
+
if not lines:
|
511
|
+
break
|
512
|
+
|
513
|
+
line = lines.popleft()
|
514
|
+
|
515
|
+
try:
|
516
|
+
res_num = int(line[:5])
|
517
|
+
res_name = line[5:10].strip()
|
518
|
+
atom_name = line[10:15].strip()
|
519
|
+
atom_num = int(line[15:20])
|
520
|
+
|
521
|
+
coord = (float(line[20:28]), float(line[28:36]), float(line[36:44]))
|
522
|
+
|
523
|
+
vel = None
|
524
|
+
if len(line) >= 68:
|
525
|
+
vel = (
|
526
|
+
float(line[44:52]),
|
527
|
+
float(line[52:60]),
|
528
|
+
float(line[60:68]),
|
529
|
+
)
|
530
|
+
except (ValueError, IndexError):
|
531
|
+
continue
|
532
|
+
|
533
|
+
valid_atoms += 1
|
534
|
+
data["residue_number"].append(res_num)
|
535
|
+
data["residue_name"].append(res_name)
|
536
|
+
data["atom_name"].append(atom_name)
|
537
|
+
data["atom_number"].append(atom_num)
|
538
|
+
data["atom_coordinate"].append(coord)
|
539
|
+
data["velocity"].append(vel)
|
540
|
+
|
541
|
+
data["segment_identifier"].extend([str_file_index] * valid_atoms)
|
542
|
+
data["record_type"].extend(["ATOM"] * valid_atoms)
|
543
|
+
|
544
|
+
if lines:
|
545
|
+
box_line = lines.popleft()
|
546
|
+
try:
|
547
|
+
data["box_vectors"].append([float(val) for val in box_line.split()])
|
548
|
+
except ValueError:
|
549
|
+
pass
|
550
|
+
|
551
|
+
return data
|
552
|
+
|
553
|
+
|
554
|
+
class StarParser(MMCIFParser):
|
555
|
+
"""
|
556
|
+
Convert STAR file data into a dictionary representation [1]_.
|
557
|
+
|
558
|
+
References
|
559
|
+
----------
|
560
|
+
.. [1] https://www.iucr.org/__data/assets/file/0013/11416/star.5.html
|
561
|
+
"""
|
562
|
+
|
563
|
+
def parse_input(self, lines: List[str], delimiter: str = "\t") -> Dict:
|
564
|
+
pattern = re.compile(r"\s*#.*")
|
565
|
+
|
566
|
+
ret, category, block = {}, None, []
|
567
|
+
while lines:
|
568
|
+
line = lines.popleft()
|
569
|
+
|
570
|
+
if line.startswith("data") and not line.startswith("_"):
|
571
|
+
if category != line and category is not None:
|
572
|
+
headers = list(ret[category].keys())
|
573
|
+
headers = [pattern.sub("", x) for x in headers]
|
574
|
+
ret[category] = {
|
575
|
+
header: list(column)
|
576
|
+
for header, column in zip(headers, zip(*block))
|
577
|
+
}
|
578
|
+
block.clear()
|
579
|
+
category = line
|
580
|
+
if category not in ret:
|
581
|
+
ret[category] = {}
|
582
|
+
continue
|
583
|
+
|
584
|
+
if line.startswith("_"):
|
585
|
+
ret[category][line] = []
|
586
|
+
continue
|
587
|
+
|
588
|
+
if line.startswith("loop"):
|
589
|
+
continue
|
590
|
+
|
591
|
+
line_split = line.split(delimiter)
|
592
|
+
if len(line_split):
|
593
|
+
block.append(line_split)
|
594
|
+
|
595
|
+
headers = list(ret[category].keys())
|
596
|
+
headers = [pattern.sub("", x) for x in headers]
|
597
|
+
ret[category] = {
|
598
|
+
header: list(column) for header, column in zip(headers, zip(*block))
|
599
|
+
}
|
600
|
+
return ret
|
601
|
+
|
602
|
+
|
603
|
+
class XMLParser(Parser):
|
604
|
+
"""
|
605
|
+
Parser for XML files.
|
606
|
+
"""
|
607
|
+
|
608
|
+
def parse_input(self, lines: deque, **kwargs) -> Dict:
|
609
|
+
root = ET.fromstring("\n".join(lines))
|
610
|
+
return self._element_to_dict(root)
|
611
|
+
|
612
|
+
def _element_to_dict(self, element) -> Dict:
|
613
|
+
"""
|
614
|
+
Convert an XML element and its children to a dictionary.
|
615
|
+
|
616
|
+
Parameters
|
617
|
+
----------
|
618
|
+
element : xml.etree.ElementTree.Element
|
619
|
+
The XML element to convert.
|
620
|
+
|
621
|
+
Returns
|
622
|
+
-------
|
623
|
+
Dict
|
624
|
+
Dictionary representation of the element.
|
625
|
+
"""
|
626
|
+
result = {}
|
627
|
+
|
628
|
+
if element.attrib:
|
629
|
+
result["@attributes"] = {
|
630
|
+
k: self._convert_value(v) for k, v in element.attrib.items()
|
631
|
+
}
|
632
|
+
|
633
|
+
children = list(element)
|
634
|
+
if not children:
|
635
|
+
if element.text and element.text.strip():
|
636
|
+
text = element.text.strip()
|
637
|
+
if "\n" in text:
|
638
|
+
result = [
|
639
|
+
self._convert_value(line.strip())
|
640
|
+
for line in text.split("\n")
|
641
|
+
if line.strip()
|
642
|
+
]
|
643
|
+
else:
|
644
|
+
result = self._convert_value(text)
|
645
|
+
else:
|
646
|
+
for child in children:
|
647
|
+
child_dict = self._element_to_dict(child)
|
648
|
+
|
649
|
+
if child.tag not in result:
|
650
|
+
result[child.tag] = child_dict
|
651
|
+
else:
|
652
|
+
if not isinstance(result[child.tag], list):
|
653
|
+
result[child.tag] = [result[child.tag]]
|
654
|
+
result[child.tag].append(child_dict)
|
655
|
+
|
656
|
+
return result
|
657
|
+
|
658
|
+
def _convert_value(self, value_str: str) -> Union[int, float, bool, str]:
|
659
|
+
"""
|
660
|
+
Convert a string value to an appropriate data type.
|
661
|
+
|
662
|
+
Parameters
|
663
|
+
----------
|
664
|
+
value_str : str
|
665
|
+
String value to convert.
|
666
|
+
|
667
|
+
Returns
|
668
|
+
-------
|
669
|
+
Union[int, float, bool, str]
|
670
|
+
Converted value in appropriate data type.
|
671
|
+
"""
|
672
|
+
if value_str.lower() in ("true", "false"):
|
673
|
+
return value_str.lower() == "true"
|
674
|
+
|
675
|
+
try:
|
676
|
+
return int(value_str)
|
677
|
+
except ValueError:
|
678
|
+
pass
|
679
|
+
|
680
|
+
try:
|
681
|
+
return float(value_str)
|
682
|
+
except ValueError:
|
683
|
+
pass
|
684
|
+
|
685
|
+
return value_str
|