pytme 0.2.0b0__cp311-cp311-macosx_14_0_arm64.whl → 0.2.2__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.2.2.data/scripts/match_template.py +1187 -0
- {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/postprocess.py +170 -71
- {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/preprocessor_gui.py +179 -86
- pytme-0.2.2.dist-info/METADATA +91 -0
- pytme-0.2.2.dist-info/RECORD +74 -0
- {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/WHEEL +1 -1
- scripts/extract_candidates.py +126 -87
- scripts/match_template.py +596 -209
- scripts/match_template_filters.py +571 -223
- scripts/postprocess.py +170 -71
- scripts/preprocessor_gui.py +179 -86
- scripts/refine_matches.py +567 -159
- tme/__init__.py +0 -1
- tme/__version__.py +1 -1
- tme/analyzer.py +627 -855
- tme/backends/__init__.py +41 -11
- tme/backends/_jax_utils.py +185 -0
- tme/backends/cupy_backend.py +120 -225
- tme/backends/jax_backend.py +282 -0
- tme/backends/matching_backend.py +464 -388
- tme/backends/mlx_backend.py +45 -68
- tme/backends/npfftw_backend.py +256 -514
- tme/backends/pytorch_backend.py +41 -154
- tme/density.py +312 -421
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/matching_data.py +366 -303
- tme/matching_exhaustive.py +279 -1521
- tme/matching_optimization.py +234 -129
- tme/matching_scores.py +884 -0
- tme/matching_utils.py +281 -387
- tme/memory.py +377 -0
- tme/orientations.py +226 -66
- tme/parser.py +3 -4
- tme/preprocessing/__init__.py +2 -0
- tme/preprocessing/_utils.py +217 -0
- tme/preprocessing/composable_filter.py +31 -0
- tme/preprocessing/compose.py +55 -0
- tme/preprocessing/frequency_filters.py +388 -0
- tme/preprocessing/tilt_series.py +1011 -0
- tme/preprocessor.py +574 -530
- tme/structure.py +495 -189
- tme/types.py +5 -3
- pytme-0.2.0b0.data/scripts/match_template.py +0 -800
- pytme-0.2.0b0.dist-info/METADATA +0 -73
- pytme-0.2.0b0.dist-info/RECORD +0 -66
- tme/helpers.py +0 -881
- tme/matching_constrained.py +0 -195
- {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/estimate_ram_usage.py +0 -0
- {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/preprocess.py +0 -0
- {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/LICENSE +0 -0
- {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/entry_points.txt +0 -0
- {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/top_level.txt +0 -0
tme/structure.py
CHANGED
@@ -6,103 +6,159 @@
|
|
6
6
|
"""
|
7
7
|
import warnings
|
8
8
|
from copy import deepcopy
|
9
|
-
from collections import namedtuple
|
10
|
-
from typing import List, Dict, Tuple
|
11
9
|
from itertools import groupby
|
12
10
|
from dataclasses import dataclass
|
11
|
+
from collections import namedtuple
|
12
|
+
from typing import List, Dict, Tuple
|
13
13
|
from os.path import splitext, basename
|
14
14
|
|
15
15
|
import numpy as np
|
16
16
|
|
17
|
-
from .parser import PDBParser, MMCIFParser
|
18
|
-
from .matching_utils import (
|
19
|
-
rigid_transform,
|
20
|
-
_format_mmcif_colunns,
|
21
|
-
minimum_enclosing_box,
|
22
|
-
)
|
23
|
-
from .helpers import atom_profile
|
24
17
|
from .types import NDArray
|
18
|
+
from .preprocessor import atom_profile, Preprocessor
|
19
|
+
from .parser import PDBParser, MMCIFParser
|
20
|
+
from .matching_utils import rigid_transform, minimum_enclosing_box
|
25
21
|
|
26
22
|
|
27
23
|
@dataclass(repr=False)
|
28
24
|
class Structure:
|
29
25
|
"""
|
30
|
-
Represents atomic structures
|
31
|
-
|
26
|
+
Represents atomic structures per the Protein Data Bank (PDB) specification.
|
27
|
+
|
28
|
+
Examples
|
29
|
+
--------
|
30
|
+
The following achieves the definition of a :py:class:`Structure` instance
|
31
|
+
|
32
|
+
>>> from tme import Structure
|
33
|
+
>>> structure = Structure(
|
34
|
+
>>> record_type=["ATOM", "ATOM", "ATOM"],
|
35
|
+
>>> atom_serial_number=[0, 1, 2] ,
|
36
|
+
>>> atom_name=["C", "N", "H"],
|
37
|
+
>>> atom_coordinate=[[30,15,10], [35, 20, 15], [35,25,20]],
|
38
|
+
>>> alternate_location_indicator=[".", ".", "."],
|
39
|
+
>>> residue_name=["GLY", "GLY", "HIS"],
|
40
|
+
>>> chain_identifier=["A", "A", "B"],
|
41
|
+
>>> residue_sequence_number=[0, 0, 1],
|
42
|
+
>>> code_for_residue_insertion=["?", "?", "?"],
|
43
|
+
>>> occupancy=[0, 0, 0],
|
44
|
+
>>> temperature_factor=[0, 0, 0],
|
45
|
+
>>> segment_identifier=["1", "1", "1"],
|
46
|
+
>>> element_symbol=["C", "N", "C"],
|
47
|
+
>>> charge=["?", "?", "?"],
|
48
|
+
>>> metadata={},
|
49
|
+
>>> )
|
50
|
+
>>> structure
|
51
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
52
|
+
|
53
|
+
:py:class:`Structure` instances support a range of subsetting operations based on
|
54
|
+
atom indices
|
55
|
+
|
56
|
+
>>> structure[1]
|
57
|
+
Unique Chains: A, Atom Range: 1-1 [N = 1], Residue Range: 0-0 [N = 1]
|
58
|
+
>>> structure[(False, False, True)]
|
59
|
+
Unique Chains: B, Atom Range: 2-2 [N = 1], Residue Range: 1-1 [N = 1]
|
60
|
+
>>> structure[(1,2)]
|
61
|
+
Unique Chains: A-B, Atom Range: 1-2 [N = 2], Residue Range: 0-1 [N = 2]
|
62
|
+
|
63
|
+
They can be written to disk in a range of formats using :py:meth:`Structure.to_file`
|
64
|
+
|
65
|
+
>>> structure.to_file("test.pdb") # Writes a PDB file to disk
|
66
|
+
>>> structure.to_file("test.cif") # Writes a mmCIF file to disk
|
67
|
+
|
68
|
+
New instances can be created from a range of formats using
|
69
|
+
:py:meth:`Structure.from_file`
|
70
|
+
|
71
|
+
>>> Structure.from_file("test.pdb") # Reads PDB file from disk
|
72
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
73
|
+
>>> Structure.from_file("test.cif") # Reads mmCIF file from disk
|
74
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
75
|
+
|
76
|
+
Class instances can be discretized on grids and converted to
|
77
|
+
:py:class:`tme.density.Density` instances using :py:meth:`Structure.to_volume`
|
78
|
+
or :py:meth:`tme.density.Density.from_structure`.
|
79
|
+
|
80
|
+
>>> volume, origin, sampling_rate = structure.to_volume(shape=(50,40,30))
|
32
81
|
|
33
82
|
References
|
34
83
|
----------
|
35
|
-
.. [1]
|
84
|
+
.. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
|
85
|
+
.. [2] https://www.ccp4.ac.uk/html/mmcifformat.html
|
86
|
+
|
36
87
|
"""
|
37
88
|
|
38
|
-
#:
|
89
|
+
#: Array of record types, e.g.ATOM.
|
39
90
|
record_type: NDArray
|
40
91
|
|
41
|
-
#:
|
92
|
+
#: Array of serial numbers.
|
42
93
|
atom_serial_number: NDArray
|
43
94
|
|
44
|
-
#:
|
95
|
+
#: Array of atom names.
|
45
96
|
atom_name: NDArray
|
46
97
|
|
47
|
-
#:
|
98
|
+
#: Array of x,y,z atom coordinates.
|
48
99
|
atom_coordinate: NDArray
|
49
100
|
|
50
|
-
#:
|
101
|
+
#: Array of alternate location indices.
|
51
102
|
alternate_location_indicator: NDArray
|
52
103
|
|
53
|
-
#:
|
104
|
+
#: Array of residue names.
|
54
105
|
residue_name: NDArray
|
55
106
|
|
56
|
-
#:
|
107
|
+
#: Array of chain identifiers.
|
57
108
|
chain_identifier: NDArray
|
58
109
|
|
59
|
-
#:
|
110
|
+
#: Array of residue ids.
|
60
111
|
residue_sequence_number: NDArray
|
61
112
|
|
62
|
-
#:
|
113
|
+
#: Array of insertion information.
|
63
114
|
code_for_residue_insertion: NDArray
|
64
115
|
|
65
|
-
#:
|
116
|
+
#: Array of occupancy factors.
|
66
117
|
occupancy: NDArray
|
67
118
|
|
68
|
-
#:
|
119
|
+
#: Array of B-factors.
|
69
120
|
temperature_factor: NDArray
|
70
121
|
|
71
|
-
#:
|
122
|
+
#: Array of segment identifiers.
|
72
123
|
segment_identifier: NDArray
|
73
124
|
|
74
|
-
#:
|
125
|
+
#: Array of element symbols.
|
75
126
|
element_symbol: NDArray
|
76
127
|
|
77
|
-
#:
|
128
|
+
#: Array of charges.
|
78
129
|
charge: NDArray
|
79
130
|
|
80
|
-
#:
|
81
|
-
|
131
|
+
#: Metadata dictionary.
|
132
|
+
metadata: dict
|
82
133
|
|
83
134
|
def __post_init__(self, *args, **kwargs):
|
84
135
|
"""
|
85
|
-
Initialize the structure and populate header
|
136
|
+
Initialize the structure and populate header metadata.
|
86
137
|
|
87
138
|
Raises
|
88
139
|
------
|
89
140
|
ValueError
|
90
|
-
If
|
91
|
-
If the shape of atom_coordinates and chain_identifier doesn't match.
|
141
|
+
If NDArray attributes does not match the number of atoms.
|
92
142
|
"""
|
93
|
-
self.
|
94
|
-
|
143
|
+
for attribute in self.__dict__:
|
144
|
+
value = getattr(self, attribute)
|
145
|
+
target_type = self.__annotations__.get(attribute, None)
|
146
|
+
if target_type == NDArray:
|
147
|
+
setattr(self, attribute, np.atleast_1d(np.array(value)))
|
95
148
|
|
96
149
|
n_atoms = self.atom_coordinate.shape[0]
|
97
150
|
for attribute in self.__dict__:
|
98
151
|
value = getattr(self, attribute)
|
99
|
-
if
|
152
|
+
if not isinstance(value, np.ndarray):
|
100
153
|
continue
|
101
154
|
if value.shape[0] != n_atoms:
|
102
155
|
raise ValueError(
|
103
156
|
f"Expected shape of {attribute}: {n_atoms}, got {value.shape[0]}."
|
104
157
|
)
|
105
158
|
|
159
|
+
self._elements = Elements()
|
160
|
+
self.metadata = self._populate_metadata(self.metadata)
|
161
|
+
|
106
162
|
def __getitem__(self, indices: List[int]) -> "Structure":
|
107
163
|
"""
|
108
164
|
Get a Structure instance for specified indices.
|
@@ -138,22 +194,17 @@ class Structure:
|
|
138
194
|
"charge",
|
139
195
|
)
|
140
196
|
kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
|
141
|
-
ret = self.__class__(**kwargs,
|
197
|
+
ret = self.__class__(**kwargs, metadata={})
|
142
198
|
return ret
|
143
199
|
|
144
200
|
def __repr__(self):
|
145
201
|
"""
|
146
202
|
Return a string representation of the Structure.
|
147
|
-
|
148
|
-
Returns
|
149
|
-
-------
|
150
|
-
str
|
151
|
-
The string representation.
|
152
203
|
"""
|
153
204
|
unique_chains = "-".join(
|
154
205
|
[
|
155
206
|
",".join([str(x) for x in entity])
|
156
|
-
for entity in self.
|
207
|
+
for entity in self.metadata["unique_chains"]
|
157
208
|
]
|
158
209
|
)
|
159
210
|
min_atom = np.min(self.atom_serial_number)
|
@@ -162,7 +213,7 @@ class Structure:
|
|
162
213
|
|
163
214
|
min_residue = np.min(self.residue_sequence_number)
|
164
215
|
max_residue = np.max(self.residue_sequence_number)
|
165
|
-
n_residue = self.residue_sequence_number.size
|
216
|
+
n_residue = np.unique(self.residue_sequence_number).size
|
166
217
|
|
167
218
|
repr_str = (
|
168
219
|
f"Structure object at {id(self)}\n"
|
@@ -172,43 +223,39 @@ class Structure:
|
|
172
223
|
)
|
173
224
|
return repr_str
|
174
225
|
|
175
|
-
def get_chains(self) -> List[str]:
|
176
|
-
"""
|
177
|
-
Returns a list of available chains.
|
178
|
-
|
179
|
-
Returns
|
180
|
-
-------
|
181
|
-
list
|
182
|
-
The list of available chains.
|
183
|
-
"""
|
184
|
-
return list(self.details["chain_weight"].keys())
|
185
|
-
|
186
226
|
def copy(self) -> "Structure":
|
187
227
|
"""
|
188
228
|
Returns a copy of the Structure instance.
|
189
229
|
|
190
230
|
Returns
|
191
231
|
-------
|
192
|
-
Structure
|
232
|
+
:py:class:`Structure`
|
193
233
|
The copied Structure instance.
|
234
|
+
|
235
|
+
Examples
|
236
|
+
--------
|
237
|
+
>>> import numpy as np
|
238
|
+
>>> structure_copy = structure.copy()
|
239
|
+
>>> np.allclose(structure_copy.atom_coordinate, structure.atom_coordinate)
|
240
|
+
True
|
194
241
|
"""
|
195
242
|
return deepcopy(self)
|
196
243
|
|
197
|
-
def
|
244
|
+
def _populate_metadata(self, metadata: Dict = {}) -> Dict:
|
198
245
|
"""
|
199
|
-
Populate the
|
246
|
+
Populate the metadata dictionary with the data from the Structure instance.
|
200
247
|
|
201
248
|
Parameters
|
202
249
|
----------
|
203
|
-
|
204
|
-
The initial
|
250
|
+
metadata : dict, optional
|
251
|
+
The initial metadata dictionary, by default {}.
|
205
252
|
|
206
253
|
Returns
|
207
254
|
-------
|
208
255
|
dict
|
209
|
-
The populated
|
256
|
+
The populated metadata dictionary.
|
210
257
|
"""
|
211
|
-
|
258
|
+
metadata["weight"] = np.sum(
|
212
259
|
[self._elements[atype].atomic_weight for atype in self.element_symbol]
|
213
260
|
)
|
214
261
|
|
@@ -220,12 +267,12 @@ class Structure:
|
|
220
267
|
[self._elements[atype].atomic_weight for atype in self.element_symbol],
|
221
268
|
)
|
222
269
|
labels = self.chain_identifier[idx]
|
223
|
-
|
270
|
+
metadata["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
|
224
271
|
|
225
|
-
# Group non-unique chains in separate lists in
|
226
|
-
|
272
|
+
# Group non-unique chains in separate lists in metadata["unique_chains"]
|
273
|
+
metadata["unique_chains"], temp = [], {}
|
227
274
|
for chain_label in label:
|
228
|
-
index = len(
|
275
|
+
index = len(metadata["unique_chains"])
|
229
276
|
chain_sequence = "".join(
|
230
277
|
[
|
231
278
|
str(y)
|
@@ -236,10 +283,10 @@ class Structure:
|
|
236
283
|
)
|
237
284
|
if chain_sequence not in temp:
|
238
285
|
temp[chain_sequence] = index
|
239
|
-
|
286
|
+
metadata["unique_chains"].append([chain_label])
|
240
287
|
continue
|
241
288
|
idx = temp.get(chain_sequence)
|
242
|
-
|
289
|
+
metadata["unique_chains"][idx].append(chain_label)
|
243
290
|
|
244
291
|
filtered_data = [
|
245
292
|
(label, integer)
|
@@ -248,12 +295,12 @@ class Structure:
|
|
248
295
|
)
|
249
296
|
]
|
250
297
|
filtered_data = sorted(filtered_data, key=lambda x: x[0])
|
251
|
-
|
298
|
+
metadata["chain_range"] = {}
|
252
299
|
for label, values in groupby(filtered_data, key=lambda x: x[0]):
|
253
300
|
values = [int(x[1]) for x in values]
|
254
|
-
|
301
|
+
metadata["chain_range"][label] = (min(values), max(values))
|
255
302
|
|
256
|
-
return
|
303
|
+
return metadata
|
257
304
|
|
258
305
|
@classmethod
|
259
306
|
def from_file(
|
@@ -264,12 +311,18 @@ class Structure:
|
|
264
311
|
filter_by_residues: set = None,
|
265
312
|
) -> "Structure":
|
266
313
|
"""
|
267
|
-
Reads
|
314
|
+
Reads an atomic structure file and into a :py:class:`Structure` instance.
|
268
315
|
|
269
316
|
Parameters
|
270
317
|
----------
|
271
318
|
filename : str
|
272
|
-
|
319
|
+
Input file. Supported extensions are:
|
320
|
+
|
321
|
+
+------+-------------------------------------------------------------+
|
322
|
+
| .pdb | Reads a PDB file |
|
323
|
+
+------+-------------------------------------------------------------+
|
324
|
+
| .cif | Reads an mmCIF file |
|
325
|
+
+------+-------------------------------------------------------------+
|
273
326
|
keep_non_atom_records : bool, optional
|
274
327
|
Wheter to keep residues that are not labelled ATOM.
|
275
328
|
filter_by_elements: set, optional
|
@@ -280,12 +333,34 @@ class Structure:
|
|
280
333
|
Raises
|
281
334
|
------
|
282
335
|
ValueError
|
283
|
-
If the extension is not
|
336
|
+
If the extension is not supported.
|
284
337
|
|
285
338
|
Returns
|
286
339
|
-------
|
287
|
-
Structure
|
288
|
-
|
340
|
+
:py:class:`Structure`
|
341
|
+
Structure instance representing the read in file.
|
342
|
+
|
343
|
+
Examples
|
344
|
+
--------
|
345
|
+
>>> from importlib_resources import files
|
346
|
+
>>> from tme import Structure
|
347
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
348
|
+
>>> structure = Structure.from_file(filename=fname)
|
349
|
+
>>> structure
|
350
|
+
Unique Chains: A-B, Atom Range: 1-1564 [N = 1564], Residue Range: 142-239 [N = 1564]
|
351
|
+
|
352
|
+
We can include non ATOM entries and restrict the considered elements
|
353
|
+
and residues
|
354
|
+
|
355
|
+
>>> structure = Structure.from_file(
|
356
|
+
>>> filename=fname,
|
357
|
+
>>> keep_non_atom_records=True,
|
358
|
+
>>> filter_by_elements = {"C"},
|
359
|
+
>>> filter_by_residues = {"GLY"},
|
360
|
+
>>> )
|
361
|
+
>>> structure
|
362
|
+
Unique Chains: A,B, Atom Range: 96-1461 [N = 44], Residue Range: 154-228 [N = 44]
|
363
|
+
|
289
364
|
"""
|
290
365
|
_, file_extension = splitext(basename(filename.upper()))
|
291
366
|
if file_extension == ".PDB":
|
@@ -313,14 +388,14 @@ class Structure:
|
|
313
388
|
keep = np.logical_and(keep, data["record_type"] == "ATOM")
|
314
389
|
|
315
390
|
for key in data:
|
316
|
-
if key == "
|
391
|
+
if key == "metadata":
|
317
392
|
continue
|
318
|
-
if
|
393
|
+
if isinstance(data[key], np.ndarray):
|
319
394
|
data[key] = data[key][keep]
|
320
395
|
else:
|
321
396
|
data[key] = [x for x, flag in zip(data[key], keep) if flag]
|
322
397
|
|
323
|
-
data["
|
398
|
+
data["metadata"]["filepath"] = filename
|
324
399
|
|
325
400
|
return cls(**data)
|
326
401
|
|
@@ -367,12 +442,12 @@ class Structure:
|
|
367
442
|
out_data = [
|
368
443
|
x.strip() for x in result["atom_site"].get(atom_site_key, ["."])
|
369
444
|
]
|
370
|
-
if dtype
|
445
|
+
if dtype is int:
|
371
446
|
out_data = [0 if x == "." else int(x) for x in out_data]
|
372
447
|
try:
|
373
448
|
out[out_key] = np.asarray(out_data).astype(dtype)
|
374
449
|
except ValueError:
|
375
|
-
default = ["."] if dtype
|
450
|
+
default = ["."] if dtype is str else 0
|
376
451
|
print(f"Converting {out_key} to {dtype} failed, set to {default}.")
|
377
452
|
out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
|
378
453
|
|
@@ -382,7 +457,7 @@ class Structure:
|
|
382
457
|
continue
|
383
458
|
out[key] = np.repeat(value, number_entries // value.size)
|
384
459
|
|
385
|
-
out["
|
460
|
+
out["metadata"] = {}
|
386
461
|
out["atom_coordinate"] = np.transpose(
|
387
462
|
np.array(
|
388
463
|
[
|
@@ -405,7 +480,7 @@ class Structure:
|
|
405
480
|
for out_key, (base_key, inner_key, default) in detail_mapping.items():
|
406
481
|
if base_key not in result:
|
407
482
|
continue
|
408
|
-
out["
|
483
|
+
out["metadata"][out_key] = result[base_key].get(inner_key, default)
|
409
484
|
|
410
485
|
return out
|
411
486
|
|
@@ -446,15 +521,15 @@ class Structure:
|
|
446
521
|
"charge": ("charge", str),
|
447
522
|
}
|
448
523
|
|
449
|
-
out = {"
|
524
|
+
out = {"metadata": result["details"]}
|
450
525
|
for out_key, (inner_key, dtype) in atom_site_mapping.items():
|
451
526
|
out_data = [x.strip() for x in result[inner_key]]
|
452
|
-
if dtype
|
527
|
+
if dtype is int:
|
453
528
|
out_data = [0 if x == "." else int(x) for x in out_data]
|
454
529
|
try:
|
455
530
|
out[out_key] = np.asarray(out_data).astype(dtype)
|
456
531
|
except ValueError:
|
457
|
-
default = "." if dtype
|
532
|
+
default = "." if dtype is str else 0
|
458
533
|
print(
|
459
534
|
f"Converting {out_key} to {dtype} failed. Setting {out_key} to {default}."
|
460
535
|
)
|
@@ -466,21 +541,36 @@ class Structure:
|
|
466
541
|
|
467
542
|
def to_file(self, filename: str) -> None:
|
468
543
|
"""
|
469
|
-
Writes the Structure instance
|
470
|
-
|
471
|
-
|
544
|
+
Writes the :py:class:`Structure` instance to disk.
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
filename : str
|
549
|
+
The name of the file to be created. Supported extensions are
|
550
|
+
|
551
|
+
+------+-------------------------------------------------------------+
|
552
|
+
| .pdb | Creates a PDB file |
|
553
|
+
+------+-------------------------------------------------------------+
|
554
|
+
| .cif | Creates an mmCIF file |
|
555
|
+
+------+-------------------------------------------------------------+
|
472
556
|
|
473
557
|
Raises
|
474
558
|
------
|
475
559
|
ValueError
|
476
|
-
If the extension is not
|
560
|
+
If the extension is not supported.
|
561
|
+
|
562
|
+
Examples
|
563
|
+
--------
|
564
|
+
>>> from importlib_resources import files
|
565
|
+
>>> from tempfile import NamedTemporaryFile
|
566
|
+
>>> from tme import Structure
|
567
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
568
|
+
>>> oname = NamedTemporaryFile().name
|
569
|
+
>>> structure = Structure.from_file(filename=fname)
|
570
|
+
>>> structure.to_file(f"{oname}.cif") # Writes an mmCIF file to disk
|
571
|
+
>>> structure.to_file(f"{oname}.pdb") # Writes a PDB file to disk
|
477
572
|
|
478
|
-
Parameters
|
479
|
-
----------
|
480
|
-
filename : str
|
481
|
-
The filename of the file to write.
|
482
573
|
"""
|
483
|
-
data_out = []
|
484
574
|
if np.any(np.vectorize(len)(self.chain_identifier) > 2):
|
485
575
|
warnings.warn("Chain identifiers longer than one will be shortened.")
|
486
576
|
|
@@ -596,7 +686,7 @@ class Structure:
|
|
596
686
|
data["pdbx_PDB_model_num"].append(str(model_num))
|
597
687
|
|
598
688
|
output_data = {"atom_site": data}
|
599
|
-
original_file = self.
|
689
|
+
original_file = self.metadata.get("filepath", "")
|
600
690
|
try:
|
601
691
|
new_data = {k: v for k, v in MMCIFParser(original_file).items()}
|
602
692
|
index = self.atom_serial_number - 1
|
@@ -612,6 +702,9 @@ class Structure:
|
|
612
702
|
|
613
703
|
ret = ""
|
614
704
|
for category, subdict in output_data.items():
|
705
|
+
if not len(subdict):
|
706
|
+
continue
|
707
|
+
|
615
708
|
ret += "#\n"
|
616
709
|
is_loop = isinstance(subdict[list(subdict.keys())[0]], list)
|
617
710
|
if not is_loop:
|
@@ -620,7 +713,18 @@ class Structure:
|
|
620
713
|
else:
|
621
714
|
ret += "loop_\n"
|
622
715
|
ret += "".join([f"_{category}.{k}\n" for k in subdict])
|
623
|
-
|
716
|
+
|
717
|
+
subdict = {
|
718
|
+
k: [_format_string(s) for s in v] for k, v in subdict.items()
|
719
|
+
}
|
720
|
+
key_length = {
|
721
|
+
key: len(max(value, key=lambda x: len(x), default=""))
|
722
|
+
for key, value in subdict.items()
|
723
|
+
}
|
724
|
+
padded_subdict = {
|
725
|
+
key: [s.ljust(key_length[key] + 1) for s in values]
|
726
|
+
for key, values in subdict.items()
|
727
|
+
}
|
624
728
|
|
625
729
|
data = [
|
626
730
|
"".join([str(x) for x in content])
|
@@ -644,8 +748,23 @@ class Structure:
|
|
644
748
|
|
645
749
|
Returns
|
646
750
|
-------
|
647
|
-
Structure
|
648
|
-
A subset of the
|
751
|
+
:py:class:`Structure`
|
752
|
+
A subset of the class instance containing only the specified chains.
|
753
|
+
|
754
|
+
Raises
|
755
|
+
------
|
756
|
+
ValueError
|
757
|
+
If none of the specified chains exist.
|
758
|
+
|
759
|
+
Examples
|
760
|
+
--------
|
761
|
+
>>> from importlib_resources import files
|
762
|
+
>>> from tme import Structure
|
763
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
764
|
+
>>> structure = Structure.from_file(filename=fname)
|
765
|
+
>>> structure.subset_by_chain(chain="A") # Keep A
|
766
|
+
>>> structure.subset_by_chain(chain="A,B") # Keep A and B
|
767
|
+
>>> structure.subset_by_chain(chain="B,C") # Keep B, C does not exist
|
649
768
|
"""
|
650
769
|
chain = np.unique(self.chain_identifier) if chain is None else chain.split(",")
|
651
770
|
keep = np.in1d(self.chain_identifier, chain)
|
@@ -664,10 +783,8 @@ class Structure:
|
|
664
783
|
----------
|
665
784
|
start : int
|
666
785
|
The starting residue sequence number.
|
667
|
-
|
668
786
|
stop : int
|
669
787
|
The ending residue sequence number.
|
670
|
-
|
671
788
|
chain : str, optional
|
672
789
|
The chain identifier. If multiple chains should be selected they need
|
673
790
|
to be a comma separated string, e.g. 'A,B,CE'. If chain None,
|
@@ -675,8 +792,21 @@ class Structure:
|
|
675
792
|
|
676
793
|
Returns
|
677
794
|
-------
|
678
|
-
Structure
|
795
|
+
:py:class:`Structure`
|
679
796
|
A subset of the original structure within the specified residue range.
|
797
|
+
|
798
|
+
Raises
|
799
|
+
------
|
800
|
+
ValueError
|
801
|
+
If none of the specified residue chain combinations exist.
|
802
|
+
|
803
|
+
Examples
|
804
|
+
--------
|
805
|
+
>>> from importlib_resources import files
|
806
|
+
>>> from tme import Structure
|
807
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
808
|
+
>>> structure = Structure.from_file(filename=fname)
|
809
|
+
>>> structure.subset_by_range(chain="A",start=150,stop=180)
|
680
810
|
"""
|
681
811
|
ret = self.subset_by_chain(chain=chain)
|
682
812
|
keep = np.logical_and(
|
@@ -692,6 +822,15 @@ class Structure:
|
|
692
822
|
-------
|
693
823
|
NDArray
|
694
824
|
The center of mass of the structure.
|
825
|
+
|
826
|
+
Examples
|
827
|
+
--------
|
828
|
+
>>> from importlib_resources import files
|
829
|
+
>>> from tme import Structure
|
830
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
831
|
+
>>> structure = Structure.from_file(filename=fname)
|
832
|
+
>>> structure.center_of_mass()
|
833
|
+
array([-0.89391639, 29.94908928, -2.64736741])
|
695
834
|
"""
|
696
835
|
weights = [self._elements[atype].atomic_weight for atype in self.element_symbol]
|
697
836
|
return np.dot(self.atom_coordinate.T, weights) / np.sum(weights)
|
@@ -717,7 +856,19 @@ class Structure:
|
|
717
856
|
Returns
|
718
857
|
-------
|
719
858
|
Structure
|
720
|
-
The transformed instance of :py:class:`
|
859
|
+
The transformed instance of :py:class:`Structure`.
|
860
|
+
|
861
|
+
Examples
|
862
|
+
--------
|
863
|
+
>>> from importlib_resources import files
|
864
|
+
>>> from tme import Structure
|
865
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
866
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
867
|
+
>>> structure = Structure.from_file(filename=fname)
|
868
|
+
>>> structure.rigid_transform(
|
869
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
870
|
+
>>> translation = (0, 1, -5)
|
871
|
+
>>> )
|
721
872
|
"""
|
722
873
|
out = np.empty_like(self.atom_coordinate.T)
|
723
874
|
rigid_transform(
|
@@ -745,7 +896,17 @@ class Structure:
|
|
745
896
|
|
746
897
|
See Also
|
747
898
|
--------
|
748
|
-
:py:meth:`tme.Density.centered`
|
899
|
+
:py:meth:`tme.density.Density.centered`
|
900
|
+
|
901
|
+
Examples
|
902
|
+
--------
|
903
|
+
>>> from importlib_resources import files
|
904
|
+
>>> from tme import Structure
|
905
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
906
|
+
>>> structure = Structure.from_file(filename=fname)
|
907
|
+
>>> centered_structure, translation = structure.centered()
|
908
|
+
>>> translation
|
909
|
+
array([34.89391639, 4.05091072, 36.64736741])
|
749
910
|
"""
|
750
911
|
center_of_mass = self.center_of_mass()
|
751
912
|
enclosing_box = minimum_enclosing_box(coordinates=self.atom_coordinate.T)
|
@@ -770,12 +931,11 @@ class Structure:
|
|
770
931
|
----------
|
771
932
|
shape : Tuple[int,]
|
772
933
|
The desired shape of the output array.
|
773
|
-
|
774
934
|
sampling_rate : float
|
775
935
|
The sampling rate of the output array in unit of self.atom_coordinate.
|
776
|
-
|
777
936
|
origin : Tuple[float,]
|
778
937
|
The origin of the coordinate system.
|
938
|
+
|
779
939
|
Returns
|
780
940
|
-------
|
781
941
|
Tuple[NDArray, List[str], Tuple[int, ], float, Tuple[float,]]
|
@@ -809,11 +969,11 @@ class Structure:
|
|
809
969
|
positions = positions[valid_positions == positions.shape[1], :]
|
810
970
|
atom_types = atom_types[valid_positions == positions.shape[1]]
|
811
971
|
|
812
|
-
self.
|
972
|
+
self.metadata["nAtoms_outOfBound"] = 0
|
813
973
|
if positions.shape[0] != coordinates.shape[0]:
|
814
974
|
out_of_bounds = coordinates.shape[0] - positions.shape[0]
|
815
975
|
print(f"{out_of_bounds}/{coordinates.shape[0]} atoms were out of bounds.")
|
816
|
-
self.
|
976
|
+
self.metadata["nAtoms_outOfBound"] = out_of_bounds
|
817
977
|
|
818
978
|
return positions, atom_types, shape, sampling_rate, origin
|
819
979
|
|
@@ -831,14 +991,11 @@ class Structure:
|
|
831
991
|
----------
|
832
992
|
positions : Tuple[float, float, float]
|
833
993
|
The positions of the atoms.
|
834
|
-
|
835
994
|
atoms : Tuple[str]
|
836
995
|
The types of the atoms.
|
837
|
-
|
838
996
|
sampling_rate : float
|
839
997
|
The desired sampling rate in unit of self.atom_coordinate of the
|
840
998
|
output array.
|
841
|
-
|
842
999
|
volume : NDArray
|
843
1000
|
The volume to update.
|
844
1001
|
"""
|
@@ -900,7 +1057,7 @@ class Structure:
|
|
900
1057
|
volume : NDArray
|
901
1058
|
The volume to update.
|
902
1059
|
lowpass_filter : NDArray
|
903
|
-
Whether the scattering factors
|
1060
|
+
Whether the scattering factors should be lowpass filtered.
|
904
1061
|
downsampling_factor : NDArray
|
905
1062
|
Downsampling factor for scattering factor computation.
|
906
1063
|
source : str
|
@@ -944,6 +1101,75 @@ class Structure:
|
|
944
1101
|
scattering_profiles[atoms[atom_index]](distances),
|
945
1102
|
)
|
946
1103
|
|
1104
|
+
@staticmethod
|
1105
|
+
def _position_to_molmap(
|
1106
|
+
positions: NDArray,
|
1107
|
+
weights: Tuple[float],
|
1108
|
+
resolution: float = 4,
|
1109
|
+
sigma_factor: float = 1 / (np.pi * np.sqrt(2)),
|
1110
|
+
cutoff_value: float = 4.0,
|
1111
|
+
sampling_rate: float = None,
|
1112
|
+
) -> NDArray:
|
1113
|
+
"""
|
1114
|
+
Simulates electron densities analogous to Chimera's molmap function [1]_.
|
1115
|
+
|
1116
|
+
Parameters
|
1117
|
+
----------
|
1118
|
+
positions : NDArray
|
1119
|
+
Array containing atomic positions in z,y,x format (n,d).
|
1120
|
+
weights : [float]
|
1121
|
+
The weights to use for the entries in positions.
|
1122
|
+
resolution : float
|
1123
|
+
The product of resolution and sigma_factor gives the sigma used to
|
1124
|
+
compute the discretized Gaussian.
|
1125
|
+
sigma_factor : float
|
1126
|
+
The factor used with resolution to compute sigma. Default is 1 / (π√2).
|
1127
|
+
cutoff_value : float
|
1128
|
+
The cutoff value for the Gaussian kernel. Default is 4.0.
|
1129
|
+
sampling_rate : float
|
1130
|
+
Sampling rate along each dimension. One third of resolution by default.
|
1131
|
+
|
1132
|
+
References
|
1133
|
+
----------
|
1134
|
+
..[1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/molmap.html
|
1135
|
+
|
1136
|
+
Returns
|
1137
|
+
-------
|
1138
|
+
NDArray
|
1139
|
+
A numpy array containing the simulated electron densities.
|
1140
|
+
"""
|
1141
|
+
if sampling_rate is None:
|
1142
|
+
sampling_rate = resolution / 3
|
1143
|
+
|
1144
|
+
pad = int(3 * resolution)
|
1145
|
+
sigma = sigma_factor * resolution
|
1146
|
+
sigma_grid = sigma / sampling_rate
|
1147
|
+
|
1148
|
+
# Limit padding to numerically stable values
|
1149
|
+
smax = np.max(sigma_grid)
|
1150
|
+
arr = np.arange(0, pad)
|
1151
|
+
gaussian = (
|
1152
|
+
np.exp(-0.5 * (arr / smax) ** 2)
|
1153
|
+
* np.power(2 * np.pi, -1.5)
|
1154
|
+
* np.power(sigma, -3.0)
|
1155
|
+
)
|
1156
|
+
pad_cutoff = np.max(arr[gaussian > 1e-8])
|
1157
|
+
if arr.size != 0:
|
1158
|
+
pad = int(pad_cutoff) + 1
|
1159
|
+
|
1160
|
+
positions = positions[:, ::-1]
|
1161
|
+
origin = positions.min(axis=0) - pad * sampling_rate
|
1162
|
+
positions = np.rint(np.divide((positions - origin), sampling_rate)).astype(int)
|
1163
|
+
shape = positions.max(axis=0).astype(int) + pad + 1
|
1164
|
+
|
1165
|
+
out = np.zeros(shape, dtype=np.float32)
|
1166
|
+
np.add.at(out, tuple(positions.T), weights)
|
1167
|
+
|
1168
|
+
out = Preprocessor().gaussian_filter(
|
1169
|
+
template=out, sigma=sigma_grid, cutoff_value=cutoff_value
|
1170
|
+
)
|
1171
|
+
return out, origin
|
1172
|
+
|
947
1173
|
def _get_atom_weights(
|
948
1174
|
self, atoms: Tuple[str] = None, weight_type: str = "atomic_weight"
|
949
1175
|
) -> Tuple[float]:
|
@@ -980,41 +1206,77 @@ class Structure:
|
|
980
1206
|
def to_volume(
|
981
1207
|
self,
|
982
1208
|
shape: Tuple[int] = None,
|
983
|
-
sampling_rate:
|
1209
|
+
sampling_rate: Tuple[float] = None,
|
984
1210
|
origin: Tuple[float] = None,
|
985
1211
|
chain: str = None,
|
986
1212
|
weight_type: str = "atomic_weight",
|
987
|
-
|
988
|
-
) -> Tuple[NDArray,
|
1213
|
+
weight_type_args: Dict = dict(),
|
1214
|
+
) -> Tuple[NDArray, NDArray, NDArray]:
|
989
1215
|
"""
|
990
|
-
|
991
|
-
index z, y, x.
|
1216
|
+
Maps class instance to a volume.
|
992
1217
|
|
993
1218
|
Parameters
|
994
1219
|
----------
|
995
|
-
shape :
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
origin :
|
1001
|
-
Origin of the coordinate system
|
1002
|
-
in z, y, x form.
|
1220
|
+
shape : tuple of ints, optional
|
1221
|
+
Output array shape in (z,y,x) form.
|
1222
|
+
sampling_rate : tuple of float, optional
|
1223
|
+
Sampling rate of the output array in units of
|
1224
|
+
:py:attr:`Structure.atom_coordinate`
|
1225
|
+
origin : tuple of floats, optional
|
1226
|
+
Origin of the coordinate system in (z,y,x) form.
|
1003
1227
|
chain : str, optional
|
1004
|
-
|
1005
|
-
to
|
1006
|
-
all chains are returned. Default is None.
|
1228
|
+
Chain identified. Either single or comma separated string of chains.
|
1229
|
+
Defaults to None which returns all chains.
|
1007
1230
|
weight_type : str, optional
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1231
|
+
Weight given to individual atoms. Supported weight are:
|
1232
|
+
|
1233
|
+
+----------------------------+---------------------------------------+
|
1234
|
+
| atomic_weight | Using element unit point mass |
|
1235
|
+
+----------------------------+---------------------------------------+
|
1236
|
+
| atomic_number | Using atomic number point mass |
|
1237
|
+
+----------------------------+---------------------------------------+
|
1238
|
+
| gaussian | Represent atoms as isotropic Gaussian |
|
1239
|
+
+----------------------------+---------------------------------------+
|
1240
|
+
| van_der_waals_radius | Using binary van der waal spheres |
|
1241
|
+
+----------------------------+---------------------------------------+
|
1242
|
+
| scattering_factors | Using experimental scattering factors |
|
1243
|
+
+----------------------------+---------------------------------------+
|
1244
|
+
| lowpass_scattering_factors | Lowpass filtered scattering_factors |
|
1245
|
+
+----------------------------+---------------------------------------+
|
1246
|
+
weight_type_args : dict, optional
|
1247
|
+
Additional arguments used for individual weight_types. `gaussian`
|
1248
|
+
accepts ``resolution``, `scattering` accepts ``method``.
|
1011
1249
|
|
1012
1250
|
Returns
|
1013
1251
|
-------
|
1014
|
-
Tuple[NDArray,
|
1015
|
-
|
1252
|
+
Tuple[NDArray, NDArray, NDArray]
|
1253
|
+
Volume, origin and sampling_rate.
|
1254
|
+
|
1255
|
+
Examples
|
1256
|
+
--------
|
1257
|
+
>>> from importlib_resources import files
|
1258
|
+
>>> from tme import Structure
|
1259
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1260
|
+
>>> structure = Structure.from_file(filename=fname)
|
1261
|
+
>>> vol, origin, sampling = structure.to_volume()
|
1262
|
+
>>> vol.shape, origin, sampling
|
1263
|
+
((59, 35, 53), array([-30.71, 12.42, -27.15]), array([1., 1., 1.]))
|
1264
|
+
>>> vol, origin, sampling = structure.to_volume(sampling_rate=(2.2,1,3))
|
1265
|
+
((27, 35, 18), array([-30.71, 12.42, -27.15]), array([2.2, 1. , 3. ]))
|
1266
|
+
|
1267
|
+
``sampling_rate`` and ``origin`` can be set to ensure correct alignment
|
1268
|
+
with corresponding density maps such as the ones at EMDB. Analogous to
|
1269
|
+
:py:meth:`Structure.subset_by_chain` only parts of the structure can be
|
1270
|
+
mapped onto grids using a variety of weighting schemes
|
1271
|
+
|
1272
|
+
>>> structure.to_volume(weight_type="van_der_waals_radius")
|
1273
|
+
>>> structure.to_volume(
|
1274
|
+
>>> weight_type="lowpass_scattering_factors",
|
1275
|
+
>>> method_args={"source" : "dt1969", "downsampling_factor" : 1.35},
|
1276
|
+
>>> )
|
1016
1277
|
"""
|
1017
1278
|
_weight_types = {
|
1279
|
+
"gaussian",
|
1018
1280
|
"atomic_weight",
|
1019
1281
|
"atomic_number",
|
1020
1282
|
"van_der_waals_radius",
|
@@ -1035,11 +1297,8 @@ class Structure:
|
|
1035
1297
|
"sampling_rate should either be single value of array with"
|
1036
1298
|
f"size {self.atom_coordinate.shape[1]}."
|
1037
1299
|
)
|
1038
|
-
if "source" not in scattering_args:
|
1039
|
-
scattering_args["source"] = "peng1995"
|
1040
1300
|
|
1041
1301
|
temp = self.subset_by_chain(chain=chain)
|
1042
|
-
|
1043
1302
|
positions, atoms, shape, sampling_rate, origin = temp._coordinate_to_position(
|
1044
1303
|
shape=shape, sampling_rate=sampling_rate, origin=origin
|
1045
1304
|
)
|
@@ -1056,7 +1315,7 @@ class Structure:
|
|
1056
1315
|
sampling_rate,
|
1057
1316
|
volume,
|
1058
1317
|
lowpass_filter=False,
|
1059
|
-
**
|
1318
|
+
**weight_type_args,
|
1060
1319
|
)
|
1061
1320
|
elif weight_type == "lowpass_scattering_factors":
|
1062
1321
|
self._position_to_scattering_factors(
|
@@ -1065,10 +1324,19 @@ class Structure:
|
|
1065
1324
|
sampling_rate,
|
1066
1325
|
volume,
|
1067
1326
|
lowpass_filter=True,
|
1068
|
-
**
|
1327
|
+
**weight_type_args,
|
1328
|
+
)
|
1329
|
+
elif weight_type == "gaussian":
|
1330
|
+
volume, origin = self._position_to_molmap(
|
1331
|
+
positions=temp.atom_coordinate,
|
1332
|
+
weights=temp._get_atom_weights(
|
1333
|
+
atoms=atoms, weight_type="atomic_number"
|
1334
|
+
),
|
1335
|
+
sampling_rate=sampling_rate,
|
1336
|
+
**weight_type_args,
|
1069
1337
|
)
|
1070
1338
|
|
1071
|
-
self.
|
1339
|
+
self.metadata.update(temp.metadata)
|
1072
1340
|
return volume, origin, sampling_rate
|
1073
1341
|
|
1074
1342
|
@classmethod
|
@@ -1081,32 +1349,41 @@ class Structure:
|
|
1081
1349
|
weighted: bool = False,
|
1082
1350
|
) -> float:
|
1083
1351
|
"""
|
1084
|
-
Compute root mean square deviation (RMSD) between two structures
|
1085
|
-
|
1086
|
-
Both structures need to have the same number of atoms. In practice, this means
|
1087
|
-
that *structure2* is a transformed version of *structure1*
|
1352
|
+
Compute root mean square deviation (RMSD) between two structures with the
|
1353
|
+
same number of atoms.
|
1088
1354
|
|
1089
1355
|
Parameters
|
1090
1356
|
----------
|
1091
|
-
structure1 : Structure
|
1092
|
-
Structure
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
Origin of the structure coordinate system.
|
1099
|
-
|
1100
|
-
sampling_rate : float, optional
|
1101
|
-
Sampling rate if discretized on a grid in the unit of self.atom_coordinate.
|
1102
|
-
|
1357
|
+
structure1, structure2 : :py:class:`Structure`
|
1358
|
+
Structure instances to compare.
|
1359
|
+
origin : tuple of floats, optional
|
1360
|
+
Coordinate system origin. For computing RMSD on discretized grids.
|
1361
|
+
sampling_rate : tuple of floats, optional
|
1362
|
+
Sampling rate in units of :py:attr:`atom_coordinate`.
|
1363
|
+
For computing RMSD on discretized grids.
|
1103
1364
|
weighted : bool, optional
|
1104
|
-
Whether atoms should be weighted
|
1365
|
+
Whether atoms should be weighted acoording to their atomic weight.
|
1105
1366
|
|
1106
1367
|
Returns
|
1107
1368
|
-------
|
1108
1369
|
float
|
1109
|
-
Root Mean Square Deviation
|
1370
|
+
Root Mean Square Deviation between input structures.
|
1371
|
+
|
1372
|
+
Examples
|
1373
|
+
--------
|
1374
|
+
>>> from importlib_resources import files
|
1375
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1376
|
+
>>> from tme import Structure
|
1377
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1378
|
+
>>> structure = Structure.from_file(filename=fname)
|
1379
|
+
>>> transformed = structure.rigid_transform(
|
1380
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
1381
|
+
>>> translation = (0, 1, -5)
|
1382
|
+
>>> )
|
1383
|
+
>>> Structure.compare_structures(structure, transformed)
|
1384
|
+
31.35238
|
1385
|
+
>>> Structure.compare_structures(structure, structure)
|
1386
|
+
0.0
|
1110
1387
|
"""
|
1111
1388
|
if origin is None:
|
1112
1389
|
origin = np.zeros(structure1.atom_coordinate.shape[1])
|
@@ -1115,14 +1392,18 @@ class Structure:
|
|
1115
1392
|
coordinates2 = structure2.atom_coordinate
|
1116
1393
|
atoms1, atoms2 = structure1.element_symbol, structure2.element_symbol
|
1117
1394
|
if sampling_rate is not None:
|
1118
|
-
coordinates1 = np.rint(
|
1119
|
-
|
1395
|
+
coordinates1 = np.rint(
|
1396
|
+
np.divide(np.subtract(coordinates1, origin), sampling_rate)
|
1397
|
+
).astype(int)
|
1398
|
+
coordinates2 = np.rint(
|
1399
|
+
np.divide(np.subtract(coordinates2, origin), sampling_rate)
|
1400
|
+
).astype(int)
|
1120
1401
|
|
1121
|
-
weights1 = np.
|
1122
|
-
weights2 = np.
|
1123
|
-
if
|
1124
|
-
weights1 = np.
|
1125
|
-
weights2 = np.
|
1402
|
+
weights1 = np.ones_like(structure1.atom_coordinate.shape[0])
|
1403
|
+
weights2 = np.ones_like(structure2.atom_coordinate.shape[0])
|
1404
|
+
if weighted:
|
1405
|
+
weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
|
1406
|
+
weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
|
1126
1407
|
|
1127
1408
|
if not np.allclose(coordinates1.shape, coordinates2.shape):
|
1128
1409
|
raise ValueError(
|
@@ -1147,35 +1428,41 @@ class Structure:
|
|
1147
1428
|
weighted: bool = False,
|
1148
1429
|
) -> Tuple["Structure", float]:
|
1149
1430
|
"""
|
1150
|
-
Align
|
1151
|
-
the
|
1152
|
-
|
1153
|
-
Both structures need to have the same number of atoms. In practice, this means
|
1154
|
-
that *structure2* is a subset of *structure1*
|
1431
|
+
Align ``structure2`` to ``structure1`` using the Kabsch Algorithm. Both
|
1432
|
+
structures need to have the same number of atoms.
|
1155
1433
|
|
1156
1434
|
Parameters
|
1157
1435
|
----------
|
1158
|
-
structure1 : Structure
|
1159
|
-
Structure
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
Origin of the structure coordinate system.
|
1166
|
-
|
1167
|
-
sampling_rate : float, optional
|
1168
|
-
Voxel size if discretized on a grid.
|
1169
|
-
|
1436
|
+
structure1, structure2 : :py:class:`Structure`
|
1437
|
+
Structure instances to align.
|
1438
|
+
origin : tuple of floats, optional
|
1439
|
+
Coordinate system origin. For computing RMSD on discretized grids.
|
1440
|
+
sampling_rate : tuple of floats, optional
|
1441
|
+
Sampling rate in units of :py:attr:`atom_coordinate`.
|
1442
|
+
For computing RMSD on discretized grids.
|
1170
1443
|
weighted : bool, optional
|
1171
1444
|
Whether atoms should be weighted by their atomic weight.
|
1172
1445
|
|
1173
1446
|
Returns
|
1174
1447
|
-------
|
1175
|
-
Structure
|
1176
|
-
|
1448
|
+
:py:class:`Structure`
|
1449
|
+
``structure2`` aligned to ``structure1``.
|
1177
1450
|
float
|
1178
|
-
|
1451
|
+
Alignment RMSD
|
1452
|
+
|
1453
|
+
Examples
|
1454
|
+
--------
|
1455
|
+
>>> from importlib_resources import files
|
1456
|
+
>>> from tme import Structure
|
1457
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1458
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1459
|
+
>>> structure = Structure.from_file(filename=fname)
|
1460
|
+
>>> transformed = structure.rigid_transform(
|
1461
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
1462
|
+
>>> translation = (0, 1, -5)
|
1463
|
+
>>> )
|
1464
|
+
>>> aligned, rmsd = Structure.align_structures(structure, transformed)
|
1465
|
+
Initial RMSD: 31.07189 - Final RMSD: 0.00000
|
1179
1466
|
"""
|
1180
1467
|
if origin is None:
|
1181
1468
|
origin = np.minimum(
|
@@ -1237,9 +1524,7 @@ class Structure:
|
|
1237
1524
|
|
1238
1525
|
@dataclass(frozen=True, repr=True)
|
1239
1526
|
class Elements:
|
1240
|
-
"""
|
1241
|
-
Lookup table containing information on chemical elements.
|
1242
|
-
"""
|
1527
|
+
"""Lookup table for chemical elements."""
|
1243
1528
|
|
1244
1529
|
Atom = namedtuple(
|
1245
1530
|
"Atom",
|
@@ -1382,12 +1667,33 @@ class Elements:
|
|
1382
1667
|
Parameters
|
1383
1668
|
----------
|
1384
1669
|
key : str
|
1385
|
-
|
1386
|
-
the internal data.
|
1670
|
+
Key to retrieve the corresponding value for.
|
1387
1671
|
|
1388
1672
|
Returns
|
1389
1673
|
-------
|
1390
|
-
|
1391
|
-
The
|
1674
|
+
namedtuple
|
1675
|
+
The Atom tuple associated with the provided key.
|
1392
1676
|
"""
|
1393
1677
|
return self._elements.get(key, self._default)
|
1678
|
+
|
1679
|
+
|
1680
|
+
def _format_string(string: str) -> str:
|
1681
|
+
"""
|
1682
|
+
Formats a string by adding quotation marks if it contains white spaces.
|
1683
|
+
|
1684
|
+
Parameters
|
1685
|
+
----------
|
1686
|
+
string : str
|
1687
|
+
Input string to be formatted.
|
1688
|
+
|
1689
|
+
Returns
|
1690
|
+
-------
|
1691
|
+
str
|
1692
|
+
Formatted string with added quotation marks if needed.
|
1693
|
+
"""
|
1694
|
+
if " " in string:
|
1695
|
+
return f"'{string}'"
|
1696
|
+
# Occurs e.g. for C1' atoms. The trailing whitespace is necessary.
|
1697
|
+
if string.count("'") == 1:
|
1698
|
+
return f'"{string}"'
|
1699
|
+
return string
|