pytme 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.2__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytme-0.2.1.data → pytme-0.2.2.data}/scripts/match_template.py +147 -93
- {pytme-0.2.1.data → pytme-0.2.2.data}/scripts/postprocess.py +67 -26
- {pytme-0.2.1.data → pytme-0.2.2.data}/scripts/preprocessor_gui.py +175 -85
- pytme-0.2.2.dist-info/METADATA +91 -0
- pytme-0.2.2.dist-info/RECORD +74 -0
- {pytme-0.2.1.dist-info → pytme-0.2.2.dist-info}/WHEEL +1 -1
- scripts/extract_candidates.py +20 -13
- scripts/match_template.py +147 -93
- scripts/match_template_filters.py +154 -95
- scripts/postprocess.py +67 -26
- scripts/preprocessor_gui.py +175 -85
- scripts/refine_matches.py +265 -61
- tme/__init__.py +0 -1
- tme/__version__.py +1 -1
- tme/analyzer.py +451 -809
- tme/backends/__init__.py +40 -11
- tme/backends/_jax_utils.py +185 -0
- tme/backends/cupy_backend.py +111 -223
- tme/backends/jax_backend.py +214 -150
- tme/backends/matching_backend.py +445 -384
- tme/backends/mlx_backend.py +32 -59
- tme/backends/npfftw_backend.py +239 -507
- tme/backends/pytorch_backend.py +21 -145
- tme/density.py +233 -363
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/matching_data.py +322 -285
- tme/matching_exhaustive.py +172 -1493
- tme/matching_optimization.py +143 -106
- tme/matching_scores.py +884 -0
- tme/matching_utils.py +280 -386
- tme/memory.py +377 -0
- tme/orientations.py +52 -12
- tme/parser.py +3 -4
- tme/preprocessing/_utils.py +61 -32
- tme/preprocessing/compose.py +7 -3
- tme/preprocessing/frequency_filters.py +49 -39
- tme/preprocessing/tilt_series.py +34 -40
- tme/preprocessor.py +560 -526
- tme/structure.py +491 -188
- tme/types.py +5 -3
- pytme-0.2.1.dist-info/METADATA +0 -73
- pytme-0.2.1.dist-info/RECORD +0 -73
- tme/helpers.py +0 -881
- tme/matching_constrained.py +0 -195
- {pytme-0.2.1.data → pytme-0.2.2.data}/scripts/estimate_ram_usage.py +0 -0
- {pytme-0.2.1.data → pytme-0.2.2.data}/scripts/preprocess.py +0 -0
- {pytme-0.2.1.dist-info → pytme-0.2.2.dist-info}/LICENSE +0 -0
- {pytme-0.2.1.dist-info → pytme-0.2.2.dist-info}/entry_points.txt +0 -0
- {pytme-0.2.1.dist-info → pytme-0.2.2.dist-info}/top_level.txt +0 -0
tme/structure.py
CHANGED
@@ -6,103 +6,159 @@
|
|
6
6
|
"""
|
7
7
|
import warnings
|
8
8
|
from copy import deepcopy
|
9
|
-
from collections import namedtuple
|
10
|
-
from typing import List, Dict, Tuple
|
11
9
|
from itertools import groupby
|
12
10
|
from dataclasses import dataclass
|
11
|
+
from collections import namedtuple
|
12
|
+
from typing import List, Dict, Tuple
|
13
13
|
from os.path import splitext, basename
|
14
14
|
|
15
15
|
import numpy as np
|
16
16
|
|
17
|
-
from .parser import PDBParser, MMCIFParser
|
18
|
-
from .matching_utils import (
|
19
|
-
rigid_transform,
|
20
|
-
_format_mmcif_colunns,
|
21
|
-
minimum_enclosing_box,
|
22
|
-
)
|
23
|
-
from .helpers import atom_profile
|
24
17
|
from .types import NDArray
|
18
|
+
from .preprocessor import atom_profile, Preprocessor
|
19
|
+
from .parser import PDBParser, MMCIFParser
|
20
|
+
from .matching_utils import rigid_transform, minimum_enclosing_box
|
25
21
|
|
26
22
|
|
27
23
|
@dataclass(repr=False)
|
28
24
|
class Structure:
|
29
25
|
"""
|
30
|
-
Represents atomic structures
|
31
|
-
|
26
|
+
Represents atomic structures per the Protein Data Bank (PDB) specification.
|
27
|
+
|
28
|
+
Examples
|
29
|
+
--------
|
30
|
+
The following achieves the definition of a :py:class:`Structure` instance
|
31
|
+
|
32
|
+
>>> from tme import Structure
|
33
|
+
>>> structure = Structure(
|
34
|
+
>>> record_type=["ATOM", "ATOM", "ATOM"],
|
35
|
+
>>> atom_serial_number=[0, 1, 2] ,
|
36
|
+
>>> atom_name=["C", "N", "H"],
|
37
|
+
>>> atom_coordinate=[[30,15,10], [35, 20, 15], [35,25,20]],
|
38
|
+
>>> alternate_location_indicator=[".", ".", "."],
|
39
|
+
>>> residue_name=["GLY", "GLY", "HIS"],
|
40
|
+
>>> chain_identifier=["A", "A", "B"],
|
41
|
+
>>> residue_sequence_number=[0, 0, 1],
|
42
|
+
>>> code_for_residue_insertion=["?", "?", "?"],
|
43
|
+
>>> occupancy=[0, 0, 0],
|
44
|
+
>>> temperature_factor=[0, 0, 0],
|
45
|
+
>>> segment_identifier=["1", "1", "1"],
|
46
|
+
>>> element_symbol=["C", "N", "C"],
|
47
|
+
>>> charge=["?", "?", "?"],
|
48
|
+
>>> metadata={},
|
49
|
+
>>> )
|
50
|
+
>>> structure
|
51
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
52
|
+
|
53
|
+
:py:class:`Structure` instances support a range of subsetting operations based on
|
54
|
+
atom indices
|
55
|
+
|
56
|
+
>>> structure[1]
|
57
|
+
Unique Chains: A, Atom Range: 1-1 [N = 1], Residue Range: 0-0 [N = 1]
|
58
|
+
>>> structure[(False, False, True)]
|
59
|
+
Unique Chains: B, Atom Range: 2-2 [N = 1], Residue Range: 1-1 [N = 1]
|
60
|
+
>>> structure[(1,2)]
|
61
|
+
Unique Chains: A-B, Atom Range: 1-2 [N = 2], Residue Range: 0-1 [N = 2]
|
62
|
+
|
63
|
+
They can be written to disk in a range of formats using :py:meth:`Structure.to_file`
|
64
|
+
|
65
|
+
>>> structure.to_file("test.pdb") # Writes a PDB file to disk
|
66
|
+
>>> structure.to_file("test.cif") # Writes a mmCIF file to disk
|
67
|
+
|
68
|
+
New instances can be created from a range of formats using
|
69
|
+
:py:meth:`Structure.from_file`
|
70
|
+
|
71
|
+
>>> Structure.from_file("test.pdb") # Reads PDB file from disk
|
72
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
73
|
+
>>> Structure.from_file("test.cif") # Reads mmCIF file from disk
|
74
|
+
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
|
75
|
+
|
76
|
+
Class instances can be discretized on grids and converted to
|
77
|
+
:py:class:`tme.density.Density` instances using :py:meth:`Structure.to_volume`
|
78
|
+
or :py:meth:`tme.density.Density.from_structure`.
|
79
|
+
|
80
|
+
>>> volume, origin, sampling_rate = structure.to_volume(shape=(50,40,30))
|
32
81
|
|
33
82
|
References
|
34
83
|
----------
|
35
|
-
.. [1]
|
84
|
+
.. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
|
85
|
+
.. [2] https://www.ccp4.ac.uk/html/mmcifformat.html
|
86
|
+
|
36
87
|
"""
|
37
88
|
|
38
|
-
#:
|
89
|
+
#: Array of record types, e.g.ATOM.
|
39
90
|
record_type: NDArray
|
40
91
|
|
41
|
-
#:
|
92
|
+
#: Array of serial numbers.
|
42
93
|
atom_serial_number: NDArray
|
43
94
|
|
44
|
-
#:
|
95
|
+
#: Array of atom names.
|
45
96
|
atom_name: NDArray
|
46
97
|
|
47
|
-
#:
|
98
|
+
#: Array of x,y,z atom coordinates.
|
48
99
|
atom_coordinate: NDArray
|
49
100
|
|
50
|
-
#:
|
101
|
+
#: Array of alternate location indices.
|
51
102
|
alternate_location_indicator: NDArray
|
52
103
|
|
53
|
-
#:
|
104
|
+
#: Array of residue names.
|
54
105
|
residue_name: NDArray
|
55
106
|
|
56
|
-
#:
|
107
|
+
#: Array of chain identifiers.
|
57
108
|
chain_identifier: NDArray
|
58
109
|
|
59
|
-
#:
|
110
|
+
#: Array of residue ids.
|
60
111
|
residue_sequence_number: NDArray
|
61
112
|
|
62
|
-
#:
|
113
|
+
#: Array of insertion information.
|
63
114
|
code_for_residue_insertion: NDArray
|
64
115
|
|
65
|
-
#:
|
116
|
+
#: Array of occupancy factors.
|
66
117
|
occupancy: NDArray
|
67
118
|
|
68
|
-
#:
|
119
|
+
#: Array of B-factors.
|
69
120
|
temperature_factor: NDArray
|
70
121
|
|
71
|
-
#:
|
122
|
+
#: Array of segment identifiers.
|
72
123
|
segment_identifier: NDArray
|
73
124
|
|
74
|
-
#:
|
125
|
+
#: Array of element symbols.
|
75
126
|
element_symbol: NDArray
|
76
127
|
|
77
|
-
#:
|
128
|
+
#: Array of charges.
|
78
129
|
charge: NDArray
|
79
130
|
|
80
|
-
#:
|
81
|
-
|
131
|
+
#: Metadata dictionary.
|
132
|
+
metadata: dict
|
82
133
|
|
83
134
|
def __post_init__(self, *args, **kwargs):
|
84
135
|
"""
|
85
|
-
Initialize the structure and populate header
|
136
|
+
Initialize the structure and populate header metadata.
|
86
137
|
|
87
138
|
Raises
|
88
139
|
------
|
89
140
|
ValueError
|
90
|
-
If
|
91
|
-
If the shape of atom_coordinates and chain_identifier doesn't match.
|
141
|
+
If NDArray attributes does not match the number of atoms.
|
92
142
|
"""
|
93
|
-
self.
|
94
|
-
|
143
|
+
for attribute in self.__dict__:
|
144
|
+
value = getattr(self, attribute)
|
145
|
+
target_type = self.__annotations__.get(attribute, None)
|
146
|
+
if target_type == NDArray:
|
147
|
+
setattr(self, attribute, np.atleast_1d(np.array(value)))
|
95
148
|
|
96
149
|
n_atoms = self.atom_coordinate.shape[0]
|
97
150
|
for attribute in self.__dict__:
|
98
151
|
value = getattr(self, attribute)
|
99
|
-
if
|
152
|
+
if not isinstance(value, np.ndarray):
|
100
153
|
continue
|
101
154
|
if value.shape[0] != n_atoms:
|
102
155
|
raise ValueError(
|
103
156
|
f"Expected shape of {attribute}: {n_atoms}, got {value.shape[0]}."
|
104
157
|
)
|
105
158
|
|
159
|
+
self._elements = Elements()
|
160
|
+
self.metadata = self._populate_metadata(self.metadata)
|
161
|
+
|
106
162
|
def __getitem__(self, indices: List[int]) -> "Structure":
|
107
163
|
"""
|
108
164
|
Get a Structure instance for specified indices.
|
@@ -138,22 +194,17 @@ class Structure:
|
|
138
194
|
"charge",
|
139
195
|
)
|
140
196
|
kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
|
141
|
-
ret = self.__class__(**kwargs,
|
197
|
+
ret = self.__class__(**kwargs, metadata={})
|
142
198
|
return ret
|
143
199
|
|
144
200
|
def __repr__(self):
|
145
201
|
"""
|
146
202
|
Return a string representation of the Structure.
|
147
|
-
|
148
|
-
Returns
|
149
|
-
-------
|
150
|
-
str
|
151
|
-
The string representation.
|
152
203
|
"""
|
153
204
|
unique_chains = "-".join(
|
154
205
|
[
|
155
206
|
",".join([str(x) for x in entity])
|
156
|
-
for entity in self.
|
207
|
+
for entity in self.metadata["unique_chains"]
|
157
208
|
]
|
158
209
|
)
|
159
210
|
min_atom = np.min(self.atom_serial_number)
|
@@ -162,7 +213,7 @@ class Structure:
|
|
162
213
|
|
163
214
|
min_residue = np.min(self.residue_sequence_number)
|
164
215
|
max_residue = np.max(self.residue_sequence_number)
|
165
|
-
n_residue = self.residue_sequence_number.size
|
216
|
+
n_residue = np.unique(self.residue_sequence_number).size
|
166
217
|
|
167
218
|
repr_str = (
|
168
219
|
f"Structure object at {id(self)}\n"
|
@@ -172,43 +223,39 @@ class Structure:
|
|
172
223
|
)
|
173
224
|
return repr_str
|
174
225
|
|
175
|
-
def get_chains(self) -> List[str]:
|
176
|
-
"""
|
177
|
-
Returns a list of available chains.
|
178
|
-
|
179
|
-
Returns
|
180
|
-
-------
|
181
|
-
list
|
182
|
-
The list of available chains.
|
183
|
-
"""
|
184
|
-
return list(self.details["chain_weight"].keys())
|
185
|
-
|
186
226
|
def copy(self) -> "Structure":
|
187
227
|
"""
|
188
228
|
Returns a copy of the Structure instance.
|
189
229
|
|
190
230
|
Returns
|
191
231
|
-------
|
192
|
-
Structure
|
232
|
+
:py:class:`Structure`
|
193
233
|
The copied Structure instance.
|
234
|
+
|
235
|
+
Examples
|
236
|
+
--------
|
237
|
+
>>> import numpy as np
|
238
|
+
>>> structure_copy = structure.copy()
|
239
|
+
>>> np.allclose(structure_copy.atom_coordinate, structure.atom_coordinate)
|
240
|
+
True
|
194
241
|
"""
|
195
242
|
return deepcopy(self)
|
196
243
|
|
197
|
-
def
|
244
|
+
def _populate_metadata(self, metadata: Dict = {}) -> Dict:
|
198
245
|
"""
|
199
|
-
Populate the
|
246
|
+
Populate the metadata dictionary with the data from the Structure instance.
|
200
247
|
|
201
248
|
Parameters
|
202
249
|
----------
|
203
|
-
|
204
|
-
The initial
|
250
|
+
metadata : dict, optional
|
251
|
+
The initial metadata dictionary, by default {}.
|
205
252
|
|
206
253
|
Returns
|
207
254
|
-------
|
208
255
|
dict
|
209
|
-
The populated
|
256
|
+
The populated metadata dictionary.
|
210
257
|
"""
|
211
|
-
|
258
|
+
metadata["weight"] = np.sum(
|
212
259
|
[self._elements[atype].atomic_weight for atype in self.element_symbol]
|
213
260
|
)
|
214
261
|
|
@@ -220,12 +267,12 @@ class Structure:
|
|
220
267
|
[self._elements[atype].atomic_weight for atype in self.element_symbol],
|
221
268
|
)
|
222
269
|
labels = self.chain_identifier[idx]
|
223
|
-
|
270
|
+
metadata["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
|
224
271
|
|
225
|
-
# Group non-unique chains in separate lists in
|
226
|
-
|
272
|
+
# Group non-unique chains in separate lists in metadata["unique_chains"]
|
273
|
+
metadata["unique_chains"], temp = [], {}
|
227
274
|
for chain_label in label:
|
228
|
-
index = len(
|
275
|
+
index = len(metadata["unique_chains"])
|
229
276
|
chain_sequence = "".join(
|
230
277
|
[
|
231
278
|
str(y)
|
@@ -236,10 +283,10 @@ class Structure:
|
|
236
283
|
)
|
237
284
|
if chain_sequence not in temp:
|
238
285
|
temp[chain_sequence] = index
|
239
|
-
|
286
|
+
metadata["unique_chains"].append([chain_label])
|
240
287
|
continue
|
241
288
|
idx = temp.get(chain_sequence)
|
242
|
-
|
289
|
+
metadata["unique_chains"][idx].append(chain_label)
|
243
290
|
|
244
291
|
filtered_data = [
|
245
292
|
(label, integer)
|
@@ -248,12 +295,12 @@ class Structure:
|
|
248
295
|
)
|
249
296
|
]
|
250
297
|
filtered_data = sorted(filtered_data, key=lambda x: x[0])
|
251
|
-
|
298
|
+
metadata["chain_range"] = {}
|
252
299
|
for label, values in groupby(filtered_data, key=lambda x: x[0]):
|
253
300
|
values = [int(x[1]) for x in values]
|
254
|
-
|
301
|
+
metadata["chain_range"][label] = (min(values), max(values))
|
255
302
|
|
256
|
-
return
|
303
|
+
return metadata
|
257
304
|
|
258
305
|
@classmethod
|
259
306
|
def from_file(
|
@@ -264,12 +311,18 @@ class Structure:
|
|
264
311
|
filter_by_residues: set = None,
|
265
312
|
) -> "Structure":
|
266
313
|
"""
|
267
|
-
Reads
|
314
|
+
Reads an atomic structure file and into a :py:class:`Structure` instance.
|
268
315
|
|
269
316
|
Parameters
|
270
317
|
----------
|
271
318
|
filename : str
|
272
|
-
|
319
|
+
Input file. Supported extensions are:
|
320
|
+
|
321
|
+
+------+-------------------------------------------------------------+
|
322
|
+
| .pdb | Reads a PDB file |
|
323
|
+
+------+-------------------------------------------------------------+
|
324
|
+
| .cif | Reads an mmCIF file |
|
325
|
+
+------+-------------------------------------------------------------+
|
273
326
|
keep_non_atom_records : bool, optional
|
274
327
|
Wheter to keep residues that are not labelled ATOM.
|
275
328
|
filter_by_elements: set, optional
|
@@ -280,12 +333,34 @@ class Structure:
|
|
280
333
|
Raises
|
281
334
|
------
|
282
335
|
ValueError
|
283
|
-
If the extension is not
|
336
|
+
If the extension is not supported.
|
284
337
|
|
285
338
|
Returns
|
286
339
|
-------
|
287
|
-
Structure
|
288
|
-
|
340
|
+
:py:class:`Structure`
|
341
|
+
Structure instance representing the read in file.
|
342
|
+
|
343
|
+
Examples
|
344
|
+
--------
|
345
|
+
>>> from importlib_resources import files
|
346
|
+
>>> from tme import Structure
|
347
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
348
|
+
>>> structure = Structure.from_file(filename=fname)
|
349
|
+
>>> structure
|
350
|
+
Unique Chains: A-B, Atom Range: 1-1564 [N = 1564], Residue Range: 142-239 [N = 1564]
|
351
|
+
|
352
|
+
We can include non ATOM entries and restrict the considered elements
|
353
|
+
and residues
|
354
|
+
|
355
|
+
>>> structure = Structure.from_file(
|
356
|
+
>>> filename=fname,
|
357
|
+
>>> keep_non_atom_records=True,
|
358
|
+
>>> filter_by_elements = {"C"},
|
359
|
+
>>> filter_by_residues = {"GLY"},
|
360
|
+
>>> )
|
361
|
+
>>> structure
|
362
|
+
Unique Chains: A,B, Atom Range: 96-1461 [N = 44], Residue Range: 154-228 [N = 44]
|
363
|
+
|
289
364
|
"""
|
290
365
|
_, file_extension = splitext(basename(filename.upper()))
|
291
366
|
if file_extension == ".PDB":
|
@@ -313,14 +388,14 @@ class Structure:
|
|
313
388
|
keep = np.logical_and(keep, data["record_type"] == "ATOM")
|
314
389
|
|
315
390
|
for key in data:
|
316
|
-
if key == "
|
391
|
+
if key == "metadata":
|
317
392
|
continue
|
318
|
-
if
|
393
|
+
if isinstance(data[key], np.ndarray):
|
319
394
|
data[key] = data[key][keep]
|
320
395
|
else:
|
321
396
|
data[key] = [x for x, flag in zip(data[key], keep) if flag]
|
322
397
|
|
323
|
-
data["
|
398
|
+
data["metadata"]["filepath"] = filename
|
324
399
|
|
325
400
|
return cls(**data)
|
326
401
|
|
@@ -367,12 +442,12 @@ class Structure:
|
|
367
442
|
out_data = [
|
368
443
|
x.strip() for x in result["atom_site"].get(atom_site_key, ["."])
|
369
444
|
]
|
370
|
-
if dtype
|
445
|
+
if dtype is int:
|
371
446
|
out_data = [0 if x == "." else int(x) for x in out_data]
|
372
447
|
try:
|
373
448
|
out[out_key] = np.asarray(out_data).astype(dtype)
|
374
449
|
except ValueError:
|
375
|
-
default = ["."] if dtype
|
450
|
+
default = ["."] if dtype is str else 0
|
376
451
|
print(f"Converting {out_key} to {dtype} failed, set to {default}.")
|
377
452
|
out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
|
378
453
|
|
@@ -382,7 +457,7 @@ class Structure:
|
|
382
457
|
continue
|
383
458
|
out[key] = np.repeat(value, number_entries // value.size)
|
384
459
|
|
385
|
-
out["
|
460
|
+
out["metadata"] = {}
|
386
461
|
out["atom_coordinate"] = np.transpose(
|
387
462
|
np.array(
|
388
463
|
[
|
@@ -405,7 +480,7 @@ class Structure:
|
|
405
480
|
for out_key, (base_key, inner_key, default) in detail_mapping.items():
|
406
481
|
if base_key not in result:
|
407
482
|
continue
|
408
|
-
out["
|
483
|
+
out["metadata"][out_key] = result[base_key].get(inner_key, default)
|
409
484
|
|
410
485
|
return out
|
411
486
|
|
@@ -446,15 +521,15 @@ class Structure:
|
|
446
521
|
"charge": ("charge", str),
|
447
522
|
}
|
448
523
|
|
449
|
-
out = {"
|
524
|
+
out = {"metadata": result["details"]}
|
450
525
|
for out_key, (inner_key, dtype) in atom_site_mapping.items():
|
451
526
|
out_data = [x.strip() for x in result[inner_key]]
|
452
|
-
if dtype
|
527
|
+
if dtype is int:
|
453
528
|
out_data = [0 if x == "." else int(x) for x in out_data]
|
454
529
|
try:
|
455
530
|
out[out_key] = np.asarray(out_data).astype(dtype)
|
456
531
|
except ValueError:
|
457
|
-
default = "." if dtype
|
532
|
+
default = "." if dtype is str else 0
|
458
533
|
print(
|
459
534
|
f"Converting {out_key} to {dtype} failed. Setting {out_key} to {default}."
|
460
535
|
)
|
@@ -466,19 +541,35 @@ class Structure:
|
|
466
541
|
|
467
542
|
def to_file(self, filename: str) -> None:
|
468
543
|
"""
|
469
|
-
Writes the Structure instance
|
470
|
-
|
471
|
-
|
544
|
+
Writes the :py:class:`Structure` instance to disk.
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
filename : str
|
549
|
+
The name of the file to be created. Supported extensions are
|
550
|
+
|
551
|
+
+------+-------------------------------------------------------------+
|
552
|
+
| .pdb | Creates a PDB file |
|
553
|
+
+------+-------------------------------------------------------------+
|
554
|
+
| .cif | Creates an mmCIF file |
|
555
|
+
+------+-------------------------------------------------------------+
|
472
556
|
|
473
557
|
Raises
|
474
558
|
------
|
475
559
|
ValueError
|
476
|
-
If the extension is not
|
560
|
+
If the extension is not supported.
|
561
|
+
|
562
|
+
Examples
|
563
|
+
--------
|
564
|
+
>>> from importlib_resources import files
|
565
|
+
>>> from tempfile import NamedTemporaryFile
|
566
|
+
>>> from tme import Structure
|
567
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
568
|
+
>>> oname = NamedTemporaryFile().name
|
569
|
+
>>> structure = Structure.from_file(filename=fname)
|
570
|
+
>>> structure.to_file(f"{oname}.cif") # Writes an mmCIF file to disk
|
571
|
+
>>> structure.to_file(f"{oname}.pdb") # Writes a PDB file to disk
|
477
572
|
|
478
|
-
Parameters
|
479
|
-
----------
|
480
|
-
filename : str
|
481
|
-
The filename of the file to write.
|
482
573
|
"""
|
483
574
|
if np.any(np.vectorize(len)(self.chain_identifier) > 2):
|
484
575
|
warnings.warn("Chain identifiers longer than one will be shortened.")
|
@@ -595,7 +686,7 @@ class Structure:
|
|
595
686
|
data["pdbx_PDB_model_num"].append(str(model_num))
|
596
687
|
|
597
688
|
output_data = {"atom_site": data}
|
598
|
-
original_file = self.
|
689
|
+
original_file = self.metadata.get("filepath", "")
|
599
690
|
try:
|
600
691
|
new_data = {k: v for k, v in MMCIFParser(original_file).items()}
|
601
692
|
index = self.atom_serial_number - 1
|
@@ -622,7 +713,18 @@ class Structure:
|
|
622
713
|
else:
|
623
714
|
ret += "loop_\n"
|
624
715
|
ret += "".join([f"_{category}.{k}\n" for k in subdict])
|
625
|
-
|
716
|
+
|
717
|
+
subdict = {
|
718
|
+
k: [_format_string(s) for s in v] for k, v in subdict.items()
|
719
|
+
}
|
720
|
+
key_length = {
|
721
|
+
key: len(max(value, key=lambda x: len(x), default=""))
|
722
|
+
for key, value in subdict.items()
|
723
|
+
}
|
724
|
+
padded_subdict = {
|
725
|
+
key: [s.ljust(key_length[key] + 1) for s in values]
|
726
|
+
for key, values in subdict.items()
|
727
|
+
}
|
626
728
|
|
627
729
|
data = [
|
628
730
|
"".join([str(x) for x in content])
|
@@ -646,8 +748,23 @@ class Structure:
|
|
646
748
|
|
647
749
|
Returns
|
648
750
|
-------
|
649
|
-
Structure
|
650
|
-
A subset of the
|
751
|
+
:py:class:`Structure`
|
752
|
+
A subset of the class instance containing only the specified chains.
|
753
|
+
|
754
|
+
Raises
|
755
|
+
------
|
756
|
+
ValueError
|
757
|
+
If none of the specified chains exist.
|
758
|
+
|
759
|
+
Examples
|
760
|
+
--------
|
761
|
+
>>> from importlib_resources import files
|
762
|
+
>>> from tme import Structure
|
763
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
764
|
+
>>> structure = Structure.from_file(filename=fname)
|
765
|
+
>>> structure.subset_by_chain(chain="A") # Keep A
|
766
|
+
>>> structure.subset_by_chain(chain="A,B") # Keep A and B
|
767
|
+
>>> structure.subset_by_chain(chain="B,C") # Keep B, C does not exist
|
651
768
|
"""
|
652
769
|
chain = np.unique(self.chain_identifier) if chain is None else chain.split(",")
|
653
770
|
keep = np.in1d(self.chain_identifier, chain)
|
@@ -666,10 +783,8 @@ class Structure:
|
|
666
783
|
----------
|
667
784
|
start : int
|
668
785
|
The starting residue sequence number.
|
669
|
-
|
670
786
|
stop : int
|
671
787
|
The ending residue sequence number.
|
672
|
-
|
673
788
|
chain : str, optional
|
674
789
|
The chain identifier. If multiple chains should be selected they need
|
675
790
|
to be a comma separated string, e.g. 'A,B,CE'. If chain None,
|
@@ -677,8 +792,21 @@ class Structure:
|
|
677
792
|
|
678
793
|
Returns
|
679
794
|
-------
|
680
|
-
Structure
|
795
|
+
:py:class:`Structure`
|
681
796
|
A subset of the original structure within the specified residue range.
|
797
|
+
|
798
|
+
Raises
|
799
|
+
------
|
800
|
+
ValueError
|
801
|
+
If none of the specified residue chain combinations exist.
|
802
|
+
|
803
|
+
Examples
|
804
|
+
--------
|
805
|
+
>>> from importlib_resources import files
|
806
|
+
>>> from tme import Structure
|
807
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
808
|
+
>>> structure = Structure.from_file(filename=fname)
|
809
|
+
>>> structure.subset_by_range(chain="A",start=150,stop=180)
|
682
810
|
"""
|
683
811
|
ret = self.subset_by_chain(chain=chain)
|
684
812
|
keep = np.logical_and(
|
@@ -694,6 +822,15 @@ class Structure:
|
|
694
822
|
-------
|
695
823
|
NDArray
|
696
824
|
The center of mass of the structure.
|
825
|
+
|
826
|
+
Examples
|
827
|
+
--------
|
828
|
+
>>> from importlib_resources import files
|
829
|
+
>>> from tme import Structure
|
830
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
831
|
+
>>> structure = Structure.from_file(filename=fname)
|
832
|
+
>>> structure.center_of_mass()
|
833
|
+
array([-0.89391639, 29.94908928, -2.64736741])
|
697
834
|
"""
|
698
835
|
weights = [self._elements[atype].atomic_weight for atype in self.element_symbol]
|
699
836
|
return np.dot(self.atom_coordinate.T, weights) / np.sum(weights)
|
@@ -719,7 +856,19 @@ class Structure:
|
|
719
856
|
Returns
|
720
857
|
-------
|
721
858
|
Structure
|
722
|
-
The transformed instance of :py:class:`
|
859
|
+
The transformed instance of :py:class:`Structure`.
|
860
|
+
|
861
|
+
Examples
|
862
|
+
--------
|
863
|
+
>>> from importlib_resources import files
|
864
|
+
>>> from tme import Structure
|
865
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
866
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
867
|
+
>>> structure = Structure.from_file(filename=fname)
|
868
|
+
>>> structure.rigid_transform(
|
869
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
870
|
+
>>> translation = (0, 1, -5)
|
871
|
+
>>> )
|
723
872
|
"""
|
724
873
|
out = np.empty_like(self.atom_coordinate.T)
|
725
874
|
rigid_transform(
|
@@ -747,7 +896,17 @@ class Structure:
|
|
747
896
|
|
748
897
|
See Also
|
749
898
|
--------
|
750
|
-
:py:meth:`tme.Density.centered`
|
899
|
+
:py:meth:`tme.density.Density.centered`
|
900
|
+
|
901
|
+
Examples
|
902
|
+
--------
|
903
|
+
>>> from importlib_resources import files
|
904
|
+
>>> from tme import Structure
|
905
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
906
|
+
>>> structure = Structure.from_file(filename=fname)
|
907
|
+
>>> centered_structure, translation = structure.centered()
|
908
|
+
>>> translation
|
909
|
+
array([34.89391639, 4.05091072, 36.64736741])
|
751
910
|
"""
|
752
911
|
center_of_mass = self.center_of_mass()
|
753
912
|
enclosing_box = minimum_enclosing_box(coordinates=self.atom_coordinate.T)
|
@@ -772,10 +931,8 @@ class Structure:
|
|
772
931
|
----------
|
773
932
|
shape : Tuple[int,]
|
774
933
|
The desired shape of the output array.
|
775
|
-
|
776
934
|
sampling_rate : float
|
777
935
|
The sampling rate of the output array in unit of self.atom_coordinate.
|
778
|
-
|
779
936
|
origin : Tuple[float,]
|
780
937
|
The origin of the coordinate system.
|
781
938
|
|
@@ -812,11 +969,11 @@ class Structure:
|
|
812
969
|
positions = positions[valid_positions == positions.shape[1], :]
|
813
970
|
atom_types = atom_types[valid_positions == positions.shape[1]]
|
814
971
|
|
815
|
-
self.
|
972
|
+
self.metadata["nAtoms_outOfBound"] = 0
|
816
973
|
if positions.shape[0] != coordinates.shape[0]:
|
817
974
|
out_of_bounds = coordinates.shape[0] - positions.shape[0]
|
818
975
|
print(f"{out_of_bounds}/{coordinates.shape[0]} atoms were out of bounds.")
|
819
|
-
self.
|
976
|
+
self.metadata["nAtoms_outOfBound"] = out_of_bounds
|
820
977
|
|
821
978
|
return positions, atom_types, shape, sampling_rate, origin
|
822
979
|
|
@@ -834,14 +991,11 @@ class Structure:
|
|
834
991
|
----------
|
835
992
|
positions : Tuple[float, float, float]
|
836
993
|
The positions of the atoms.
|
837
|
-
|
838
994
|
atoms : Tuple[str]
|
839
995
|
The types of the atoms.
|
840
|
-
|
841
996
|
sampling_rate : float
|
842
997
|
The desired sampling rate in unit of self.atom_coordinate of the
|
843
998
|
output array.
|
844
|
-
|
845
999
|
volume : NDArray
|
846
1000
|
The volume to update.
|
847
1001
|
"""
|
@@ -903,7 +1057,7 @@ class Structure:
|
|
903
1057
|
volume : NDArray
|
904
1058
|
The volume to update.
|
905
1059
|
lowpass_filter : NDArray
|
906
|
-
Whether the scattering factors
|
1060
|
+
Whether the scattering factors should be lowpass filtered.
|
907
1061
|
downsampling_factor : NDArray
|
908
1062
|
Downsampling factor for scattering factor computation.
|
909
1063
|
source : str
|
@@ -947,6 +1101,75 @@ class Structure:
|
|
947
1101
|
scattering_profiles[atoms[atom_index]](distances),
|
948
1102
|
)
|
949
1103
|
|
1104
|
+
@staticmethod
|
1105
|
+
def _position_to_molmap(
|
1106
|
+
positions: NDArray,
|
1107
|
+
weights: Tuple[float],
|
1108
|
+
resolution: float = 4,
|
1109
|
+
sigma_factor: float = 1 / (np.pi * np.sqrt(2)),
|
1110
|
+
cutoff_value: float = 4.0,
|
1111
|
+
sampling_rate: float = None,
|
1112
|
+
) -> NDArray:
|
1113
|
+
"""
|
1114
|
+
Simulates electron densities analogous to Chimera's molmap function [1]_.
|
1115
|
+
|
1116
|
+
Parameters
|
1117
|
+
----------
|
1118
|
+
positions : NDArray
|
1119
|
+
Array containing atomic positions in z,y,x format (n,d).
|
1120
|
+
weights : [float]
|
1121
|
+
The weights to use for the entries in positions.
|
1122
|
+
resolution : float
|
1123
|
+
The product of resolution and sigma_factor gives the sigma used to
|
1124
|
+
compute the discretized Gaussian.
|
1125
|
+
sigma_factor : float
|
1126
|
+
The factor used with resolution to compute sigma. Default is 1 / (π√2).
|
1127
|
+
cutoff_value : float
|
1128
|
+
The cutoff value for the Gaussian kernel. Default is 4.0.
|
1129
|
+
sampling_rate : float
|
1130
|
+
Sampling rate along each dimension. One third of resolution by default.
|
1131
|
+
|
1132
|
+
References
|
1133
|
+
----------
|
1134
|
+
..[1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/molmap.html
|
1135
|
+
|
1136
|
+
Returns
|
1137
|
+
-------
|
1138
|
+
NDArray
|
1139
|
+
A numpy array containing the simulated electron densities.
|
1140
|
+
"""
|
1141
|
+
if sampling_rate is None:
|
1142
|
+
sampling_rate = resolution / 3
|
1143
|
+
|
1144
|
+
pad = int(3 * resolution)
|
1145
|
+
sigma = sigma_factor * resolution
|
1146
|
+
sigma_grid = sigma / sampling_rate
|
1147
|
+
|
1148
|
+
# Limit padding to numerically stable values
|
1149
|
+
smax = np.max(sigma_grid)
|
1150
|
+
arr = np.arange(0, pad)
|
1151
|
+
gaussian = (
|
1152
|
+
np.exp(-0.5 * (arr / smax) ** 2)
|
1153
|
+
* np.power(2 * np.pi, -1.5)
|
1154
|
+
* np.power(sigma, -3.0)
|
1155
|
+
)
|
1156
|
+
pad_cutoff = np.max(arr[gaussian > 1e-8])
|
1157
|
+
if arr.size != 0:
|
1158
|
+
pad = int(pad_cutoff) + 1
|
1159
|
+
|
1160
|
+
positions = positions[:, ::-1]
|
1161
|
+
origin = positions.min(axis=0) - pad * sampling_rate
|
1162
|
+
positions = np.rint(np.divide((positions - origin), sampling_rate)).astype(int)
|
1163
|
+
shape = positions.max(axis=0).astype(int) + pad + 1
|
1164
|
+
|
1165
|
+
out = np.zeros(shape, dtype=np.float32)
|
1166
|
+
np.add.at(out, tuple(positions.T), weights)
|
1167
|
+
|
1168
|
+
out = Preprocessor().gaussian_filter(
|
1169
|
+
template=out, sigma=sigma_grid, cutoff_value=cutoff_value
|
1170
|
+
)
|
1171
|
+
return out, origin
|
1172
|
+
|
950
1173
|
def _get_atom_weights(
|
951
1174
|
self, atoms: Tuple[str] = None, weight_type: str = "atomic_weight"
|
952
1175
|
) -> Tuple[float]:
|
@@ -983,41 +1206,77 @@ class Structure:
|
|
983
1206
|
def to_volume(
|
984
1207
|
self,
|
985
1208
|
shape: Tuple[int] = None,
|
986
|
-
sampling_rate:
|
1209
|
+
sampling_rate: Tuple[float] = None,
|
987
1210
|
origin: Tuple[float] = None,
|
988
1211
|
chain: str = None,
|
989
1212
|
weight_type: str = "atomic_weight",
|
990
|
-
|
991
|
-
) -> Tuple[NDArray,
|
1213
|
+
weight_type_args: Dict = dict(),
|
1214
|
+
) -> Tuple[NDArray, NDArray, NDArray]:
|
992
1215
|
"""
|
993
|
-
|
994
|
-
index z, y, x.
|
1216
|
+
Maps class instance to a volume.
|
995
1217
|
|
996
1218
|
Parameters
|
997
1219
|
----------
|
998
|
-
shape :
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
origin :
|
1004
|
-
Origin of the coordinate system
|
1005
|
-
in z, y, x form.
|
1220
|
+
shape : tuple of ints, optional
|
1221
|
+
Output array shape in (z,y,x) form.
|
1222
|
+
sampling_rate : tuple of float, optional
|
1223
|
+
Sampling rate of the output array in units of
|
1224
|
+
:py:attr:`Structure.atom_coordinate`
|
1225
|
+
origin : tuple of floats, optional
|
1226
|
+
Origin of the coordinate system in (z,y,x) form.
|
1006
1227
|
chain : str, optional
|
1007
|
-
|
1008
|
-
to
|
1009
|
-
all chains are returned. Default is None.
|
1228
|
+
Chain identified. Either single or comma separated string of chains.
|
1229
|
+
Defaults to None which returns all chains.
|
1010
1230
|
weight_type : str, optional
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1231
|
+
Weight given to individual atoms. Supported weight are:
|
1232
|
+
|
1233
|
+
+----------------------------+---------------------------------------+
|
1234
|
+
| atomic_weight | Using element unit point mass |
|
1235
|
+
+----------------------------+---------------------------------------+
|
1236
|
+
| atomic_number | Using atomic number point mass |
|
1237
|
+
+----------------------------+---------------------------------------+
|
1238
|
+
| gaussian | Represent atoms as isotropic Gaussian |
|
1239
|
+
+----------------------------+---------------------------------------+
|
1240
|
+
| van_der_waals_radius | Using binary van der waal spheres |
|
1241
|
+
+----------------------------+---------------------------------------+
|
1242
|
+
| scattering_factors | Using experimental scattering factors |
|
1243
|
+
+----------------------------+---------------------------------------+
|
1244
|
+
| lowpass_scattering_factors | Lowpass filtered scattering_factors |
|
1245
|
+
+----------------------------+---------------------------------------+
|
1246
|
+
weight_type_args : dict, optional
|
1247
|
+
Additional arguments used for individual weight_types. `gaussian`
|
1248
|
+
accepts ``resolution``, `scattering` accepts ``method``.
|
1014
1249
|
|
1015
1250
|
Returns
|
1016
1251
|
-------
|
1017
|
-
Tuple[NDArray,
|
1018
|
-
|
1252
|
+
Tuple[NDArray, NDArray, NDArray]
|
1253
|
+
Volume, origin and sampling_rate.
|
1254
|
+
|
1255
|
+
Examples
|
1256
|
+
--------
|
1257
|
+
>>> from importlib_resources import files
|
1258
|
+
>>> from tme import Structure
|
1259
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1260
|
+
>>> structure = Structure.from_file(filename=fname)
|
1261
|
+
>>> vol, origin, sampling = structure.to_volume()
|
1262
|
+
>>> vol.shape, origin, sampling
|
1263
|
+
((59, 35, 53), array([-30.71, 12.42, -27.15]), array([1., 1., 1.]))
|
1264
|
+
>>> vol, origin, sampling = structure.to_volume(sampling_rate=(2.2,1,3))
|
1265
|
+
((27, 35, 18), array([-30.71, 12.42, -27.15]), array([2.2, 1. , 3. ]))
|
1266
|
+
|
1267
|
+
``sampling_rate`` and ``origin`` can be set to ensure correct alignment
|
1268
|
+
with corresponding density maps such as the ones at EMDB. Analogous to
|
1269
|
+
:py:meth:`Structure.subset_by_chain` only parts of the structure can be
|
1270
|
+
mapped onto grids using a variety of weighting schemes
|
1271
|
+
|
1272
|
+
>>> structure.to_volume(weight_type="van_der_waals_radius")
|
1273
|
+
>>> structure.to_volume(
|
1274
|
+
>>> weight_type="lowpass_scattering_factors",
|
1275
|
+
>>> method_args={"source" : "dt1969", "downsampling_factor" : 1.35},
|
1276
|
+
>>> )
|
1019
1277
|
"""
|
1020
1278
|
_weight_types = {
|
1279
|
+
"gaussian",
|
1021
1280
|
"atomic_weight",
|
1022
1281
|
"atomic_number",
|
1023
1282
|
"van_der_waals_radius",
|
@@ -1038,11 +1297,8 @@ class Structure:
|
|
1038
1297
|
"sampling_rate should either be single value of array with"
|
1039
1298
|
f"size {self.atom_coordinate.shape[1]}."
|
1040
1299
|
)
|
1041
|
-
if "source" not in scattering_args:
|
1042
|
-
scattering_args["source"] = "peng1995"
|
1043
1300
|
|
1044
1301
|
temp = self.subset_by_chain(chain=chain)
|
1045
|
-
|
1046
1302
|
positions, atoms, shape, sampling_rate, origin = temp._coordinate_to_position(
|
1047
1303
|
shape=shape, sampling_rate=sampling_rate, origin=origin
|
1048
1304
|
)
|
@@ -1059,7 +1315,7 @@ class Structure:
|
|
1059
1315
|
sampling_rate,
|
1060
1316
|
volume,
|
1061
1317
|
lowpass_filter=False,
|
1062
|
-
**
|
1318
|
+
**weight_type_args,
|
1063
1319
|
)
|
1064
1320
|
elif weight_type == "lowpass_scattering_factors":
|
1065
1321
|
self._position_to_scattering_factors(
|
@@ -1068,10 +1324,19 @@ class Structure:
|
|
1068
1324
|
sampling_rate,
|
1069
1325
|
volume,
|
1070
1326
|
lowpass_filter=True,
|
1071
|
-
**
|
1327
|
+
**weight_type_args,
|
1328
|
+
)
|
1329
|
+
elif weight_type == "gaussian":
|
1330
|
+
volume, origin = self._position_to_molmap(
|
1331
|
+
positions=temp.atom_coordinate,
|
1332
|
+
weights=temp._get_atom_weights(
|
1333
|
+
atoms=atoms, weight_type="atomic_number"
|
1334
|
+
),
|
1335
|
+
sampling_rate=sampling_rate,
|
1336
|
+
**weight_type_args,
|
1072
1337
|
)
|
1073
1338
|
|
1074
|
-
self.
|
1339
|
+
self.metadata.update(temp.metadata)
|
1075
1340
|
return volume, origin, sampling_rate
|
1076
1341
|
|
1077
1342
|
@classmethod
|
@@ -1084,32 +1349,41 @@ class Structure:
|
|
1084
1349
|
weighted: bool = False,
|
1085
1350
|
) -> float:
|
1086
1351
|
"""
|
1087
|
-
Compute root mean square deviation (RMSD) between two structures
|
1088
|
-
|
1089
|
-
Both structures need to have the same number of atoms. In practice, this means
|
1090
|
-
that *structure2* is a transformed version of *structure1*
|
1352
|
+
Compute root mean square deviation (RMSD) between two structures with the
|
1353
|
+
same number of atoms.
|
1091
1354
|
|
1092
1355
|
Parameters
|
1093
1356
|
----------
|
1094
|
-
structure1 : Structure
|
1095
|
-
Structure
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
Origin of the structure coordinate system.
|
1102
|
-
|
1103
|
-
sampling_rate : float, optional
|
1104
|
-
Sampling rate if discretized on a grid in the unit of self.atom_coordinate.
|
1105
|
-
|
1357
|
+
structure1, structure2 : :py:class:`Structure`
|
1358
|
+
Structure instances to compare.
|
1359
|
+
origin : tuple of floats, optional
|
1360
|
+
Coordinate system origin. For computing RMSD on discretized grids.
|
1361
|
+
sampling_rate : tuple of floats, optional
|
1362
|
+
Sampling rate in units of :py:attr:`atom_coordinate`.
|
1363
|
+
For computing RMSD on discretized grids.
|
1106
1364
|
weighted : bool, optional
|
1107
|
-
Whether atoms should be weighted
|
1365
|
+
Whether atoms should be weighted acoording to their atomic weight.
|
1108
1366
|
|
1109
1367
|
Returns
|
1110
1368
|
-------
|
1111
1369
|
float
|
1112
|
-
Root Mean Square Deviation
|
1370
|
+
Root Mean Square Deviation between input structures.
|
1371
|
+
|
1372
|
+
Examples
|
1373
|
+
--------
|
1374
|
+
>>> from importlib_resources import files
|
1375
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1376
|
+
>>> from tme import Structure
|
1377
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1378
|
+
>>> structure = Structure.from_file(filename=fname)
|
1379
|
+
>>> transformed = structure.rigid_transform(
|
1380
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
1381
|
+
>>> translation = (0, 1, -5)
|
1382
|
+
>>> )
|
1383
|
+
>>> Structure.compare_structures(structure, transformed)
|
1384
|
+
31.35238
|
1385
|
+
>>> Structure.compare_structures(structure, structure)
|
1386
|
+
0.0
|
1113
1387
|
"""
|
1114
1388
|
if origin is None:
|
1115
1389
|
origin = np.zeros(structure1.atom_coordinate.shape[1])
|
@@ -1118,14 +1392,18 @@ class Structure:
|
|
1118
1392
|
coordinates2 = structure2.atom_coordinate
|
1119
1393
|
atoms1, atoms2 = structure1.element_symbol, structure2.element_symbol
|
1120
1394
|
if sampling_rate is not None:
|
1121
|
-
coordinates1 = np.rint(
|
1122
|
-
|
1395
|
+
coordinates1 = np.rint(
|
1396
|
+
np.divide(np.subtract(coordinates1, origin), sampling_rate)
|
1397
|
+
).astype(int)
|
1398
|
+
coordinates2 = np.rint(
|
1399
|
+
np.divide(np.subtract(coordinates2, origin), sampling_rate)
|
1400
|
+
).astype(int)
|
1123
1401
|
|
1124
|
-
weights1 = np.
|
1125
|
-
weights2 = np.
|
1126
|
-
if
|
1127
|
-
weights1 = np.
|
1128
|
-
weights2 = np.
|
1402
|
+
weights1 = np.ones_like(structure1.atom_coordinate.shape[0])
|
1403
|
+
weights2 = np.ones_like(structure2.atom_coordinate.shape[0])
|
1404
|
+
if weighted:
|
1405
|
+
weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
|
1406
|
+
weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
|
1129
1407
|
|
1130
1408
|
if not np.allclose(coordinates1.shape, coordinates2.shape):
|
1131
1409
|
raise ValueError(
|
@@ -1150,35 +1428,41 @@ class Structure:
|
|
1150
1428
|
weighted: bool = False,
|
1151
1429
|
) -> Tuple["Structure", float]:
|
1152
1430
|
"""
|
1153
|
-
Align
|
1154
|
-
the
|
1155
|
-
|
1156
|
-
Both structures need to have the same number of atoms. In practice, this means
|
1157
|
-
that *structure2* is a subset of *structure1*
|
1431
|
+
Align ``structure2`` to ``structure1`` using the Kabsch Algorithm. Both
|
1432
|
+
structures need to have the same number of atoms.
|
1158
1433
|
|
1159
1434
|
Parameters
|
1160
1435
|
----------
|
1161
|
-
structure1 : Structure
|
1162
|
-
Structure
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
Origin of the structure coordinate system.
|
1169
|
-
|
1170
|
-
sampling_rate : float, optional
|
1171
|
-
Voxel size if discretized on a grid.
|
1172
|
-
|
1436
|
+
structure1, structure2 : :py:class:`Structure`
|
1437
|
+
Structure instances to align.
|
1438
|
+
origin : tuple of floats, optional
|
1439
|
+
Coordinate system origin. For computing RMSD on discretized grids.
|
1440
|
+
sampling_rate : tuple of floats, optional
|
1441
|
+
Sampling rate in units of :py:attr:`atom_coordinate`.
|
1442
|
+
For computing RMSD on discretized grids.
|
1173
1443
|
weighted : bool, optional
|
1174
1444
|
Whether atoms should be weighted by their atomic weight.
|
1175
1445
|
|
1176
1446
|
Returns
|
1177
1447
|
-------
|
1178
|
-
Structure
|
1179
|
-
|
1448
|
+
:py:class:`Structure`
|
1449
|
+
``structure2`` aligned to ``structure1``.
|
1180
1450
|
float
|
1181
|
-
|
1451
|
+
Alignment RMSD
|
1452
|
+
|
1453
|
+
Examples
|
1454
|
+
--------
|
1455
|
+
>>> from importlib_resources import files
|
1456
|
+
>>> from tme import Structure
|
1457
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1458
|
+
>>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
|
1459
|
+
>>> structure = Structure.from_file(filename=fname)
|
1460
|
+
>>> transformed = structure.rigid_transform(
|
1461
|
+
>>> rotation_matrix = get_rotation_matrices(60)[2],
|
1462
|
+
>>> translation = (0, 1, -5)
|
1463
|
+
>>> )
|
1464
|
+
>>> aligned, rmsd = Structure.align_structures(structure, transformed)
|
1465
|
+
Initial RMSD: 31.07189 - Final RMSD: 0.00000
|
1182
1466
|
"""
|
1183
1467
|
if origin is None:
|
1184
1468
|
origin = np.minimum(
|
@@ -1240,9 +1524,7 @@ class Structure:
|
|
1240
1524
|
|
1241
1525
|
@dataclass(frozen=True, repr=True)
|
1242
1526
|
class Elements:
|
1243
|
-
"""
|
1244
|
-
Lookup table containing information on chemical elements.
|
1245
|
-
"""
|
1527
|
+
"""Lookup table for chemical elements."""
|
1246
1528
|
|
1247
1529
|
Atom = namedtuple(
|
1248
1530
|
"Atom",
|
@@ -1385,12 +1667,33 @@ class Elements:
|
|
1385
1667
|
Parameters
|
1386
1668
|
----------
|
1387
1669
|
key : str
|
1388
|
-
|
1389
|
-
the internal data.
|
1670
|
+
Key to retrieve the corresponding value for.
|
1390
1671
|
|
1391
1672
|
Returns
|
1392
1673
|
-------
|
1393
|
-
|
1394
|
-
The
|
1674
|
+
namedtuple
|
1675
|
+
The Atom tuple associated with the provided key.
|
1395
1676
|
"""
|
1396
1677
|
return self._elements.get(key, self._default)
|
1678
|
+
|
1679
|
+
|
1680
|
+
def _format_string(string: str) -> str:
|
1681
|
+
"""
|
1682
|
+
Formats a string by adding quotation marks if it contains white spaces.
|
1683
|
+
|
1684
|
+
Parameters
|
1685
|
+
----------
|
1686
|
+
string : str
|
1687
|
+
Input string to be formatted.
|
1688
|
+
|
1689
|
+
Returns
|
1690
|
+
-------
|
1691
|
+
str
|
1692
|
+
Formatted string with added quotation marks if needed.
|
1693
|
+
"""
|
1694
|
+
if " " in string:
|
1695
|
+
return f"'{string}'"
|
1696
|
+
# Occurs e.g. for C1' atoms. The trailing whitespace is necessary.
|
1697
|
+
if string.count("'") == 1:
|
1698
|
+
return f'"{string}"'
|
1699
|
+
return string
|