biotite 1.2.0__cp312-cp312-win_amd64.whl → 1.4.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biotite/application/viennarna/rnaplot.py +7 -7
- biotite/interface/openmm/__init__.py +4 -0
- biotite/interface/pymol/__init__.py +3 -0
- biotite/interface/pymol/object.py +3 -1
- biotite/interface/rdkit/__init__.py +4 -0
- biotite/interface/rdkit/mol.py +5 -5
- biotite/interface/version.py +23 -0
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +1 -1
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +2 -4
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/structure/basepairs.py +13 -14
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +67 -6
- biotite/structure/box.py +141 -3
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +0 -1
- biotite/structure/chains.py +15 -21
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +2 -0
- biotite/structure/dotbracket.py +4 -4
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +1 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/io/pdb/convert.py +84 -2
- biotite/structure/io/pdb/file.py +94 -7
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/bcif.py +6 -3
- biotite/structure/io/pdbx/cif.py +5 -2
- biotite/structure/io/pdbx/compress.py +71 -34
- biotite/structure/io/pdbx/convert.py +226 -58
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/pseudoknots.py +6 -6
- biotite/structure/residues.py +10 -27
- biotite/structure/rings.py +118 -2
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +28 -29
- biotite/structure/segments.py +55 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/superimpose.py +1 -191
- biotite/structure/transform.py +220 -1
- biotite/version.py +2 -2
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/METADATA +4 -34
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/RECORD +62 -60
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/WHEEL +1 -1
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
|
|
|
230
230
|
# since the file content may be invalid/malicious.
|
|
231
231
|
raise NotImplementedError()
|
|
232
232
|
|
|
233
|
+
def __str__(self):
|
|
234
|
+
# Restore original behavior, as `__str__()` implementation of `_Component`
|
|
235
|
+
# may require serialization, which is not possible for some encodings prior
|
|
236
|
+
# to the first encoding pass
|
|
237
|
+
return object.__str__(self)
|
|
238
|
+
|
|
233
239
|
|
|
234
240
|
@dataclass
|
|
235
241
|
class ByteArrayEncoding(Encoding):
|
|
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
|
|
|
325
331
|
)
|
|
326
332
|
|
|
327
333
|
# Round to avoid wrong values due to floating point inaccuracies
|
|
328
|
-
|
|
334
|
+
scaled_data = np.round(data * self.factor)
|
|
335
|
+
return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
|
|
329
336
|
|
|
330
337
|
def decode(self, data):
|
|
331
338
|
return (data / self.factor).astype(
|
|
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
|
|
|
392
399
|
self.min, self.max, self.num_steps, dtype=data.dtype
|
|
393
400
|
)
|
|
394
401
|
indices = np.searchsorted(steps, data, side="left")
|
|
395
|
-
return indices
|
|
402
|
+
return _safe_cast(indices, np.int32)
|
|
396
403
|
|
|
397
404
|
def decode(self, data):
|
|
398
405
|
output = data * (self.max - self.min) / (self.num_steps - 1)
|
|
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
|
|
|
570
577
|
if self.origin is None:
|
|
571
578
|
self.origin = data[0]
|
|
572
579
|
|
|
580
|
+
# Differences (including `np.diff`) return an array with the same dtype as the
|
|
581
|
+
# input array
|
|
582
|
+
# As the input dtype may be unsigned, the output dtype could underflow,
|
|
583
|
+
# if the difference is negative
|
|
584
|
+
# -> cast to int64 to avoid this
|
|
585
|
+
data = data.astype(np.int64, copy=False)
|
|
573
586
|
data = data - self.origin
|
|
574
|
-
return np.diff(data, prepend=0)
|
|
587
|
+
return _safe_cast(np.diff(data, prepend=0), np.int32)
|
|
575
588
|
|
|
576
589
|
def decode(self, data):
|
|
577
590
|
output = np.cumsum(data, dtype=self.src_type.to_dtype())
|
|
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
|
|
|
635
648
|
# Only positive values -> use unsigned integers
|
|
636
649
|
self.is_unsigned = data.min().item() >= 0
|
|
637
650
|
|
|
638
|
-
data = data
|
|
651
|
+
data = _safe_cast(data, np.int32)
|
|
639
652
|
return self._encode(
|
|
640
653
|
data, np.empty(0, dtype=self._determine_packed_dtype())
|
|
641
654
|
)
|
|
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
|
|
|
870
883
|
else:
|
|
871
884
|
check_present = True
|
|
872
885
|
|
|
873
|
-
string_order = np.argsort(self.strings)
|
|
886
|
+
string_order = _safe_cast(np.argsort(self.strings), np.int32)
|
|
874
887
|
sorted_strings = self.strings[string_order]
|
|
875
888
|
sorted_indices = np.searchsorted(sorted_strings, data)
|
|
876
889
|
indices = string_order[sorted_indices]
|
|
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
|
|
|
1010
1023
|
return attribute_name[0].lower() + attribute_name[1:]
|
|
1011
1024
|
|
|
1012
1025
|
|
|
1013
|
-
def _safe_cast(array, dtype):
|
|
1014
|
-
|
|
1015
|
-
|
|
1026
|
+
def _safe_cast(array, dtype, allow_decimal_loss=False):
|
|
1027
|
+
source_dtype = array.dtype
|
|
1028
|
+
target_dtype = np.dtype(dtype)
|
|
1029
|
+
|
|
1030
|
+
if target_dtype == source_dtype:
|
|
1016
1031
|
return array
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
if
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
+
|
|
1033
|
+
if np.issubdtype(target_dtype, np.integer):
|
|
1034
|
+
if np.issubdtype(source_dtype, np.floating):
|
|
1035
|
+
if not allow_decimal_loss:
|
|
1036
|
+
raise ValueError("Cannot cast floating point to integer")
|
|
1037
|
+
if not np.isfinite(array).all():
|
|
1038
|
+
raise ValueError("Data contains non-finite values")
|
|
1039
|
+
elif not np.issubdtype(source_dtype, np.integer):
|
|
1040
|
+
# Neither float, nor integer -> cannot cast
|
|
1041
|
+
raise ValueError(f"Cannot cast '{source_dtype}' to integer")
|
|
1042
|
+
dtype_info = np.iinfo(target_dtype)
|
|
1043
|
+
# Check if an integer underflow/overflow would occur during conversion
|
|
1044
|
+
if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
|
|
1045
|
+
raise ValueError("Values do not fit into the given dtype")
|
|
1046
|
+
|
|
1047
|
+
return array.astype(target_dtype)
|
biotite/structure/pseudoknots.py
CHANGED
|
@@ -148,7 +148,7 @@ class _Region:
|
|
|
148
148
|
region_pairs : ndarray, dtype=int
|
|
149
149
|
The indices of the base pairs in ``base_pairs`` that are part of
|
|
150
150
|
the region.
|
|
151
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
151
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
152
152
|
The score for each base pair.
|
|
153
153
|
"""
|
|
154
154
|
|
|
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
|
|
|
202
202
|
base_pairs : ndarray, dtype=int, shape=(n, 2)
|
|
203
203
|
Each row is equivalent to one base pair and contains the first
|
|
204
204
|
indices of the residues corresponding to each base.
|
|
205
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
205
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
206
206
|
The score for each base pair.
|
|
207
207
|
|
|
208
208
|
Returns
|
|
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
|
|
|
352
352
|
return i
|
|
353
353
|
|
|
354
354
|
|
|
355
|
-
def _get_region_array_for(regions, content=
|
|
355
|
+
def _get_region_array_for(regions, content=(), dtype=()):
|
|
356
356
|
"""
|
|
357
357
|
Get a :class:`ndarray` of region objects. Each object occurs twice,
|
|
358
358
|
representing its start and end point. The regions positions in the
|
|
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
|
|
|
365
365
|
----------
|
|
366
366
|
regions : set {_region, ...}
|
|
367
367
|
The regions to be considered
|
|
368
|
-
content : list [function, ...]
|
|
368
|
+
content : list [function, ...]
|
|
369
369
|
The functions to be considered for custom outputs. For a given
|
|
370
370
|
region they must return a tuple of which the first value is
|
|
371
371
|
placed at the start position and the second value at the end
|
|
372
372
|
position of the region relative to the other regions.
|
|
373
|
-
dtype : list [str, ...]
|
|
373
|
+
dtype : list [str, ...]
|
|
374
374
|
The data type of the output of the custom functions.
|
|
375
375
|
|
|
376
376
|
Returns
|
|
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
|
|
|
554
554
|
The maximum pseudoknot order to be found. If a base pair would
|
|
555
555
|
be of a higher order, its order is specified as -1. If ``None``
|
|
556
556
|
is given, all base pairs are evaluated.
|
|
557
|
-
order : int
|
|
557
|
+
order : int
|
|
558
558
|
The order that is currently evaluated.
|
|
559
559
|
|
|
560
560
|
Returns
|
biotite/structure/residues.py
CHANGED
|
@@ -21,23 +21,23 @@ __all__ = [
|
|
|
21
21
|
"residue_iter",
|
|
22
22
|
]
|
|
23
23
|
|
|
24
|
-
import numpy as np
|
|
25
24
|
from biotite.structure.segments import (
|
|
26
25
|
apply_segment_wise,
|
|
27
26
|
get_segment_masks,
|
|
28
27
|
get_segment_positions,
|
|
28
|
+
get_segment_starts,
|
|
29
29
|
get_segment_starts_for,
|
|
30
30
|
segment_iter,
|
|
31
31
|
spread_segment_wise,
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def get_residue_starts(array, add_exclusive_stop=False):
|
|
35
|
+
def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
|
|
36
36
|
"""
|
|
37
37
|
Get indices for an atom array, each indicating the beginning of
|
|
38
38
|
a residue.
|
|
39
39
|
|
|
40
|
-
A new residue starts, either when the chain ID, residue ID,
|
|
40
|
+
A new residue starts, either when the chain ID, sym ID, residue ID,
|
|
41
41
|
insertion code or residue name changes from one to the next atom.
|
|
42
42
|
|
|
43
43
|
Parameters
|
|
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
|
|
|
48
48
|
If true, the exclusive stop of the input atom array, i.e.
|
|
49
49
|
``array.array_length()``, is added to the returned array of
|
|
50
50
|
start indices as last element.
|
|
51
|
+
extra_categories : tuple of str, optional
|
|
52
|
+
Additional annotation categories that induce the start of a new residue,
|
|
53
|
+
when their value change from one atom to the next.
|
|
51
54
|
|
|
52
55
|
Returns
|
|
53
56
|
-------
|
|
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
|
|
|
69
72
|
[ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
|
|
70
73
|
278 292 304]
|
|
71
74
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
|
|
77
|
-
res_id_changes = array.res_id[1:] != array.res_id[:-1]
|
|
78
|
-
ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
|
|
79
|
-
res_name_changes = array.res_name[1:] != array.res_name[:-1]
|
|
80
|
-
|
|
81
|
-
# If any of these annotation arrays change, a new residue starts
|
|
82
|
-
residue_change_mask = (
|
|
83
|
-
chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
# Convert mask to indices
|
|
87
|
-
# Add 1, to shift the indices from the end of a residue
|
|
88
|
-
# to the start of a new residue
|
|
89
|
-
residue_starts = np.where(residue_change_mask)[0] + 1
|
|
90
|
-
|
|
91
|
-
# The first residue is not included yet -> Insert '[0]'
|
|
92
|
-
if add_exclusive_stop:
|
|
93
|
-
return np.concatenate(([0], residue_starts, [array.array_length()]))
|
|
94
|
-
else:
|
|
95
|
-
return np.concatenate(([0], residue_starts))
|
|
75
|
+
categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
|
|
76
|
+
if "sym_id" in array.get_annotation_categories():
|
|
77
|
+
categories.append("sym_id")
|
|
78
|
+
return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
|
|
96
79
|
|
|
97
80
|
|
|
98
81
|
def apply_residue_wise(array, data, function, axis=None):
|
biotite/structure/rings.py
CHANGED
|
@@ -8,7 +8,12 @@ This module provides functions related to aromatic rings.
|
|
|
8
8
|
|
|
9
9
|
__name__ = "biotite.structure"
|
|
10
10
|
__author__ = "Patrick Kunzmann"
|
|
11
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"find_aromatic_rings",
|
|
13
|
+
"find_stacking_interactions",
|
|
14
|
+
"find_pi_cation_interactions",
|
|
15
|
+
"PiStacking",
|
|
16
|
+
]
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
from enum import IntEnum
|
|
@@ -149,7 +154,7 @@ def find_stacking_interactions(
|
|
|
149
154
|
|
|
150
155
|
The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
|
|
151
156
|
|
|
152
|
-
- The ring centroids must be within cutoff distance
|
|
157
|
+
- The ring centroids must be within cutoff `centroid_cutoff` distance.
|
|
153
158
|
While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
|
|
154
159
|
adopted from :footcite:`Bouysset2021` to better identify perpendicular
|
|
155
160
|
stacking interactions.
|
|
@@ -268,6 +273,117 @@ def find_stacking_interactions(
|
|
|
268
273
|
]
|
|
269
274
|
|
|
270
275
|
|
|
276
|
+
def find_pi_cation_interactions(
|
|
277
|
+
atoms,
|
|
278
|
+
distance_cutoff=5.0,
|
|
279
|
+
angle_tol=np.deg2rad(30.0),
|
|
280
|
+
):
|
|
281
|
+
"""
|
|
282
|
+
Find pi-cation interactions between aromatic rings and cations.
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
atoms : AtomArray
|
|
287
|
+
The atoms to be searched for pi-cation interactions.
|
|
288
|
+
Requires an associated :class:`BondList` and ``charge`` annotation.
|
|
289
|
+
distance_cutoff : float, optional
|
|
290
|
+
The cutoff distance between ring centroid and cation.
|
|
291
|
+
angle_tol : float, optional
|
|
292
|
+
The tolerance for the angle between the ring plane normal
|
|
293
|
+
and the centroid-cation vector. Perfect pi-cation interaction
|
|
294
|
+
has 0° angle (perpendicular to ring plane).
|
|
295
|
+
Given in radians.
|
|
296
|
+
|
|
297
|
+
Returns
|
|
298
|
+
-------
|
|
299
|
+
interactions : list of tuple(ndarray, int)
|
|
300
|
+
The pi-cation interactions between aromatic rings and cations.
|
|
301
|
+
Each element in the list represents one pi-cation interaction.
|
|
302
|
+
The first element of each tuple represents atom indices of the
|
|
303
|
+
aromatic ring, the second element is the atom index of the cation.
|
|
304
|
+
|
|
305
|
+
See Also
|
|
306
|
+
--------
|
|
307
|
+
find_aromatic_rings : Used for finding the aromatic rings in this function.
|
|
308
|
+
find_stacking_interactions : Find pi-stacking interactions between rings.
|
|
309
|
+
|
|
310
|
+
Notes
|
|
311
|
+
-----
|
|
312
|
+
The conditions for pi-cation interactions are:
|
|
313
|
+
- The distance between ring centroid and cation must be within
|
|
314
|
+
`distance_cutoff`. :footcite:`Wojcikowski2015` uses 5.0 Å,
|
|
315
|
+
whereas :footcite:`Bouysset2021` uses 4.5 Å.
|
|
316
|
+
- The angle between the ring plane normal and the centroid-cation
|
|
317
|
+
vector must be within `angle_tol` of 0° (perpendicular to plane).
|
|
318
|
+
|
|
319
|
+
Examples
|
|
320
|
+
--------
|
|
321
|
+
>>> from os.path import join
|
|
322
|
+
>>> structure = load_structure(join(path_to_structures, "3wip.cif"), include_bonds=True, extra_fields=["charge"])
|
|
323
|
+
>>> interactions = find_pi_cation_interactions(structure)
|
|
324
|
+
>>> for ring_indices, cation_index in interactions:
|
|
325
|
+
... print(
|
|
326
|
+
... structure.res_name[ring_indices[0]],
|
|
327
|
+
... structure.res_name[cation_index]
|
|
328
|
+
... )
|
|
329
|
+
TYR ACH
|
|
330
|
+
TRP ACH
|
|
331
|
+
"""
|
|
332
|
+
if atoms.bonds is None:
|
|
333
|
+
raise BadStructureError("Structure must have an associated BondList")
|
|
334
|
+
|
|
335
|
+
if atoms.charge is None:
|
|
336
|
+
raise BadStructureError(
|
|
337
|
+
"Structure must have a 'charge' annotation to identify cations."
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
rings = find_aromatic_rings(atoms)
|
|
341
|
+
if len(rings) == 0:
|
|
342
|
+
return []
|
|
343
|
+
|
|
344
|
+
cation_mask = atoms.charge > 0
|
|
345
|
+
cation_indices = np.where(cation_mask)[0]
|
|
346
|
+
|
|
347
|
+
if len(cation_indices) == 0:
|
|
348
|
+
return []
|
|
349
|
+
|
|
350
|
+
# Calculate ring centroids and normals
|
|
351
|
+
ring_centroids = np.array(
|
|
352
|
+
[atoms.coord[atom_indices].mean(axis=0) for atom_indices in rings]
|
|
353
|
+
)
|
|
354
|
+
ring_normals = np.array(
|
|
355
|
+
[_get_ring_normal(atoms.coord[atom_indices]) for atom_indices in rings]
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
cation_coords = atoms.coord[cation_indices]
|
|
359
|
+
|
|
360
|
+
# Create an index array that contains the Cartesian product of all rings and cations
|
|
361
|
+
indices = np.stack(
|
|
362
|
+
[
|
|
363
|
+
np.repeat(np.arange(len(rings)), len(cation_indices)),
|
|
364
|
+
np.tile(np.arange(len(cation_indices)), len(rings)),
|
|
365
|
+
],
|
|
366
|
+
axis=-1,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
## Condition 1: Ring centroids and cations are close enough to each other
|
|
370
|
+
diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
|
|
371
|
+
# Use squared distance to avoid time consuming sqrt computation
|
|
372
|
+
sq_distance = vector_dot(diff, diff)
|
|
373
|
+
is_interacting = sq_distance < distance_cutoff**2
|
|
374
|
+
indices = indices[is_interacting]
|
|
375
|
+
|
|
376
|
+
## Condition 2: Angle between ring normal and centroid-cation vector
|
|
377
|
+
diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
|
|
378
|
+
norm_vector(diff)
|
|
379
|
+
angles = _minimum_angle(ring_normals[indices[:, 0]], diff)
|
|
380
|
+
is_interacting = _is_within_tolerance(angles, 0, angle_tol)
|
|
381
|
+
indices = indices[is_interacting]
|
|
382
|
+
|
|
383
|
+
# Only return pairs where all conditions were fulfilled
|
|
384
|
+
return [(rings[ring_i], cation_indices[cation_j]) for ring_i, cation_j in indices]
|
|
385
|
+
|
|
386
|
+
|
|
271
387
|
def _get_ring_normal(ring_coord):
|
|
272
388
|
"""
|
|
273
389
|
Get the normal vector perpendicular to the ring plane.
|
|
Binary file
|
biotite/structure/sasa.pyx
CHANGED
|
@@ -35,39 +35,38 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
35
35
|
point_number=1000, point_distr="Fibonacci", vdw_radii="ProtOr")
|
|
36
36
|
|
|
37
37
|
Calculate the Solvent Accessible Surface Area (SASA) of a protein.
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
This function uses the Shrake-Rupley ("rolling probe")
|
|
40
40
|
algorithm :footcite:`Shrake1973`:
|
|
41
41
|
Every atom is occupied by a evenly distributed point mesh. The
|
|
42
42
|
points that can be reached by the "rolling probe", are surface
|
|
43
43
|
accessible.
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
Parameters
|
|
46
46
|
----------
|
|
47
47
|
array : AtomArray
|
|
48
48
|
The protein model to calculate the SASA for.
|
|
49
49
|
probe_radius : float, optional
|
|
50
|
-
The VdW-radius of the solvent molecules
|
|
50
|
+
The VdW-radius of the solvent molecules.
|
|
51
51
|
atom_filter : ndarray, dtype=bool, optional
|
|
52
52
|
If this parameter is given, SASA is only calculated for the
|
|
53
53
|
filtered atoms.
|
|
54
54
|
ignore_ions : bool, optional
|
|
55
|
-
If true, all monoatomic ions are removed before SASA calculation
|
|
56
|
-
(default: True).
|
|
55
|
+
If true, all monoatomic ions are removed before SASA calculation.
|
|
57
56
|
point_number : int, optional
|
|
58
57
|
The number of points in the mesh occupying each atom for SASA
|
|
59
|
-
calculation
|
|
60
|
-
proportional to the amount of sphere points.
|
|
58
|
+
calculation.
|
|
59
|
+
The SASA calculation time is proportional to the amount of sphere points.
|
|
61
60
|
point_distr : str or function, optional
|
|
62
61
|
If a function is given, the function is used to calculate the
|
|
63
62
|
point distribution for the mesh (the function must take `float`
|
|
64
63
|
*n* as parameter and return a *(n x 3)* :class:`ndarray`).
|
|
65
64
|
Alternatively a string can be given to choose a built-in
|
|
66
65
|
distribution:
|
|
67
|
-
|
|
66
|
+
|
|
68
67
|
- **Fibonacci** - Distribute points using a golden section
|
|
69
68
|
spiral.
|
|
70
|
-
|
|
69
|
+
|
|
71
70
|
By default *Fibonacci* is used.
|
|
72
71
|
vdw_radii : str or ndarray, dtype=float, optional
|
|
73
72
|
Indicates the set of VdW radii to be used. If an `array`-length
|
|
@@ -76,7 +75,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
76
75
|
SASA calculation (e.g. solvent atoms) can have arbitrary values
|
|
77
76
|
(e.g. `NaN`). If instead a string is given, one of the
|
|
78
77
|
built-in sets is used:
|
|
79
|
-
|
|
78
|
+
|
|
80
79
|
- **ProtOr** - A set, which does not require hydrogen atoms
|
|
81
80
|
in the model. Suitable for crystal structures.
|
|
82
81
|
:footcite:`Tsai1999`
|
|
@@ -85,25 +84,25 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
85
84
|
in the model (e.g. NMR elucidated structures).
|
|
86
85
|
Values for main group elements are taken from :footcite:`Mantina2009`,
|
|
87
86
|
and for relevant transition metals from the :footcite:`RDKit`.
|
|
88
|
-
|
|
87
|
+
|
|
89
88
|
By default *ProtOr* is used.
|
|
90
|
-
|
|
91
|
-
|
|
89
|
+
|
|
90
|
+
|
|
92
91
|
Returns
|
|
93
92
|
-------
|
|
94
93
|
sasa : ndarray, dtype=bool, shape=(n,)
|
|
95
|
-
Atom-wise SASA. `NaN` for atoms where SASA has not been
|
|
94
|
+
Atom-wise SASA. `NaN` for atoms where SASA has not been
|
|
96
95
|
calculated
|
|
97
96
|
(solvent atoms, hydrogen atoms (ProtOr), atoms not in `filter`).
|
|
98
|
-
|
|
97
|
+
|
|
99
98
|
References
|
|
100
99
|
----------
|
|
101
|
-
|
|
100
|
+
|
|
102
101
|
.. footbibliography::
|
|
103
|
-
|
|
102
|
+
|
|
104
103
|
"""
|
|
105
104
|
cdef int i=0, j=0, k=0, adj_atom_i=0, rel_atom_i=0
|
|
106
|
-
|
|
105
|
+
|
|
107
106
|
cdef np.ndarray sasa_filter
|
|
108
107
|
cdef np.ndarray occl_filter
|
|
109
108
|
if atom_filter is not None:
|
|
@@ -122,7 +121,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
122
121
|
filter = ~filter_monoatomic_ions(array)
|
|
123
122
|
sasa_filter = sasa_filter & filter
|
|
124
123
|
occl_filter = occl_filter & filter
|
|
125
|
-
|
|
124
|
+
|
|
126
125
|
cdef np.ndarray sphere_points
|
|
127
126
|
if callable(point_distr):
|
|
128
127
|
sphere_points = point_distr(point_number)
|
|
@@ -131,7 +130,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
131
130
|
else:
|
|
132
131
|
raise ValueError(f"'{point_distr}' is not a valid point distribution")
|
|
133
132
|
sphere_points = sphere_points.astype(np.float32)
|
|
134
|
-
|
|
133
|
+
|
|
135
134
|
cdef np.ndarray radii
|
|
136
135
|
if isinstance(vdw_radii, np.ndarray):
|
|
137
136
|
radii = vdw_radii.astype(np.float32)
|
|
@@ -159,17 +158,17 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
159
158
|
raise KeyError(f"'{vdw_radii}' is not a valid radii set")
|
|
160
159
|
# Increase atom radii by probe size ("rolling probe")
|
|
161
160
|
radii += probe_radius
|
|
162
|
-
|
|
161
|
+
|
|
163
162
|
# Memoryview for filter
|
|
164
163
|
# Problem with creating boolean memoryviews
|
|
165
164
|
# -> Type uint8 is used
|
|
166
165
|
cdef np_bool[:] sasa_filter_view = np.frombuffer(sasa_filter,
|
|
167
166
|
dtype=np.uint8)
|
|
168
|
-
|
|
167
|
+
|
|
169
168
|
cdef np.ndarray occl_r = radii[occl_filter]
|
|
170
169
|
# Atom array containing occluding atoms
|
|
171
170
|
occl_array = array[occl_filter]
|
|
172
|
-
|
|
171
|
+
|
|
173
172
|
# Memoryviews for coordinates of entire (main) array
|
|
174
173
|
# and for coordinates of occluding atom array
|
|
175
174
|
cdef float32[:,:] main_coord = array.coord.astype(np.float32,
|
|
@@ -191,10 +190,10 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
191
190
|
cdef float32[:] occl_radii_sq = occl_r * occl_r
|
|
192
191
|
# Memoryview for atomwise SASA
|
|
193
192
|
cdef float32[:] sasa = np.full(len(array), np.nan, dtype=np.float32)
|
|
194
|
-
|
|
193
|
+
|
|
195
194
|
# Area of a sphere point on a unit sphere
|
|
196
195
|
cdef float32 area_per_point = 4.0 * np.pi / point_number
|
|
197
|
-
|
|
196
|
+
|
|
198
197
|
# Define further statically typed variables
|
|
199
198
|
# that are needed for SASA calculation
|
|
200
199
|
cdef int n_accesible = 0
|
|
@@ -213,8 +212,8 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
213
212
|
cdef float32 occl_y = 0
|
|
214
213
|
cdef float32 occl_z = 0
|
|
215
214
|
cdef float32[:,:] relevant_occl_coord = None
|
|
216
|
-
|
|
217
|
-
# Cell size is as large as the maximum distance,
|
|
215
|
+
|
|
216
|
+
# Cell size is as large as the maximum distance,
|
|
218
217
|
# where two atom can intersect.
|
|
219
218
|
# Therefore intersecting atoms are always in the same or adjacent cell.
|
|
220
219
|
cell_list = CellList(occl_array, np.max(radii[occl_filter])*2)
|
|
@@ -227,7 +226,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
227
226
|
cell_indices = cell_list.get_atoms_in_cells(array.coord)
|
|
228
227
|
cell_indices_view = cell_indices
|
|
229
228
|
max_adj_list_length = cell_indices.shape[0]
|
|
230
|
-
|
|
229
|
+
|
|
231
230
|
# Later on, this array stores coordinates for actual
|
|
232
231
|
# occluding atoms for a certain atom to calculate the
|
|
233
232
|
# SASA for
|
|
@@ -237,7 +236,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
|
|
|
237
236
|
# adjacent atoms
|
|
238
237
|
relevant_occl_coord = np.zeros((max_adj_list_length, 4),
|
|
239
238
|
dtype=np.float32)
|
|
240
|
-
|
|
239
|
+
|
|
241
240
|
# Actual SASA calculation
|
|
242
241
|
for i in range(array_length):
|
|
243
242
|
# First level: The atoms to calculate SASA for
|
biotite/structure/segments.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
__name__ = "biotite.structure"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = [
|
|
8
|
+
"get_segment_starts",
|
|
8
9
|
"apply_segment_wise",
|
|
9
10
|
"spread_segment_wise",
|
|
10
11
|
"get_segment_masks",
|
|
@@ -16,6 +17,60 @@ __all__ = [
|
|
|
16
17
|
import numpy as np
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
def get_segment_starts(
|
|
21
|
+
array, add_exclusive_stop, continuous_categories=(), equal_categories=()
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Generalized version of :func:`get_residue_starts()` for residues and chains.
|
|
25
|
+
|
|
26
|
+
The starts are determined from value changes in the given annotations.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
array : AtomArray or AtomArrayStack
|
|
31
|
+
The atom array (stack) to get the segment starts from.
|
|
32
|
+
add_exclusive_stop : bool, optional
|
|
33
|
+
If true, the exclusive stop of the input atom array,
|
|
34
|
+
i.e. ``array.array_length()``, is added to the returned array of start indices
|
|
35
|
+
as last element.
|
|
36
|
+
continuous_categories : tuple of str, optional
|
|
37
|
+
Annotation categories that are expected to be continuously increasing within a
|
|
38
|
+
segment.
|
|
39
|
+
This means if the value of such an annotation decreases from one atom to
|
|
40
|
+
another, a new segment is started.
|
|
41
|
+
equal_categories : tuple of str, optional
|
|
42
|
+
Annotation categories that are expected to be equal within a segment.
|
|
43
|
+
This means if the value of such an annotation changes from one atom to
|
|
44
|
+
another, a new segment is started.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
starts : ndarray, dtype=int
|
|
49
|
+
The start indices of segments in `array`.
|
|
50
|
+
"""
|
|
51
|
+
if array.array_length() == 0:
|
|
52
|
+
return np.array([], dtype=int)
|
|
53
|
+
|
|
54
|
+
segment_start_mask = np.zeros(array.array_length() - 1, dtype=bool)
|
|
55
|
+
for annot_name in continuous_categories:
|
|
56
|
+
annotation = array.get_annotation(annot_name)
|
|
57
|
+
segment_start_mask |= np.diff(annotation) < 0
|
|
58
|
+
for annot_name in equal_categories:
|
|
59
|
+
annotation = array.get_annotation(annot_name)
|
|
60
|
+
segment_start_mask |= annotation[1:] != annotation[:-1]
|
|
61
|
+
|
|
62
|
+
# Convert mask to indices
|
|
63
|
+
# Add 1, to shift the indices from the end of a segment
|
|
64
|
+
# to the start of a new segment
|
|
65
|
+
chain_starts = np.where(segment_start_mask)[0] + 1
|
|
66
|
+
|
|
67
|
+
# The first chain is not included yet -> Insert '[0]'
|
|
68
|
+
if add_exclusive_stop:
|
|
69
|
+
return np.concatenate(([0], chain_starts, [array.array_length()]))
|
|
70
|
+
else:
|
|
71
|
+
return np.concatenate(([0], chain_starts))
|
|
72
|
+
|
|
73
|
+
|
|
19
74
|
def apply_segment_wise(starts, data, function, axis=None):
|
|
20
75
|
"""
|
|
21
76
|
Generalized version of :func:`apply_residue_wise()` for
|