biotite 1.0.1__cp310-cp310-macosx_11_0_arm64.whl → 1.1.0__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +129 -40
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +72 -59
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +194 -48
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Conversion of structures into the *Protein Blocks* structural alphabet.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
15
|
+
from biotite.sequence.sequence import Sequence
|
|
16
|
+
from biotite.structure.chains import get_chain_starts
|
|
17
|
+
from biotite.structure.geometry import dihedral_backbone
|
|
18
|
+
|
|
19
|
+
# PB reference angles, adapted from PBxplore
|
|
20
|
+
PB_ANGLES = np.array(
|
|
21
|
+
[
|
|
22
|
+
[41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
|
|
23
|
+
[108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
|
|
24
|
+
[-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
|
|
25
|
+
[141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
|
|
26
|
+
[133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
|
|
27
|
+
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
|
|
28
|
+
[0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
|
|
29
|
+
[119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
|
|
30
|
+
[130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
|
|
31
|
+
[114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
|
|
32
|
+
[117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
|
|
33
|
+
[139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
|
|
34
|
+
[-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
|
|
35
|
+
[-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
|
|
36
|
+
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
|
|
37
|
+
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
|
|
38
|
+
]
|
|
39
|
+
) # fmt: skip
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ProteinBlocksSequence(Sequence):
|
|
43
|
+
"""
|
|
44
|
+
Representation of a structure in the *Protein Blocks* structural alphabet.
|
|
45
|
+
:footcite:`Brevern2000`
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
sequence : iterable object, optional
|
|
50
|
+
The *Protein Blocks* sequence.
|
|
51
|
+
This may either be a list or a string.
|
|
52
|
+
May take upper or lower case letters.
|
|
53
|
+
By default the sequence is empty.
|
|
54
|
+
|
|
55
|
+
See also
|
|
56
|
+
--------
|
|
57
|
+
to_protein_blocks : Create *Protein Blocks* sequences from a structure.
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
|
|
62
|
+
.. footbibliography::
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
alphabet = LetterAlphabet("abcdefghijklmnopz")
|
|
67
|
+
undefined_symbol = "z"
|
|
68
|
+
|
|
69
|
+
def __init__(self, sequence=""):
|
|
70
|
+
if isinstance(sequence, str):
|
|
71
|
+
sequence = sequence.lower()
|
|
72
|
+
else:
|
|
73
|
+
sequence = [symbol.upper() for symbol in sequence]
|
|
74
|
+
super().__init__(sequence)
|
|
75
|
+
|
|
76
|
+
def get_alphabet(self):
|
|
77
|
+
return ProteinBlocksSequence.alphabet
|
|
78
|
+
|
|
79
|
+
def remove_undefined(self):
|
|
80
|
+
"""
|
|
81
|
+
Remove undefined symbols from the sequence.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
filtered_sequence : ProteinBlocksSequence
|
|
86
|
+
The sequence without undefined symbols.
|
|
87
|
+
"""
|
|
88
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
89
|
+
ProteinBlocksSequence.undefined_symbol
|
|
90
|
+
)
|
|
91
|
+
filtered_code = self.code[self.code != undefined_code]
|
|
92
|
+
filtered_sequence = ProteinBlocksSequence()
|
|
93
|
+
filtered_sequence.code = filtered_code
|
|
94
|
+
return filtered_sequence
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def to_protein_blocks(atoms):
|
|
98
|
+
"""
|
|
99
|
+
Encode each chain in the given structure to the *Protein Blocks* structural
|
|
100
|
+
alphabet.
|
|
101
|
+
:footcite:`Brevern2000`
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
atoms : AtomArray
|
|
106
|
+
The atom array to encode.
|
|
107
|
+
May contain multiple chains.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
sequences : list of Sequence, length=n
|
|
112
|
+
The encoded *Protein Blocks* sequence for each peptide chain in the structure.
|
|
113
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
114
|
+
The atom index where each chain starts.
|
|
115
|
+
|
|
116
|
+
References
|
|
117
|
+
----------
|
|
118
|
+
|
|
119
|
+
.. footbibliography::
|
|
120
|
+
|
|
121
|
+
Examples
|
|
122
|
+
--------
|
|
123
|
+
|
|
124
|
+
>>> sequences, chain_starts = to_protein_blocks(atom_array)
|
|
125
|
+
>>> print(sequences[0])
|
|
126
|
+
zzmmmmmnopjmnopacdzz
|
|
127
|
+
"""
|
|
128
|
+
sequences = []
|
|
129
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
130
|
+
for i in range(len(chain_start_indices) - 1):
|
|
131
|
+
start = chain_start_indices[i]
|
|
132
|
+
stop = chain_start_indices[i + 1]
|
|
133
|
+
chain = atoms[start:stop]
|
|
134
|
+
sequences.append(_to_protein_blocks(chain))
|
|
135
|
+
return sequences, chain_start_indices[:-1]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _to_protein_blocks(chain):
|
|
139
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
140
|
+
ProteinBlocksSequence.undefined_symbol
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
phi, psi, _ = dihedral_backbone(chain)
|
|
144
|
+
|
|
145
|
+
pb_angles = np.full((len(phi), 8), np.nan)
|
|
146
|
+
pb_angles[2:-2, 0] = psi[:-4]
|
|
147
|
+
pb_angles[2:-2, 1] = phi[1:-3]
|
|
148
|
+
pb_angles[2:-2, 2] = psi[1:-3]
|
|
149
|
+
pb_angles[2:-2, 3] = phi[2:-2]
|
|
150
|
+
pb_angles[2:-2, 4] = psi[2:-2]
|
|
151
|
+
pb_angles[2:-2, 5] = phi[3:-1]
|
|
152
|
+
pb_angles[2:-2, 6] = psi[3:-1]
|
|
153
|
+
pb_angles[2:-2, 7] = phi[4:]
|
|
154
|
+
pb_angles = np.rad2deg(pb_angles)
|
|
155
|
+
|
|
156
|
+
# Angle RMSD of all reference angles with all actual angles
|
|
157
|
+
rmsda = np.sum(
|
|
158
|
+
((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
|
|
159
|
+
axis=-1,
|
|
160
|
+
)
|
|
161
|
+
# Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
|
|
162
|
+
pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
|
|
163
|
+
pb_available_mask = ~np.isnan(rmsda).any(axis=0)
|
|
164
|
+
# Chose PB, where the RMSDA to the reference angle is lowest
|
|
165
|
+
# Due to the definition of Biotite symbol codes
|
|
166
|
+
# the index of the chosen PB is directly the symbol code
|
|
167
|
+
pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
|
|
168
|
+
# Put the array of symbol codes into actual sequence objects
|
|
169
|
+
pb_sequence = ProteinBlocksSequence()
|
|
170
|
+
pb_sequence.code = pb_seq_code
|
|
171
|
+
return pb_sequence
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Parser for extracting weights from Keras files.
|
|
7
|
+
|
|
8
|
+
Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__name__ = "biotite.structure.alphabet"
|
|
12
|
+
__author__ = "Martin Larralde"
|
|
13
|
+
__all__ = ["load_kerasify"]
|
|
14
|
+
|
|
15
|
+
import enum
|
|
16
|
+
import functools
|
|
17
|
+
import itertools
|
|
18
|
+
import struct
|
|
19
|
+
import numpy as np
|
|
20
|
+
from biotite.structure.alphabet.layers import DenseLayer, Layer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LayerType(enum.IntEnum):
|
|
24
|
+
DENSE = 1
|
|
25
|
+
CONVOLUTION2D = 2
|
|
26
|
+
FLATTEN = 3
|
|
27
|
+
ELU = 4
|
|
28
|
+
ACTIVATION = 5
|
|
29
|
+
MAXPOOLING2D = 6
|
|
30
|
+
LSTM = 7
|
|
31
|
+
EMBEDDING = 8
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ActivationType(enum.IntEnum):
|
|
35
|
+
LINEAR = 1
|
|
36
|
+
RELU = 2
|
|
37
|
+
SOFTPLUS = 3
|
|
38
|
+
SIGMOID = 4
|
|
39
|
+
TANH = 5
|
|
40
|
+
HARD_SIGMOID = 6
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class KerasifyParser:
|
|
44
|
+
"""An incomplete parser for model files serialized with `kerasify`.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
Only dense layers are supported, since the ``foldseek`` VQ-VAE model
|
|
49
|
+
is only using 3 dense layers.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, file) -> None:
|
|
53
|
+
self.file = file
|
|
54
|
+
self.buffer = bytearray(1024)
|
|
55
|
+
(self.n_layers,) = self._get("I")
|
|
56
|
+
|
|
57
|
+
def read(self):
|
|
58
|
+
if self.n_layers == 0:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
self.n_layers -= 1
|
|
62
|
+
layer_type = LayerType(self._get("I")[0])
|
|
63
|
+
if layer_type == LayerType.DENSE:
|
|
64
|
+
(w0,) = self._get("I")
|
|
65
|
+
(w1,) = self._get("I")
|
|
66
|
+
(b0,) = self._get("I")
|
|
67
|
+
weights = (
|
|
68
|
+
np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
|
|
69
|
+
.reshape(w0, w1)
|
|
70
|
+
.copy()
|
|
71
|
+
)
|
|
72
|
+
biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
|
|
73
|
+
activation = ActivationType(self._get("I")[0])
|
|
74
|
+
if activation not in (ActivationType.LINEAR, ActivationType.RELU):
|
|
75
|
+
raise NotImplementedError(
|
|
76
|
+
f"Unsupported activation type: {activation!r}"
|
|
77
|
+
)
|
|
78
|
+
return DenseLayer(weights, biases, activation == ActivationType.RELU)
|
|
79
|
+
else:
|
|
80
|
+
raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
|
|
81
|
+
|
|
82
|
+
def __iter__(self):
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def __next__(self) -> Layer:
|
|
86
|
+
layer = self.read()
|
|
87
|
+
if layer is None:
|
|
88
|
+
raise StopIteration
|
|
89
|
+
return layer
|
|
90
|
+
|
|
91
|
+
def _read(self, format: str) -> memoryview:
|
|
92
|
+
n = struct.calcsize(format)
|
|
93
|
+
if len(self.buffer) < n:
|
|
94
|
+
self.buffer.extend(
|
|
95
|
+
itertools.islice(itertools.repeat(0), n - len(self.buffer))
|
|
96
|
+
)
|
|
97
|
+
v = memoryview(self.buffer)[:n]
|
|
98
|
+
self.file.readinto(v) # type: ignore
|
|
99
|
+
return v
|
|
100
|
+
|
|
101
|
+
def _get(self, format: str):
|
|
102
|
+
v = self._read(format)
|
|
103
|
+
return struct.unpack(format, v)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@functools.cache
|
|
107
|
+
def load_kerasify(file_path):
|
|
108
|
+
"""
|
|
109
|
+
Load the the model layers from a ``.kerasify`` file.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
file_path : str
|
|
114
|
+
The path to the ``.kerasify`` file.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
layers : tuple of Layer
|
|
119
|
+
The model layers.
|
|
120
|
+
"""
|
|
121
|
+
with open(file_path, "rb") as file:
|
|
122
|
+
return tuple(KerasifyParser(file))
|
biotite/structure/atoms.py
CHANGED
|
@@ -13,6 +13,7 @@ __all__ = [
|
|
|
13
13
|
"Atom",
|
|
14
14
|
"AtomArray",
|
|
15
15
|
"AtomArrayStack",
|
|
16
|
+
"concatenate",
|
|
16
17
|
"array",
|
|
17
18
|
"stack",
|
|
18
19
|
"repeat",
|
|
@@ -22,6 +23,7 @@ __all__ = [
|
|
|
22
23
|
|
|
23
24
|
import abc
|
|
24
25
|
import numbers
|
|
26
|
+
from collections.abc import Sequence
|
|
25
27
|
import numpy as np
|
|
26
28
|
from biotite.copyable import Copyable
|
|
27
29
|
from biotite.structure.bonds import BondList
|
|
@@ -157,7 +159,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
157
159
|
----------
|
|
158
160
|
category : str
|
|
159
161
|
The annotation category to be set.
|
|
160
|
-
array : ndarray
|
|
162
|
+
array : ndarray
|
|
161
163
|
The new value of the annotation category. The size of the
|
|
162
164
|
array must be the same as the array length.
|
|
163
165
|
|
|
@@ -233,7 +235,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
233
235
|
else:
|
|
234
236
|
raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
|
|
235
237
|
|
|
236
|
-
def equal_annotations(self, item):
|
|
238
|
+
def equal_annotations(self, item, equal_nan=True):
|
|
237
239
|
"""
|
|
238
240
|
Check, if this object shares equal annotation arrays with the
|
|
239
241
|
given :class:`AtomArray` or :class:`AtomArrayStack`.
|
|
@@ -242,6 +244,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
242
244
|
----------
|
|
243
245
|
item : AtomArray or AtomArrayStack
|
|
244
246
|
The object to compare the annotation arrays with.
|
|
247
|
+
equal_nan: bool
|
|
248
|
+
Whether to count `nan` values as equal. Default: True.
|
|
245
249
|
|
|
246
250
|
Returns
|
|
247
251
|
-------
|
|
@@ -253,7 +257,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
253
257
|
if not self.equal_annotation_categories(item):
|
|
254
258
|
return False
|
|
255
259
|
for name in self._annot:
|
|
256
|
-
|
|
260
|
+
# ... allowing `nan` values causes type-casting, which is
|
|
261
|
+
# only possible for floating-point arrays
|
|
262
|
+
allow_nan = (
|
|
263
|
+
equal_nan
|
|
264
|
+
if np.issubdtype(self._annot[name].dtype, np.floating)
|
|
265
|
+
else False
|
|
266
|
+
)
|
|
267
|
+
if not np.array_equal(
|
|
268
|
+
self._annot[name],
|
|
269
|
+
item._annot[name],
|
|
270
|
+
equal_nan=allow_nan,
|
|
271
|
+
):
|
|
257
272
|
return False
|
|
258
273
|
return True
|
|
259
274
|
|
|
@@ -407,42 +422,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
407
422
|
return self._array_length
|
|
408
423
|
|
|
409
424
|
def __add__(self, array):
|
|
410
|
-
|
|
411
|
-
raise TypeError("Can only concatenate two arrays or two stacks")
|
|
412
|
-
# Create either new array or stack, depending of the own type
|
|
413
|
-
if isinstance(self, AtomArray):
|
|
414
|
-
concat = AtomArray(length=self._array_length + array._array_length)
|
|
415
|
-
if isinstance(self, AtomArrayStack):
|
|
416
|
-
concat = AtomArrayStack(
|
|
417
|
-
self.stack_depth(), self._array_length + array._array_length
|
|
418
|
-
)
|
|
419
|
-
|
|
420
|
-
concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
|
|
421
|
-
|
|
422
|
-
# Transfer only annotations,
|
|
423
|
-
# which are existent in both operands
|
|
424
|
-
arr_categories = list(array._annot.keys())
|
|
425
|
-
for category in self._annot.keys():
|
|
426
|
-
if category in arr_categories:
|
|
427
|
-
annot = self._annot[category]
|
|
428
|
-
arr_annot = array._annot[category]
|
|
429
|
-
concat._annot[category] = np.concatenate((annot, arr_annot))
|
|
430
|
-
|
|
431
|
-
# Concatenate bonds lists,
|
|
432
|
-
# if at least one of them contains bond information
|
|
433
|
-
if self._bonds is not None or array._bonds is not None:
|
|
434
|
-
bonds1 = self._bonds
|
|
435
|
-
bonds2 = array._bonds
|
|
436
|
-
if bonds1 is None:
|
|
437
|
-
bonds1 = BondList(self._array_length)
|
|
438
|
-
if bonds2 is None:
|
|
439
|
-
bonds2 = BondList(array._array_length)
|
|
440
|
-
concat._bonds = bonds1 + bonds2
|
|
441
|
-
|
|
442
|
-
# Copy box
|
|
443
|
-
if self._box is not None:
|
|
444
|
-
concat._box = np.copy(self._box)
|
|
445
|
-
return concat
|
|
425
|
+
return concatenate([self, array])
|
|
446
426
|
|
|
447
427
|
def __copy_fill__(self, clone):
|
|
448
428
|
super().__copy_fill__(clone)
|
|
@@ -606,6 +586,7 @@ class AtomArray(_AtomArrayBase):
|
|
|
606
586
|
:class:`AtomArray` is done with the '+' operator.
|
|
607
587
|
Only the annotation categories, which are existing in both arrays,
|
|
608
588
|
are transferred to the new array.
|
|
589
|
+
For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
|
|
609
590
|
|
|
610
591
|
Optionally, an :class:`AtomArray` can store chemical bond
|
|
611
592
|
information via a :class:`BondList` object.
|
|
@@ -878,7 +859,9 @@ class AtomArrayStack(_AtomArrayBase):
|
|
|
878
859
|
:class:`AtomArray` instance.
|
|
879
860
|
|
|
880
861
|
Concatenation of atoms for each array in the stack is done using the
|
|
881
|
-
'+' operator.
|
|
862
|
+
'+' operator.
|
|
863
|
+
For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
|
|
864
|
+
For addition of atom arrays onto the stack use the
|
|
882
865
|
:func:`stack()` method.
|
|
883
866
|
|
|
884
867
|
The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
|
|
@@ -1292,6 +1275,112 @@ def stack(arrays):
|
|
|
1292
1275
|
return array_stack
|
|
1293
1276
|
|
|
1294
1277
|
|
|
1278
|
+
def concatenate(atoms):
|
|
1279
|
+
"""
|
|
1280
|
+
Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
|
|
1281
|
+
a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
|
|
1282
|
+
|
|
1283
|
+
Parameters
|
|
1284
|
+
----------
|
|
1285
|
+
atoms : iterable object of AtomArray or AtomArrayStack
|
|
1286
|
+
The atoms to be concatenated.
|
|
1287
|
+
:class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
|
|
1288
|
+
|
|
1289
|
+
Returns
|
|
1290
|
+
-------
|
|
1291
|
+
concatenated_atoms : AtomArray or AtomArrayStack
|
|
1292
|
+
The concatenated atoms, i.e. its ``array_length()`` is the sum of the
|
|
1293
|
+
``array_length()`` of the input ``atoms``.
|
|
1294
|
+
|
|
1295
|
+
Notes
|
|
1296
|
+
-----
|
|
1297
|
+
The following rules apply:
|
|
1298
|
+
|
|
1299
|
+
- Only the annotation categories that exist in all elements are transferred.
|
|
1300
|
+
- The box of the first element that has a box is transferred, if any.
|
|
1301
|
+
- The bonds of all elements are concatenated, if any element has associated bonds.
|
|
1302
|
+
For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
|
|
1303
|
+
|
|
1304
|
+
Examples
|
|
1305
|
+
--------
|
|
1306
|
+
|
|
1307
|
+
>>> atoms1 = array([
|
|
1308
|
+
... Atom([1,2,3], res_id=1, atom_name="N"),
|
|
1309
|
+
... Atom([4,5,6], res_id=1, atom_name="CA"),
|
|
1310
|
+
... Atom([7,8,9], res_id=1, atom_name="C")
|
|
1311
|
+
... ])
|
|
1312
|
+
>>> atoms2 = array([
|
|
1313
|
+
... Atom([1,2,3], res_id=2, atom_name="N"),
|
|
1314
|
+
... Atom([4,5,6], res_id=2, atom_name="CA"),
|
|
1315
|
+
... Atom([7,8,9], res_id=2, atom_name="C")
|
|
1316
|
+
... ])
|
|
1317
|
+
>>> print(concatenate([atoms1, atoms2]))
|
|
1318
|
+
1 N 1.000 2.000 3.000
|
|
1319
|
+
1 CA 4.000 5.000 6.000
|
|
1320
|
+
1 C 7.000 8.000 9.000
|
|
1321
|
+
2 N 1.000 2.000 3.000
|
|
1322
|
+
2 CA 4.000 5.000 6.000
|
|
1323
|
+
2 C 7.000 8.000 9.000
|
|
1324
|
+
"""
|
|
1325
|
+
# Ensure that the atoms can be iterated over multiple times
|
|
1326
|
+
if not isinstance(atoms, Sequence):
|
|
1327
|
+
atoms = list(atoms)
|
|
1328
|
+
|
|
1329
|
+
length = 0
|
|
1330
|
+
depth = None
|
|
1331
|
+
element_type = None
|
|
1332
|
+
common_categories = set(atoms[0].get_annotation_categories())
|
|
1333
|
+
box = None
|
|
1334
|
+
has_bonds = False
|
|
1335
|
+
for element in atoms:
|
|
1336
|
+
if element_type is None:
|
|
1337
|
+
element_type = type(element)
|
|
1338
|
+
else:
|
|
1339
|
+
if not isinstance(element, element_type):
|
|
1340
|
+
raise TypeError(
|
|
1341
|
+
f"Cannot concatenate '{type(element).__name__}' "
|
|
1342
|
+
f"with '{element_type.__name__}'"
|
|
1343
|
+
)
|
|
1344
|
+
length += element.array_length()
|
|
1345
|
+
if isinstance(element, AtomArrayStack):
|
|
1346
|
+
if depth is None:
|
|
1347
|
+
depth = element.stack_depth()
|
|
1348
|
+
else:
|
|
1349
|
+
if element.stack_depth() != depth:
|
|
1350
|
+
raise IndexError("The stack depths are not equal")
|
|
1351
|
+
common_categories &= set(element.get_annotation_categories())
|
|
1352
|
+
if element.box is not None and box is None:
|
|
1353
|
+
box = element.box
|
|
1354
|
+
if element.bonds is not None:
|
|
1355
|
+
has_bonds = True
|
|
1356
|
+
|
|
1357
|
+
if element_type == AtomArray:
|
|
1358
|
+
concat_atoms = AtomArray(length)
|
|
1359
|
+
elif element_type == AtomArrayStack:
|
|
1360
|
+
concat_atoms = AtomArrayStack(depth, length)
|
|
1361
|
+
concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
|
|
1362
|
+
for category in common_categories:
|
|
1363
|
+
concat_atoms.set_annotation(
|
|
1364
|
+
category,
|
|
1365
|
+
np.concatenate(
|
|
1366
|
+
[element.get_annotation(category) for element in atoms], axis=0
|
|
1367
|
+
),
|
|
1368
|
+
)
|
|
1369
|
+
concat_atoms.box = box
|
|
1370
|
+
if has_bonds:
|
|
1371
|
+
# Concatenate bonds of all elements
|
|
1372
|
+
concat_atoms.bonds = BondList.concatenate(
|
|
1373
|
+
[
|
|
1374
|
+
element.bonds
|
|
1375
|
+
if element.bonds is not None
|
|
1376
|
+
else BondList(element.array_length())
|
|
1377
|
+
for element in atoms
|
|
1378
|
+
]
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
return concat_atoms
|
|
1382
|
+
|
|
1383
|
+
|
|
1295
1384
|
def repeat(atoms, coord):
|
|
1296
1385
|
"""
|
|
1297
1386
|
Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)
|
|
Binary file
|
biotite/structure/bonds.pyx
CHANGED
|
@@ -17,6 +17,7 @@ cimport cython
|
|
|
17
17
|
cimport numpy as np
|
|
18
18
|
from libc.stdlib cimport free, realloc
|
|
19
19
|
|
|
20
|
+
from collections.abc import Sequence
|
|
20
21
|
import itertools
|
|
21
22
|
import numbers
|
|
22
23
|
from enum import IntEnum
|
|
@@ -59,6 +60,7 @@ class BondType(IntEnum):
|
|
|
59
60
|
- `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
|
|
60
61
|
- `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
|
|
61
62
|
- `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
|
|
63
|
+
- `COORDINATION` - Coordination complex involving a metal atom
|
|
62
64
|
"""
|
|
63
65
|
ANY = 0
|
|
64
66
|
SINGLE = 1
|
|
@@ -68,6 +70,7 @@ class BondType(IntEnum):
|
|
|
68
70
|
AROMATIC_SINGLE = 5
|
|
69
71
|
AROMATIC_DOUBLE = 6
|
|
70
72
|
AROMATIC_TRIPLE = 7
|
|
73
|
+
COORDINATION = 8
|
|
71
74
|
|
|
72
75
|
|
|
73
76
|
def without_aromaticity(self):
|
|
@@ -88,10 +91,12 @@ class BondType(IntEnum):
|
|
|
88
91
|
>>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
|
|
89
92
|
DOUBLE
|
|
90
93
|
"""
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return BondType
|
|
94
|
+
if self == BondType.AROMATIC_SINGLE:
|
|
95
|
+
return BondType.SINGLE
|
|
96
|
+
elif self == BondType.AROMATIC_DOUBLE:
|
|
97
|
+
return BondType.DOUBLE
|
|
98
|
+
elif self == BondType.AROMATIC_TRIPLE:
|
|
99
|
+
return BondType.TRIPLE
|
|
95
100
|
else:
|
|
96
101
|
return self
|
|
97
102
|
|
|
@@ -305,6 +310,61 @@ class BondList(Copyable):
|
|
|
305
310
|
self._bonds = np.zeros((0, 3), dtype=np.uint32)
|
|
306
311
|
self._max_bonds_per_atom = 0
|
|
307
312
|
|
|
313
|
+
@staticmethod
|
|
314
|
+
def concatenate(bonds_lists):
|
|
315
|
+
"""
|
|
316
|
+
Concatenate multiple :class:`BondList` objects into a single
|
|
317
|
+
:class:`BondList`, respectively.
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
bonds_lists : iterable object of BondList
|
|
322
|
+
The bond lists to be concatenated.
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
concatenated_bonds : BondList
|
|
327
|
+
The concatenated bond lists.
|
|
328
|
+
|
|
329
|
+
Examples
|
|
330
|
+
--------
|
|
331
|
+
|
|
332
|
+
>>> bonds1 = BondList(2, np.array([(0, 1)]))
|
|
333
|
+
>>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
|
|
334
|
+
>>> merged_bonds = BondList.concatenate([bonds1, bonds2])
|
|
335
|
+
>>> print(merged_bonds.get_atom_count())
|
|
336
|
+
5
|
|
337
|
+
>>> print(merged_bonds.as_array()[:, :2])
|
|
338
|
+
[[0 1]
|
|
339
|
+
[2 3]
|
|
340
|
+
[2 4]]
|
|
341
|
+
"""
|
|
342
|
+
# Ensure that the bonds_lists can be iterated over multiple times
|
|
343
|
+
if not isinstance(bonds_lists, Sequence):
|
|
344
|
+
bonds_lists = list(bonds_lists)
|
|
345
|
+
|
|
346
|
+
cdef np.ndarray merged_bonds = np.concatenate(
|
|
347
|
+
[bond_list._bonds for bond_list in bonds_lists]
|
|
348
|
+
)
|
|
349
|
+
# Offset the indices of appended bonds list
|
|
350
|
+
# (consistent with addition of AtomArray)
|
|
351
|
+
cdef int start = 0, stop = 0
|
|
352
|
+
cdef int cum_atom_count = 0
|
|
353
|
+
for bond_list in bonds_lists:
|
|
354
|
+
stop = start + bond_list._bonds.shape[0]
|
|
355
|
+
merged_bonds[start : stop, :2] += cum_atom_count
|
|
356
|
+
cum_atom_count += bond_list._atom_count
|
|
357
|
+
start = stop
|
|
358
|
+
|
|
359
|
+
cdef merged_bond_list = BondList(cum_atom_count)
|
|
360
|
+
# Array is not used in constructor to prevent unnecessary
|
|
361
|
+
# maximum and redundant bond calculation
|
|
362
|
+
merged_bond_list._bonds = merged_bonds
|
|
363
|
+
merged_bond_list._max_bonds_per_atom = max(
|
|
364
|
+
[bond_list._max_bonds_per_atom for bond_list in bonds_lists]
|
|
365
|
+
)
|
|
366
|
+
return merged_bond_list
|
|
367
|
+
|
|
308
368
|
def __copy_create__(self):
|
|
309
369
|
# Create empty bond list to prevent
|
|
310
370
|
# unnecessary removal of redundant atoms
|
|
@@ -453,9 +513,13 @@ class BondList(Copyable):
|
|
|
453
513
|
0 1 SINGLE
|
|
454
514
|
1 2 DOUBLE
|
|
455
515
|
"""
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
516
|
+
bond_types = self._bonds[:,2]
|
|
517
|
+
for aromatic_type, non_aromatic_type in [
|
|
518
|
+
(BondType.AROMATIC_SINGLE, BondType.SINGLE),
|
|
519
|
+
(BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
|
|
520
|
+
(BondType.AROMATIC_TRIPLE, BondType.TRIPLE)
|
|
521
|
+
]:
|
|
522
|
+
bond_types[bond_types == aromatic_type] = non_aromatic_type
|
|
459
523
|
|
|
460
524
|
def remove_bond_order(self):
|
|
461
525
|
"""
|
|
@@ -994,20 +1058,7 @@ class BondList(Copyable):
|
|
|
994
1058
|
)
|
|
995
1059
|
|
|
996
1060
|
def __add__(self, bond_list):
|
|
997
|
-
|
|
998
|
-
= np.concatenate([self._bonds, bond_list._bonds])
|
|
999
|
-
# Offset the indices of appended bonds list
|
|
1000
|
-
# (consistent with addition of AtomArray)
|
|
1001
|
-
merged_bonds[len(self._bonds):, :2] += self._atom_count
|
|
1002
|
-
cdef uint32 merged_count = self._atom_count + bond_list._atom_count
|
|
1003
|
-
cdef merged_bond_list = BondList(merged_count)
|
|
1004
|
-
# Array is not used in constructor to prevent unnecessary
|
|
1005
|
-
# maximum and redundant bond calculation
|
|
1006
|
-
merged_bond_list._bonds = merged_bonds
|
|
1007
|
-
merged_bond_list._max_bonds_per_atom = max(
|
|
1008
|
-
self._max_bonds_per_atom, bond_list._max_bonds_per_atom
|
|
1009
|
-
)
|
|
1010
|
-
return merged_bond_list
|
|
1061
|
+
return BondList.concatenate([self, bond_list])
|
|
1011
1062
|
|
|
1012
1063
|
def __getitem__(self, index):
|
|
1013
1064
|
## Variables for both, integer and boolean index arrays
|
|
Binary file
|
|
Binary file
|