biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +156 -43
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/filter.py +1 -1
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +148 -107
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +227 -68
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +16 -16
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Conversion of structures into the *Protein Blocks* structural alphabet.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure.alphabet"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
15
|
+
from biotite.sequence.sequence import Sequence
|
|
16
|
+
from biotite.structure.chains import get_chain_starts
|
|
17
|
+
from biotite.structure.geometry import dihedral_backbone
|
|
18
|
+
|
|
19
|
+
# PB reference angles, adapted from PBxplore
|
|
20
|
+
PB_ANGLES = np.array(
|
|
21
|
+
[
|
|
22
|
+
[41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
|
|
23
|
+
[108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
|
|
24
|
+
[-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
|
|
25
|
+
[141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
|
|
26
|
+
[133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
|
|
27
|
+
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
|
|
28
|
+
[0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
|
|
29
|
+
[119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
|
|
30
|
+
[130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
|
|
31
|
+
[114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
|
|
32
|
+
[117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
|
|
33
|
+
[139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
|
|
34
|
+
[-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
|
|
35
|
+
[-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
|
|
36
|
+
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
|
|
37
|
+
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
|
|
38
|
+
]
|
|
39
|
+
) # fmt: skip
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ProteinBlocksSequence(Sequence):
|
|
43
|
+
"""
|
|
44
|
+
Representation of a structure in the *Protein Blocks* structural alphabet.
|
|
45
|
+
:footcite:`Brevern2000`
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
sequence : iterable object, optional
|
|
50
|
+
The *Protein Blocks* sequence.
|
|
51
|
+
This may either be a list or a string.
|
|
52
|
+
May take upper or lower case letters.
|
|
53
|
+
By default the sequence is empty.
|
|
54
|
+
|
|
55
|
+
See also
|
|
56
|
+
--------
|
|
57
|
+
to_protein_blocks : Create *Protein Blocks* sequences from a structure.
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
|
|
62
|
+
.. footbibliography::
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
alphabet = LetterAlphabet("abcdefghijklmnopz")
|
|
67
|
+
undefined_symbol = "z"
|
|
68
|
+
|
|
69
|
+
def __init__(self, sequence=""):
|
|
70
|
+
if isinstance(sequence, str):
|
|
71
|
+
sequence = sequence.lower()
|
|
72
|
+
else:
|
|
73
|
+
sequence = [symbol.upper() for symbol in sequence]
|
|
74
|
+
super().__init__(sequence)
|
|
75
|
+
|
|
76
|
+
def get_alphabet(self):
|
|
77
|
+
return ProteinBlocksSequence.alphabet
|
|
78
|
+
|
|
79
|
+
def remove_undefined(self):
|
|
80
|
+
"""
|
|
81
|
+
Remove undefined symbols from the sequence.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
filtered_sequence : ProteinBlocksSequence
|
|
86
|
+
The sequence without undefined symbols.
|
|
87
|
+
"""
|
|
88
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
89
|
+
ProteinBlocksSequence.undefined_symbol
|
|
90
|
+
)
|
|
91
|
+
filtered_code = self.code[self.code != undefined_code]
|
|
92
|
+
filtered_sequence = ProteinBlocksSequence()
|
|
93
|
+
filtered_sequence.code = filtered_code
|
|
94
|
+
return filtered_sequence
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def to_protein_blocks(atoms):
|
|
98
|
+
"""
|
|
99
|
+
Encode each chain in the given structure to the *Protein Blocks* structural
|
|
100
|
+
alphabet.
|
|
101
|
+
:footcite:`Brevern2000`
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
atoms : AtomArray
|
|
106
|
+
The atom array to encode.
|
|
107
|
+
May contain multiple chains.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
sequences : list of Sequence, length=n
|
|
112
|
+
The encoded *Protein Blocks* sequence for each peptide chain in the structure.
|
|
113
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
114
|
+
The atom index where each chain starts.
|
|
115
|
+
|
|
116
|
+
References
|
|
117
|
+
----------
|
|
118
|
+
|
|
119
|
+
.. footbibliography::
|
|
120
|
+
|
|
121
|
+
Examples
|
|
122
|
+
--------
|
|
123
|
+
|
|
124
|
+
>>> sequences, chain_starts = to_protein_blocks(atom_array)
|
|
125
|
+
>>> print(sequences[0])
|
|
126
|
+
zzmmmmmnopjmnopacdzz
|
|
127
|
+
"""
|
|
128
|
+
sequences = []
|
|
129
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
130
|
+
for i in range(len(chain_start_indices) - 1):
|
|
131
|
+
start = chain_start_indices[i]
|
|
132
|
+
stop = chain_start_indices[i + 1]
|
|
133
|
+
chain = atoms[start:stop]
|
|
134
|
+
sequences.append(_to_protein_blocks(chain))
|
|
135
|
+
return sequences, chain_start_indices[:-1]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _to_protein_blocks(chain):
|
|
139
|
+
undefined_code = ProteinBlocksSequence.alphabet.encode(
|
|
140
|
+
ProteinBlocksSequence.undefined_symbol
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
phi, psi, _ = dihedral_backbone(chain)
|
|
144
|
+
|
|
145
|
+
pb_angles = np.full((len(phi), 8), np.nan)
|
|
146
|
+
pb_angles[2:-2, 0] = psi[:-4]
|
|
147
|
+
pb_angles[2:-2, 1] = phi[1:-3]
|
|
148
|
+
pb_angles[2:-2, 2] = psi[1:-3]
|
|
149
|
+
pb_angles[2:-2, 3] = phi[2:-2]
|
|
150
|
+
pb_angles[2:-2, 4] = psi[2:-2]
|
|
151
|
+
pb_angles[2:-2, 5] = phi[3:-1]
|
|
152
|
+
pb_angles[2:-2, 6] = psi[3:-1]
|
|
153
|
+
pb_angles[2:-2, 7] = phi[4:]
|
|
154
|
+
pb_angles = np.rad2deg(pb_angles)
|
|
155
|
+
|
|
156
|
+
# Angle RMSD of all reference angles with all actual angles
|
|
157
|
+
rmsda = np.sum(
|
|
158
|
+
((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
|
|
159
|
+
axis=-1,
|
|
160
|
+
)
|
|
161
|
+
# Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
|
|
162
|
+
pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
|
|
163
|
+
pb_available_mask = ~np.isnan(rmsda).any(axis=0)
|
|
164
|
+
# Chose PB, where the RMSDA to the reference angle is lowest
|
|
165
|
+
# Due to the definition of Biotite symbol codes
|
|
166
|
+
# the index of the chosen PB is directly the symbol code
|
|
167
|
+
pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
|
|
168
|
+
# Put the array of symbol codes into actual sequence objects
|
|
169
|
+
pb_sequence = ProteinBlocksSequence()
|
|
170
|
+
pb_sequence.code = pb_seq_code
|
|
171
|
+
return pb_sequence
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Parser for extracting weights from Keras files.
|
|
7
|
+
|
|
8
|
+
Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__name__ = "biotite.structure.alphabet"
|
|
12
|
+
__author__ = "Martin Larralde"
|
|
13
|
+
__all__ = ["load_kerasify"]
|
|
14
|
+
|
|
15
|
+
import enum
|
|
16
|
+
import functools
|
|
17
|
+
import itertools
|
|
18
|
+
import struct
|
|
19
|
+
import numpy as np
|
|
20
|
+
from biotite.structure.alphabet.layers import DenseLayer, Layer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LayerType(enum.IntEnum):
|
|
24
|
+
DENSE = 1
|
|
25
|
+
CONVOLUTION2D = 2
|
|
26
|
+
FLATTEN = 3
|
|
27
|
+
ELU = 4
|
|
28
|
+
ACTIVATION = 5
|
|
29
|
+
MAXPOOLING2D = 6
|
|
30
|
+
LSTM = 7
|
|
31
|
+
EMBEDDING = 8
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ActivationType(enum.IntEnum):
|
|
35
|
+
LINEAR = 1
|
|
36
|
+
RELU = 2
|
|
37
|
+
SOFTPLUS = 3
|
|
38
|
+
SIGMOID = 4
|
|
39
|
+
TANH = 5
|
|
40
|
+
HARD_SIGMOID = 6
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class KerasifyParser:
|
|
44
|
+
"""An incomplete parser for model files serialized with `kerasify`.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
Only dense layers are supported, since the ``foldseek`` VQ-VAE model
|
|
49
|
+
is only using 3 dense layers.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, file) -> None:
|
|
53
|
+
self.file = file
|
|
54
|
+
self.buffer = bytearray(1024)
|
|
55
|
+
(self.n_layers,) = self._get("I")
|
|
56
|
+
|
|
57
|
+
def read(self):
|
|
58
|
+
if self.n_layers == 0:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
self.n_layers -= 1
|
|
62
|
+
layer_type = LayerType(self._get("I")[0])
|
|
63
|
+
if layer_type == LayerType.DENSE:
|
|
64
|
+
(w0,) = self._get("I")
|
|
65
|
+
(w1,) = self._get("I")
|
|
66
|
+
(b0,) = self._get("I")
|
|
67
|
+
weights = (
|
|
68
|
+
np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
|
|
69
|
+
.reshape(w0, w1)
|
|
70
|
+
.copy()
|
|
71
|
+
)
|
|
72
|
+
biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
|
|
73
|
+
activation = ActivationType(self._get("I")[0])
|
|
74
|
+
if activation not in (ActivationType.LINEAR, ActivationType.RELU):
|
|
75
|
+
raise NotImplementedError(
|
|
76
|
+
f"Unsupported activation type: {activation!r}"
|
|
77
|
+
)
|
|
78
|
+
return DenseLayer(weights, biases, activation == ActivationType.RELU)
|
|
79
|
+
else:
|
|
80
|
+
raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
|
|
81
|
+
|
|
82
|
+
def __iter__(self):
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def __next__(self) -> Layer:
|
|
86
|
+
layer = self.read()
|
|
87
|
+
if layer is None:
|
|
88
|
+
raise StopIteration
|
|
89
|
+
return layer
|
|
90
|
+
|
|
91
|
+
def _read(self, format: str) -> memoryview:
|
|
92
|
+
n = struct.calcsize(format)
|
|
93
|
+
if len(self.buffer) < n:
|
|
94
|
+
self.buffer.extend(
|
|
95
|
+
itertools.islice(itertools.repeat(0), n - len(self.buffer))
|
|
96
|
+
)
|
|
97
|
+
v = memoryview(self.buffer)[:n]
|
|
98
|
+
self.file.readinto(v) # type: ignore
|
|
99
|
+
return v
|
|
100
|
+
|
|
101
|
+
def _get(self, format: str):
|
|
102
|
+
v = self._read(format)
|
|
103
|
+
return struct.unpack(format, v)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@functools.cache
|
|
107
|
+
def load_kerasify(file_path):
|
|
108
|
+
"""
|
|
109
|
+
Load the the model layers from a ``.kerasify`` file.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
file_path : str
|
|
114
|
+
The path to the ``.kerasify`` file.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
layers : tuple of Layer
|
|
119
|
+
The model layers.
|
|
120
|
+
"""
|
|
121
|
+
with open(file_path, "rb") as file:
|
|
122
|
+
return tuple(KerasifyParser(file))
|
biotite/structure/atoms.py
CHANGED
|
@@ -13,6 +13,7 @@ __all__ = [
|
|
|
13
13
|
"Atom",
|
|
14
14
|
"AtomArray",
|
|
15
15
|
"AtomArrayStack",
|
|
16
|
+
"concatenate",
|
|
16
17
|
"array",
|
|
17
18
|
"stack",
|
|
18
19
|
"repeat",
|
|
@@ -22,6 +23,7 @@ __all__ = [
|
|
|
22
23
|
|
|
23
24
|
import abc
|
|
24
25
|
import numbers
|
|
26
|
+
from collections.abc import Sequence
|
|
25
27
|
import numpy as np
|
|
26
28
|
from biotite.copyable import Copyable
|
|
27
29
|
from biotite.structure.bonds import BondList
|
|
@@ -99,9 +101,24 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
99
101
|
See Also
|
|
100
102
|
--------
|
|
101
103
|
set_annotation
|
|
104
|
+
|
|
105
|
+
Notes
|
|
106
|
+
-----
|
|
107
|
+
If the annotation category already exists, a compatible dtype is chosen,
|
|
108
|
+
that is also able to represent the old values.
|
|
102
109
|
"""
|
|
103
110
|
if category not in self._annot:
|
|
104
111
|
self._annot[str(category)] = np.zeros(self._array_length, dtype=dtype)
|
|
112
|
+
elif np.can_cast(self._annot[str(category)].dtype, dtype):
|
|
113
|
+
self._annot[str(category)] = self._annot[str(category)].astype(dtype)
|
|
114
|
+
elif np.can_cast(dtype, self._annot[str(category)].dtype):
|
|
115
|
+
# The existing dtype is more general
|
|
116
|
+
pass
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"Cannot cast '{str(category)}' "
|
|
120
|
+
f"with dtype '{self._annot[str(category)].dtype}' into '{dtype}'"
|
|
121
|
+
)
|
|
105
122
|
|
|
106
123
|
def del_annotation(self, category):
|
|
107
124
|
"""
|
|
@@ -142,19 +159,28 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
142
159
|
----------
|
|
143
160
|
category : str
|
|
144
161
|
The annotation category to be set.
|
|
145
|
-
array : ndarray
|
|
162
|
+
array : ndarray
|
|
146
163
|
The new value of the annotation category. The size of the
|
|
147
164
|
array must be the same as the array length.
|
|
165
|
+
|
|
166
|
+
Notes
|
|
167
|
+
-----
|
|
168
|
+
If the annotation category already exists, a compatible dtype is chosen,
|
|
169
|
+
that is able to represent the old and new array values.
|
|
148
170
|
"""
|
|
171
|
+
array = np.asarray(array)
|
|
149
172
|
if len(array) != self._array_length:
|
|
150
173
|
raise IndexError(
|
|
151
174
|
f"Expected array length {self._array_length}, " f"but got {len(array)}"
|
|
152
175
|
)
|
|
153
176
|
if category in self._annot:
|
|
154
|
-
#
|
|
155
|
-
self._annot[category] =
|
|
177
|
+
# If the annotation already exists, find the compatible dtype
|
|
178
|
+
self._annot[category] = array.astype(
|
|
179
|
+
dtype=np.promote_types(self._annot[category].dtype, array.dtype),
|
|
180
|
+
copy=False,
|
|
181
|
+
)
|
|
156
182
|
else:
|
|
157
|
-
self._annot[category] =
|
|
183
|
+
self._annot[category] = array
|
|
158
184
|
|
|
159
185
|
def get_annotation_categories(self):
|
|
160
186
|
"""
|
|
@@ -209,7 +235,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
209
235
|
else:
|
|
210
236
|
raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
|
|
211
237
|
|
|
212
|
-
def equal_annotations(self, item):
|
|
238
|
+
def equal_annotations(self, item, equal_nan=True):
|
|
213
239
|
"""
|
|
214
240
|
Check, if this object shares equal annotation arrays with the
|
|
215
241
|
given :class:`AtomArray` or :class:`AtomArrayStack`.
|
|
@@ -218,6 +244,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
218
244
|
----------
|
|
219
245
|
item : AtomArray or AtomArrayStack
|
|
220
246
|
The object to compare the annotation arrays with.
|
|
247
|
+
equal_nan: bool
|
|
248
|
+
Whether to count `nan` values as equal. Default: True.
|
|
221
249
|
|
|
222
250
|
Returns
|
|
223
251
|
-------
|
|
@@ -229,7 +257,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
229
257
|
if not self.equal_annotation_categories(item):
|
|
230
258
|
return False
|
|
231
259
|
for name in self._annot:
|
|
232
|
-
|
|
260
|
+
# ... allowing `nan` values causes type-casting, which is
|
|
261
|
+
# only possible for floating-point arrays
|
|
262
|
+
allow_nan = (
|
|
263
|
+
equal_nan
|
|
264
|
+
if np.issubdtype(self._annot[name].dtype, np.floating)
|
|
265
|
+
else False
|
|
266
|
+
)
|
|
267
|
+
if not np.array_equal(
|
|
268
|
+
self._annot[name],
|
|
269
|
+
item._annot[name],
|
|
270
|
+
equal_nan=allow_nan,
|
|
271
|
+
):
|
|
233
272
|
return False
|
|
234
273
|
return True
|
|
235
274
|
|
|
@@ -383,42 +422,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
383
422
|
return self._array_length
|
|
384
423
|
|
|
385
424
|
def __add__(self, array):
|
|
386
|
-
|
|
387
|
-
raise TypeError("Can only concatenate two arrays or two stacks")
|
|
388
|
-
# Create either new array or stack, depending of the own type
|
|
389
|
-
if isinstance(self, AtomArray):
|
|
390
|
-
concat = AtomArray(length=self._array_length + array._array_length)
|
|
391
|
-
if isinstance(self, AtomArrayStack):
|
|
392
|
-
concat = AtomArrayStack(
|
|
393
|
-
self.stack_depth(), self._array_length + array._array_length
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
|
|
397
|
-
|
|
398
|
-
# Transfer only annotations,
|
|
399
|
-
# which are existent in both operands
|
|
400
|
-
arr_categories = list(array._annot.keys())
|
|
401
|
-
for category in self._annot.keys():
|
|
402
|
-
if category in arr_categories:
|
|
403
|
-
annot = self._annot[category]
|
|
404
|
-
arr_annot = array._annot[category]
|
|
405
|
-
concat._annot[category] = np.concatenate((annot, arr_annot))
|
|
406
|
-
|
|
407
|
-
# Concatenate bonds lists,
|
|
408
|
-
# if at least one of them contains bond information
|
|
409
|
-
if self._bonds is not None or array._bonds is not None:
|
|
410
|
-
bonds1 = self._bonds
|
|
411
|
-
bonds2 = array._bonds
|
|
412
|
-
if bonds1 is None:
|
|
413
|
-
bonds1 = BondList(self._array_length)
|
|
414
|
-
if bonds2 is None:
|
|
415
|
-
bonds2 = BondList(array._array_length)
|
|
416
|
-
concat._bonds = bonds1 + bonds2
|
|
417
|
-
|
|
418
|
-
# Copy box
|
|
419
|
-
if self._box is not None:
|
|
420
|
-
concat._box = np.copy(self._box)
|
|
421
|
-
return concat
|
|
425
|
+
return concatenate([self, array])
|
|
422
426
|
|
|
423
427
|
def __copy_fill__(self, clone):
|
|
424
428
|
super().__copy_fill__(clone)
|
|
@@ -582,6 +586,7 @@ class AtomArray(_AtomArrayBase):
|
|
|
582
586
|
:class:`AtomArray` is done with the '+' operator.
|
|
583
587
|
Only the annotation categories, which are existing in both arrays,
|
|
584
588
|
are transferred to the new array.
|
|
589
|
+
For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
|
|
585
590
|
|
|
586
591
|
Optionally, an :class:`AtomArray` can store chemical bond
|
|
587
592
|
information via a :class:`BondList` object.
|
|
@@ -854,7 +859,9 @@ class AtomArrayStack(_AtomArrayBase):
|
|
|
854
859
|
:class:`AtomArray` instance.
|
|
855
860
|
|
|
856
861
|
Concatenation of atoms for each array in the stack is done using the
|
|
857
|
-
'+' operator.
|
|
862
|
+
'+' operator.
|
|
863
|
+
For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
|
|
864
|
+
For addition of atom arrays onto the stack use the
|
|
858
865
|
:func:`stack()` method.
|
|
859
866
|
|
|
860
867
|
The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
|
|
@@ -1268,6 +1275,112 @@ def stack(arrays):
|
|
|
1268
1275
|
return array_stack
|
|
1269
1276
|
|
|
1270
1277
|
|
|
1278
|
+
def concatenate(atoms):
|
|
1279
|
+
"""
|
|
1280
|
+
Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
|
|
1281
|
+
a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
|
|
1282
|
+
|
|
1283
|
+
Parameters
|
|
1284
|
+
----------
|
|
1285
|
+
atoms : iterable object of AtomArray or AtomArrayStack
|
|
1286
|
+
The atoms to be concatenated.
|
|
1287
|
+
:class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
|
|
1288
|
+
|
|
1289
|
+
Returns
|
|
1290
|
+
-------
|
|
1291
|
+
concatenated_atoms : AtomArray or AtomArrayStack
|
|
1292
|
+
The concatenated atoms, i.e. its ``array_length()`` is the sum of the
|
|
1293
|
+
``array_length()`` of the input ``atoms``.
|
|
1294
|
+
|
|
1295
|
+
Notes
|
|
1296
|
+
-----
|
|
1297
|
+
The following rules apply:
|
|
1298
|
+
|
|
1299
|
+
- Only the annotation categories that exist in all elements are transferred.
|
|
1300
|
+
- The box of the first element that has a box is transferred, if any.
|
|
1301
|
+
- The bonds of all elements are concatenated, if any element has associated bonds.
|
|
1302
|
+
For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
|
|
1303
|
+
|
|
1304
|
+
Examples
|
|
1305
|
+
--------
|
|
1306
|
+
|
|
1307
|
+
>>> atoms1 = array([
|
|
1308
|
+
... Atom([1,2,3], res_id=1, atom_name="N"),
|
|
1309
|
+
... Atom([4,5,6], res_id=1, atom_name="CA"),
|
|
1310
|
+
... Atom([7,8,9], res_id=1, atom_name="C")
|
|
1311
|
+
... ])
|
|
1312
|
+
>>> atoms2 = array([
|
|
1313
|
+
... Atom([1,2,3], res_id=2, atom_name="N"),
|
|
1314
|
+
... Atom([4,5,6], res_id=2, atom_name="CA"),
|
|
1315
|
+
... Atom([7,8,9], res_id=2, atom_name="C")
|
|
1316
|
+
... ])
|
|
1317
|
+
>>> print(concatenate([atoms1, atoms2]))
|
|
1318
|
+
1 N 1.000 2.000 3.000
|
|
1319
|
+
1 CA 4.000 5.000 6.000
|
|
1320
|
+
1 C 7.000 8.000 9.000
|
|
1321
|
+
2 N 1.000 2.000 3.000
|
|
1322
|
+
2 CA 4.000 5.000 6.000
|
|
1323
|
+
2 C 7.000 8.000 9.000
|
|
1324
|
+
"""
|
|
1325
|
+
# Ensure that the atoms can be iterated over multiple times
|
|
1326
|
+
if not isinstance(atoms, Sequence):
|
|
1327
|
+
atoms = list(atoms)
|
|
1328
|
+
|
|
1329
|
+
length = 0
|
|
1330
|
+
depth = None
|
|
1331
|
+
element_type = None
|
|
1332
|
+
common_categories = set(atoms[0].get_annotation_categories())
|
|
1333
|
+
box = None
|
|
1334
|
+
has_bonds = False
|
|
1335
|
+
for element in atoms:
|
|
1336
|
+
if element_type is None:
|
|
1337
|
+
element_type = type(element)
|
|
1338
|
+
else:
|
|
1339
|
+
if not isinstance(element, element_type):
|
|
1340
|
+
raise TypeError(
|
|
1341
|
+
f"Cannot concatenate '{type(element).__name__}' "
|
|
1342
|
+
f"with '{element_type.__name__}'"
|
|
1343
|
+
)
|
|
1344
|
+
length += element.array_length()
|
|
1345
|
+
if isinstance(element, AtomArrayStack):
|
|
1346
|
+
if depth is None:
|
|
1347
|
+
depth = element.stack_depth()
|
|
1348
|
+
else:
|
|
1349
|
+
if element.stack_depth() != depth:
|
|
1350
|
+
raise IndexError("The stack depths are not equal")
|
|
1351
|
+
common_categories &= set(element.get_annotation_categories())
|
|
1352
|
+
if element.box is not None and box is None:
|
|
1353
|
+
box = element.box
|
|
1354
|
+
if element.bonds is not None:
|
|
1355
|
+
has_bonds = True
|
|
1356
|
+
|
|
1357
|
+
if element_type == AtomArray:
|
|
1358
|
+
concat_atoms = AtomArray(length)
|
|
1359
|
+
elif element_type == AtomArrayStack:
|
|
1360
|
+
concat_atoms = AtomArrayStack(depth, length)
|
|
1361
|
+
concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
|
|
1362
|
+
for category in common_categories:
|
|
1363
|
+
concat_atoms.set_annotation(
|
|
1364
|
+
category,
|
|
1365
|
+
np.concatenate(
|
|
1366
|
+
[element.get_annotation(category) for element in atoms], axis=0
|
|
1367
|
+
),
|
|
1368
|
+
)
|
|
1369
|
+
concat_atoms.box = box
|
|
1370
|
+
if has_bonds:
|
|
1371
|
+
# Concatenate bonds of all elements
|
|
1372
|
+
concat_atoms.bonds = BondList.concatenate(
|
|
1373
|
+
[
|
|
1374
|
+
element.bonds
|
|
1375
|
+
if element.bonds is not None
|
|
1376
|
+
else BondList(element.array_length())
|
|
1377
|
+
for element in atoms
|
|
1378
|
+
]
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
return concat_atoms
|
|
1382
|
+
|
|
1383
|
+
|
|
1271
1384
|
def repeat(atoms, coord):
|
|
1272
1385
|
"""
|
|
1273
1386
|
Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)
|
|
Binary file
|