biotite 0.41.1__cp311-cp311-win_amd64.whl → 0.41.2__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/__init__.py +35 -9
- biotite/application/application.py +2 -1
- biotite/sequence/__init__.py +13 -2
- biotite/sequence/align/__init__.py +158 -4
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +34 -34
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +63 -63
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/sequence.py +52 -50
- biotite/structure/atoms.py +8 -8
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +59 -68
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/info/ccd.py +17 -2
- biotite/structure/info/groups.py +9 -12
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/bcif.py +0 -8
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/version.py +2 -2
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/METADATA +2 -2
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/RECORD +42 -42
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/WHEEL +1 -1
- {biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/sequence.py
CHANGED
|
@@ -25,7 +25,7 @@ _size_uint32 = np.iinfo(np.uint32).max +1
|
|
|
25
25
|
class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
26
26
|
"""
|
|
27
27
|
The abstract base class for all sequence types.
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
A :class:`Sequence` can be seen as a succession of symbols, that are
|
|
30
30
|
elements in the allowed set of symbols, the :class:`Alphabet`.
|
|
31
31
|
Internally, a :class:`Sequence` object uses a *NumPy*
|
|
@@ -36,35 +36,35 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
36
36
|
:class:`Sequence`, into an integer. These integer values are called
|
|
37
37
|
symbol code, the encoding of an entire sequence of symbols is
|
|
38
38
|
called sequence code.
|
|
39
|
-
|
|
40
|
-
The size of the symbol code type in the array is determined by the
|
|
39
|
+
|
|
40
|
+
The size of the symbol code type in the array is determined by the
|
|
41
41
|
size of the :class:`Alphabet`:
|
|
42
42
|
If the :class:`Alphabet` contains 256 symbols or less, one byte is
|
|
43
43
|
used per array element; if the :class:`Alphabet` contains
|
|
44
44
|
between 257 and 65536 symbols, two bytes are used, and so on.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Two :class:`Sequence` objects are equal if they are instances of the
|
|
47
47
|
same class, have the same :class:`Alphabet` and have equal sequence
|
|
48
48
|
codes.
|
|
49
49
|
Comparison with a string or list of symbols evaluates always to
|
|
50
50
|
false.
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
A :class:`Sequence` can be indexed by any 1-D index a
|
|
53
53
|
:class:`ndarray` accepts.
|
|
54
54
|
If the index is a single integer, the decoded symbol at that
|
|
55
55
|
position is returned, otherwise a subsequence is returned.
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
Individual symbols of the sequence can also be exchanged in indexed
|
|
58
58
|
form: If the an integer is used as index, the item is treated as a
|
|
59
59
|
symbol. Any other index (slice, index list, boolean mask) expects
|
|
60
60
|
multiple symbols, either as list of symbols, as :class:`ndarray`
|
|
61
61
|
containing a sequence code or another :class:`Sequence` instance.
|
|
62
62
|
Concatenation of two sequences is achieved with the '+' operator.
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
Each subclass of :class:`Sequence` needs to overwrite the abstract
|
|
65
65
|
method :func:`get_alphabet()`, which specifies the alphabet the
|
|
66
66
|
:class:`Sequence` uses.
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
Parameters
|
|
69
69
|
----------
|
|
70
70
|
sequence : iterable object, optional
|
|
@@ -72,7 +72,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
72
72
|
For alphabets containing single letter strings, this parameter
|
|
73
73
|
may also be a :class`str` object.
|
|
74
74
|
By default the sequence is empty.
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
Attributes
|
|
77
77
|
----------
|
|
78
78
|
code : ndarray
|
|
@@ -85,12 +85,12 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
85
85
|
alphabet : Alphabet
|
|
86
86
|
The alphabet of this sequence. Cannot be set.
|
|
87
87
|
Equal to `get_alphabet()`.
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
Examples
|
|
90
90
|
--------
|
|
91
91
|
Creating a DNA sequence from string and print the symbols and the
|
|
92
92
|
code:
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
>>> dna_seq = NucleotideSequence("ACGTA")
|
|
95
95
|
>>> print(dna_seq)
|
|
96
96
|
ACGTA
|
|
@@ -100,18 +100,18 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
100
100
|
['A' 'C' 'G' 'T' 'A']
|
|
101
101
|
>>> print(list(dna_seq))
|
|
102
102
|
['A', 'C', 'G', 'T', 'A']
|
|
103
|
-
|
|
103
|
+
|
|
104
104
|
Sequence indexing:
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
>>> print(dna_seq[1:3])
|
|
107
107
|
CG
|
|
108
108
|
>>> print(dna_seq[[0,2,4]])
|
|
109
109
|
AGA
|
|
110
110
|
>>> print(dna_seq[np.array([False,False,True,True,True])])
|
|
111
111
|
GTA
|
|
112
|
-
|
|
112
|
+
|
|
113
113
|
Sequence manipulation:
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
>>> dna_copy = dna_seq.copy()
|
|
116
116
|
>>> dna_copy[2] = "C"
|
|
117
117
|
>>> print(dna_copy)
|
|
@@ -134,28 +134,28 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
134
134
|
>>> dna_seq_rev = dna_seq.reverse()
|
|
135
135
|
>>> print(dna_seq_rev)
|
|
136
136
|
ATGCA
|
|
137
|
-
|
|
137
|
+
|
|
138
138
|
Concatenate the two sequences:
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
>>> dna_seq_concat = dna_seq + dna_seq_rev
|
|
141
141
|
>>> print(dna_seq_concat)
|
|
142
142
|
ACGTAATGCA
|
|
143
|
-
|
|
143
|
+
|
|
144
144
|
"""
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
def __init__(self, sequence=()):
|
|
147
147
|
self.symbols = sequence
|
|
148
148
|
|
|
149
149
|
def copy(self, new_seq_code=None):
|
|
150
150
|
"""
|
|
151
151
|
Copy the object.
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
Parameters
|
|
154
154
|
----------
|
|
155
155
|
new_seq_code : ndarray, optional
|
|
156
156
|
If this parameter is set, the sequence code is set to this
|
|
157
157
|
value, rather than the original sequence code.
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
Returns
|
|
160
160
|
-------
|
|
161
161
|
copy
|
|
@@ -171,51 +171,51 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
171
171
|
clone.code = new_seq_code
|
|
172
172
|
self.__copy_fill__(clone)
|
|
173
173
|
return clone
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
@property
|
|
176
176
|
def symbols(self):
|
|
177
177
|
return self.get_alphabet().decode_multiple(self.code)
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
@symbols.setter
|
|
180
180
|
def symbols(self, value):
|
|
181
181
|
alph = self.get_alphabet()
|
|
182
182
|
dtype = Sequence.dtype(len(alph))
|
|
183
183
|
self._seq_code = alph.encode_multiple(value, dtype)
|
|
184
|
-
|
|
184
|
+
|
|
185
185
|
@property
|
|
186
186
|
def code(self):
|
|
187
187
|
return self._seq_code
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
@code.setter
|
|
190
190
|
def code(self, value):
|
|
191
191
|
dtype = Sequence.dtype(len(self.get_alphabet()))
|
|
192
192
|
if not isinstance(value, np.ndarray):
|
|
193
193
|
raise TypeError("Sequence code must be an integer ndarray")
|
|
194
194
|
self._seq_code = value.astype(dtype, copy=False)
|
|
195
|
-
|
|
195
|
+
|
|
196
196
|
@property
|
|
197
197
|
def alphabet(self):
|
|
198
198
|
return self.get_alphabet()
|
|
199
|
-
|
|
199
|
+
|
|
200
200
|
@abc.abstractmethod
|
|
201
201
|
def get_alphabet(self):
|
|
202
202
|
"""
|
|
203
203
|
Get the :class:`Alphabet` of the :class:`Sequence`.
|
|
204
|
-
|
|
204
|
+
|
|
205
205
|
This method must be overwritten, when subclassing
|
|
206
206
|
:class:`Sequence`.
|
|
207
|
-
|
|
207
|
+
|
|
208
208
|
Returns
|
|
209
209
|
-------
|
|
210
210
|
alphabet : Alphabet
|
|
211
211
|
:class:`Sequence` alphabet.
|
|
212
212
|
"""
|
|
213
213
|
pass
|
|
214
|
-
|
|
214
|
+
|
|
215
215
|
def reverse(self, copy=True):
|
|
216
216
|
"""
|
|
217
217
|
Reverse the :class:`Sequence`.
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
Parameters
|
|
220
220
|
----------
|
|
221
221
|
copy : bool, optional
|
|
@@ -225,15 +225,15 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
225
225
|
In this case, manipulations on the returned sequence would
|
|
226
226
|
also affect this object.
|
|
227
227
|
Otherwise, the sequence code is copied.
|
|
228
|
-
|
|
228
|
+
|
|
229
229
|
Returns
|
|
230
230
|
-------
|
|
231
231
|
reversed : Sequence
|
|
232
232
|
The reversed :class:`Sequence`.
|
|
233
|
-
|
|
233
|
+
|
|
234
234
|
Examples
|
|
235
235
|
--------
|
|
236
|
-
|
|
236
|
+
|
|
237
237
|
>>> dna_seq = NucleotideSequence("ACGTA")
|
|
238
238
|
>>> dna_seq_rev = dna_seq.reverse()
|
|
239
239
|
>>> print(dna_seq_rev)
|
|
@@ -243,33 +243,33 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
243
243
|
if copy:
|
|
244
244
|
reversed_code = np.copy(reversed_code)
|
|
245
245
|
return self.copy(reversed_code)
|
|
246
|
-
|
|
246
|
+
|
|
247
247
|
def is_valid(self):
|
|
248
248
|
"""
|
|
249
249
|
Check, if the sequence contains a valid sequence code.
|
|
250
|
-
|
|
250
|
+
|
|
251
251
|
A sequence code is valid, if at each sequence position the
|
|
252
252
|
code is smaller than the size of the alphabet.
|
|
253
|
-
|
|
253
|
+
|
|
254
254
|
Invalid code means that the code cannot be decoded into
|
|
255
255
|
symbols. Furthermore invalid code can lead to serious
|
|
256
256
|
errors in alignments, since the substitution matrix
|
|
257
257
|
is indexed with an invalid index.
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
Returns
|
|
260
260
|
-------
|
|
261
261
|
valid : bool
|
|
262
262
|
True, if the sequence is valid, false otherwise.
|
|
263
263
|
"""
|
|
264
264
|
return (self.code < len(self.get_alphabet())).all()
|
|
265
|
-
|
|
265
|
+
|
|
266
266
|
def get_symbol_frequency(self):
|
|
267
267
|
"""
|
|
268
268
|
Get the number of occurences of each symbol in the sequence.
|
|
269
|
-
|
|
269
|
+
|
|
270
270
|
If a symbol does not occur in the sequence, but it is in the
|
|
271
271
|
alphabet, its number of occurences is 0.
|
|
272
|
-
|
|
272
|
+
|
|
273
273
|
Returns
|
|
274
274
|
-------
|
|
275
275
|
frequency : dict
|
|
@@ -284,7 +284,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
284
284
|
symbol: count for symbol, count
|
|
285
285
|
in zip(self.get_alphabet().get_symbols(), counts)
|
|
286
286
|
}
|
|
287
|
-
|
|
287
|
+
|
|
288
288
|
def __getitem__(self, index):
|
|
289
289
|
alph = self.get_alphabet()
|
|
290
290
|
sub_seq = self._seq_code.__getitem__(index)
|
|
@@ -292,7 +292,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
292
292
|
return self.copy(sub_seq)
|
|
293
293
|
else:
|
|
294
294
|
return alph.decode(sub_seq)
|
|
295
|
-
|
|
295
|
+
|
|
296
296
|
def __setitem__(self, index, item):
|
|
297
297
|
alph = self.get_alphabet()
|
|
298
298
|
if isinstance(index, numbers.Integral):
|
|
@@ -308,32 +308,34 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
308
308
|
# Default: item is iterable object of symbols
|
|
309
309
|
code = alph.encode_multiple(item)
|
|
310
310
|
self._seq_code.__setitem__(index, code)
|
|
311
|
-
|
|
311
|
+
|
|
312
312
|
def __len__(self):
|
|
313
313
|
return len(self._seq_code)
|
|
314
|
-
|
|
314
|
+
|
|
315
315
|
def __iter__(self):
|
|
316
316
|
alph = self.get_alphabet()
|
|
317
317
|
i = 0
|
|
318
318
|
while i < len(self):
|
|
319
319
|
yield alph.decode(self._seq_code[i])
|
|
320
320
|
i += 1
|
|
321
|
-
|
|
321
|
+
|
|
322
322
|
def __eq__(self, item):
|
|
323
323
|
if not isinstance(item, type(self)):
|
|
324
324
|
return False
|
|
325
325
|
if self.get_alphabet() != item.get_alphabet():
|
|
326
326
|
return False
|
|
327
327
|
return np.array_equal(self._seq_code, item._seq_code)
|
|
328
|
-
|
|
328
|
+
|
|
329
329
|
def __str__(self):
|
|
330
330
|
alph = self.get_alphabet()
|
|
331
331
|
if isinstance(alph, LetterAlphabet):
|
|
332
332
|
return alph.decode_multiple(self._seq_code, as_bytes=True)\
|
|
333
333
|
.tobytes().decode("ASCII")
|
|
334
334
|
else:
|
|
335
|
-
return "".join(
|
|
336
|
-
|
|
335
|
+
return ", ".join(
|
|
336
|
+
[str(e) for e in alph.decode_multiple(self._seq_code)]
|
|
337
|
+
)
|
|
338
|
+
|
|
337
339
|
def __add__(self, sequence):
|
|
338
340
|
if self.get_alphabet().extends(sequence.get_alphabet()):
|
|
339
341
|
new_code = np.concatenate((self._seq_code, sequence._seq_code))
|
|
@@ -356,7 +358,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
356
358
|
----------
|
|
357
359
|
alpahabet_size : int
|
|
358
360
|
The size of the alphabet.
|
|
359
|
-
|
|
361
|
+
|
|
360
362
|
Returns
|
|
361
363
|
-------
|
|
362
364
|
dtype
|
biotite/structure/atoms.py
CHANGED
|
@@ -498,14 +498,14 @@ class Atom(Copyable):
|
|
|
498
498
|
|
|
499
499
|
def __repr__(self):
|
|
500
500
|
"""Represent Atom as a string for debugging."""
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
annot =
|
|
508
|
-
return f'Atom(np.{np.array_repr(self.coord)}, {annot})'
|
|
501
|
+
# print out key-value pairs and format strings in quotation marks
|
|
502
|
+
annot_parts = [
|
|
503
|
+
f'{key}="{value}"' if isinstance(value, str) else f'{key}={value}'
|
|
504
|
+
for key, value in self._annot.items()
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
annot = ', '.join(annot_parts)
|
|
508
|
+
return f'Atom(np.{np.array_repr(self.coord)}, {annot})'
|
|
509
509
|
|
|
510
510
|
@property
|
|
511
511
|
def shape(self):
|
|
Binary file
|
biotite/structure/bonds.pyx
CHANGED
|
@@ -1012,7 +1012,6 @@ class BondList(Copyable):
|
|
|
1012
1012
|
def __getitem__(self, index):
|
|
1013
1013
|
## Variables for both, integer and boolean index arrays
|
|
1014
1014
|
cdef uint32[:,:] all_bonds_v
|
|
1015
|
-
cdef int32 new_index
|
|
1016
1015
|
cdef int i
|
|
1017
1016
|
cdef uint32* index1_ptr
|
|
1018
1017
|
cdef uint32* index2_ptr
|
|
@@ -1020,7 +1019,7 @@ class BondList(Copyable):
|
|
|
1020
1019
|
cdef uint8[:] removal_filter_v
|
|
1021
1020
|
|
|
1022
1021
|
## Variables for integer arrays
|
|
1023
|
-
cdef int32[:]
|
|
1022
|
+
cdef int32[:] inverse_index_v
|
|
1024
1023
|
cdef int32 new_index1, new_index2
|
|
1025
1024
|
|
|
1026
1025
|
## Variables for boolean mask
|
|
@@ -1035,54 +1034,13 @@ class BondList(Copyable):
|
|
|
1035
1034
|
## Handle single index
|
|
1036
1035
|
return self.get_bonds(index)
|
|
1037
1036
|
|
|
1038
|
-
elif isinstance(index, np.ndarray)
|
|
1039
|
-
|
|
1040
|
-
## Handle index array
|
|
1041
|
-
copy = self.copy()
|
|
1042
|
-
all_bonds_v = copy._bonds
|
|
1043
|
-
|
|
1044
|
-
index = _to_positive_index_array(index, self._atom_count)
|
|
1045
|
-
# The inverse index is required to efficiently obtain
|
|
1046
|
-
# the new index of an atom in case of an unsorted index
|
|
1047
|
-
# array
|
|
1048
|
-
inverse_index_v = _invert_index(index, self._atom_count)
|
|
1049
|
-
removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
|
|
1050
|
-
removal_filter_v = removal_filter
|
|
1051
|
-
for i in range(all_bonds_v.shape[0]):
|
|
1052
|
-
# Usage of pointer to increase performance
|
|
1053
|
-
# as redundant indexing is avoided
|
|
1054
|
-
index1_ptr = &all_bonds_v[i,0]
|
|
1055
|
-
index2_ptr = &all_bonds_v[i,1]
|
|
1056
|
-
new_index1 = inverse_index_v[index1_ptr[0]]
|
|
1057
|
-
new_index2 = inverse_index_v[index2_ptr[0]]
|
|
1058
|
-
if new_index1 != -1 and new_index2 != -1:
|
|
1059
|
-
# Both atoms involved in bond are included
|
|
1060
|
-
# by index array
|
|
1061
|
-
# -> assign new atom indices
|
|
1062
|
-
index1_ptr[0] = <int32>new_index1
|
|
1063
|
-
index2_ptr[0] = <int32>new_index2
|
|
1064
|
-
else:
|
|
1065
|
-
# At least one atom in bond is not included
|
|
1066
|
-
# -> remove bond
|
|
1067
|
-
removal_filter_v[i] = False
|
|
1068
|
-
|
|
1069
|
-
copy._bonds = copy._bonds[
|
|
1070
|
-
removal_filter.astype(bool, copy=False)
|
|
1071
|
-
]
|
|
1072
|
-
# Again, sort indices per bond
|
|
1073
|
-
# as the correct order is not guaranteed anymore
|
|
1074
|
-
# for unsorted index arrays
|
|
1075
|
-
copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
|
|
1076
|
-
copy._atom_count = len(index)
|
|
1077
|
-
copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
|
|
1078
|
-
return copy
|
|
1079
|
-
|
|
1080
|
-
else:
|
|
1081
|
-
## Handle all other arrays as boolean mask
|
|
1037
|
+
elif isinstance(index, np.ndarray) and index.dtype == bool:
|
|
1038
|
+
## Handle boolean masks
|
|
1082
1039
|
copy = self.copy()
|
|
1083
1040
|
all_bonds_v = copy._bonds
|
|
1041
|
+
# Use 'uint8' instead of 'bool' for memory view
|
|
1042
|
+
mask = np.frombuffer(index, dtype=np.uint8)
|
|
1084
1043
|
|
|
1085
|
-
mask = _to_bool_mask(index, length=copy._atom_count)
|
|
1086
1044
|
# Each time an atom is missing in the mask,
|
|
1087
1045
|
# the offset is increased by one
|
|
1088
1046
|
offsets = np.cumsum(
|
|
@@ -1118,6 +1076,48 @@ class BondList(Copyable):
|
|
|
1118
1076
|
copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
|
|
1119
1077
|
return copy
|
|
1120
1078
|
|
|
1079
|
+
else:
|
|
1080
|
+
## Convert any other type of index into index array, as it preserves order
|
|
1081
|
+
copy = self.copy()
|
|
1082
|
+
all_bonds_v = copy._bonds
|
|
1083
|
+
index = _to_index_array(index, self._atom_count)
|
|
1084
|
+
index = _to_positive_index_array(index, self._atom_count)
|
|
1085
|
+
|
|
1086
|
+
# The inverse index is required to efficiently obtain
|
|
1087
|
+
# the new index of an atom in case of an unsorted index
|
|
1088
|
+
# array
|
|
1089
|
+
inverse_index_v = _invert_index(index, self._atom_count)
|
|
1090
|
+
removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
|
|
1091
|
+
removal_filter_v = removal_filter
|
|
1092
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1093
|
+
# Usage of pointer to increase performance
|
|
1094
|
+
# as redundant indexing is avoided
|
|
1095
|
+
index1_ptr = &all_bonds_v[i,0]
|
|
1096
|
+
index2_ptr = &all_bonds_v[i,1]
|
|
1097
|
+
new_index1 = inverse_index_v[index1_ptr[0]]
|
|
1098
|
+
new_index2 = inverse_index_v[index2_ptr[0]]
|
|
1099
|
+
if new_index1 != -1 and new_index2 != -1:
|
|
1100
|
+
# Both atoms involved in bond are included
|
|
1101
|
+
# by index array
|
|
1102
|
+
# -> assign new atom indices
|
|
1103
|
+
index1_ptr[0] = <int32>new_index1
|
|
1104
|
+
index2_ptr[0] = <int32>new_index2
|
|
1105
|
+
else:
|
|
1106
|
+
# At least one atom in bond is not included
|
|
1107
|
+
# -> remove bond
|
|
1108
|
+
removal_filter_v[i] = False
|
|
1109
|
+
|
|
1110
|
+
copy._bonds = copy._bonds[
|
|
1111
|
+
removal_filter.astype(bool, copy=False)
|
|
1112
|
+
]
|
|
1113
|
+
# Again, sort indices per bond
|
|
1114
|
+
# as the correct order is not guaranteed anymore
|
|
1115
|
+
# for unsorted index arrays
|
|
1116
|
+
copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
|
|
1117
|
+
copy._atom_count = len(index)
|
|
1118
|
+
copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
|
|
1119
|
+
return copy
|
|
1120
|
+
|
|
1121
1121
|
def __iter__(self):
|
|
1122
1122
|
raise TypeError("'BondList' object is not iterable")
|
|
1123
1123
|
|
|
@@ -1266,6 +1266,18 @@ def _to_positive_index_array(index_array, length):
|
|
|
1266
1266
|
return index_array.reshape(orig_shape)
|
|
1267
1267
|
|
|
1268
1268
|
|
|
1269
|
+
def _to_index_array(object index, uint32 length):
|
|
1270
|
+
"""
|
|
1271
|
+
Convert an index of arbitrary type into an index array.
|
|
1272
|
+
"""
|
|
1273
|
+
if isinstance(index, np.ndarray) and np.issubdtype(index.dtype, np.integer):
|
|
1274
|
+
return index
|
|
1275
|
+
else:
|
|
1276
|
+
# Convert into index array
|
|
1277
|
+
all_indices = np.arange(length, dtype=np.uint32)
|
|
1278
|
+
return all_indices[index]
|
|
1279
|
+
|
|
1280
|
+
|
|
1269
1281
|
cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
|
|
1270
1282
|
"""
|
|
1271
1283
|
Test whether a value (`atom_index`) is in a C-array `array`.
|
|
@@ -1316,27 +1328,6 @@ def _invert_index(IndexType[:] index_v, uint32 length):
|
|
|
1316
1328
|
return inverse_index
|
|
1317
1329
|
|
|
1318
1330
|
|
|
1319
|
-
def _to_bool_mask(object index, uint32 length):
|
|
1320
|
-
"""
|
|
1321
|
-
Convert an index of arbitrary type into a boolean mask
|
|
1322
|
-
with given length.
|
|
1323
|
-
"""
|
|
1324
|
-
if isinstance(index, np.ndarray) and index.dtype == bool:
|
|
1325
|
-
# Index is already boolean mask -> simply return as uint8
|
|
1326
|
-
if len(index) != length:
|
|
1327
|
-
raise IndexError(
|
|
1328
|
-
f"Boolean mask has length {len(index)}, expected {length}"
|
|
1329
|
-
)
|
|
1330
|
-
# Use 'uint8' instead of 'bool' for memory view
|
|
1331
|
-
return index.astype(np.uint8, copy=False)
|
|
1332
|
-
else:
|
|
1333
|
-
# Use 'uint8' instead of 'bool' for memory view
|
|
1334
|
-
mask = np.zeros(length, dtype=np.uint8)
|
|
1335
|
-
# 1 -> True
|
|
1336
|
-
mask[index] = 1
|
|
1337
|
-
return mask
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
1331
|
|
|
1341
1332
|
|
|
1342
1333
|
_DEFAULT_DISTANCE_RANGE = {
|
|
Binary file
|
|
Binary file
|
biotite/structure/info/ccd.py
CHANGED
|
@@ -24,12 +24,20 @@ _residue_index = {}
|
|
|
24
24
|
|
|
25
25
|
def get_ccd():
|
|
26
26
|
"""
|
|
27
|
-
Get the
|
|
27
|
+
Get the internal subset of the PDB
|
|
28
|
+
*Chemical Component Dictionary* (CCD).
|
|
29
|
+
:footcite:`Westbrook2015`
|
|
28
30
|
|
|
29
31
|
Returns
|
|
30
32
|
-------
|
|
31
33
|
ccd : BinaryCIFFile
|
|
32
34
|
The CCD.
|
|
35
|
+
|
|
36
|
+
References
|
|
37
|
+
----------
|
|
38
|
+
|
|
39
|
+
.. footbibliography::
|
|
40
|
+
|
|
33
41
|
"""
|
|
34
42
|
# Avoid circular import
|
|
35
43
|
from ..io.pdbx.bcif import BinaryCIFFile
|
|
@@ -44,7 +52,8 @@ def get_ccd():
|
|
|
44
52
|
def get_from_ccd(category_name, comp_id, column_name=None):
|
|
45
53
|
"""
|
|
46
54
|
Get the rows for the given residue in the given category from the
|
|
47
|
-
PDB *Chemical Component Dictionary* (CCD).
|
|
55
|
+
internal subset of the PDB *Chemical Component Dictionary* (CCD).
|
|
56
|
+
:footcite:`Westbrook2015`
|
|
48
57
|
|
|
49
58
|
Parameters
|
|
50
59
|
----------
|
|
@@ -62,6 +71,12 @@ def get_from_ccd(category_name, comp_id, column_name=None):
|
|
|
62
71
|
value : ndarray or dict or None
|
|
63
72
|
The array of the given column or all columns as dictionary.
|
|
64
73
|
``None`` if the `comp_id` is not found in the category.
|
|
74
|
+
|
|
75
|
+
References
|
|
76
|
+
----------
|
|
77
|
+
|
|
78
|
+
.. footbibliography::
|
|
79
|
+
|
|
65
80
|
"""
|
|
66
81
|
global _residue_index
|
|
67
82
|
ccd = get_ccd()
|
biotite/structure/info/groups.py
CHANGED
|
@@ -19,7 +19,8 @@ group_lists = {}
|
|
|
19
19
|
def amino_acid_names():
|
|
20
20
|
"""
|
|
21
21
|
Get a tuple of amino acid three-letter codes according to the
|
|
22
|
-
PDB *Chemical Component Dictionary
|
|
22
|
+
PDB *Chemical Component Dictionary*.
|
|
23
|
+
:footcite:`Westbrook2015`
|
|
23
24
|
|
|
24
25
|
Returns
|
|
25
26
|
-------
|
|
@@ -27,13 +28,11 @@ def amino_acid_names():
|
|
|
27
28
|
A list of three-letter-codes containing residues that are
|
|
28
29
|
peptide monomers.
|
|
29
30
|
|
|
30
|
-
Notes
|
|
31
|
-
-----
|
|
32
|
-
|
|
33
31
|
References
|
|
34
32
|
----------
|
|
35
33
|
|
|
36
34
|
.. footbibliography::
|
|
35
|
+
|
|
37
36
|
"""
|
|
38
37
|
return _get_group_members("amino_acids")
|
|
39
38
|
|
|
@@ -41,7 +40,8 @@ def amino_acid_names():
|
|
|
41
40
|
def nucleotide_names():
|
|
42
41
|
"""
|
|
43
42
|
Get a tuple of nucleotide three-letter codes according to the
|
|
44
|
-
PDB *Chemical Component Dictionary
|
|
43
|
+
PDB *Chemical Component Dictionary*.
|
|
44
|
+
:footcite:`Westbrook2015`
|
|
45
45
|
|
|
46
46
|
Returns
|
|
47
47
|
-------
|
|
@@ -49,13 +49,11 @@ def nucleotide_names():
|
|
|
49
49
|
A list of three-letter-codes containing residues that are
|
|
50
50
|
DNA/RNA monomers.
|
|
51
51
|
|
|
52
|
-
Notes
|
|
53
|
-
-----
|
|
54
|
-
|
|
55
52
|
References
|
|
56
53
|
----------
|
|
57
54
|
|
|
58
55
|
.. footbibliography::
|
|
56
|
+
|
|
59
57
|
"""
|
|
60
58
|
return _get_group_members("nucleotides")
|
|
61
59
|
|
|
@@ -63,7 +61,8 @@ def nucleotide_names():
|
|
|
63
61
|
def carbohydrate_names():
|
|
64
62
|
"""
|
|
65
63
|
Get a tuple of carbohydrate three-letter codes according to the
|
|
66
|
-
PDB *Chemical Component Dictionary
|
|
64
|
+
PDB *Chemical Component Dictionary*.
|
|
65
|
+
:footcite:`Westbrook2015`
|
|
67
66
|
|
|
68
67
|
Returns
|
|
69
68
|
-------
|
|
@@ -71,13 +70,11 @@ def carbohydrate_names():
|
|
|
71
70
|
A list of three-letter-codes containing residues that are
|
|
72
71
|
saccharide monomers.
|
|
73
72
|
|
|
74
|
-
Notes
|
|
75
|
-
-----
|
|
76
|
-
|
|
77
73
|
References
|
|
78
74
|
----------
|
|
79
75
|
|
|
80
76
|
.. footbibliography::
|
|
77
|
+
|
|
81
78
|
"""
|
|
82
79
|
return _get_group_members("carbohydrates")
|
|
83
80
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -389,14 +389,6 @@ class BinaryCIFCategory(_HierarchicalContainer):
|
|
|
389
389
|
def supercomponent_class():
|
|
390
390
|
return BinaryCIFBlock
|
|
391
391
|
|
|
392
|
-
def filter(self, index):
|
|
393
|
-
return BinaryCIFCategory(
|
|
394
|
-
{key: column.filter(index) for key, column in self.items()},
|
|
395
|
-
# Create placeholder array just to check how many elements
|
|
396
|
-
# remain after filtering
|
|
397
|
-
len(np.empty(self.row_count, dtype=bool)[index]),
|
|
398
|
-
)
|
|
399
|
-
|
|
400
392
|
@staticmethod
|
|
401
393
|
def deserialize(content):
|
|
402
394
|
return BinaryCIFCategory(
|
|
Binary file
|
|
Binary file
|
biotite/version.py
CHANGED