biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/alphabet.py
CHANGED
|
@@ -4,14 +4,18 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.sequence"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Alphabet",
|
|
9
|
+
"LetterAlphabet",
|
|
10
|
+
"AlphabetMapper",
|
|
11
|
+
"AlphabetError",
|
|
12
|
+
"common_alphabet",
|
|
13
|
+
]
|
|
9
14
|
|
|
10
|
-
import copy
|
|
11
|
-
from numbers import Integral
|
|
12
15
|
import string
|
|
16
|
+
from numbers import Integral
|
|
13
17
|
import numpy as np
|
|
14
|
-
from .codec import
|
|
18
|
+
from biotite.sequence.codec import decode_to_chars, encode_chars, map_sequence_code
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class Alphabet(object):
|
|
@@ -100,14 +104,14 @@ class Alphabet(object):
|
|
|
100
104
|
def __init__(self, symbols):
|
|
101
105
|
if len(symbols) == 0:
|
|
102
106
|
raise ValueError("Symbol list is empty")
|
|
103
|
-
self._symbols =
|
|
107
|
+
self._symbols = tuple(symbols)
|
|
104
108
|
self._symbol_dict = {}
|
|
105
109
|
for i, symbol in enumerate(symbols):
|
|
106
110
|
self._symbol_dict[symbol] = i
|
|
107
111
|
|
|
108
112
|
def __repr__(self):
|
|
109
113
|
"""Represent Alphabet as a string for debugging."""
|
|
110
|
-
return f
|
|
114
|
+
return f"Alphabet({self._symbols})"
|
|
111
115
|
|
|
112
116
|
def get_symbols(self):
|
|
113
117
|
"""
|
|
@@ -115,10 +119,10 @@ class Alphabet(object):
|
|
|
115
119
|
|
|
116
120
|
Returns
|
|
117
121
|
-------
|
|
118
|
-
symbols :
|
|
119
|
-
|
|
122
|
+
symbols : tuple
|
|
123
|
+
The symbols.
|
|
120
124
|
"""
|
|
121
|
-
return
|
|
125
|
+
return self._symbols
|
|
122
126
|
|
|
123
127
|
def extends(self, alphabet):
|
|
124
128
|
"""
|
|
@@ -139,8 +143,7 @@ class Alphabet(object):
|
|
|
139
143
|
elif len(alphabet) > len(self):
|
|
140
144
|
return False
|
|
141
145
|
else:
|
|
142
|
-
return alphabet.get_symbols()
|
|
143
|
-
== self.get_symbols()[:len(alphabet)]
|
|
146
|
+
return alphabet.get_symbols() == self.get_symbols()[: len(alphabet)]
|
|
144
147
|
|
|
145
148
|
def encode(self, symbol):
|
|
146
149
|
"""
|
|
@@ -164,9 +167,7 @@ class Alphabet(object):
|
|
|
164
167
|
try:
|
|
165
168
|
return self._symbol_dict[symbol]
|
|
166
169
|
except KeyError:
|
|
167
|
-
raise AlphabetError(
|
|
168
|
-
f"Symbol {repr(symbol)} is not in the alphabet"
|
|
169
|
-
)
|
|
170
|
+
raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
|
|
170
171
|
|
|
171
172
|
def decode(self, code):
|
|
172
173
|
"""
|
|
@@ -238,12 +239,11 @@ class Alphabet(object):
|
|
|
238
239
|
have length 1 and are printable.
|
|
239
240
|
"""
|
|
240
241
|
for symbol in self:
|
|
241
|
-
if not isinstance(symbol, (str, bytes))
|
|
242
|
-
|
|
243
|
-
return False
|
|
242
|
+
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
243
|
+
return False
|
|
244
244
|
if isinstance(symbol, str):
|
|
245
245
|
symbol = symbol.encode("ASCII")
|
|
246
|
-
if symbol not in LetterAlphabet.
|
|
246
|
+
if symbol not in LetterAlphabet.PRINTABLES:
|
|
247
247
|
return False
|
|
248
248
|
return True
|
|
249
249
|
|
|
@@ -260,7 +260,11 @@ class Alphabet(object):
|
|
|
260
260
|
return symbol in self.get_symbols()
|
|
261
261
|
|
|
262
262
|
def __hash__(self):
|
|
263
|
-
|
|
263
|
+
symbols = self.get_symbols()
|
|
264
|
+
if isinstance(symbols, tuple):
|
|
265
|
+
return hash(symbols)
|
|
266
|
+
else:
|
|
267
|
+
return hash(tuple(symbols))
|
|
264
268
|
|
|
265
269
|
def __eq__(self, item):
|
|
266
270
|
if item is self:
|
|
@@ -292,8 +296,9 @@ class LetterAlphabet(Alphabet):
|
|
|
292
296
|
in this list.
|
|
293
297
|
"""
|
|
294
298
|
|
|
295
|
-
|
|
296
|
-
|
|
299
|
+
PRINTABLES = (string.digits + string.ascii_letters + string.punctuation).encode(
|
|
300
|
+
"ASCII"
|
|
301
|
+
)
|
|
297
302
|
|
|
298
303
|
def __init__(self, symbols):
|
|
299
304
|
if len(symbols) == 0:
|
|
@@ -304,7 +309,7 @@ class LetterAlphabet(Alphabet):
|
|
|
304
309
|
raise ValueError(f"Symbol '{symbol}' is not a single letter")
|
|
305
310
|
if isinstance(symbol, str):
|
|
306
311
|
symbol = symbol.encode("ASCII")
|
|
307
|
-
if symbol not in LetterAlphabet.
|
|
312
|
+
if symbol not in LetterAlphabet.PRINTABLES:
|
|
308
313
|
raise ValueError(
|
|
309
314
|
f"Symbol {repr(symbol)} is not printable or whitespace"
|
|
310
315
|
)
|
|
@@ -312,47 +317,33 @@ class LetterAlphabet(Alphabet):
|
|
|
312
317
|
# Direct 'astype' conversion is not allowed by numpy
|
|
313
318
|
# -> frombuffer()
|
|
314
319
|
self._symbols = np.frombuffer(
|
|
315
|
-
np.array(self._symbols, dtype="|S1"),
|
|
316
|
-
dtype=np.ubyte
|
|
320
|
+
np.array(self._symbols, dtype="|S1"), dtype=np.ubyte
|
|
317
321
|
)
|
|
318
322
|
|
|
319
323
|
def __repr__(self):
|
|
320
324
|
"""Represent LetterAlphabet as a string for debugging."""
|
|
321
|
-
return f
|
|
325
|
+
return f"LetterAlphabet({self.get_symbols()})"
|
|
322
326
|
|
|
323
327
|
def extends(self, alphabet):
|
|
324
328
|
if alphabet is self:
|
|
325
329
|
return True
|
|
326
|
-
elif
|
|
330
|
+
elif isinstance(alphabet, LetterAlphabet):
|
|
327
331
|
if len(alphabet._symbols) > len(self._symbols):
|
|
328
332
|
return False
|
|
329
|
-
return np.all(
|
|
330
|
-
alphabet._symbols == self._symbols[:len(alphabet._symbols)]
|
|
331
|
-
)
|
|
333
|
+
return np.all(alphabet._symbols == self._symbols[: len(alphabet._symbols)])
|
|
332
334
|
else:
|
|
333
335
|
return super().extends(alphabet)
|
|
334
336
|
|
|
335
337
|
def get_symbols(self):
|
|
336
|
-
""
|
|
337
|
-
Get the symbols in the alphabet.
|
|
338
|
-
|
|
339
|
-
Returns
|
|
340
|
-
-------
|
|
341
|
-
symbols : list
|
|
342
|
-
Copy of the internal list of symbols.
|
|
343
|
-
"""
|
|
344
|
-
return [symbol.decode("ASCII") for symbol
|
|
345
|
-
in self._symbols_as_bytes()]
|
|
338
|
+
return tuple([symbol.decode("ASCII") for symbol in self._symbols_as_bytes()])
|
|
346
339
|
|
|
347
340
|
def encode(self, symbol):
|
|
348
341
|
if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
|
|
349
342
|
raise AlphabetError(f"Symbol '{symbol}' is not a single letter")
|
|
350
343
|
indices = np.where(self._symbols == ord(symbol))[0]
|
|
351
344
|
if len(indices) == 0:
|
|
352
|
-
raise AlphabetError(
|
|
353
|
-
|
|
354
|
-
)
|
|
355
|
-
return indices[0]
|
|
345
|
+
raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
|
|
346
|
+
return indices[0].item()
|
|
356
347
|
|
|
357
348
|
def decode(self, code, as_bytes=False):
|
|
358
349
|
if code < 0 or code >= len(self._symbols):
|
|
@@ -382,13 +373,10 @@ class LetterAlphabet(Alphabet):
|
|
|
382
373
|
elif isinstance(symbols, bytes):
|
|
383
374
|
symbols = np.frombuffer(symbols, dtype=np.ubyte)
|
|
384
375
|
elif isinstance(symbols, np.ndarray):
|
|
385
|
-
symbols = np.frombuffer(
|
|
386
|
-
symbols.astype(dtype="|S1"), dtype=np.ubyte
|
|
387
|
-
)
|
|
376
|
+
symbols = np.frombuffer(symbols.astype(dtype="|S1"), dtype=np.ubyte)
|
|
388
377
|
else:
|
|
389
378
|
symbols = np.frombuffer(
|
|
390
|
-
np.array(list(symbols), dtype="|S1"),
|
|
391
|
-
dtype=np.ubyte
|
|
379
|
+
np.array(list(symbols), dtype="|S1"), dtype=np.ubyte
|
|
392
380
|
)
|
|
393
381
|
return encode_chars(alphabet=self._symbols, symbols=symbols)
|
|
394
382
|
|
|
@@ -435,7 +423,6 @@ class LetterAlphabet(Alphabet):
|
|
|
435
423
|
return np.frombuffer(self._symbols, dtype="|S1")
|
|
436
424
|
|
|
437
425
|
|
|
438
|
-
|
|
439
426
|
class AlphabetMapper(object):
|
|
440
427
|
"""
|
|
441
428
|
This class is used for symbol code conversion from a source
|
|
@@ -486,8 +473,7 @@ class AlphabetMapper(object):
|
|
|
486
473
|
else:
|
|
487
474
|
self._necessary_mapping = True
|
|
488
475
|
self._mapper = np.zeros(
|
|
489
|
-
len(source_alphabet),
|
|
490
|
-
dtype=AlphabetMapper._dtype(len(target_alphabet))
|
|
476
|
+
len(source_alphabet), dtype=AlphabetMapper._dtype(len(target_alphabet))
|
|
491
477
|
)
|
|
492
478
|
for old_code in range(len(source_alphabet)):
|
|
493
479
|
symbol = source_alphabet.decode(old_code)
|
|
@@ -500,26 +486,25 @@ class AlphabetMapper(object):
|
|
|
500
486
|
return self._mapper[code]
|
|
501
487
|
else:
|
|
502
488
|
return code
|
|
503
|
-
if not isinstance(code, np.ndarray)
|
|
504
|
-
|
|
505
|
-
|
|
489
|
+
if not isinstance(code, np.ndarray) or code.dtype not in (
|
|
490
|
+
np.uint8,
|
|
491
|
+
np.uint16,
|
|
492
|
+
np.uint32,
|
|
493
|
+
np.uint64,
|
|
494
|
+
):
|
|
495
|
+
code = np.array(code, dtype=np.uint64)
|
|
506
496
|
if self._necessary_mapping:
|
|
507
497
|
mapped_code = np.empty(len(code), dtype=self._mapper.dtype)
|
|
508
|
-
map_sequence_code(
|
|
509
|
-
self._mapper,
|
|
510
|
-
code,
|
|
511
|
-
mapped_code
|
|
512
|
-
)
|
|
498
|
+
map_sequence_code(self._mapper, code, mapped_code)
|
|
513
499
|
return mapped_code
|
|
514
500
|
else:
|
|
515
501
|
return code
|
|
516
502
|
|
|
517
|
-
|
|
518
503
|
@staticmethod
|
|
519
504
|
def _dtype(alphabet_size):
|
|
520
|
-
_size_uint8
|
|
521
|
-
_size_uint16 = np.iinfo(np.uint16).max +1
|
|
522
|
-
_size_uint32 = np.iinfo(np.uint32).max +1
|
|
505
|
+
_size_uint8 = np.iinfo(np.uint8).max + 1
|
|
506
|
+
_size_uint16 = np.iinfo(np.uint16).max + 1
|
|
507
|
+
_size_uint32 = np.iinfo(np.uint32).max + 1
|
|
523
508
|
if alphabet_size <= _size_uint8:
|
|
524
509
|
return np.uint8
|
|
525
510
|
elif alphabet_size <= _size_uint16:
|
|
@@ -535,6 +520,7 @@ class AlphabetError(Exception):
|
|
|
535
520
|
This exception is raised, when a code or a symbol is not in an
|
|
536
521
|
:class:`Alphabet`.
|
|
537
522
|
"""
|
|
523
|
+
|
|
538
524
|
pass
|
|
539
525
|
|
|
540
526
|
|
|
@@ -563,4 +549,4 @@ def common_alphabet(alphabets):
|
|
|
563
549
|
common_alphabet = alphabet
|
|
564
550
|
else:
|
|
565
551
|
return None
|
|
566
|
-
return common_alphabet
|
|
552
|
+
return common_alphabet
|
biotite/sequence/annotation.py
CHANGED
|
@@ -6,17 +6,15 @@ __name__ = "biotite.sequence"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["Location", "Feature", "Annotation", "AnnotatedSequence"]
|
|
8
8
|
|
|
9
|
-
import numbers
|
|
10
9
|
import copy
|
|
10
|
+
import numbers
|
|
11
11
|
import sys
|
|
12
|
-
from enum import
|
|
12
|
+
from enum import Enum, Flag, auto
|
|
13
13
|
import numpy as np
|
|
14
|
-
from .
|
|
15
|
-
from ..copyable import Copyable
|
|
16
|
-
from .seqtypes import NucleotideSequence
|
|
14
|
+
from biotite.copyable import Copyable
|
|
17
15
|
|
|
18
16
|
|
|
19
|
-
class Location
|
|
17
|
+
class Location:
|
|
20
18
|
"""
|
|
21
19
|
A :class:`Location` defines at which base(s)/residue(s) a feature is
|
|
22
20
|
located.
|
|
@@ -63,24 +61,25 @@ class Location():
|
|
|
63
61
|
- **BETWEEN** - The position is between to consecutive
|
|
64
62
|
bases/residues.
|
|
65
63
|
"""
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
|
|
65
|
+
NONE = 0
|
|
66
|
+
MISS_LEFT = auto()
|
|
67
|
+
MISS_RIGHT = auto()
|
|
68
|
+
BEYOND_LEFT = auto()
|
|
70
69
|
BEYOND_RIGHT = auto()
|
|
71
|
-
UNK_LOC
|
|
72
|
-
BETWEEN
|
|
70
|
+
UNK_LOC = auto()
|
|
71
|
+
BETWEEN = auto()
|
|
73
72
|
|
|
74
73
|
class Strand(Enum):
|
|
75
74
|
"""
|
|
76
75
|
This enum type describes the strand of the feature location.
|
|
77
76
|
This is not relevant for protein sequence features.
|
|
78
77
|
"""
|
|
78
|
+
|
|
79
79
|
FORWARD = auto()
|
|
80
80
|
REVERSE = auto()
|
|
81
81
|
|
|
82
|
-
def __init__(self, first, last, strand=Strand.FORWARD,
|
|
83
|
-
defect=Defect.NONE):
|
|
82
|
+
def __init__(self, first, last, strand=Strand.FORWARD, defect=Defect.NONE):
|
|
84
83
|
if first > last:
|
|
85
84
|
raise ValueError(
|
|
86
85
|
"The first position cannot be higher than the last position"
|
|
@@ -92,8 +91,10 @@ class Location():
|
|
|
92
91
|
|
|
93
92
|
def __repr__(self):
|
|
94
93
|
"""Represent Location as a string for debugging."""
|
|
95
|
-
return
|
|
96
|
-
|
|
94
|
+
return (
|
|
95
|
+
f'Location({self._first}, {self._last}, strand={"Location." + str(self._strand)}, '
|
|
96
|
+
f'defect={"Location." + str(self._defect)})'
|
|
97
|
+
)
|
|
97
98
|
|
|
98
99
|
@property
|
|
99
100
|
def first(self):
|
|
@@ -122,10 +123,12 @@ class Location():
|
|
|
122
123
|
def __eq__(self, item):
|
|
123
124
|
if not isinstance(item, Location):
|
|
124
125
|
return False
|
|
125
|
-
return (
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
126
|
+
return (
|
|
127
|
+
self.first == item.first
|
|
128
|
+
and self.last == item.last
|
|
129
|
+
and self.strand == item.strand
|
|
130
|
+
and self.defect == item.defect
|
|
131
|
+
)
|
|
129
132
|
|
|
130
133
|
def __hash__(self):
|
|
131
134
|
return hash((self._first, self._last, self._strand, self._defect))
|
|
@@ -208,9 +211,11 @@ class Feature(Copyable):
|
|
|
208
211
|
def __eq__(self, item):
|
|
209
212
|
if not isinstance(item, Feature):
|
|
210
213
|
return False
|
|
211
|
-
return (
|
|
212
|
-
|
|
213
|
-
|
|
214
|
+
return (
|
|
215
|
+
self._key == item._key
|
|
216
|
+
and self._locs == item._locs
|
|
217
|
+
and self._qual == item._qual
|
|
218
|
+
)
|
|
214
219
|
|
|
215
220
|
def __lt__(self, item):
|
|
216
221
|
if not isinstance(item, Feature):
|
|
@@ -223,7 +228,7 @@ class Feature(Copyable):
|
|
|
223
228
|
return True
|
|
224
229
|
elif first > it_first:
|
|
225
230
|
return False
|
|
226
|
-
else:
|
|
231
|
+
else: # First is equal
|
|
227
232
|
return last > it_last
|
|
228
233
|
|
|
229
234
|
def __gt__(self, item):
|
|
@@ -237,7 +242,7 @@ class Feature(Copyable):
|
|
|
237
242
|
return True
|
|
238
243
|
elif first < it_first:
|
|
239
244
|
return False
|
|
240
|
-
else:
|
|
245
|
+
else: # First is equal
|
|
241
246
|
return last < it_last
|
|
242
247
|
|
|
243
248
|
@property
|
|
@@ -253,7 +258,7 @@ class Feature(Copyable):
|
|
|
253
258
|
return copy.copy(self._qual)
|
|
254
259
|
|
|
255
260
|
def __hash__(self):
|
|
256
|
-
return hash((
|
|
261
|
+
return hash((self._key, self._locs, frozenset(self._qual.items())))
|
|
257
262
|
|
|
258
263
|
|
|
259
264
|
class Annotation(Copyable):
|
|
@@ -337,7 +342,7 @@ class Annotation(Copyable):
|
|
|
337
342
|
... gene = f.qual["gene"]
|
|
338
343
|
... loc_str = "".join([f"{loc} {loc.defect}" for loc in f.locs])
|
|
339
344
|
... print(gene, loc_str)
|
|
340
|
-
test5 40-149 > Defect.MISS_RIGHT
|
|
345
|
+
test5 40-149 > Defect.MISS_LEFT|MISS_RIGHT
|
|
341
346
|
test2 40-50 > Defect.MISS_LEFT
|
|
342
347
|
test3 100-130 > Defect.NONE
|
|
343
348
|
"""
|
|
@@ -350,7 +355,9 @@ class Annotation(Copyable):
|
|
|
350
355
|
|
|
351
356
|
def __repr__(self):
|
|
352
357
|
"""Represent Annotation as a string for debugging."""
|
|
353
|
-
return
|
|
358
|
+
return (
|
|
359
|
+
f'Annotation([{", ".join([feat.__repr__() for feat in self._features])}])'
|
|
360
|
+
)
|
|
354
361
|
|
|
355
362
|
def __copy_create__(self):
|
|
356
363
|
return Annotation(self._features)
|
|
@@ -403,7 +410,7 @@ class Annotation(Copyable):
|
|
|
403
410
|
if loc.last > last:
|
|
404
411
|
last = loc.last
|
|
405
412
|
# Exclusive stop -> +1
|
|
406
|
-
return first, last+1
|
|
413
|
+
return first, last + 1
|
|
407
414
|
|
|
408
415
|
def del_feature(self, feature):
|
|
409
416
|
"""
|
|
@@ -475,9 +482,7 @@ class Annotation(Copyable):
|
|
|
475
482
|
if loc.last > i_last:
|
|
476
483
|
defect |= Location.Defect.MISS_RIGHT
|
|
477
484
|
last = i_last
|
|
478
|
-
locs_in_scope.append(Location(
|
|
479
|
-
first, last, loc.strand, defect
|
|
480
|
-
))
|
|
485
|
+
locs_in_scope.append(Location(first, last, loc.strand, defect))
|
|
481
486
|
if len(locs_in_scope) > 0:
|
|
482
487
|
# The feature is present in the new annotation
|
|
483
488
|
# if any of the original locations is in the new
|
|
@@ -488,15 +493,12 @@ class Annotation(Copyable):
|
|
|
488
493
|
sub_annot.add_feature(new_feature)
|
|
489
494
|
return sub_annot
|
|
490
495
|
else:
|
|
491
|
-
raise TypeError(
|
|
492
|
-
f"'{type(index).__name__}' instances are invalid indices"
|
|
493
|
-
)
|
|
496
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
494
497
|
|
|
495
498
|
def __delitem__(self, item):
|
|
496
499
|
if not isinstance(item, Feature):
|
|
497
500
|
raise TypeError(
|
|
498
|
-
f"Only 'Feature' objects are supported, "
|
|
499
|
-
f"not {type(item).__name__}"
|
|
501
|
+
f"Only 'Feature' objects are supported, " f"not {type(item).__name__}"
|
|
500
502
|
)
|
|
501
503
|
self.del_feature(item)
|
|
502
504
|
|
|
@@ -626,8 +628,10 @@ class AnnotatedSequence(Copyable):
|
|
|
626
628
|
|
|
627
629
|
def __repr__(self):
|
|
628
630
|
"""Represent AnnotatedSequence as a string for debugging."""
|
|
629
|
-
return
|
|
630
|
-
|
|
631
|
+
return (
|
|
632
|
+
f"AnnotatedSequence({self._annotation.__repr__()}, {self._sequence.__repr__()}, "
|
|
633
|
+
f"sequence_start={self._seqstart})"
|
|
634
|
+
)
|
|
631
635
|
|
|
632
636
|
@property
|
|
633
637
|
def sequence_start(self):
|
|
@@ -643,7 +647,8 @@ class AnnotatedSequence(Copyable):
|
|
|
643
647
|
|
|
644
648
|
def __copy_create__(self):
|
|
645
649
|
return AnnotatedSequence(
|
|
646
|
-
self._annotation.copy(), self._sequence.copy, self._seqstart
|
|
650
|
+
self._annotation.copy(), self._sequence.copy, self._seqstart
|
|
651
|
+
)
|
|
647
652
|
|
|
648
653
|
def reverse_complement(self, sequence_start=1):
|
|
649
654
|
"""
|
|
@@ -676,10 +681,12 @@ class AnnotatedSequence(Copyable):
|
|
|
676
681
|
# (seq_len-1) -> last sequence index
|
|
677
682
|
# (loc.last-self._seqstart) -> location to index
|
|
678
683
|
# ... + rev_seqstart -> index to location
|
|
679
|
-
rev_loc_first
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
684
|
+
rev_loc_first = (
|
|
685
|
+
(seq_len - 1) - (loc.last - self._seqstart) + rev_seqstart
|
|
686
|
+
)
|
|
687
|
+
rev_loc_last = (
|
|
688
|
+
(seq_len - 1) - (loc.first - self._seqstart) + rev_seqstart
|
|
689
|
+
)
|
|
683
690
|
|
|
684
691
|
if loc.strand == Location.Strand.FORWARD:
|
|
685
692
|
rev_loc_strand = Location.Strand.REVERSE
|
|
@@ -700,17 +707,14 @@ class AnnotatedSequence(Copyable):
|
|
|
700
707
|
if loc.defect & Location.Defect.BETWEEN:
|
|
701
708
|
rev_loc_defect |= Location.Defect.BETWEEN
|
|
702
709
|
|
|
703
|
-
rev_locs.append(
|
|
704
|
-
|
|
705
|
-
rev_loc_strand, rev_loc_defect
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
))
|
|
710
|
+
rev_locs.append(
|
|
711
|
+
Location(
|
|
712
|
+
rev_loc_first, rev_loc_last, rev_loc_strand, rev_loc_defect
|
|
713
|
+
)
|
|
714
|
+
)
|
|
715
|
+
rev_features.append(Feature(feature.key, rev_locs, feature.qual))
|
|
710
716
|
|
|
711
|
-
return AnnotatedSequence(
|
|
712
|
-
Annotation(rev_features), rev_sequence, rev_seqstart
|
|
713
|
-
)
|
|
717
|
+
return AnnotatedSequence(Annotation(rev_features), rev_sequence, rev_seqstart)
|
|
714
718
|
|
|
715
719
|
def __getitem__(self, index):
|
|
716
720
|
if isinstance(index, Feature):
|
|
@@ -730,24 +734,20 @@ class AnnotatedSequence(Copyable):
|
|
|
730
734
|
pass
|
|
731
735
|
elif strand is None:
|
|
732
736
|
strand = loc.strand
|
|
733
|
-
else:
|
|
737
|
+
else: # loc.strand != strand
|
|
734
738
|
raise ValueError(
|
|
735
739
|
"All locations of the feature must have the same "
|
|
736
740
|
"strand direction"
|
|
737
741
|
)
|
|
738
742
|
if strand == Location.Strand.FORWARD:
|
|
739
|
-
sorted_locs = sorted(
|
|
740
|
-
locs, key=lambda loc: loc.first
|
|
741
|
-
)
|
|
743
|
+
sorted_locs = sorted(locs, key=lambda loc: loc.first)
|
|
742
744
|
else:
|
|
743
|
-
sorted_locs = sorted(
|
|
744
|
-
locs, key=lambda loc: loc.last, reverse=True
|
|
745
|
-
)
|
|
745
|
+
sorted_locs = sorted(locs, key=lambda loc: loc.last, reverse=True)
|
|
746
746
|
# Merge the sequences corresponding to the ordered locations
|
|
747
747
|
for loc in sorted_locs:
|
|
748
748
|
slice_start = loc.first - self._seqstart
|
|
749
749
|
# +1 due to exclusive stop
|
|
750
|
-
slice_stop = loc.last - self._seqstart +1
|
|
750
|
+
slice_stop = loc.last - self._seqstart + 1
|
|
751
751
|
add_seq = self._sequence[slice_start:slice_stop]
|
|
752
752
|
if loc.strand == Location.Strand.REVERSE:
|
|
753
753
|
add_seq = add_seq.reverse().complement()
|
|
@@ -775,17 +775,17 @@ class AnnotatedSequence(Copyable):
|
|
|
775
775
|
rel_seq_start = self._seqstart
|
|
776
776
|
else:
|
|
777
777
|
rel_seq_start = index.start
|
|
778
|
-
return AnnotatedSequence(
|
|
779
|
-
|
|
780
|
-
|
|
778
|
+
return AnnotatedSequence(
|
|
779
|
+
self._annotation[index],
|
|
780
|
+
self._sequence[seq_start:seq_stop],
|
|
781
|
+
rel_seq_start,
|
|
782
|
+
)
|
|
781
783
|
|
|
782
784
|
elif isinstance(index, numbers.Integral):
|
|
783
785
|
return self._sequence[index - self._seqstart]
|
|
784
786
|
|
|
785
787
|
else:
|
|
786
|
-
raise TypeError(
|
|
787
|
-
f"'{type(index).__name__}' instances are invalid indices"
|
|
788
|
-
)
|
|
788
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
789
789
|
|
|
790
790
|
def __setitem__(self, index, item):
|
|
791
791
|
if isinstance(index, Feature):
|
|
@@ -796,10 +796,11 @@ class AnnotatedSequence(Copyable):
|
|
|
796
796
|
for loc in index.locs:
|
|
797
797
|
slice_start = loc.first - self._seqstart
|
|
798
798
|
# +1 due to exclusive stop
|
|
799
|
-
slice_stop = loc.last - self._seqstart +1
|
|
799
|
+
slice_stop = loc.last - self._seqstart + 1
|
|
800
800
|
interval_size = slice_stop - slice_start
|
|
801
|
-
self._sequence[slice_start:slice_stop]
|
|
802
|
-
|
|
801
|
+
self._sequence[slice_start:slice_stop] = sub_seq[
|
|
802
|
+
sub_seq_i : sub_seq_i + interval_size
|
|
803
|
+
]
|
|
803
804
|
sub_seq_i += interval_size
|
|
804
805
|
elif isinstance(index, slice):
|
|
805
806
|
# Sequence start correction
|
|
@@ -817,13 +818,13 @@ class AnnotatedSequence(Copyable):
|
|
|
817
818
|
# Item is a symbol
|
|
818
819
|
self._sequence[index - self._seqstart] = item
|
|
819
820
|
else:
|
|
820
|
-
raise TypeError(
|
|
821
|
-
f"'{type(index).__name__}' instances are invalid indices"
|
|
822
|
-
)
|
|
821
|
+
raise TypeError(f"'{type(index).__name__}' instances are invalid indices")
|
|
823
822
|
|
|
824
823
|
def __eq__(self, item):
|
|
825
824
|
if not isinstance(item, AnnotatedSequence):
|
|
826
825
|
return False
|
|
827
|
-
return (
|
|
828
|
-
|
|
829
|
-
|
|
826
|
+
return (
|
|
827
|
+
self.annotation == item.annotation
|
|
828
|
+
and self.sequence == item.sequence
|
|
829
|
+
and self._seqstart == item._seqstart
|
|
830
|
+
)
|
|
Binary file
|