biotite 0.41.1__cp312-cp312-win_amd64.whl → 1.0.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +36 -10
- biotite/application/application.py +22 -11
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +16 -5
- biotite/sequence/align/__init__.py +160 -6
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +35 -35
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +112 -126
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +64 -64
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +226 -240
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +88 -100
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +21 -7
- biotite/structure/info/groups.py +10 -15
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -52
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
- biotite-1.0.0.dist-info/RECORD +322 -0
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.1.dist-info/RECORD +0 -340
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/sequence.py
CHANGED
|
@@ -10,22 +10,21 @@ __name__ = "biotite.sequence"
|
|
|
10
10
|
__author__ = "Patrick Kunzmann"
|
|
11
11
|
__all__ = ["Sequence"]
|
|
12
12
|
|
|
13
|
-
import numbers
|
|
14
13
|
import abc
|
|
14
|
+
import numbers
|
|
15
15
|
import numpy as np
|
|
16
|
-
from .
|
|
17
|
-
from
|
|
16
|
+
from biotite.copyable import Copyable
|
|
17
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
_size_uint32 = np.iinfo(np.uint32).max +1
|
|
19
|
+
_size_uint8 = np.iinfo(np.uint8).max + 1
|
|
20
|
+
_size_uint16 = np.iinfo(np.uint16).max + 1
|
|
21
|
+
_size_uint32 = np.iinfo(np.uint32).max + 1
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
26
25
|
"""
|
|
27
26
|
The abstract base class for all sequence types.
|
|
28
|
-
|
|
27
|
+
|
|
29
28
|
A :class:`Sequence` can be seen as a succession of symbols, that are
|
|
30
29
|
elements in the allowed set of symbols, the :class:`Alphabet`.
|
|
31
30
|
Internally, a :class:`Sequence` object uses a *NumPy*
|
|
@@ -36,35 +35,35 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
36
35
|
:class:`Sequence`, into an integer. These integer values are called
|
|
37
36
|
symbol code, the encoding of an entire sequence of symbols is
|
|
38
37
|
called sequence code.
|
|
39
|
-
|
|
40
|
-
The size of the symbol code type in the array is determined by the
|
|
38
|
+
|
|
39
|
+
The size of the symbol code type in the array is determined by the
|
|
41
40
|
size of the :class:`Alphabet`:
|
|
42
41
|
If the :class:`Alphabet` contains 256 symbols or less, one byte is
|
|
43
42
|
used per array element; if the :class:`Alphabet` contains
|
|
44
43
|
between 257 and 65536 symbols, two bytes are used, and so on.
|
|
45
|
-
|
|
44
|
+
|
|
46
45
|
Two :class:`Sequence` objects are equal if they are instances of the
|
|
47
46
|
same class, have the same :class:`Alphabet` and have equal sequence
|
|
48
47
|
codes.
|
|
49
48
|
Comparison with a string or list of symbols evaluates always to
|
|
50
49
|
false.
|
|
51
|
-
|
|
50
|
+
|
|
52
51
|
A :class:`Sequence` can be indexed by any 1-D index a
|
|
53
52
|
:class:`ndarray` accepts.
|
|
54
53
|
If the index is a single integer, the decoded symbol at that
|
|
55
54
|
position is returned, otherwise a subsequence is returned.
|
|
56
|
-
|
|
55
|
+
|
|
57
56
|
Individual symbols of the sequence can also be exchanged in indexed
|
|
58
57
|
form: If the an integer is used as index, the item is treated as a
|
|
59
58
|
symbol. Any other index (slice, index list, boolean mask) expects
|
|
60
59
|
multiple symbols, either as list of symbols, as :class:`ndarray`
|
|
61
60
|
containing a sequence code or another :class:`Sequence` instance.
|
|
62
61
|
Concatenation of two sequences is achieved with the '+' operator.
|
|
63
|
-
|
|
62
|
+
|
|
64
63
|
Each subclass of :class:`Sequence` needs to overwrite the abstract
|
|
65
64
|
method :func:`get_alphabet()`, which specifies the alphabet the
|
|
66
65
|
:class:`Sequence` uses.
|
|
67
|
-
|
|
66
|
+
|
|
68
67
|
Parameters
|
|
69
68
|
----------
|
|
70
69
|
sequence : iterable object, optional
|
|
@@ -72,7 +71,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
72
71
|
For alphabets containing single letter strings, this parameter
|
|
73
72
|
may also be a :class`str` object.
|
|
74
73
|
By default the sequence is empty.
|
|
75
|
-
|
|
74
|
+
|
|
76
75
|
Attributes
|
|
77
76
|
----------
|
|
78
77
|
code : ndarray
|
|
@@ -85,12 +84,12 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
85
84
|
alphabet : Alphabet
|
|
86
85
|
The alphabet of this sequence. Cannot be set.
|
|
87
86
|
Equal to `get_alphabet()`.
|
|
88
|
-
|
|
87
|
+
|
|
89
88
|
Examples
|
|
90
89
|
--------
|
|
91
90
|
Creating a DNA sequence from string and print the symbols and the
|
|
92
91
|
code:
|
|
93
|
-
|
|
92
|
+
|
|
94
93
|
>>> dna_seq = NucleotideSequence("ACGTA")
|
|
95
94
|
>>> print(dna_seq)
|
|
96
95
|
ACGTA
|
|
@@ -100,18 +99,18 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
100
99
|
['A' 'C' 'G' 'T' 'A']
|
|
101
100
|
>>> print(list(dna_seq))
|
|
102
101
|
['A', 'C', 'G', 'T', 'A']
|
|
103
|
-
|
|
102
|
+
|
|
104
103
|
Sequence indexing:
|
|
105
|
-
|
|
104
|
+
|
|
106
105
|
>>> print(dna_seq[1:3])
|
|
107
106
|
CG
|
|
108
107
|
>>> print(dna_seq[[0,2,4]])
|
|
109
108
|
AGA
|
|
110
109
|
>>> print(dna_seq[np.array([False,False,True,True,True])])
|
|
111
110
|
GTA
|
|
112
|
-
|
|
111
|
+
|
|
113
112
|
Sequence manipulation:
|
|
114
|
-
|
|
113
|
+
|
|
115
114
|
>>> dna_copy = dna_seq.copy()
|
|
116
115
|
>>> dna_copy[2] = "C"
|
|
117
116
|
>>> print(dna_copy)
|
|
@@ -134,28 +133,28 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
134
133
|
>>> dna_seq_rev = dna_seq.reverse()
|
|
135
134
|
>>> print(dna_seq_rev)
|
|
136
135
|
ATGCA
|
|
137
|
-
|
|
136
|
+
|
|
138
137
|
Concatenate the two sequences:
|
|
139
|
-
|
|
138
|
+
|
|
140
139
|
>>> dna_seq_concat = dna_seq + dna_seq_rev
|
|
141
140
|
>>> print(dna_seq_concat)
|
|
142
141
|
ACGTAATGCA
|
|
143
|
-
|
|
142
|
+
|
|
144
143
|
"""
|
|
145
|
-
|
|
144
|
+
|
|
146
145
|
def __init__(self, sequence=()):
|
|
147
146
|
self.symbols = sequence
|
|
148
147
|
|
|
149
148
|
def copy(self, new_seq_code=None):
|
|
150
149
|
"""
|
|
151
150
|
Copy the object.
|
|
152
|
-
|
|
151
|
+
|
|
153
152
|
Parameters
|
|
154
153
|
----------
|
|
155
154
|
new_seq_code : ndarray, optional
|
|
156
155
|
If this parameter is set, the sequence code is set to this
|
|
157
156
|
value, rather than the original sequence code.
|
|
158
|
-
|
|
157
|
+
|
|
159
158
|
Returns
|
|
160
159
|
-------
|
|
161
160
|
copy
|
|
@@ -171,51 +170,51 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
171
170
|
clone.code = new_seq_code
|
|
172
171
|
self.__copy_fill__(clone)
|
|
173
172
|
return clone
|
|
174
|
-
|
|
173
|
+
|
|
175
174
|
@property
|
|
176
175
|
def symbols(self):
|
|
177
176
|
return self.get_alphabet().decode_multiple(self.code)
|
|
178
|
-
|
|
177
|
+
|
|
179
178
|
@symbols.setter
|
|
180
179
|
def symbols(self, value):
|
|
181
180
|
alph = self.get_alphabet()
|
|
182
181
|
dtype = Sequence.dtype(len(alph))
|
|
183
182
|
self._seq_code = alph.encode_multiple(value, dtype)
|
|
184
|
-
|
|
183
|
+
|
|
185
184
|
@property
|
|
186
185
|
def code(self):
|
|
187
186
|
return self._seq_code
|
|
188
|
-
|
|
187
|
+
|
|
189
188
|
@code.setter
|
|
190
189
|
def code(self, value):
|
|
191
190
|
dtype = Sequence.dtype(len(self.get_alphabet()))
|
|
192
191
|
if not isinstance(value, np.ndarray):
|
|
193
192
|
raise TypeError("Sequence code must be an integer ndarray")
|
|
194
193
|
self._seq_code = value.astype(dtype, copy=False)
|
|
195
|
-
|
|
194
|
+
|
|
196
195
|
@property
|
|
197
196
|
def alphabet(self):
|
|
198
197
|
return self.get_alphabet()
|
|
199
|
-
|
|
198
|
+
|
|
200
199
|
@abc.abstractmethod
|
|
201
200
|
def get_alphabet(self):
|
|
202
201
|
"""
|
|
203
202
|
Get the :class:`Alphabet` of the :class:`Sequence`.
|
|
204
|
-
|
|
203
|
+
|
|
205
204
|
This method must be overwritten, when subclassing
|
|
206
205
|
:class:`Sequence`.
|
|
207
|
-
|
|
206
|
+
|
|
208
207
|
Returns
|
|
209
208
|
-------
|
|
210
209
|
alphabet : Alphabet
|
|
211
210
|
:class:`Sequence` alphabet.
|
|
212
211
|
"""
|
|
213
212
|
pass
|
|
214
|
-
|
|
213
|
+
|
|
215
214
|
def reverse(self, copy=True):
|
|
216
215
|
"""
|
|
217
216
|
Reverse the :class:`Sequence`.
|
|
218
|
-
|
|
217
|
+
|
|
219
218
|
Parameters
|
|
220
219
|
----------
|
|
221
220
|
copy : bool, optional
|
|
@@ -225,15 +224,15 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
225
224
|
In this case, manipulations on the returned sequence would
|
|
226
225
|
also affect this object.
|
|
227
226
|
Otherwise, the sequence code is copied.
|
|
228
|
-
|
|
227
|
+
|
|
229
228
|
Returns
|
|
230
229
|
-------
|
|
231
230
|
reversed : Sequence
|
|
232
231
|
The reversed :class:`Sequence`.
|
|
233
|
-
|
|
232
|
+
|
|
234
233
|
Examples
|
|
235
234
|
--------
|
|
236
|
-
|
|
235
|
+
|
|
237
236
|
>>> dna_seq = NucleotideSequence("ACGTA")
|
|
238
237
|
>>> dna_seq_rev = dna_seq.reverse()
|
|
239
238
|
>>> print(dna_seq_rev)
|
|
@@ -243,33 +242,33 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
243
242
|
if copy:
|
|
244
243
|
reversed_code = np.copy(reversed_code)
|
|
245
244
|
return self.copy(reversed_code)
|
|
246
|
-
|
|
245
|
+
|
|
247
246
|
def is_valid(self):
|
|
248
247
|
"""
|
|
249
248
|
Check, if the sequence contains a valid sequence code.
|
|
250
|
-
|
|
249
|
+
|
|
251
250
|
A sequence code is valid, if at each sequence position the
|
|
252
251
|
code is smaller than the size of the alphabet.
|
|
253
|
-
|
|
252
|
+
|
|
254
253
|
Invalid code means that the code cannot be decoded into
|
|
255
254
|
symbols. Furthermore invalid code can lead to serious
|
|
256
255
|
errors in alignments, since the substitution matrix
|
|
257
256
|
is indexed with an invalid index.
|
|
258
|
-
|
|
257
|
+
|
|
259
258
|
Returns
|
|
260
259
|
-------
|
|
261
260
|
valid : bool
|
|
262
261
|
True, if the sequence is valid, false otherwise.
|
|
263
262
|
"""
|
|
264
263
|
return (self.code < len(self.get_alphabet())).all()
|
|
265
|
-
|
|
264
|
+
|
|
266
265
|
def get_symbol_frequency(self):
|
|
267
266
|
"""
|
|
268
267
|
Get the number of occurences of each symbol in the sequence.
|
|
269
|
-
|
|
268
|
+
|
|
270
269
|
If a symbol does not occur in the sequence, but it is in the
|
|
271
270
|
alphabet, its number of occurences is 0.
|
|
272
|
-
|
|
271
|
+
|
|
273
272
|
Returns
|
|
274
273
|
-------
|
|
275
274
|
frequency : dict
|
|
@@ -277,14 +276,12 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
277
276
|
corresponding number of occurences in the sequence as
|
|
278
277
|
values.
|
|
279
278
|
"""
|
|
280
|
-
counts = np.bincount(
|
|
281
|
-
self._seq_code, minlength=len(self.get_alphabet())
|
|
282
|
-
)
|
|
279
|
+
counts = np.bincount(self._seq_code, minlength=len(self.get_alphabet()))
|
|
283
280
|
return {
|
|
284
|
-
symbol: count
|
|
285
|
-
in zip(self.get_alphabet().get_symbols(), counts)
|
|
281
|
+
symbol: count
|
|
282
|
+
for symbol, count in zip(self.get_alphabet().get_symbols(), counts)
|
|
286
283
|
}
|
|
287
|
-
|
|
284
|
+
|
|
288
285
|
def __getitem__(self, index):
|
|
289
286
|
alph = self.get_alphabet()
|
|
290
287
|
sub_seq = self._seq_code.__getitem__(index)
|
|
@@ -292,7 +289,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
292
289
|
return self.copy(sub_seq)
|
|
293
290
|
else:
|
|
294
291
|
return alph.decode(sub_seq)
|
|
295
|
-
|
|
292
|
+
|
|
296
293
|
def __setitem__(self, index, item):
|
|
297
294
|
alph = self.get_alphabet()
|
|
298
295
|
if isinstance(index, numbers.Integral):
|
|
@@ -308,32 +305,35 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
308
305
|
# Default: item is iterable object of symbols
|
|
309
306
|
code = alph.encode_multiple(item)
|
|
310
307
|
self._seq_code.__setitem__(index, code)
|
|
311
|
-
|
|
308
|
+
|
|
312
309
|
def __len__(self):
|
|
313
310
|
return len(self._seq_code)
|
|
314
|
-
|
|
311
|
+
|
|
315
312
|
def __iter__(self):
|
|
316
313
|
alph = self.get_alphabet()
|
|
317
314
|
i = 0
|
|
318
315
|
while i < len(self):
|
|
319
316
|
yield alph.decode(self._seq_code[i])
|
|
320
317
|
i += 1
|
|
321
|
-
|
|
318
|
+
|
|
322
319
|
def __eq__(self, item):
|
|
323
320
|
if not isinstance(item, type(self)):
|
|
324
321
|
return False
|
|
325
322
|
if self.get_alphabet() != item.get_alphabet():
|
|
326
323
|
return False
|
|
327
324
|
return np.array_equal(self._seq_code, item._seq_code)
|
|
328
|
-
|
|
325
|
+
|
|
329
326
|
def __str__(self):
|
|
330
327
|
alph = self.get_alphabet()
|
|
331
328
|
if isinstance(alph, LetterAlphabet):
|
|
332
|
-
return
|
|
333
|
-
|
|
329
|
+
return (
|
|
330
|
+
alph.decode_multiple(self._seq_code, as_bytes=True)
|
|
331
|
+
.tobytes()
|
|
332
|
+
.decode("ASCII")
|
|
333
|
+
)
|
|
334
334
|
else:
|
|
335
|
-
return "".join(alph.decode_multiple(self._seq_code))
|
|
336
|
-
|
|
335
|
+
return ", ".join([str(e) for e in alph.decode_multiple(self._seq_code)])
|
|
336
|
+
|
|
337
337
|
def __add__(self, sequence):
|
|
338
338
|
if self.get_alphabet().extends(sequence.get_alphabet()):
|
|
339
339
|
new_code = np.concatenate((self._seq_code, sequence._seq_code))
|
|
@@ -356,7 +356,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
356
356
|
----------
|
|
357
357
|
alpahabet_size : int
|
|
358
358
|
The size of the alphabet.
|
|
359
|
-
|
|
359
|
+
|
|
360
360
|
Returns
|
|
361
361
|
-------
|
|
362
362
|
dtype
|
biotite/structure/__init__.py
CHANGED
|
@@ -104,9 +104,11 @@ __name__ = "biotite.structure"
|
|
|
104
104
|
__author__ = "Patrick Kunzmann"
|
|
105
105
|
|
|
106
106
|
from .atoms import *
|
|
107
|
+
from .basepairs import *
|
|
107
108
|
from .bonds import *
|
|
108
109
|
from .box import *
|
|
109
110
|
from .celllist import *
|
|
111
|
+
from .chains import *
|
|
110
112
|
from .charges import *
|
|
111
113
|
from .compare import *
|
|
112
114
|
from .density import *
|
|
@@ -122,11 +124,9 @@ from .pseudoknots import *
|
|
|
122
124
|
from .rdf import *
|
|
123
125
|
from .repair import *
|
|
124
126
|
from .residues import *
|
|
125
|
-
from .chains import *
|
|
126
127
|
from .sasa import *
|
|
127
128
|
from .sequence import *
|
|
128
129
|
from .sse import *
|
|
129
130
|
from .superimpose import *
|
|
130
131
|
from .transform import *
|
|
131
|
-
|
|
132
|
-
# util and resutil are used internally
|
|
132
|
+
# util and segments are used internally
|