biotite 0.41.1__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +36 -10
- biotite/application/application.py +22 -11
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +16 -5
- biotite/sequence/align/__init__.py +160 -6
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +35 -35
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +112 -126
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +64 -64
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +226 -240
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +88 -100
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +21 -7
- biotite/structure/info/groups.py +10 -15
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -52
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
- biotite-1.0.0.dist-info/RECORD +322 -0
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.1.dist-info/RECORD +0 -340
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -6,10 +6,9 @@ __name__ = "biotite.database.uniprot"
|
|
|
6
6
|
__author__ = "Maximilian Greil"
|
|
7
7
|
__all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
|
|
8
8
|
|
|
9
|
-
import requests
|
|
10
9
|
import abc
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
import requests
|
|
11
|
+
from biotite.database.uniprot.check import assert_valid_response
|
|
13
12
|
|
|
14
13
|
_base_url = "https://rest.uniprot.org/uniprotkb/search/"
|
|
15
14
|
|
|
@@ -122,22 +121,114 @@ class SimpleQuery(Query):
|
|
|
122
121
|
# Field identifiers are taken from
|
|
123
122
|
# https://www.uniprot.org/help/query-fields
|
|
124
123
|
_fields = [
|
|
125
|
-
"accession",
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
135
|
-
"
|
|
136
|
-
"
|
|
137
|
-
"
|
|
138
|
-
"
|
|
139
|
-
"
|
|
140
|
-
"
|
|
124
|
+
"accession",
|
|
125
|
+
"active",
|
|
126
|
+
"ft_init_met",
|
|
127
|
+
"ft_signal",
|
|
128
|
+
"ft_transit",
|
|
129
|
+
"ft_propep",
|
|
130
|
+
"ft_chain",
|
|
131
|
+
"ft_peptide",
|
|
132
|
+
"ft_topo_dom",
|
|
133
|
+
"ft_transmem",
|
|
134
|
+
"ft_intramem",
|
|
135
|
+
"ft_domain",
|
|
136
|
+
"ft_repeat",
|
|
137
|
+
"ft_zn_fing",
|
|
138
|
+
"ft_dna_bind",
|
|
139
|
+
"ft_region",
|
|
140
|
+
"ft_coiled",
|
|
141
|
+
"ft_motif",
|
|
142
|
+
"ft_compbias",
|
|
143
|
+
"ft_act_site",
|
|
144
|
+
"ft_binding",
|
|
145
|
+
"ft_site",
|
|
146
|
+
"ft_non_std",
|
|
147
|
+
"ft_mod_res",
|
|
148
|
+
"ft_lipid",
|
|
149
|
+
"ft_carbohyd",
|
|
150
|
+
"ft_disulfid",
|
|
151
|
+
"ft_crosslnk",
|
|
152
|
+
"ft_var_seq",
|
|
153
|
+
"ft_variant",
|
|
154
|
+
"ft_mutagen",
|
|
155
|
+
"ft_unsure",
|
|
156
|
+
"ft_conflict",
|
|
157
|
+
"ft_non_cons",
|
|
158
|
+
"ft_non_ter",
|
|
159
|
+
"ft_helix",
|
|
160
|
+
"ft_turn",
|
|
161
|
+
"ft_strand",
|
|
162
|
+
"lit_author",
|
|
163
|
+
"protein_name",
|
|
164
|
+
"chebi",
|
|
165
|
+
"citation",
|
|
166
|
+
"uniref_cluster_90",
|
|
167
|
+
"xrefcount_pdb",
|
|
168
|
+
"date_created",
|
|
169
|
+
"database",
|
|
170
|
+
"xref",
|
|
171
|
+
"ec",
|
|
172
|
+
"cc_function",
|
|
173
|
+
"cc_catalytic_activity",
|
|
174
|
+
"cc_cofactor",
|
|
175
|
+
"cc_activity_regulation",
|
|
176
|
+
"cc_biophysicochemical_properties",
|
|
177
|
+
"cc_subunit",
|
|
178
|
+
"cc_pathway",
|
|
179
|
+
"cc_scl_term",
|
|
180
|
+
"cc_tissue_specificity",
|
|
181
|
+
"cc_developmental_stage",
|
|
182
|
+
"cc_induction",
|
|
183
|
+
"cc_domain",
|
|
184
|
+
"cc_ptm cc_rna_editing",
|
|
185
|
+
"cc_mass_spectrometry",
|
|
186
|
+
"cc_polymorphism",
|
|
187
|
+
"cc_disease",
|
|
188
|
+
"cc_disruption_phenotype",
|
|
189
|
+
"cc_allergen",
|
|
190
|
+
"cc_toxic_dose",
|
|
191
|
+
"cc_biotechnology",
|
|
192
|
+
"cc_pharmaceutical",
|
|
193
|
+
"cc_miscellaneous",
|
|
194
|
+
"cc_similarity",
|
|
195
|
+
"cc_caution",
|
|
196
|
+
"cc_sequence_caution",
|
|
197
|
+
"existence",
|
|
198
|
+
"family",
|
|
199
|
+
"fragment",
|
|
200
|
+
"gene",
|
|
201
|
+
"gene_exact",
|
|
202
|
+
"go",
|
|
203
|
+
"virus_host_name",
|
|
204
|
+
"virus_host_id",
|
|
205
|
+
"accession_id",
|
|
206
|
+
"inchikey",
|
|
207
|
+
"protein_name",
|
|
208
|
+
"interactor",
|
|
209
|
+
"keyword",
|
|
210
|
+
"length",
|
|
211
|
+
"lineage",
|
|
212
|
+
"mass",
|
|
213
|
+
"cc_mass_spectrometry",
|
|
214
|
+
"date_modified",
|
|
215
|
+
"protein_name",
|
|
216
|
+
"organelle",
|
|
217
|
+
"organism_name",
|
|
218
|
+
"organism_id",
|
|
219
|
+
"plasmid",
|
|
220
|
+
"proteome",
|
|
221
|
+
"proteomecomponent",
|
|
222
|
+
"sec_acc",
|
|
223
|
+
"reviewed",
|
|
224
|
+
"scope",
|
|
225
|
+
"sequence",
|
|
226
|
+
"date_sequence_modified",
|
|
227
|
+
"strain",
|
|
228
|
+
"taxonomy_name",
|
|
229
|
+
"taxonomy_id",
|
|
230
|
+
"tissue",
|
|
231
|
+
"cc_webresource",
|
|
141
232
|
]
|
|
142
233
|
|
|
143
234
|
def __init__(self, field, term):
|
|
@@ -146,14 +237,11 @@ class SimpleQuery(Query):
|
|
|
146
237
|
raise ValueError(f"Unknown field identifier '{field}'")
|
|
147
238
|
if not _check_brackets(term):
|
|
148
239
|
raise ValueError(
|
|
149
|
-
|
|
240
|
+
"Query term contains illegal number of round brackets ( ) and/or square brackets [ ]"
|
|
150
241
|
)
|
|
151
|
-
for invalid_string in \
|
|
152
|
-
['"', "AND", "OR", "NOT", "\t", "\n"]:
|
|
242
|
+
for invalid_string in ['"', "AND", "OR", "NOT", "\t", "\n"]:
|
|
153
243
|
if invalid_string in term:
|
|
154
|
-
raise ValueError(
|
|
155
|
-
f"Query contains illegal term {invalid_string}"
|
|
156
|
-
)
|
|
244
|
+
raise ValueError(f"Query contains illegal term {invalid_string}")
|
|
157
245
|
if " " in term:
|
|
158
246
|
term = f'"{term}"'
|
|
159
247
|
self._field = field
|
|
@@ -198,12 +286,8 @@ def search(query, number=500):
|
|
|
198
286
|
['P12345']
|
|
199
287
|
"""
|
|
200
288
|
|
|
201
|
-
params = {
|
|
202
|
-
'query': str(query),
|
|
203
|
-
'format': 'list',
|
|
204
|
-
'size': str(number)
|
|
205
|
-
}
|
|
289
|
+
params = {"query": str(query), "format": "list", "size": str(number)}
|
|
206
290
|
r = requests.get(_base_url, params=params)
|
|
207
291
|
content = r.text
|
|
208
292
|
assert_valid_response(r.status_code)
|
|
209
|
-
return content.split(
|
|
293
|
+
return content.split("\n")[:-1]
|
biotite/file.py
CHANGED
|
@@ -4,16 +4,19 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"File",
|
|
9
|
+
"TextFile",
|
|
10
|
+
"InvalidFileError",
|
|
11
|
+
"SerializationError",
|
|
12
|
+
"DeserializationError",
|
|
13
|
+
]
|
|
9
14
|
|
|
10
15
|
import abc
|
|
16
|
+
import copy
|
|
11
17
|
import io
|
|
12
|
-
import warnings
|
|
13
18
|
from os import PathLike
|
|
14
|
-
|
|
15
|
-
from .copyable import Copyable
|
|
16
|
-
import copy
|
|
19
|
+
from biotite.copyable import Copyable
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
class File(Copyable, metaclass=abc.ABCMeta):
|
|
@@ -27,13 +30,6 @@ class File(Copyable, metaclass=abc.ABCMeta):
|
|
|
27
30
|
:func:`write()` method is used.
|
|
28
31
|
"""
|
|
29
32
|
|
|
30
|
-
def __init__(self):
|
|
31
|
-
# Support for deprecated instance method 'read()':
|
|
32
|
-
# When creating an instance, the 'read()' class method is
|
|
33
|
-
# replaced by the instance method, so that subsequent
|
|
34
|
-
# 'read()' calls are delegated to the instance method
|
|
35
|
-
self.read = self._deprecated_read
|
|
36
|
-
|
|
37
33
|
@classmethod
|
|
38
34
|
@abc.abstractmethod
|
|
39
35
|
def read(cls, file):
|
|
@@ -54,23 +50,6 @@ class File(Copyable, metaclass=abc.ABCMeta):
|
|
|
54
50
|
"""
|
|
55
51
|
pass
|
|
56
52
|
|
|
57
|
-
def _deprecated_read(self, file, *args, **kwargs):
|
|
58
|
-
"""
|
|
59
|
-
Support for deprecated instance method :func:`read()`.
|
|
60
|
-
|
|
61
|
-
Internally this calls the :func:`read()` class method and
|
|
62
|
-
replaces the data in `self` with the data from the newly created
|
|
63
|
-
:class:`File` object
|
|
64
|
-
"""
|
|
65
|
-
warnings.warn(
|
|
66
|
-
"Instance method 'read()' is deprecated, "
|
|
67
|
-
"use class method instead",
|
|
68
|
-
DeprecationWarning
|
|
69
|
-
)
|
|
70
|
-
cls = type(self)
|
|
71
|
-
new_file = cls.read(file, *args, **kwargs)
|
|
72
|
-
self.__dict__.update(new_file.__dict__)
|
|
73
|
-
|
|
74
53
|
@abc.abstractmethod
|
|
75
54
|
def write(self, file):
|
|
76
55
|
"""
|
|
@@ -209,12 +188,14 @@ class InvalidFileError(Exception):
|
|
|
209
188
|
either because the file does not contain the required data or
|
|
210
189
|
because the file is malformed.
|
|
211
190
|
"""
|
|
191
|
+
|
|
212
192
|
pass
|
|
213
193
|
|
|
214
194
|
|
|
215
195
|
class SerializationError(Exception):
|
|
216
196
|
pass
|
|
217
197
|
|
|
198
|
+
|
|
218
199
|
class DeserializationError(Exception):
|
|
219
200
|
pass
|
|
220
201
|
|
|
@@ -229,7 +210,7 @@ def wrap_string(text, width):
|
|
|
229
210
|
"""
|
|
230
211
|
lines = []
|
|
231
212
|
for i in range(0, len(text), width):
|
|
232
|
-
lines.append(text[i : i+width])
|
|
213
|
+
lines.append(text[i : i + width])
|
|
233
214
|
return lines
|
|
234
215
|
|
|
235
216
|
|
biotite/sequence/__init__.py
CHANGED
|
@@ -24,7 +24,15 @@ For example, ``'A'``, ``'C'``, ``'G'`` and ``'T'`` would be encoded into
|
|
|
24
24
|
These integer values are called *symbol code*, the encoding of an entire
|
|
25
25
|
sequence of symbols is called *sequence code*.
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
.. figure:: /static/assets/figures/symbol_encoding.png
|
|
28
|
+
:alt: Symbol encoding in Biotite
|
|
29
|
+
:scale: 50%
|
|
30
|
+
|
|
31
|
+
Taken from
|
|
32
|
+
`Kunzmann & Hamacher 2018 <https://doi.org/10.1186/s12859-018-2367-z>`_
|
|
33
|
+
licensed under `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_.
|
|
34
|
+
|
|
35
|
+
The size of the symbol code type in the array is determined by the
|
|
28
36
|
size of the :class:`Alphabet`:
|
|
29
37
|
If the :class:`Alphabet` contains 256 symbols or less, one byte is used
|
|
30
38
|
per array element, between 257 and 65536 symbols, two bytes are used,
|
|
@@ -41,6 +49,7 @@ This approach has multiple advantages:
|
|
|
41
49
|
indifferent to the actual type of sequence.
|
|
42
50
|
- Symbol codes are directly indices for substitution matrices in
|
|
43
51
|
alignments
|
|
52
|
+
- *k-mers* can be computed fast
|
|
44
53
|
|
|
45
54
|
The abstract :class:`Sequence` superclass cannot be instantiated
|
|
46
55
|
directly, as it does not define an :class:`Alphabet` by itself.
|
|
@@ -55,19 +64,21 @@ The class :class:`GeneralSequence` allows the usage of a custom
|
|
|
55
64
|
Additionally, this subpackage provides support for sequence features,
|
|
56
65
|
as used in e.g. GenBank or GFF files.
|
|
57
66
|
A :class:`Feature` stores its key name, its qualifiers and locations.
|
|
58
|
-
An :class:`Annotation` is a group of multiple :class:`
|
|
67
|
+
An :class:`Annotation` is a group of multiple :class:`Feature` objects
|
|
59
68
|
and offers convenient location based indexing.
|
|
60
69
|
An :class:`AnnotatedSequence` combines an :class:`Annotation` and a
|
|
61
70
|
:class:`Sequence`.
|
|
71
|
+
|
|
72
|
+
Sequence profiles can be created with the :class:`SequenceProfile` class.
|
|
62
73
|
"""
|
|
63
74
|
|
|
64
75
|
__name__ = "biotite.sequence"
|
|
65
76
|
__author__ = "Patrick Kunzmann"
|
|
66
77
|
|
|
67
78
|
from .alphabet import *
|
|
79
|
+
from .annotation import *
|
|
80
|
+
from .codon import *
|
|
81
|
+
from .profile import *
|
|
68
82
|
from .search import *
|
|
69
83
|
from .seqtypes import *
|
|
70
84
|
from .sequence import *
|
|
71
|
-
from .codon import *
|
|
72
|
-
from .annotation import *
|
|
73
|
-
from .profile import *
|
|
@@ -22,11 +22,165 @@ These objects contain the original sequences and a trace, that describe
|
|
|
22
22
|
which positions (indices) in the sequences are aligned.
|
|
23
23
|
Optionally they also contain the similarity score.
|
|
24
24
|
|
|
25
|
-
The aligning functions
|
|
26
|
-
|
|
25
|
+
The aligning functions :func:`align_optimal()` and
|
|
26
|
+
:func:`align_multiple()` cover most use cases for pairwise and multiple
|
|
27
|
+
sequence alignments respectively.
|
|
28
|
+
|
|
29
|
+
However, *Biotite* provides also a modular system to build performant
|
|
30
|
+
heuristic alignment search methods, e.g. for finding homologies in a sequence
|
|
31
|
+
database or map reads to a genome.
|
|
32
|
+
The table below summarizes those provided functionalities.
|
|
33
|
+
The typical stages in alignment search, where those functionalities are used,
|
|
34
|
+
are arranged from top to bottom.
|
|
35
|
+
|
|
36
|
+
.. grid::
|
|
37
|
+
:gutter: 0
|
|
38
|
+
:class-container: sd-text-center
|
|
39
|
+
|
|
40
|
+
.. grid-item::
|
|
41
|
+
:padding: 2
|
|
42
|
+
:outline:
|
|
43
|
+
:columns: 3
|
|
44
|
+
|
|
45
|
+
**Entire k-mer set**
|
|
46
|
+
|
|
47
|
+
.. grid-item::
|
|
48
|
+
:padding: 2
|
|
49
|
+
:outline:
|
|
50
|
+
:columns: 9
|
|
51
|
+
|
|
52
|
+
.. grid::
|
|
53
|
+
:margin: 0
|
|
54
|
+
|
|
55
|
+
.. grid-item::
|
|
56
|
+
:padding: 2
|
|
57
|
+
:columns: 12
|
|
58
|
+
|
|
59
|
+
**k-mer subset selection**
|
|
60
|
+
|
|
61
|
+
.. grid-item::
|
|
62
|
+
:padding: 2
|
|
63
|
+
:columns: 4
|
|
64
|
+
|
|
65
|
+
Minimizers
|
|
66
|
+
|
|
67
|
+
:class:`MinimizerSelector`
|
|
68
|
+
|
|
69
|
+
.. grid-item::
|
|
70
|
+
:padding: 2
|
|
71
|
+
:columns: 4
|
|
72
|
+
|
|
73
|
+
Syncmers
|
|
74
|
+
|
|
75
|
+
:class:`SyncmerSelector`
|
|
76
|
+
|
|
77
|
+
:class:`CachedSyncmerSelector`
|
|
78
|
+
|
|
79
|
+
.. grid-item::
|
|
80
|
+
:padding: 2
|
|
81
|
+
:columns: 4
|
|
82
|
+
|
|
83
|
+
Mincode
|
|
84
|
+
|
|
85
|
+
:class:`MincodeSelector`
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
.. grid-item::
|
|
89
|
+
:padding: 2
|
|
90
|
+
:outline:
|
|
91
|
+
:columns: 12
|
|
92
|
+
|
|
93
|
+
.. grid::
|
|
94
|
+
:margin: 0
|
|
95
|
+
|
|
96
|
+
.. grid-item::
|
|
97
|
+
:padding: 2
|
|
98
|
+
:columns: 12
|
|
99
|
+
|
|
100
|
+
**k-mer indexing and matching**
|
|
101
|
+
|
|
102
|
+
.. grid-item::
|
|
103
|
+
:padding: 2
|
|
104
|
+
:columns: 6
|
|
105
|
+
|
|
106
|
+
Perfect hashing
|
|
107
|
+
|
|
108
|
+
:class:`KmerTable`
|
|
109
|
+
|
|
110
|
+
.. grid-item::
|
|
111
|
+
:padding: 2
|
|
112
|
+
:columns: 6
|
|
113
|
+
|
|
114
|
+
Space-efficient hashing
|
|
115
|
+
|
|
116
|
+
:class:`BucketKmerTable`
|
|
117
|
+
|
|
118
|
+
:func:`bucket_number()`
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
.. grid-item::
|
|
122
|
+
:padding: 2
|
|
123
|
+
:outline:
|
|
124
|
+
:columns: 12
|
|
125
|
+
|
|
126
|
+
.. grid::
|
|
127
|
+
:margin: 0
|
|
128
|
+
|
|
129
|
+
.. grid-item::
|
|
130
|
+
:padding: 2
|
|
131
|
+
:columns: 12
|
|
132
|
+
|
|
133
|
+
**Ungapped seed extension**
|
|
134
|
+
|
|
135
|
+
:class:`align_local_ungapped()`
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
.. grid-item::
|
|
139
|
+
:padding: 2
|
|
140
|
+
:outline:
|
|
141
|
+
:columns: 12
|
|
142
|
+
|
|
143
|
+
.. grid::
|
|
144
|
+
:margin: 0
|
|
145
|
+
|
|
146
|
+
.. grid-item::
|
|
147
|
+
:padding: 2
|
|
148
|
+
:columns: 12
|
|
149
|
+
|
|
150
|
+
**Gapped alignment**
|
|
151
|
+
|
|
152
|
+
.. grid-item::
|
|
153
|
+
:padding: 2
|
|
154
|
+
:columns: 6
|
|
155
|
+
|
|
156
|
+
Banded local/semiglobal alignment
|
|
157
|
+
|
|
158
|
+
:class:`align_banded()`
|
|
159
|
+
|
|
160
|
+
.. grid-item::
|
|
161
|
+
:padding: 2
|
|
162
|
+
:columns: 6
|
|
163
|
+
|
|
164
|
+
Local alignment (*X-drop*)
|
|
165
|
+
|
|
166
|
+
:class:`align_local_gapped()`
|
|
167
|
+
|
|
168
|
+
.. grid-item::
|
|
169
|
+
:padding: 2
|
|
170
|
+
:outline:
|
|
171
|
+
:columns: 12
|
|
172
|
+
|
|
173
|
+
.. grid::
|
|
174
|
+
:margin: 0
|
|
175
|
+
|
|
176
|
+
.. grid-item::
|
|
177
|
+
:padding: 2
|
|
178
|
+
:columns: 12
|
|
179
|
+
|
|
180
|
+
**Significance evaluation**
|
|
181
|
+
|
|
182
|
+
:class:`EValueEstimator`
|
|
27
183
|
|
|
28
|
-
This subpackage also contains functionality for finding *k-mer* matches
|
|
29
|
-
between two sequences, allowing fast heuristic pairwise alignments.
|
|
30
184
|
"""
|
|
31
185
|
|
|
32
186
|
__name__ = "biotite.sequence.align"
|
|
@@ -37,8 +191,8 @@ from .banded import *
|
|
|
37
191
|
from .buckets import *
|
|
38
192
|
from .cigar import *
|
|
39
193
|
from .kmeralphabet import *
|
|
40
|
-
from .kmertable import *
|
|
41
194
|
from .kmersimilarity import *
|
|
195
|
+
from .kmertable import *
|
|
42
196
|
from .localgapped import *
|
|
43
197
|
from .localungapped import *
|
|
44
198
|
from .matrix import *
|
|
@@ -46,4 +200,4 @@ from .multiple import *
|
|
|
46
200
|
from .pairwise import *
|
|
47
201
|
from .permutation import *
|
|
48
202
|
from .selector import *
|
|
49
|
-
from .statistics import *
|
|
203
|
+
from .statistics import *
|