biotite 1.1.0__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/localapp.py +2 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +3 -3
- biotite/application/muscle/app5.py +3 -3
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +21 -1
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +3 -1
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +2 -3
- biotite/database/uniprot/check.py +2 -2
- biotite/database/uniprot/download.py +2 -5
- biotite/database/uniprot/query.py +3 -4
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +33 -11
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +21 -21
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +2 -2
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +6 -6
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +12 -3
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +2 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +19 -25
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +12 -5
- biotite/sequence/sequence.py +1 -2
- biotite/structure/__init__.py +2 -0
- biotite/structure/alphabet/i3d.py +1 -2
- biotite/structure/alphabet/pb.py +1 -2
- biotite/structure/alphabet/unkerasify.py +8 -2
- biotite/structure/atoms.py +35 -27
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +8 -5
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +15 -15
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/atoms.py +11 -2
- biotite/structure/info/ccd.py +0 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +0 -3
- biotite/structure/info/misc.py +0 -1
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +1 -2
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +39 -13
- biotite/structure/io/pdb/convert.py +2 -3
- biotite/structure/io/pdb/file.py +11 -22
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +4 -4
- biotite/structure/io/pdbx/bcif.py +22 -7
- biotite/structure/io/pdbx/cif.py +20 -7
- biotite/structure/io/pdbx/component.py +6 -0
- biotite/structure/io/pdbx/compress.py +2 -2
- biotite/structure/io/pdbx/convert.py +222 -33
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +0 -15
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +68 -9
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +3 -3
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/METADATA +5 -3
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/RECORD +155 -135
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -9,7 +9,7 @@ __all__ = ["plot_sequence_logo"]
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
from biotite.sequence.alphabet import LetterAlphabet
|
|
11
11
|
from biotite.sequence.graphics.colorschemes import get_color_scheme
|
|
12
|
-
from biotite.visualize import
|
|
12
|
+
from biotite.visualize import plot_scaled_text
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
|
|
@@ -29,7 +29,7 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
|
|
|
29
29
|
----------
|
|
30
30
|
axes : Axes
|
|
31
31
|
The axes to draw the logo one.
|
|
32
|
-
profile: SequenceProfile
|
|
32
|
+
profile : SequenceProfile
|
|
33
33
|
The logo is created based on this profile.
|
|
34
34
|
scheme : str or list of (tuple or str)
|
|
35
35
|
Either a valid color scheme name
|
|
@@ -38,7 +38,8 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
|
|
|
38
38
|
The list length must be at least as long as the
|
|
39
39
|
length of the alphabet used by the `profile`.
|
|
40
40
|
**kwargs
|
|
41
|
-
Additional
|
|
41
|
+
Additional parameters for the :class:`matplotlib.font_manager.FontProperties`
|
|
42
|
+
of the text or the created :class:`matplotlib.patches.PathPatch`.
|
|
42
43
|
|
|
43
44
|
References
|
|
44
45
|
----------
|
|
@@ -69,23 +70,20 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
|
|
|
69
70
|
index_order = np.argsort(symbols_heights)
|
|
70
71
|
start_height = 0
|
|
71
72
|
for j in index_order[i]:
|
|
72
|
-
# Stack the symbols at position on top of the
|
|
73
|
+
# Stack the symbols at position on top of the preceding one
|
|
73
74
|
height = symbols_heights[i, j]
|
|
74
75
|
if height > 0:
|
|
75
76
|
symbol = alphabet.decode(j)
|
|
76
|
-
|
|
77
|
+
plot_scaled_text(
|
|
78
|
+
axes,
|
|
79
|
+
symbol,
|
|
77
80
|
i + 0.5,
|
|
78
81
|
start_height,
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
va="bottom",
|
|
82
|
+
width=1,
|
|
83
|
+
height=height,
|
|
82
84
|
color=colors[j],
|
|
83
|
-
# Best results are obtained with this font size
|
|
84
|
-
size=1,
|
|
85
85
|
**kwargs,
|
|
86
86
|
)
|
|
87
|
-
text.set_clip_on(True)
|
|
88
|
-
set_font_size_in_coord(text, width=1, height=height)
|
|
89
87
|
start_height += height
|
|
90
88
|
|
|
91
89
|
axes.set_xlim(0.5, len(profile.symbols) + 0.5)
|
|
@@ -275,8 +275,7 @@ def _process_nucleotide_sequence(x):
|
|
|
275
275
|
def _convert_to_string(sequence, as_rna):
|
|
276
276
|
if not isinstance(sequence.get_alphabet(), LetterAlphabet):
|
|
277
277
|
raise ValueError(
|
|
278
|
-
"Only sequences using single letter alphabets "
|
|
279
|
-
"can be stored in a FASTA file"
|
|
278
|
+
"Only sequences using single letter alphabets can be stored in a FASTA file"
|
|
280
279
|
)
|
|
281
280
|
if isinstance(sequence, NucleotideSequence) and as_rna:
|
|
282
281
|
return str(sequence).replace("T", "U")
|
|
@@ -102,7 +102,7 @@ class FastaFile(TextFile, MutableMapping):
|
|
|
102
102
|
if not isinstance(header, str):
|
|
103
103
|
raise IndexError("'FastaFile' only supports header strings as keys")
|
|
104
104
|
if not isinstance(seq_str, str):
|
|
105
|
-
raise TypeError("'FastaFile' only supports sequence strings
|
|
105
|
+
raise TypeError("'FastaFile' only supports sequence strings as values")
|
|
106
106
|
# Create lines for new header and sequence (with line breaks)
|
|
107
107
|
new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
|
|
108
108
|
seq_str, width=self._chars_per_line
|
|
@@ -302,10 +302,10 @@ class FastqFile(TextFile, MutableMapping):
|
|
|
302
302
|
else: # score_len > seq_len
|
|
303
303
|
raise InvalidFileError(
|
|
304
304
|
f"The amount of scores is not equal to the sequence "
|
|
305
|
-
f"length for the sequence in line {seq_start_i+1} "
|
|
305
|
+
f"length for the sequence in line {seq_start_i + 1} "
|
|
306
306
|
)
|
|
307
307
|
else:
|
|
308
|
-
raise InvalidFileError(f"Line {i+1} in FASTQ file is invalid")
|
|
308
|
+
raise InvalidFileError(f"Line {i + 1} in FASTQ file is invalid")
|
|
309
309
|
# At the end of the file, the last sequence or score block
|
|
310
310
|
# must have properly ended
|
|
311
311
|
if in_sequence or in_scores:
|
|
@@ -392,7 +392,7 @@ class FastqFile(TextFile, MutableMapping):
|
|
|
392
392
|
yield identifier, ("".join(seq_str_list), scores)
|
|
393
393
|
else: # score_len > seq_len
|
|
394
394
|
raise InvalidFileError(
|
|
395
|
-
"The amount of scores is not equal to the sequence
|
|
395
|
+
"The amount of scores is not equal to the sequence length"
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
else:
|
|
@@ -80,7 +80,7 @@ class GenBankFile(TextFile):
|
|
|
80
80
|
>>> print(content)
|
|
81
81
|
['One line', 'A second line']
|
|
82
82
|
>>> print(subfields)
|
|
83
|
-
OrderedDict(
|
|
83
|
+
OrderedDict({'SUBFIELD1': ['Single Line'], 'SUBFIELD2': ['Two', 'lines']})
|
|
84
84
|
|
|
85
85
|
Adding an additional field:
|
|
86
86
|
|
|
@@ -391,7 +391,7 @@ class GenBankFile(TextFile):
|
|
|
391
391
|
The field name.
|
|
392
392
|
content : list of str
|
|
393
393
|
The content lines.
|
|
394
|
-
|
|
394
|
+
subfields : dict of str -> str, optional
|
|
395
395
|
The subfields of the field.
|
|
396
396
|
The dictionary maps subfield names to the content lines of
|
|
397
397
|
the respective subfield.
|
|
@@ -432,7 +432,7 @@ class GenBankFile(TextFile):
|
|
|
432
432
|
The field name.
|
|
433
433
|
content : list of str
|
|
434
434
|
The content lines.
|
|
435
|
-
|
|
435
|
+
subfields : dict of str -> str, optional
|
|
436
436
|
The subfields of the field.
|
|
437
437
|
The dictionary maps subfield names to the content lines of
|
|
438
438
|
the respective subfield.
|
|
@@ -82,6 +82,8 @@ def get_annotated_sequence(gb_file, format="gb", include_only=None):
|
|
|
82
82
|
----------
|
|
83
83
|
gb_file : GenBankFile
|
|
84
84
|
The GenBank file to read the fields from.
|
|
85
|
+
format : {'gb', 'gp'}
|
|
86
|
+
Whether the file is a *GenBank* or *GenPept* file.
|
|
85
87
|
include_only : iterable object of str, optional
|
|
86
88
|
List of names of feature keys, which should included
|
|
87
89
|
in the annotation. By default all features are included.
|
|
@@ -84,7 +84,7 @@ def set_annotation(gff_file, annotation, seqid=None, source=None, is_stranded=Tr
|
|
|
84
84
|
for feature in sorted(annotation):
|
|
85
85
|
if len(feature.locs) > 1 and "ID" not in feature.qual:
|
|
86
86
|
raise ValueError(
|
|
87
|
-
"The 'Id' qualifier is required
|
|
87
|
+
"The 'Id' qualifier is required for features with multiple locations"
|
|
88
88
|
)
|
|
89
89
|
## seqid ##
|
|
90
90
|
if seqid is not None and " " in seqid:
|
biotite/sequence/io/gff/file.py
CHANGED
|
@@ -303,8 +303,7 @@ class GFFFile(TextFile):
|
|
|
303
303
|
def __getitem__(self, index):
|
|
304
304
|
if (index >= 0 and index >= len(self)) or (index < 0 and -index > len(self)):
|
|
305
305
|
raise IndexError(
|
|
306
|
-
f"Index {index} is out of range for GFFFile with "
|
|
307
|
-
f"{len(self)} entries"
|
|
306
|
+
f"Index {index} is out of range for GFFFile with {len(self)} entries"
|
|
308
307
|
)
|
|
309
308
|
|
|
310
309
|
line_index = self._entries[index]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
biotite/sequence/profile.py
CHANGED
|
@@ -95,7 +95,7 @@ class SequenceProfile(object):
|
|
|
95
95
|
gaps : ndarray, dtype=int, shape=n
|
|
96
96
|
Array which indicates the number of gaps at each position.
|
|
97
97
|
alphabet : Alphabet, length=k
|
|
98
|
-
Alphabet of sequences of sequence profile
|
|
98
|
+
Alphabet of sequences of sequence profile.
|
|
99
99
|
|
|
100
100
|
Attributes
|
|
101
101
|
----------
|
|
@@ -264,15 +264,14 @@ class SequenceProfile(object):
|
|
|
264
264
|
alphabet : bool
|
|
265
265
|
This alphabet will be used when creating the SequenceProfile
|
|
266
266
|
object. If no alphabet is selected, the alphabet for this
|
|
267
|
-
SequenceProfile
|
|
267
|
+
:class:`SequenceProfile`.
|
|
268
268
|
object will be calculated from the sequences of object
|
|
269
269
|
Alignment.
|
|
270
|
-
(Default: None).
|
|
271
270
|
|
|
272
271
|
Returns
|
|
273
272
|
-------
|
|
274
273
|
profile: SequenceProfile
|
|
275
|
-
The created SequenceProfile object
|
|
274
|
+
The created :class:`SequenceProfile` object.
|
|
276
275
|
"""
|
|
277
276
|
sequences = get_codes(alignment)
|
|
278
277
|
if alphabet is None:
|
|
@@ -306,13 +305,12 @@ class SequenceProfile(object):
|
|
|
306
305
|
If true, returns consensus sequence as GeneralSequence
|
|
307
306
|
object.
|
|
308
307
|
Otherwise, the consensus sequence object type is chosen
|
|
309
|
-
based on the alphabet of this SequenceProfile object
|
|
310
|
-
(Default: False).
|
|
308
|
+
based on the alphabet of this SequenceProfile object.
|
|
311
309
|
|
|
312
310
|
Returns
|
|
313
311
|
-------
|
|
314
312
|
consensus: Sequence
|
|
315
|
-
The calculated consensus sequence
|
|
313
|
+
The calculated consensus sequence.
|
|
316
314
|
"""
|
|
317
315
|
# https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes
|
|
318
316
|
if as_general:
|
|
@@ -420,14 +418,13 @@ class SequenceProfile(object):
|
|
|
420
418
|
|
|
421
419
|
Parameters
|
|
422
420
|
----------
|
|
423
|
-
pseudocount: int, optional
|
|
421
|
+
pseudocount : int, optional
|
|
424
422
|
Amount added to the number of observed cases in order to
|
|
425
423
|
change the expected probability of the PPM.
|
|
426
|
-
(Default: 0)
|
|
427
424
|
|
|
428
425
|
Returns
|
|
429
426
|
-------
|
|
430
|
-
ppm: ndarray, dtype=float, shape=(n,k)
|
|
427
|
+
ppm : ndarray, dtype=float, shape=(n,k)
|
|
431
428
|
The calculated the position probability matrix.
|
|
432
429
|
"""
|
|
433
430
|
if pseudocount < 0:
|
|
@@ -456,17 +453,16 @@ class SequenceProfile(object):
|
|
|
456
453
|
|
|
457
454
|
Parameters
|
|
458
455
|
----------
|
|
459
|
-
|
|
460
|
-
Amount added to the number of observed cases in order to change
|
|
461
|
-
the expected probability of the PPM.
|
|
462
|
-
(Default: 0)
|
|
463
|
-
background_frequencies: ndarray, shape=(k,), dtype=float, optional
|
|
456
|
+
background_frequencies : ndarray, shape=(k,), dtype=float, optional
|
|
464
457
|
The background frequencies for each symbol in the alphabet.
|
|
465
458
|
By default, a uniform distribution is assumed.
|
|
459
|
+
pseudocount : int, optional
|
|
460
|
+
Amount added to the number of observed cases in order to change
|
|
461
|
+
the expected probability of the PPM.
|
|
466
462
|
|
|
467
463
|
Returns
|
|
468
464
|
-------
|
|
469
|
-
pwm: ndarray, dtype=float, shape=(n,k)
|
|
465
|
+
pwm : ndarray, dtype=float, shape=(n,k)
|
|
470
466
|
The calculated the position weight matrix.
|
|
471
467
|
"""
|
|
472
468
|
if background_frequencies is None:
|
|
@@ -490,14 +486,13 @@ class SequenceProfile(object):
|
|
|
490
486
|
----------
|
|
491
487
|
sequence : Sequence
|
|
492
488
|
The input sequence.
|
|
493
|
-
pseudocount: int, optional
|
|
489
|
+
pseudocount : int, optional
|
|
494
490
|
Amount added to the number of observed cases in order to change
|
|
495
491
|
the expected probability of the PPM.
|
|
496
|
-
(Default: 0)
|
|
497
492
|
|
|
498
493
|
Returns
|
|
499
494
|
-------
|
|
500
|
-
probability: float
|
|
495
|
+
probability : float
|
|
501
496
|
The calculated probability for the input sequence based on
|
|
502
497
|
the PPM.
|
|
503
498
|
"""
|
|
@@ -526,17 +521,16 @@ class SequenceProfile(object):
|
|
|
526
521
|
----------
|
|
527
522
|
sequence : Sequence
|
|
528
523
|
The input sequence.
|
|
529
|
-
|
|
530
|
-
Amount added to the number of observed cases in order to change
|
|
531
|
-
the expected probability of the PPM.
|
|
532
|
-
(Default: 0)
|
|
533
|
-
background_frequencies: ndarray, shape=(k,), dtype=float, optional
|
|
524
|
+
background_frequencies : ndarray, shape=(k,), dtype=float, optional
|
|
534
525
|
The background frequencies for each symbol in the alphabet.
|
|
535
526
|
By default a uniform distribution is assumed.
|
|
527
|
+
pseudocount : int, optional
|
|
528
|
+
Amount added to the number of observed cases in order to change
|
|
529
|
+
the expected probability of the PPM.
|
|
536
530
|
|
|
537
531
|
Returns
|
|
538
532
|
-------
|
|
539
|
-
score: float
|
|
533
|
+
score : float
|
|
540
534
|
The calculated score for the input sequence based on
|
|
541
535
|
the PWM.
|
|
542
536
|
"""
|
biotite/sequence/search.py
CHANGED
|
@@ -39,7 +39,6 @@ def find_subsequence(sequence, query):
|
|
|
39
39
|
>>> sub_seq = NucleotideSequence("TGA")
|
|
40
40
|
>>> print(find_subsequence(main_seq, sub_seq))
|
|
41
41
|
[2 6]
|
|
42
|
-
|
|
43
42
|
"""
|
|
44
43
|
if not sequence.get_alphabet().extends(query.get_alphabet()):
|
|
45
44
|
raise ValueError("The sequences alphabets are not equal")
|
biotite/sequence/seqtypes.py
CHANGED
|
@@ -200,7 +200,6 @@ class NucleotideSequence(Sequence):
|
|
|
200
200
|
TGCGAA
|
|
201
201
|
>>> print(dna_seq.reverse().complement())
|
|
202
202
|
AAGCGT
|
|
203
|
-
|
|
204
203
|
"""
|
|
205
204
|
# Interpreting the sequence code of this object in the
|
|
206
205
|
# complementary alphabet gives the complementary symbols
|
|
@@ -226,7 +225,7 @@ class NucleotideSequence(Sequence):
|
|
|
226
225
|
complete : bool, optional
|
|
227
226
|
If true, the complete sequence is translated. In this case
|
|
228
227
|
the sequence length must be a multiple of 3.
|
|
229
|
-
Otherwise all ORFs are translated.
|
|
228
|
+
Otherwise all ORFs are translated.
|
|
230
229
|
codon_table : CodonTable, optional
|
|
231
230
|
The codon table to be used. By default the default table
|
|
232
231
|
will be used
|
|
@@ -236,7 +235,6 @@ class NucleotideSequence(Sequence):
|
|
|
236
235
|
even if the start codon codes for another amino acid.
|
|
237
236
|
Otherwise the translation starts with the amino acid
|
|
238
237
|
the codon codes for. Only applies, if `complete` is false.
|
|
239
|
-
(Default: False)
|
|
240
238
|
|
|
241
239
|
Returns
|
|
242
240
|
-------
|
|
@@ -266,7 +264,6 @@ class NucleotideSequence(Sequence):
|
|
|
266
264
|
... print(seq)
|
|
267
265
|
MML*
|
|
268
266
|
ML*
|
|
269
|
-
|
|
270
267
|
"""
|
|
271
268
|
if self._alphabet != NucleotideSequence.alphabet_unamb:
|
|
272
269
|
raise AlphabetError("Translation requires unambiguous alphabet")
|
|
@@ -586,6 +583,11 @@ class ProteinSequence(Sequence):
|
|
|
586
583
|
in the protein and the average isotopic mass of one water
|
|
587
584
|
molecule.
|
|
588
585
|
|
|
586
|
+
Parameters
|
|
587
|
+
----------
|
|
588
|
+
monoisotopic : bool
|
|
589
|
+
Use the mass of the most common isotope.
|
|
590
|
+
|
|
589
591
|
Returns
|
|
590
592
|
-------
|
|
591
593
|
weight : float
|
|
@@ -599,7 +601,7 @@ class ProteinSequence(Sequence):
|
|
|
599
601
|
|
|
600
602
|
if np.isnan(weight):
|
|
601
603
|
raise ValueError(
|
|
602
|
-
"Sequence contains ambiguous amino acids,
|
|
604
|
+
"Sequence contains ambiguous amino acids, cannot calculate weight"
|
|
603
605
|
)
|
|
604
606
|
return weight
|
|
605
607
|
|
|
@@ -700,6 +702,11 @@ class PurePositionalSequence(Sequence):
|
|
|
700
702
|
This class is similar to :class:`PositionalSequence`, but the symbols are not
|
|
701
703
|
derived from an original sequence, but are the pure position.
|
|
702
704
|
Hence, there is no meaningful string representation of the sequence and its symbols.
|
|
705
|
+
|
|
706
|
+
Parameters
|
|
707
|
+
----------
|
|
708
|
+
length : int
|
|
709
|
+
The length of the sequence.
|
|
703
710
|
"""
|
|
704
711
|
|
|
705
712
|
def __init__(self, length):
|
biotite/sequence/sequence.py
CHANGED
|
@@ -139,7 +139,6 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
139
139
|
>>> dna_seq_concat = dna_seq + dna_seq_rev
|
|
140
140
|
>>> print(dna_seq_concat)
|
|
141
141
|
ACGTAATGCA
|
|
142
|
-
|
|
143
142
|
"""
|
|
144
143
|
|
|
145
144
|
def __init__(self, sequence=()):
|
|
@@ -354,7 +353,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
|
|
|
354
353
|
|
|
355
354
|
Parameters
|
|
356
355
|
----------
|
|
357
|
-
|
|
356
|
+
alphabet_size : int
|
|
358
357
|
The size of the alphabet.
|
|
359
358
|
|
|
360
359
|
Returns
|
biotite/structure/__init__.py
CHANGED
|
@@ -125,9 +125,11 @@ from .pseudoknots import *
|
|
|
125
125
|
from .rdf import *
|
|
126
126
|
from .repair import *
|
|
127
127
|
from .residues import *
|
|
128
|
+
from .rings import *
|
|
128
129
|
from .sasa import *
|
|
129
130
|
from .sequence import *
|
|
130
131
|
from .sse import *
|
|
131
132
|
from .superimpose import *
|
|
133
|
+
from .tm import *
|
|
132
134
|
from .transform import *
|
|
133
135
|
# util and segments are used internally
|
|
@@ -31,7 +31,7 @@ class I3DSequence(Sequence):
|
|
|
31
31
|
May take upper or lower case letters.
|
|
32
32
|
By default the sequence is empty.
|
|
33
33
|
|
|
34
|
-
See
|
|
34
|
+
See Also
|
|
35
35
|
--------
|
|
36
36
|
to_3di : Create 3Di sequences from a structure.
|
|
37
37
|
|
|
@@ -39,7 +39,6 @@ class I3DSequence(Sequence):
|
|
|
39
39
|
----------
|
|
40
40
|
|
|
41
41
|
.. footbibliography::
|
|
42
|
-
|
|
43
42
|
"""
|
|
44
43
|
|
|
45
44
|
alphabet = LetterAlphabet("acdefghiklmnpqrstvwy")
|
biotite/structure/alphabet/pb.py
CHANGED
|
@@ -52,7 +52,7 @@ class ProteinBlocksSequence(Sequence):
|
|
|
52
52
|
May take upper or lower case letters.
|
|
53
53
|
By default the sequence is empty.
|
|
54
54
|
|
|
55
|
-
See
|
|
55
|
+
See Also
|
|
56
56
|
--------
|
|
57
57
|
to_protein_blocks : Create *Protein Blocks* sequences from a structure.
|
|
58
58
|
|
|
@@ -60,7 +60,6 @@ class ProteinBlocksSequence(Sequence):
|
|
|
60
60
|
----------
|
|
61
61
|
|
|
62
62
|
.. footbibliography::
|
|
63
|
-
|
|
64
63
|
"""
|
|
65
64
|
|
|
66
65
|
alphabet = LetterAlphabet("abcdefghijklmnopz")
|
|
@@ -41,7 +41,13 @@ class ActivationType(enum.IntEnum):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class KerasifyParser:
|
|
44
|
-
"""
|
|
44
|
+
"""
|
|
45
|
+
An incomplete parser for model files serialized with `kerasify`.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
file : file-like
|
|
50
|
+
The ``.kerasify`` file to parse.
|
|
45
51
|
|
|
46
52
|
Notes
|
|
47
53
|
-----
|
|
@@ -65,7 +71,7 @@ class KerasifyParser:
|
|
|
65
71
|
(w1,) = self._get("I")
|
|
66
72
|
(b0,) = self._get("I")
|
|
67
73
|
weights = (
|
|
68
|
-
np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
|
|
74
|
+
np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
|
|
69
75
|
.reshape(w0, w1)
|
|
70
76
|
.copy()
|
|
71
77
|
)
|
biotite/structure/atoms.py
CHANGED
|
@@ -35,6 +35,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
35
35
|
:class:`AtomArrayStack`.
|
|
36
36
|
It implements functionality for annotation arrays and also
|
|
37
37
|
rudimentarily for coordinates.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
length : int
|
|
42
|
+
The amount of atoms in the structure.
|
|
38
43
|
"""
|
|
39
44
|
|
|
40
45
|
def __init__(self, length):
|
|
@@ -96,11 +101,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
96
101
|
The annotation category to be added.
|
|
97
102
|
dtype : type or str
|
|
98
103
|
A type instance or a valid *NumPy* *dtype* string.
|
|
99
|
-
Defines the type of the annotation
|
|
104
|
+
Defines the type of the annotation.
|
|
100
105
|
|
|
101
106
|
See Also
|
|
102
107
|
--------
|
|
103
|
-
set_annotation
|
|
108
|
+
set_annotation : Assign directly a value to an annotation.
|
|
104
109
|
|
|
105
110
|
Notes
|
|
106
111
|
-----
|
|
@@ -171,7 +176,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
171
176
|
array = np.asarray(array)
|
|
172
177
|
if len(array) != self._array_length:
|
|
173
178
|
raise IndexError(
|
|
174
|
-
f"Expected array length {self._array_length},
|
|
179
|
+
f"Expected array length {self._array_length}, but got {len(array)}"
|
|
175
180
|
)
|
|
176
181
|
if category in self._annot:
|
|
177
182
|
# If the annotation already exists, find the compatible dtype
|
|
@@ -244,7 +249,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
244
249
|
----------
|
|
245
250
|
item : AtomArray or AtomArrayStack
|
|
246
251
|
The object to compare the annotation arrays with.
|
|
247
|
-
equal_nan: bool
|
|
252
|
+
equal_nan : bool
|
|
248
253
|
Whether to count `nan` values as equal. Default: True.
|
|
249
254
|
|
|
250
255
|
Returns
|
|
@@ -323,17 +328,16 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
323
328
|
if isinstance(self, AtomArray):
|
|
324
329
|
if value.ndim != 2:
|
|
325
330
|
raise ValueError(
|
|
326
|
-
"A 2-dimensional ndarray is expected
|
|
331
|
+
"A 2-dimensional ndarray is expected for an AtomArray"
|
|
327
332
|
)
|
|
328
333
|
elif isinstance(self, AtomArrayStack):
|
|
329
334
|
if value.ndim != 3:
|
|
330
335
|
raise ValueError(
|
|
331
|
-
"A 3-dimensional ndarray is expected
|
|
336
|
+
"A 3-dimensional ndarray is expected for an AtomArrayStack"
|
|
332
337
|
)
|
|
333
338
|
if value.shape[-2] != self._array_length:
|
|
334
339
|
raise ValueError(
|
|
335
|
-
f"Expected array length {self._array_length}, "
|
|
336
|
-
f"but got {len(value)}"
|
|
340
|
+
f"Expected array length {self._array_length}, but got {len(value)}"
|
|
337
341
|
)
|
|
338
342
|
if value.shape[-1] != 3:
|
|
339
343
|
raise TypeError("Expected 3 coordinates for each atom")
|
|
@@ -358,13 +362,12 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
|
|
|
358
362
|
if isinstance(self, AtomArray):
|
|
359
363
|
if value.ndim != 2:
|
|
360
364
|
raise ValueError(
|
|
361
|
-
"A 2-dimensional ndarray is expected
|
|
365
|
+
"A 2-dimensional ndarray is expected for an AtomArray"
|
|
362
366
|
)
|
|
363
367
|
else: # AtomArrayStack
|
|
364
368
|
if value.ndim != 3:
|
|
365
369
|
raise ValueError(
|
|
366
|
-
"A 3-dimensional ndarray is expected "
|
|
367
|
-
"for an AtomArrayStack"
|
|
370
|
+
"A 3-dimensional ndarray is expected for an AtomArrayStack"
|
|
368
371
|
)
|
|
369
372
|
if value.shape[-2:] != (3, 3):
|
|
370
373
|
raise TypeError("Box must be a 3x3 matrix (three vectors)")
|
|
@@ -448,9 +451,9 @@ class Atom(Copyable):
|
|
|
448
451
|
|
|
449
452
|
Parameters
|
|
450
453
|
----------
|
|
451
|
-
coord: list or ndarray
|
|
454
|
+
coord : list or ndarray
|
|
452
455
|
The x, y and z coordinates.
|
|
453
|
-
kwargs
|
|
456
|
+
**kwargs
|
|
454
457
|
Atom annotations as key value pair.
|
|
455
458
|
|
|
456
459
|
Attributes
|
|
@@ -472,7 +475,6 @@ class Atom(Copyable):
|
|
|
472
475
|
CA
|
|
473
476
|
>>> print(atom.coord)
|
|
474
477
|
[1. 2. 3.]
|
|
475
|
-
|
|
476
478
|
"""
|
|
477
479
|
|
|
478
480
|
def __init__(self, coord, **kwargs):
|
|
@@ -632,6 +634,10 @@ class AtomArray(_AtomArrayBase):
|
|
|
632
634
|
The single value in the tuple is
|
|
633
635
|
the length of the atom array.
|
|
634
636
|
|
|
637
|
+
See Also
|
|
638
|
+
--------
|
|
639
|
+
AtomArrayStack : Representation of multiple structure models.
|
|
640
|
+
|
|
635
641
|
Examples
|
|
636
642
|
--------
|
|
637
643
|
Creating an atom array from atoms:
|
|
@@ -700,10 +706,6 @@ class AtomArray(_AtomArrayBase):
|
|
|
700
706
|
Shape of the array.
|
|
701
707
|
The single value in the tuple is
|
|
702
708
|
the :func:`array_length()`.
|
|
703
|
-
|
|
704
|
-
See Also
|
|
705
|
-
--------
|
|
706
|
-
array_length
|
|
707
709
|
"""
|
|
708
710
|
return (self.array_length(),)
|
|
709
711
|
|
|
@@ -895,9 +897,9 @@ class AtomArrayStack(_AtomArrayBase):
|
|
|
895
897
|
The numbers correspond to the stack depth
|
|
896
898
|
and array length, respectively.
|
|
897
899
|
|
|
898
|
-
See
|
|
900
|
+
See Also
|
|
899
901
|
--------
|
|
900
|
-
AtomArray
|
|
902
|
+
AtomArray : Representation of a single structure model.
|
|
901
903
|
|
|
902
904
|
Examples
|
|
903
905
|
--------
|
|
@@ -1195,9 +1197,18 @@ def array(atoms):
|
|
|
1195
1197
|
f"annotation categories as the atom at index 0"
|
|
1196
1198
|
)
|
|
1197
1199
|
array = AtomArray(len(atoms))
|
|
1200
|
+
|
|
1198
1201
|
# Add all (also optional) annotation categories
|
|
1199
1202
|
for name in names:
|
|
1200
|
-
|
|
1203
|
+
value = atoms[0]._annot[name]
|
|
1204
|
+
if isinstance(value, str):
|
|
1205
|
+
# Find maximum string length across all atoms for this annotation
|
|
1206
|
+
max_len = max(len(str(atom._annot[name])) for atom in atoms)
|
|
1207
|
+
dtype = f"<U{max_len}"
|
|
1208
|
+
else:
|
|
1209
|
+
dtype = type(value)
|
|
1210
|
+
array.add_annotation(name, dtype=dtype)
|
|
1211
|
+
|
|
1201
1212
|
# Add all atoms to AtomArray
|
|
1202
1213
|
for i in range(len(atoms)):
|
|
1203
1214
|
for name in names:
|
|
@@ -1443,8 +1454,7 @@ def repeat(atoms, coord):
|
|
|
1443
1454
|
if isinstance(atoms, AtomArray):
|
|
1444
1455
|
if coord.ndim != 3:
|
|
1445
1456
|
raise ValueError(
|
|
1446
|
-
f"Expected 3 dimensions for the coordinate array, "
|
|
1447
|
-
f"but got {coord.ndim}"
|
|
1457
|
+
f"Expected 3 dimensions for the coordinate array, but got {coord.ndim}"
|
|
1448
1458
|
)
|
|
1449
1459
|
repeated = AtomArray(new_length)
|
|
1450
1460
|
repeated.coord = coord.reshape((new_length, 3))
|
|
@@ -1452,16 +1462,14 @@ def repeat(atoms, coord):
|
|
|
1452
1462
|
elif isinstance(atoms, AtomArrayStack):
|
|
1453
1463
|
if coord.ndim != 4:
|
|
1454
1464
|
raise ValueError(
|
|
1455
|
-
f"Expected 4 dimensions for the coordinate array, "
|
|
1456
|
-
f"but got {coord.ndim}"
|
|
1465
|
+
f"Expected 4 dimensions for the coordinate array, but got {coord.ndim}"
|
|
1457
1466
|
)
|
|
1458
1467
|
repeated = AtomArrayStack(atoms.stack_depth(), new_length)
|
|
1459
1468
|
repeated.coord = coord.reshape((atoms.stack_depth(), new_length, 3))
|
|
1460
1469
|
|
|
1461
1470
|
else:
|
|
1462
1471
|
raise TypeError(
|
|
1463
|
-
f"Expected 'AtomArray' or 'AtomArrayStack', "
|
|
1464
|
-
f"but got {type(atoms).__name__}"
|
|
1472
|
+
f"Expected 'AtomArray' or 'AtomArrayStack', but got {type(atoms).__name__}"
|
|
1465
1473
|
)
|
|
1466
1474
|
|
|
1467
1475
|
for category in atoms.get_annotation_categories():
|