biotite 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.2.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +36 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +5 -18
- biotite/application/muscle/app5.py +5 -5
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +22 -2
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +9 -3
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +8 -9
- biotite/database/uniprot/check.py +22 -17
- biotite/database/uniprot/download.py +3 -6
- biotite/database/uniprot/query.py +4 -5
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +49 -14
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +26 -26
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +19 -2
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +58 -48
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +284 -57
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +5 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +105 -29
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +136 -8
- biotite/sequence/sequence.py +1 -2
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +6 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +163 -66
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +79 -25
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +74 -127
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +24 -15
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -34
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +62 -19
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -22
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +4 -4
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +80 -53
- biotite/structure/io/pdb/convert.py +4 -3
- biotite/structure/io/pdb/file.py +85 -25
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +36 -36
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +54 -15
- biotite/structure/io/pdbx/cif.py +92 -66
- biotite/structure/io/pdbx/component.py +15 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +410 -75
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +141 -156
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +69 -11
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +76 -4
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -9,7 +9,6 @@ import numbers
|
|
|
9
9
|
import textwrap
|
|
10
10
|
from collections.abc import Sequence
|
|
11
11
|
import numpy as np
|
|
12
|
-
from biotite.sequence.alphabet import LetterAlphabet
|
|
13
12
|
|
|
14
13
|
__all__ = [
|
|
15
14
|
"Alignment",
|
|
@@ -20,6 +19,7 @@ __all__ = [
|
|
|
20
19
|
"score",
|
|
21
20
|
"find_terminal_gaps",
|
|
22
21
|
"remove_terminal_gaps",
|
|
22
|
+
"remove_gaps",
|
|
23
23
|
]
|
|
24
24
|
|
|
25
25
|
|
|
@@ -111,7 +111,7 @@ class Alignment(object):
|
|
|
111
111
|
for i in range(len(self.trace)):
|
|
112
112
|
j = self.trace[i][seq_index]
|
|
113
113
|
if j != -1:
|
|
114
|
-
seq_str += self.sequences[seq_index][j]
|
|
114
|
+
seq_str += str(self.sequences[seq_index][j])
|
|
115
115
|
else:
|
|
116
116
|
seq_str += "-"
|
|
117
117
|
return seq_str
|
|
@@ -133,7 +133,7 @@ class Alignment(object):
|
|
|
133
133
|
# has an non-single letter alphabet
|
|
134
134
|
all_single_letter = True
|
|
135
135
|
for seq in self.sequences:
|
|
136
|
-
if not
|
|
136
|
+
if not _is_single_letter(seq.alphabet):
|
|
137
137
|
all_single_letter = False
|
|
138
138
|
if all_single_letter:
|
|
139
139
|
# First dimension: sequence number,
|
|
@@ -304,7 +304,7 @@ def get_symbols(alignment):
|
|
|
304
304
|
|
|
305
305
|
See Also
|
|
306
306
|
--------
|
|
307
|
-
get_codes
|
|
307
|
+
get_codes : Get the sequence codes of the sequences in the alignment.
|
|
308
308
|
|
|
309
309
|
Examples
|
|
310
310
|
--------
|
|
@@ -362,9 +362,9 @@ def get_sequence_identity(alignment, mode="not_terminal"):
|
|
|
362
362
|
identity : float
|
|
363
363
|
The sequence identity, ranging between 0 and 1.
|
|
364
364
|
|
|
365
|
-
See
|
|
365
|
+
See Also
|
|
366
366
|
--------
|
|
367
|
-
get_pairwise_sequence_identity
|
|
367
|
+
get_pairwise_sequence_identity : Get sequence identity for each pair of alignment rows.
|
|
368
368
|
"""
|
|
369
369
|
codes = get_codes(alignment)
|
|
370
370
|
|
|
@@ -425,9 +425,9 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
|
|
|
425
425
|
identity : ndarray, dtype=float, shape=(n,n)
|
|
426
426
|
The pairwise sequence identity, ranging between 0 and 1.
|
|
427
427
|
|
|
428
|
-
See
|
|
428
|
+
See Also
|
|
429
429
|
--------
|
|
430
|
-
get_sequence_identity
|
|
430
|
+
get_sequence_identity : Get sequence identity over all alignment rows.
|
|
431
431
|
"""
|
|
432
432
|
codes = get_codes(alignment)
|
|
433
433
|
n_seq = len(codes)
|
|
@@ -490,10 +490,9 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
|
|
|
490
490
|
penalty is used. The first integer in the tuple is the gap
|
|
491
491
|
opening penalty, the second integer is the gap extension
|
|
492
492
|
penalty.
|
|
493
|
-
The values need to be negative.
|
|
493
|
+
The values need to be negative.
|
|
494
494
|
terminal_penalty : bool, optional
|
|
495
495
|
If true, gap penalties are applied to terminal gaps.
|
|
496
|
-
(Default: True)
|
|
497
496
|
|
|
498
497
|
Returns
|
|
499
498
|
-------
|
|
@@ -570,9 +569,9 @@ def find_terminal_gaps(alignment):
|
|
|
570
569
|
When these indices are used as slice index for an alignment or
|
|
571
570
|
trace, the index would remove terminal gaps.
|
|
572
571
|
|
|
573
|
-
See
|
|
572
|
+
See Also
|
|
574
573
|
--------
|
|
575
|
-
remove_terminal_gaps
|
|
574
|
+
remove_terminal_gaps : Remove terminal gap columns directly.
|
|
576
575
|
|
|
577
576
|
Examples
|
|
578
577
|
--------
|
|
@@ -628,9 +627,9 @@ def remove_terminal_gaps(alignment):
|
|
|
628
627
|
A shallow copy of the input `alignment` with an truncated trace,
|
|
629
628
|
that does not contain alignment columns with terminal gaps.
|
|
630
629
|
|
|
631
|
-
See
|
|
630
|
+
See Also
|
|
632
631
|
--------
|
|
633
|
-
find_terminal_gaps
|
|
632
|
+
find_terminal_gaps : Only find terminal gap columns.
|
|
634
633
|
|
|
635
634
|
Examples
|
|
636
635
|
--------
|
|
@@ -665,3 +664,39 @@ def remove_terminal_gaps(alignment):
|
|
|
665
664
|
"no overlap and the resulting alignment would be empty"
|
|
666
665
|
)
|
|
667
666
|
return alignment[start:stop]
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def remove_gaps(alignment):
|
|
670
|
+
"""
|
|
671
|
+
Remove all gap columns from an alignment.
|
|
672
|
+
|
|
673
|
+
Parameters
|
|
674
|
+
----------
|
|
675
|
+
alignment : Alignment
|
|
676
|
+
The alignment to be modified.
|
|
677
|
+
|
|
678
|
+
Returns
|
|
679
|
+
-------
|
|
680
|
+
truncated_alignment : Alignment
|
|
681
|
+
The alignment without gap columns.
|
|
682
|
+
|
|
683
|
+
See Also
|
|
684
|
+
--------
|
|
685
|
+
remove_terminal_gaps : Remove only terminal gap columns.
|
|
686
|
+
"""
|
|
687
|
+
non_gap_mask = (alignment.trace != -1).all(axis=1)
|
|
688
|
+
return alignment[non_gap_mask]
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def _is_single_letter(alphabet):
|
|
692
|
+
"""
|
|
693
|
+
More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
|
|
694
|
+
It is sufficient that only only the string representation of each symbol is only
|
|
695
|
+
a single character.
|
|
696
|
+
"""
|
|
697
|
+
if alphabet.is_letter_alphabet():
|
|
698
|
+
return True
|
|
699
|
+
for symbol in alphabet:
|
|
700
|
+
if len(str(symbol)) != 1:
|
|
701
|
+
return False
|
|
702
|
+
return True
|
|
Binary file
|
|
@@ -54,7 +54,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
54
54
|
aligned to each other, if :math:`D_L \leq j - i \leq D_U`.
|
|
55
55
|
With increasing width of the diagonal band, the probability to find
|
|
56
56
|
the optimal alignment, but also the computation time increases.
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
Parameters
|
|
59
59
|
----------
|
|
60
60
|
seq1, seq2 : Sequence
|
|
@@ -84,15 +84,15 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
84
84
|
The maximum number of alignments returned.
|
|
85
85
|
When the number of branches exceeds this value in the traceback
|
|
86
86
|
step, no further branches are created.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
Returns
|
|
89
89
|
-------
|
|
90
90
|
alignments : list of Alignment
|
|
91
91
|
The generated alignments.
|
|
92
92
|
Each alignment in the list has the same similarity score,
|
|
93
93
|
which is the maximum score possible within the defined band.
|
|
94
|
-
|
|
95
|
-
See
|
|
94
|
+
|
|
95
|
+
See Also
|
|
96
96
|
--------
|
|
97
97
|
align_optimal
|
|
98
98
|
Guarantees to find the optimal alignment at the cost of greater
|
|
@@ -110,7 +110,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
110
110
|
yield a more optimal alignment.
|
|
111
111
|
Considerations on how to find a suitable band width are discussed in
|
|
112
112
|
:footcite:`Gibrat2018`.
|
|
113
|
-
|
|
113
|
+
|
|
114
114
|
The restriction to a limited band is the central difference between
|
|
115
115
|
the banded alignment heuristic and the optimal alignment
|
|
116
116
|
algorithms :footcite:`Needleman1970, Smith1981`.
|
|
@@ -151,12 +151,12 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
151
151
|
Filled cells, i.e. cells within the band, are indicated by ``x``.
|
|
152
152
|
The shorter sequence is always represented by the first dimension
|
|
153
153
|
of the table in this implementation.
|
|
154
|
-
|
|
154
|
+
|
|
155
155
|
References
|
|
156
156
|
----------
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
.. footbibliography::
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
Examples
|
|
161
161
|
--------
|
|
162
162
|
|
|
@@ -203,7 +203,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
203
203
|
raise ValueError(
|
|
204
204
|
"Maximum number of returned alignments must be at least 1"
|
|
205
205
|
)
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
# The shorter sequence is the one on the left of the matrix
|
|
208
208
|
# -> shorter sequence is 'seq1'
|
|
209
209
|
if len(seq2) < len(seq1):
|
|
@@ -214,9 +214,6 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
214
214
|
else:
|
|
215
215
|
is_swapped = False
|
|
216
216
|
lower_diag, upper_diag = min(band), max(band)
|
|
217
|
-
band_width = upper_diag - lower_diag + 1
|
|
218
|
-
if band_width < 1:
|
|
219
|
-
raise ValueError("The width of the band is 0")
|
|
220
217
|
if len(seq1) + upper_diag <= 0 or lower_diag >= len(seq2):
|
|
221
218
|
raise ValueError(
|
|
222
219
|
"Alignment band is out of range, the band allows no overlap "
|
|
@@ -226,6 +223,9 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
226
223
|
# covers the search space of an unbanded alignment
|
|
227
224
|
lower_diag = max(lower_diag, -len(seq1)+1)
|
|
228
225
|
upper_diag = min(upper_diag, len(seq2)-1)
|
|
226
|
+
band_width = upper_diag - lower_diag + 1
|
|
227
|
+
if band_width < 1:
|
|
228
|
+
raise ValueError("The width of the band is 0")
|
|
229
229
|
|
|
230
230
|
# This implementation uses transposed tables in comparison
|
|
231
231
|
# to the common visualization
|
|
@@ -243,18 +243,18 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
243
243
|
trace_table = np.zeros((len(seq1)+1, band_width+2), dtype=np.uint8)
|
|
244
244
|
code1 = seq1.code
|
|
245
245
|
code2 = seq2.code
|
|
246
|
-
|
|
246
|
+
|
|
247
247
|
|
|
248
248
|
# Table filling
|
|
249
249
|
###############
|
|
250
|
-
|
|
250
|
+
|
|
251
251
|
# A score value that signals that the respective direction in the
|
|
252
|
-
# dynamic programming matrix should not be used since
|
|
252
|
+
# dynamic programming matrix should not be used, since it would be
|
|
253
253
|
# outside the band
|
|
254
254
|
# It is the 'worst' score available, so the trace table will never
|
|
255
255
|
# include such a direction
|
|
256
256
|
neg_inf = np.iinfo(np.int32).min
|
|
257
|
-
# Correct the 'negative infinity' integer, by making it more
|
|
257
|
+
# Correct the 'negative infinity' integer, by making it more positive
|
|
258
258
|
# This prevents an integer underflow when the gap penalty or
|
|
259
259
|
# match score is added to this value
|
|
260
260
|
neg_inf -= min(gap_penalty) if affine_penalty else gap_penalty
|
|
@@ -294,7 +294,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
294
294
|
code1, code2, matrix.score_matrix(), trace_table, score_table,
|
|
295
295
|
lower_diag, upper_diag, gap_penalty, local
|
|
296
296
|
)
|
|
297
|
-
|
|
297
|
+
|
|
298
298
|
|
|
299
299
|
# Traceback
|
|
300
300
|
###########
|
|
@@ -383,7 +383,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
383
383
|
state_list = np.full(
|
|
384
384
|
len(i_list), TraceState.NO_STATE, dtype=int
|
|
385
385
|
)
|
|
386
|
-
|
|
386
|
+
|
|
387
387
|
# Follow the traces specified in state and indices lists
|
|
388
388
|
cdef int curr_trace_count
|
|
389
389
|
for k in range(len(i_list)):
|
|
@@ -401,7 +401,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
|
|
|
401
401
|
curr_trace_count=&curr_trace_count, max_trace_count=max_number,
|
|
402
402
|
lower_diag=lower_diag, upper_diag=upper_diag
|
|
403
403
|
)
|
|
404
|
-
|
|
404
|
+
|
|
405
405
|
# Replace gap entries in trace with -1
|
|
406
406
|
for i, trace in enumerate(trace_list):
|
|
407
407
|
trace = np.flip(trace, axis=0)
|
|
@@ -459,7 +459,7 @@ def _fill_align_table(CodeType1[:] code1 not None,
|
|
|
459
459
|
local
|
|
460
460
|
Indicates, whether a local alignment should be performed.
|
|
461
461
|
"""
|
|
462
|
-
|
|
462
|
+
|
|
463
463
|
cdef int i, j
|
|
464
464
|
cdef int seq_i, seq_j
|
|
465
465
|
cdef int32 from_diag, from_left, from_top
|
|
@@ -488,7 +488,7 @@ def _fill_align_table(CodeType1[:] code1 not None,
|
|
|
488
488
|
from_top = score_table[i-1, j+1] + gap_penalty
|
|
489
489
|
|
|
490
490
|
trace = get_trace_linear(from_diag, from_left, from_top, &score)
|
|
491
|
-
|
|
491
|
+
|
|
492
492
|
# Local alignment specialty:
|
|
493
493
|
# If score is less than or equal to 0,
|
|
494
494
|
# then 0 is saved on the field and the trace ends here
|
|
@@ -541,7 +541,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
|
541
541
|
local
|
|
542
542
|
Indicates, whether a local alignment should be performed.
|
|
543
543
|
"""
|
|
544
|
-
|
|
544
|
+
|
|
545
545
|
cdef int i, j
|
|
546
546
|
cdef int seq_i, seq_j
|
|
547
547
|
cdef int32 mm_score, g1m_score, g2m_score
|
|
@@ -550,7 +550,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
|
550
550
|
cdef uint8 trace
|
|
551
551
|
cdef int32 m_score, g1_score, g2_score
|
|
552
552
|
cdef int32 similarity_score
|
|
553
|
-
|
|
553
|
+
|
|
554
554
|
# Starts at 1 since the first row and column are already fil
|
|
555
555
|
for seq_i in range(0, code1.shape[0]):
|
|
556
556
|
i = seq_i + 1
|
|
@@ -572,7 +572,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
|
572
572
|
g1g1_score = g1_table[i, j-1] + gap_ext
|
|
573
573
|
mg2_score = m_table[i-1, j+1] + gap_open
|
|
574
574
|
g2g2_score = g2_table[i-1, j+1] + gap_ext
|
|
575
|
-
|
|
575
|
+
|
|
576
576
|
trace = get_trace_affine(
|
|
577
577
|
mm_score, g1m_score, g2m_score,
|
|
578
578
|
mg1_score, g1g1_score,
|
|
@@ -600,7 +600,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
|
600
600
|
m_table[i,j] = m_score
|
|
601
601
|
if g1_score <= 0:
|
|
602
602
|
trace &= ~(
|
|
603
|
-
TraceDirectionAffine.MATCH_TO_GAP_LEFT |
|
|
603
|
+
TraceDirectionAffine.MATCH_TO_GAP_LEFT |
|
|
604
604
|
TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
|
|
605
605
|
)
|
|
606
606
|
# g1_table[i,j] remains negative infinity
|
|
@@ -623,7 +623,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
|
623
623
|
|
|
624
624
|
def get_global_trace_starts(seq1_len, seq2_len, lower_diag, upper_diag):
|
|
625
625
|
band_width = upper_diag - lower_diag + 1
|
|
626
|
-
|
|
626
|
+
|
|
627
627
|
j = np.arange(1, band_width + 1)
|
|
628
628
|
seq_j = j + (seq1_len-1) + lower_diag - 1
|
|
629
629
|
# Start from the end from the first (shorter) sequence,
|
biotite/sequence/align/cigar.py
CHANGED
|
@@ -86,7 +86,7 @@ def read_alignment_from_cigar(cigar, position, reference_sequence, segment_seque
|
|
|
86
86
|
|
|
87
87
|
See Also
|
|
88
88
|
--------
|
|
89
|
-
write_alignment_to_cigar
|
|
89
|
+
write_alignment_to_cigar : The reverse operation.
|
|
90
90
|
|
|
91
91
|
Notes
|
|
92
92
|
-----
|
|
@@ -253,7 +253,7 @@ def write_alignment_to_cigar(
|
|
|
253
253
|
|
|
254
254
|
See Also
|
|
255
255
|
--------
|
|
256
|
-
read_alignment_from_cigar
|
|
256
|
+
read_alignment_from_cigar : The reverse operation.
|
|
257
257
|
|
|
258
258
|
Notes
|
|
259
259
|
-----
|
|
Binary file
|
|
@@ -267,7 +267,7 @@ class KmerAlphabet(Alphabet):
|
|
|
267
267
|
kmer_codes : int or ndarray, dtype=np.int64, shape=(n,)
|
|
268
268
|
The fused *k-mer* code(s).
|
|
269
269
|
|
|
270
|
-
See
|
|
270
|
+
See Also
|
|
271
271
|
--------
|
|
272
272
|
split
|
|
273
273
|
The reverse operation.
|
|
@@ -319,7 +319,7 @@ class KmerAlphabet(Alphabet):
|
|
|
319
319
|
codes : ndarray, dtype=np.uint64, shape=(k,) or shape=(n,k)
|
|
320
320
|
The split symbol codes from the base alphabet.
|
|
321
321
|
|
|
322
|
-
See
|
|
322
|
+
See Also
|
|
323
323
|
--------
|
|
324
324
|
fuse
|
|
325
325
|
The reverse operation.
|
|
@@ -568,6 +568,23 @@ class KmerAlphabet(Alphabet):
|
|
|
568
568
|
return int(len(self._base_alph) ** self._k)
|
|
569
569
|
|
|
570
570
|
|
|
571
|
+
def __iter__(self):
|
|
572
|
+
# Creating all symbols is expensive
|
|
573
|
+
# -> Use a generator instead
|
|
574
|
+
if isinstance(self._base_alph, LetterAlphabet):
|
|
575
|
+
return ("".join(self.decode(code)) for code in range(len(self)))
|
|
576
|
+
else:
|
|
577
|
+
return (list(self.decode(code)) for code in range(len(self)))
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def __contains__(self, symbol):
|
|
581
|
+
try:
|
|
582
|
+
self.fuse(self._base_alph.encode_multiple(symbol))
|
|
583
|
+
return True
|
|
584
|
+
except AlphabetError:
|
|
585
|
+
return False
|
|
586
|
+
|
|
587
|
+
|
|
571
588
|
def _to_array_form(model_string):
|
|
572
589
|
"""
|
|
573
590
|
Convert the the common string representation of a *k-mer* spacing
|
|
Binary file
|
|
Binary file
|
|
@@ -102,7 +102,7 @@ cdef class KmerTable:
|
|
|
102
102
|
k : int
|
|
103
103
|
The length of the *k-mers*.
|
|
104
104
|
|
|
105
|
-
See
|
|
105
|
+
See Also
|
|
106
106
|
--------
|
|
107
107
|
BucketKmerTable
|
|
108
108
|
|
|
@@ -308,7 +308,7 @@ cdef class KmerTable:
|
|
|
308
308
|
The number of *informative* positions must equal *k*.
|
|
309
309
|
Refer to :class:`KmerAlphabet` for more details.
|
|
310
310
|
|
|
311
|
-
See
|
|
311
|
+
See Also
|
|
312
312
|
--------
|
|
313
313
|
from_kmers : The same functionality based on already created *k-mers*
|
|
314
314
|
|
|
@@ -413,7 +413,7 @@ cdef class KmerTable:
|
|
|
413
413
|
is false, is not added to the table.
|
|
414
414
|
By default, all positions are added.
|
|
415
415
|
|
|
416
|
-
See
|
|
416
|
+
See Also
|
|
417
417
|
--------
|
|
418
418
|
from_sequences : The same functionality based on undecomposed sequences
|
|
419
419
|
|
|
@@ -1384,8 +1384,7 @@ cdef class KmerTable:
|
|
|
1384
1384
|
|
|
1385
1385
|
|
|
1386
1386
|
def __getstate__(self):
|
|
1387
|
-
|
|
1388
|
-
return _pickle_c_arrays(self._ptr_array, relevant_kmers)
|
|
1387
|
+
return _pickle_c_arrays(self._ptr_array)
|
|
1389
1388
|
|
|
1390
1389
|
|
|
1391
1390
|
def __setstate__(self, state):
|
|
@@ -1549,7 +1548,7 @@ cdef class BucketKmerTable:
|
|
|
1549
1548
|
n_buckets : int
|
|
1550
1549
|
The number of buckets, the *k-mers* are divided into.
|
|
1551
1550
|
|
|
1552
|
-
See
|
|
1551
|
+
See Also
|
|
1553
1552
|
--------
|
|
1554
1553
|
KmerTable
|
|
1555
1554
|
|
|
@@ -1775,7 +1774,7 @@ cdef class BucketKmerTable:
|
|
|
1775
1774
|
purpose.
|
|
1776
1775
|
By default, a load factor of approximately 0.8 is used.
|
|
1777
1776
|
|
|
1778
|
-
See
|
|
1777
|
+
See Also
|
|
1779
1778
|
--------
|
|
1780
1779
|
from_kmers : The same functionality based on already created *k-mers*
|
|
1781
1780
|
|
|
@@ -1893,7 +1892,7 @@ cdef class BucketKmerTable:
|
|
|
1893
1892
|
purpose.
|
|
1894
1893
|
By default, a load factor of approximately 0.8 is used.
|
|
1895
1894
|
|
|
1896
|
-
See
|
|
1895
|
+
See Also
|
|
1897
1896
|
--------
|
|
1898
1897
|
from_sequences : The same functionality based on undecomposed sequences
|
|
1899
1898
|
|
|
@@ -2836,12 +2835,7 @@ cdef class BucketKmerTable:
|
|
|
2836
2835
|
|
|
2837
2836
|
|
|
2838
2837
|
def __getstate__(self):
|
|
2839
|
-
|
|
2840
|
-
np.asarray(self._ptr_array) != 0
|
|
2841
|
-
)[0]
|
|
2842
|
-
return _pickle_c_arrays(self._ptr_array, relevant_buckets)
|
|
2843
|
-
|
|
2844
|
-
|
|
2838
|
+
return _pickle_c_arrays(self._ptr_array)
|
|
2845
2839
|
|
|
2846
2840
|
def __setstate__(self, state):
|
|
2847
2841
|
_unpickle_c_arrays(self._ptr_array, state)
|
|
@@ -3097,27 +3091,44 @@ def _append_entries(ptr[:] trg_ptr_array, ptr[:] src_ptr_array):
|
|
|
3097
3091
|
|
|
3098
3092
|
@cython.boundscheck(False)
|
|
3099
3093
|
@cython.wraparound(False)
|
|
3100
|
-
def _pickle_c_arrays(ptr[:] ptr_array
|
|
3094
|
+
def _pickle_c_arrays(ptr[:] ptr_array):
|
|
3101
3095
|
"""
|
|
3102
|
-
Pickle the
|
|
3103
|
-
|
|
3096
|
+
Pickle the C arrays into a single concatenated :class:`ndarray`.
|
|
3097
|
+
The lengths of each C-array on these concatenated array is saved as well.
|
|
3104
3098
|
"""
|
|
3105
|
-
cdef int64
|
|
3106
|
-
cdef int64 bucket
|
|
3099
|
+
cdef int64 pointer_i, bucket_i, concat_i
|
|
3107
3100
|
cdef int64 length
|
|
3108
3101
|
cdef uint32* bucket_ptr
|
|
3109
3102
|
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
for
|
|
3113
|
-
|
|
3114
|
-
bucket_ptr
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3103
|
+
# First pass: Count the total concatenated size
|
|
3104
|
+
cdef int64 total_length = 0
|
|
3105
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3106
|
+
bucket_ptr = <uint32*>ptr_array[pointer_i]
|
|
3107
|
+
if bucket_ptr != NULL:
|
|
3108
|
+
# The first element of the C-array is the length
|
|
3109
|
+
# of the array
|
|
3110
|
+
total_length += (<int64*>bucket_ptr)[0]
|
|
3111
|
+
|
|
3112
|
+
# Second pass: Copy the C-arrays into a single concatenated array
|
|
3113
|
+
# and track the start position of each C-array
|
|
3114
|
+
cdef uint32[:] concatenated_array = np.empty(total_length, dtype=np.uint32)
|
|
3115
|
+
cdef int64[:] lengths = np.empty(ptr_array.shape[0], dtype=np.int64)
|
|
3116
|
+
concat_i = 0
|
|
3117
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3118
|
+
bucket_ptr = <uint32*>ptr_array[pointer_i]
|
|
3119
|
+
if bucket_ptr != NULL:
|
|
3120
|
+
length = (<int64*>bucket_ptr)[0]
|
|
3121
|
+
lengths[pointer_i] = length
|
|
3122
|
+
memcpy(
|
|
3123
|
+
&concatenated_array[concat_i],
|
|
3124
|
+
bucket_ptr,
|
|
3125
|
+
length * sizeof(uint32),
|
|
3126
|
+
)
|
|
3127
|
+
concat_i += length
|
|
3128
|
+
else:
|
|
3129
|
+
lengths[pointer_i] = 0
|
|
3119
3130
|
|
|
3120
|
-
return np.asarray(
|
|
3131
|
+
return np.asarray(concatenated_array), np.asarray(lengths)
|
|
3121
3132
|
|
|
3122
3133
|
|
|
3123
3134
|
@cython.boundscheck(False)
|
|
@@ -3126,28 +3137,27 @@ def _unpickle_c_arrays(ptr[:] ptr_array, state):
|
|
|
3126
3137
|
"""
|
|
3127
3138
|
Unpickle the pickled `state` into the given `ptr_array`.
|
|
3128
3139
|
"""
|
|
3129
|
-
cdef int64
|
|
3130
|
-
cdef int64
|
|
3131
|
-
cdef int64 byte_length
|
|
3140
|
+
cdef int64 pointer_i, concat_i
|
|
3141
|
+
cdef int64 length
|
|
3132
3142
|
cdef uint32* bucket_ptr
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
cdef int64[:]
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
for
|
|
3139
|
-
|
|
3140
|
-
if
|
|
3141
|
-
|
|
3142
|
-
pickled_bytes = pickled_pointers[i]
|
|
3143
|
-
byte_length = len(pickled_bytes)
|
|
3144
|
-
if byte_length != 0:
|
|
3145
|
-
bucket_ptr = <uint32*>malloc(byte_length)
|
|
3143
|
+
|
|
3144
|
+
cdef uint32[:] concatenated_array = state[0]
|
|
3145
|
+
cdef int64[:] lengths = state[1]
|
|
3146
|
+
|
|
3147
|
+
concat_i = 0
|
|
3148
|
+
for pointer_i in range(ptr_array.shape[0]):
|
|
3149
|
+
length = lengths[pointer_i]
|
|
3150
|
+
if length != 0:
|
|
3151
|
+
bucket_ptr = <uint32*>malloc(length * sizeof(uint32))
|
|
3146
3152
|
if not bucket_ptr:
|
|
3147
3153
|
raise MemoryError
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3154
|
+
memcpy(
|
|
3155
|
+
bucket_ptr,
|
|
3156
|
+
&concatenated_array[concat_i],
|
|
3157
|
+
length * sizeof(uint32),
|
|
3158
|
+
)
|
|
3159
|
+
concat_i += length
|
|
3160
|
+
ptr_array[pointer_i] = <ptr>bucket_ptr
|
|
3151
3161
|
|
|
3152
3162
|
|
|
3153
3163
|
cdef inline void _deallocate_ptrs(ptr[:] ptrs):
|
|
Binary file
|