biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/profile.py
CHANGED
|
@@ -4,9 +4,13 @@
|
|
|
4
4
|
|
|
5
5
|
import warnings
|
|
6
6
|
import numpy as np
|
|
7
|
-
from .
|
|
8
|
-
from .alphabet import LetterAlphabet
|
|
9
|
-
from .
|
|
7
|
+
from biotite.sequence.align.alignment import get_codes
|
|
8
|
+
from biotite.sequence.alphabet import LetterAlphabet
|
|
9
|
+
from biotite.sequence.seqtypes import (
|
|
10
|
+
GeneralSequence,
|
|
11
|
+
NucleotideSequence,
|
|
12
|
+
ProteinSequence,
|
|
13
|
+
)
|
|
10
14
|
|
|
11
15
|
__name__ = "biotite.sequence"
|
|
12
16
|
__author__ = "Maximilian Greil"
|
|
@@ -73,7 +77,7 @@ class SequenceProfile(object):
|
|
|
73
77
|
be created from an indefinite number of aligned sequences.
|
|
74
78
|
|
|
75
79
|
With :meth:`sequence_probability_from_matrix()` the probability of a
|
|
76
|
-
sequence can be calculated based on the before calculated position
|
|
80
|
+
sequence can be calculated based on the before calculated position
|
|
77
81
|
probability matrix of this instance of object SequenceProfile.
|
|
78
82
|
|
|
79
83
|
With :meth:`sequence_score_from_matrix()` the score of a sequence
|
|
@@ -154,8 +158,10 @@ class SequenceProfile(object):
|
|
|
154
158
|
|
|
155
159
|
def __repr__(self):
|
|
156
160
|
"""Represent SequenceProfile as a string for debugging."""
|
|
157
|
-
return
|
|
158
|
-
|
|
161
|
+
return (
|
|
162
|
+
f"SequenceProfile(np.{np.array_repr(self.symbols)}, "
|
|
163
|
+
f"np.{np.array_repr(self.gaps)}, Alphabet({self.alphabet}))"
|
|
164
|
+
)
|
|
159
165
|
|
|
160
166
|
def __eq__(self, item):
|
|
161
167
|
if not isinstance(item, SequenceProfile):
|
|
@@ -204,16 +210,16 @@ class SequenceProfile(object):
|
|
|
204
210
|
for alph in (seq.alphabet for seq in alignment.sequences):
|
|
205
211
|
if not alphabet.extends(alph):
|
|
206
212
|
raise ValueError(
|
|
207
|
-
|
|
213
|
+
"The given alphabet is incompatible with a least one "
|
|
208
214
|
"alphabet of the given sequences"
|
|
209
215
|
)
|
|
210
216
|
symbols = np.zeros((len(sequences[0]), len(alphabet)), dtype=int)
|
|
211
217
|
gaps = np.zeros(len(sequences[0]), dtype=int)
|
|
212
218
|
sequences = np.transpose(sequences)
|
|
213
219
|
for i in range(len(sequences)):
|
|
214
|
-
row = np.where(sequences[i,
|
|
220
|
+
row = np.where(sequences[i,] == -1, len(alphabet), sequences[i,])
|
|
215
221
|
count = np.bincount(row, minlength=len(alphabet) + 1)
|
|
216
|
-
symbols[i,
|
|
222
|
+
symbols[i,] = count[0 : len(alphabet)]
|
|
217
223
|
gaps[i] = count[-1]
|
|
218
224
|
return SequenceProfile(symbols, gaps, alphabet)
|
|
219
225
|
|
|
@@ -248,10 +254,21 @@ class SequenceProfile(object):
|
|
|
248
254
|
|
|
249
255
|
def _dna_to_consensus(self):
|
|
250
256
|
codes = {
|
|
251
|
-
(0,):
|
|
252
|
-
(
|
|
253
|
-
(
|
|
254
|
-
(
|
|
257
|
+
(0,): "A",
|
|
258
|
+
(1,): "C",
|
|
259
|
+
(2,): "G",
|
|
260
|
+
(3,): "T",
|
|
261
|
+
(0, 2): "R",
|
|
262
|
+
(1, 3): "Y",
|
|
263
|
+
(1, 2): "S",
|
|
264
|
+
(0, 3): "W",
|
|
265
|
+
(2, 3): "K",
|
|
266
|
+
(0, 1): "M",
|
|
267
|
+
(1, 2, 3): "B",
|
|
268
|
+
(0, 2, 3): "D",
|
|
269
|
+
(0, 1, 3): "H",
|
|
270
|
+
(0, 1, 2): "V",
|
|
271
|
+
(0, 1, 2, 3): "N",
|
|
255
272
|
}
|
|
256
273
|
consensus = ""
|
|
257
274
|
maxes = np.max(self.symbols, axis=1)
|
|
@@ -261,10 +278,21 @@ class SequenceProfile(object):
|
|
|
261
278
|
|
|
262
279
|
def _rna_to_consensus(self):
|
|
263
280
|
codes = {
|
|
264
|
-
(0,):
|
|
265
|
-
(
|
|
266
|
-
(
|
|
267
|
-
(
|
|
281
|
+
(0,): "A",
|
|
282
|
+
(1,): "C",
|
|
283
|
+
(2,): "G",
|
|
284
|
+
(3,): "U",
|
|
285
|
+
(0, 2): "R",
|
|
286
|
+
(1, 3): "Y",
|
|
287
|
+
(1, 2): "S",
|
|
288
|
+
(0, 3): "W",
|
|
289
|
+
(2, 3): "K",
|
|
290
|
+
(0, 1): "M",
|
|
291
|
+
(1, 2, 3): "B",
|
|
292
|
+
(0, 2, 3): "D",
|
|
293
|
+
(0, 1, 3): "H",
|
|
294
|
+
(0, 1, 2): "V",
|
|
295
|
+
(0, 1, 2, 3): "N",
|
|
268
296
|
}
|
|
269
297
|
consensus = ""
|
|
270
298
|
maxes = np.max(self.symbols, axis=1)
|
|
@@ -307,7 +335,7 @@ class SequenceProfile(object):
|
|
|
307
335
|
.. math::
|
|
308
336
|
|
|
309
337
|
P(S) = \frac {C_S + \frac{c_p}{k}} {\sum_{i} C_i + c_p}
|
|
310
|
-
|
|
338
|
+
|
|
311
339
|
:math:`S`: The symbol.
|
|
312
340
|
|
|
313
341
|
:math:`C_S`: The count of symbol :math:`S` at the sequence
|
|
@@ -330,11 +358,10 @@ class SequenceProfile(object):
|
|
|
330
358
|
The calculated the position probability matrix.
|
|
331
359
|
"""
|
|
332
360
|
if pseudocount < 0:
|
|
333
|
-
raise ValueError(
|
|
334
|
-
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
(np.sum(self.symbols, axis=1)[:, np.newaxis] + pseudocount)
|
|
361
|
+
raise ValueError("Pseudocount can not be smaller than zero.")
|
|
362
|
+
return (self.symbols + pseudocount / self.symbols.shape[1]) / (
|
|
363
|
+
np.sum(self.symbols, axis=1)[:, np.newaxis] + pseudocount
|
|
364
|
+
)
|
|
338
365
|
|
|
339
366
|
def log_odds_matrix(self, background_frequencies=None, pseudocount=0):
|
|
340
367
|
r"""
|
|
@@ -346,7 +373,7 @@ class SequenceProfile(object):
|
|
|
346
373
|
.. math::
|
|
347
374
|
|
|
348
375
|
W(S) = \log_2 \left( \frac{P(S)}{B_S} \right)
|
|
349
|
-
|
|
376
|
+
|
|
350
377
|
:math:`S`: The symbol.
|
|
351
378
|
|
|
352
379
|
:math:`P(S)`: The probability of symbol :math:`S` at the
|
|
@@ -363,7 +390,7 @@ class SequenceProfile(object):
|
|
|
363
390
|
background_frequencies: ndarray, shape=(k,), dtype=float, optional
|
|
364
391
|
The background frequencies for each symbol in the alphabet.
|
|
365
392
|
By default, a uniform distribution is assumed.
|
|
366
|
-
|
|
393
|
+
|
|
367
394
|
Returns
|
|
368
395
|
-------
|
|
369
396
|
pwm: ndarray, dtype=float, shape=(n,k)
|
|
@@ -383,7 +410,7 @@ class SequenceProfile(object):
|
|
|
383
410
|
Calculate probability of a sequence based on the
|
|
384
411
|
position probability matrix (PPM).
|
|
385
412
|
|
|
386
|
-
The sequence probability is the product of the probability of
|
|
413
|
+
The sequence probability is the product of the probability of
|
|
387
414
|
the respective symbol over all sequence positions.
|
|
388
415
|
|
|
389
416
|
Parameters
|
|
@@ -419,7 +446,7 @@ class SequenceProfile(object):
|
|
|
419
446
|
Calculate score of a sequence based on the
|
|
420
447
|
position weight matrix (PWM).
|
|
421
448
|
|
|
422
|
-
The score is the sum of weights (log-odds scores) of
|
|
449
|
+
The score is the sum of weights (log-odds scores) of
|
|
423
450
|
the respective symbol over all sequence positions.
|
|
424
451
|
|
|
425
452
|
Parameters
|
|
@@ -442,7 +469,9 @@ class SequenceProfile(object):
|
|
|
442
469
|
"""
|
|
443
470
|
if background_frequencies is None:
|
|
444
471
|
background_frequencies = 1 / len(self.alphabet)
|
|
445
|
-
pwm = self.log_odds_matrix(
|
|
472
|
+
pwm = self.log_odds_matrix(
|
|
473
|
+
background_frequencies=background_frequencies, pseudocount=pseudocount
|
|
474
|
+
)
|
|
446
475
|
if len(sequence) != len(pwm):
|
|
447
476
|
raise ValueError(
|
|
448
477
|
f"The given sequence has a different length ({len(sequence)}) than "
|
biotite/sequence/search.py
CHANGED
|
@@ -4,8 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.sequence"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["find_subsequence", "find_symbol", "find_symbol_first",
|
|
8
|
-
"find_symbol_last"]
|
|
7
|
+
__all__ = ["find_subsequence", "find_symbol", "find_symbol_first", "find_symbol_last"]
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
|
|
@@ -13,7 +12,7 @@ import numpy as np
|
|
|
13
12
|
def find_subsequence(sequence, query):
|
|
14
13
|
"""
|
|
15
14
|
Find a subsequence in a sequence.
|
|
16
|
-
|
|
15
|
+
|
|
17
16
|
Parameters
|
|
18
17
|
----------
|
|
19
18
|
sequence : Sequence
|
|
@@ -21,26 +20,26 @@ def find_subsequence(sequence, query):
|
|
|
21
20
|
query : Sequence
|
|
22
21
|
The potential subsequence. Its alphabet must extend the
|
|
23
22
|
`sequence` alphabet.
|
|
24
|
-
|
|
23
|
+
|
|
25
24
|
Returns
|
|
26
25
|
-------
|
|
27
26
|
match_indices : ndarray
|
|
28
27
|
The starting indices in `sequence`, where `query` has been
|
|
29
28
|
found. The array is empty if no match has been found.
|
|
30
|
-
|
|
29
|
+
|
|
31
30
|
Raises
|
|
32
31
|
------
|
|
33
32
|
ValueError
|
|
34
33
|
If the `query` alphabet does not extend the `sequence` alphabet.
|
|
35
|
-
|
|
34
|
+
|
|
36
35
|
Examples
|
|
37
36
|
--------
|
|
38
|
-
|
|
37
|
+
|
|
39
38
|
>>> main_seq = NucleotideSequence("ACTGAATGA")
|
|
40
39
|
>>> sub_seq = NucleotideSequence("TGA")
|
|
41
40
|
>>> print(find_subsequence(main_seq, sub_seq))
|
|
42
41
|
[2 6]
|
|
43
|
-
|
|
42
|
+
|
|
44
43
|
"""
|
|
45
44
|
if not sequence.get_alphabet().extends(query.get_alphabet()):
|
|
46
45
|
raise ValueError("The sequences alphabets are not equal")
|
|
@@ -52,17 +51,18 @@ def find_subsequence(sequence, query):
|
|
|
52
51
|
match_indices.append(i)
|
|
53
52
|
return np.array(match_indices)
|
|
54
53
|
|
|
54
|
+
|
|
55
55
|
def find_symbol(sequence, symbol):
|
|
56
56
|
"""
|
|
57
57
|
Find a symbol in a sequence.
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
Parameters
|
|
60
60
|
----------
|
|
61
61
|
sequence : Sequence
|
|
62
62
|
The sequence to find the symbol in.
|
|
63
63
|
symbol : object
|
|
64
64
|
The symbol to be found in `sequence`.
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Returns
|
|
67
67
|
-------
|
|
68
68
|
match_indices : ndarray
|
|
@@ -71,17 +71,18 @@ def find_symbol(sequence, symbol):
|
|
|
71
71
|
code = sequence.get_alphabet().encode(symbol)
|
|
72
72
|
return np.where(sequence.code == code)[0]
|
|
73
73
|
|
|
74
|
+
|
|
74
75
|
def find_symbol_first(sequence, symbol):
|
|
75
76
|
"""
|
|
76
77
|
Find first occurence of a symbol in a sequence.
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
Parameters
|
|
79
80
|
----------
|
|
80
81
|
sequence : Sequence
|
|
81
82
|
The sequence to find the symbol in.
|
|
82
83
|
symbol : object
|
|
83
84
|
The symbol to be found in `sequence`.
|
|
84
|
-
|
|
85
|
+
|
|
85
86
|
Returns
|
|
86
87
|
-------
|
|
87
88
|
first_index : int
|
|
@@ -92,18 +93,19 @@ def find_symbol_first(sequence, symbol):
|
|
|
92
93
|
if len(match_i) == 0:
|
|
93
94
|
return -1
|
|
94
95
|
return np.min(match_i)
|
|
95
|
-
|
|
96
|
+
|
|
97
|
+
|
|
96
98
|
def find_symbol_last(sequence, symbol):
|
|
97
99
|
"""
|
|
98
100
|
Find last occurence of a symbol in a sequence.
|
|
99
|
-
|
|
101
|
+
|
|
100
102
|
Parameters
|
|
101
103
|
----------
|
|
102
104
|
sequence : Sequence
|
|
103
105
|
The sequence to find the symbol in.
|
|
104
106
|
symbol : object
|
|
105
107
|
The symbol to be found in `sequence`.
|
|
106
|
-
|
|
108
|
+
|
|
107
109
|
Returns
|
|
108
110
|
-------
|
|
109
111
|
flast_index : int
|