PyPI - biotite - Versions diffs - 1.0.1__cp310-cp310-win_amd64.whl → 1.2.0__cp310-cp310-win_amd64.whl - Mend

biotite 1.0.1__cp310-cp310-win_amd64.whl → 1.2.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +36 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +5 -18
biotite/application/muscle/app5.py +5 -5
biotite/application/sra/app.py +0 -5
biotite/application/util.py +22 -2
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +9 -3
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +8 -9
biotite/database/uniprot/check.py +22 -17
biotite/database/uniprot/download.py +3 -6
biotite/database/uniprot/query.py +4 -5
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +49 -14
biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +26 -26
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +19 -2
biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +58 -48
biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +284 -57
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +5 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
biotite/sequence/profile.py +105 -29
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +136 -8
biotite/sequence/sequence.py +1 -2
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +6 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +109 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +170 -0
biotite/structure/alphabet/unkerasify.py +128 -0
biotite/structure/atoms.py +163 -66
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +79 -25
biotite/structure/box.py +19 -21
biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cp310-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +74 -127
biotite/structure/hbond.py +17 -19
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +24 -15
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -34
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +62 -19
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -22
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +4 -4
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +80 -53
biotite/structure/io/pdb/convert.py +4 -3
biotite/structure/io/pdb/file.py +85 -25
biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +36 -36
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +54 -15
biotite/structure/io/pdbx/cif.py +92 -66
biotite/structure/io/pdbx/component.py +15 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +410 -75
biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +141 -156
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +69 -11
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +76 -4
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/graphics/color_schemes/pb_flower.json CHANGED Viewed

@@ -16,7 +16,8 @@
         "m",
         "n",
         "o",
-        "p"
+        "p",
+        "z"
     ],
     "colors": {
         "a": "#31b5fc",

biotite/sequence/graphics/colorschemes.py CHANGED Viewed

@@ -94,27 +94,32 @@ def get_color_scheme(name, alphabet, default="#FFFFFF"):
     >>> print(color_scheme)
     ['#3737f5', '#37f537', '#f5f537', '#f53737']
     """
+    # Try exact alphabet match first
+    for scheme in _color_schemes:
+        if scheme["name"] == name and scheme["alphabet"] == alphabet:
+            return _fit_color_scheme(alphabet, scheme, default)
+    # If no exact match was found, try to find a scheme for an alphabet
+    # that extends the given alphabet
     for scheme in _color_schemes:
         if scheme["name"] == name and scheme["alphabet"].extends(alphabet):
-            colors = scheme["colors"]
-            # Replace None values with default color
-            colors = [color if color is not None else default for color in colors]
-            # Only return colors that are in scope of this alphabet
-            # and not the extended alphabet
-            return colors[: len(alphabet)]
+            return _fit_color_scheme(alphabet, scheme, default)
     raise ValueError(f"Unkown scheme '{name}' for given alphabet")
-def list_color_scheme_names(alphabet):
+def list_color_scheme_names(alphabet, strict=False):
     """
     Get a list of available color scheme names for a given alphabet.
     Parameters
     ----------
     alphabet : Alphabet
-        The alphbet to get the color scheme names for.
-        The alphabet of the scheme must equal or extend this parameter,
-        to be included in the list.
+        The alphabet to get the color scheme names for.
+    strict : bool, optional
+        If set to true, only schemes with an exact match to the given
+        alphabet are included in the list.
+        If set to false, schemes with an alphabet that extends the given
+        alphabet are also included.
     Returns
     -------
@@ -123,7 +128,9 @@ def list_color_scheme_names(alphabet):
     """
     scheme_list = []
     for scheme in _color_schemes:
-        if scheme["alphabet"].extends(alphabet):
+        if strict and scheme["alphabet"] == alphabet:
+            scheme_list.append(scheme["name"])
+        if not strict and scheme["alphabet"].extends(alphabet):
             scheme_list.append(scheme["name"])
     return scheme_list
@@ -135,3 +142,29 @@ _color_schemes = []
 for file_name in glob.glob(_scheme_dir + os.sep + "*.json"):
     scheme = load_color_scheme(file_name)
     _color_schemes.append(scheme)
+def _fit_color_scheme(alphabet, color_scheme, default_color):
+    """
+    Fit a color scheme to the given alphabet.
+    Parameters
+    ----------
+    alphabet : Alphabet
+        The alphabet to get the color scheme for.
+    color_scheme : dict
+        The color scheme.
+    default_color : str or tuple
+        The default color.
+    Returns
+    -------
+    scheme : list of str
+        The colors from the scheme.
+    """
+    colors = color_scheme["colors"]
+    # Replace None values with default color
+    colors = [color if color is not None else default_color for color in colors]
+    # Only return colors that are in scope of this alphabet
+    # and not the extended alphabet
+    return colors[: len(alphabet)]

biotite/sequence/graphics/dendrogram.py CHANGED Viewed

@@ -25,8 +25,10 @@ def plot_dendrogram(
     Parameters
     ----------
+    axes : Axes
+        A *Matplotlib* axes, that is used as plotting area.
     tree : Tree
-        The tree to be visualized
+        The tree to be visualized.
     orientation : {'left', 'right', 'bottom', 'top'}, optional
         The position of the root node in the plot
     use_distances : bool, optional
@@ -38,7 +40,7 @@ def plot_dendrogram(
         The label of a leaf node is the entry at the position of its
         `index` attribute.
     label_size : float, optional
-        The font size of the labels
+        The font size of the labels.
     color : tuple or str, optional
         A *Matplotlib* compatible color, that is used to draw the lines
         of the dendrogram.

biotite/sequence/graphics/features.py CHANGED Viewed

@@ -71,7 +71,7 @@ def plot_feature_map(
         If true, the sequence position the base/residue of a line is
         shown on the right side of the plot.
     number_size : float, optional
-        The font size of the position numbers
+        The font size of the position numbers.
     line_width : float, optional
         The size of the continuous line as fraction of the height of
         the drawn features.
@@ -416,7 +416,7 @@ class PromoterPlotter(FeaturePlotter):
     line_width : float, optional
         The width of the curved arrow tail.
     head_width : float, optional
-        The width of the arrow head
+        The width of the arrow head.
     head_length : float, optional
         The length of the arrow.
     head_height : float, optional

biotite/sequence/graphics/logo.py CHANGED Viewed

@@ -9,7 +9,7 @@ __all__ = ["plot_sequence_logo"]
 import numpy as np
 from biotite.sequence.alphabet import LetterAlphabet
 from biotite.sequence.graphics.colorschemes import get_color_scheme
-from biotite.visualize import set_font_size_in_coord
+from biotite.visualize import plot_scaled_text
 def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
@@ -29,7 +29,7 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
     ----------
     axes : Axes
         The axes to draw the logo one.
-    profile: SequenceProfile
+    profile : SequenceProfile
         The logo is created based on this profile.
     scheme : str or list of (tuple or str)
         Either a valid color scheme name
@@ -38,7 +38,8 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
         The list length must be at least as long as the
         length of the alphabet used by the `profile`.
     **kwargs
-        Additional `text parameters <https://matplotlib.org/api/text_api.html#matplotlib.text.Text>`_.
+        Additional parameters for the :class:`matplotlib.font_manager.FontProperties`
+        of the text or the created :class:`matplotlib.patches.PathPatch`.
     References
     ----------
@@ -69,23 +70,20 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
         index_order = np.argsort(symbols_heights)
         start_height = 0
         for j in index_order[i]:
-            # Stack the symbols at position on top of the preceeding one
+            # Stack the symbols at position on top of the preceding one
             height = symbols_heights[i, j]
             if height > 0:
                 symbol = alphabet.decode(j)
-                text = axes.text(
+                plot_scaled_text(
+                    axes,
+                    symbol,
                     i + 0.5,
                     start_height,
-                    symbol,
-                    ha="left",
-                    va="bottom",
+                    width=1,
+                    height=height,
                     color=colors[j],
-                    # Best results are obtained with this font size
-                    size=1,
                     **kwargs,
                 )
-                text.set_clip_on(True)
-                set_font_size_in_coord(text, width=1, height=height)
                 start_height += height
     axes.set_xlim(0.5, len(profile.symbols) + 0.5)

biotite/sequence/io/fasta/convert.py CHANGED Viewed

@@ -275,8 +275,7 @@ def _process_nucleotide_sequence(x):
 def _convert_to_string(sequence, as_rna):
     if not isinstance(sequence.get_alphabet(), LetterAlphabet):
         raise ValueError(
-            "Only sequences using single letter alphabets "
-            "can be stored in a FASTA file"
+            "Only sequences using single letter alphabets can be stored in a FASTA file"
         )
     if isinstance(sequence, NucleotideSequence) and as_rna:
         return str(sequence).replace("T", "U")

biotite/sequence/io/fasta/file.py CHANGED Viewed

@@ -102,7 +102,7 @@ class FastaFile(TextFile, MutableMapping):
         if not isinstance(header, str):
             raise IndexError("'FastaFile' only supports header strings as keys")
         if not isinstance(seq_str, str):
-            raise TypeError("'FastaFile' only supports sequence strings " "as values")
+            raise TypeError("'FastaFile' only supports sequence strings as values")
         # Create lines for new header and sequence (with line breaks)
         new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
             seq_str, width=self._chars_per_line

biotite/sequence/io/fastq/file.py CHANGED Viewed

@@ -302,10 +302,10 @@ class FastqFile(TextFile, MutableMapping):
                 else:  # score_len > seq_len
                     raise InvalidFileError(
                         f"The amount of scores is not equal to the sequence "
-                        f"length for the sequence in line {seq_start_i+1} "
+                        f"length for the sequence in line {seq_start_i + 1} "
                     )
             else:
-                raise InvalidFileError(f"Line {i+1} in FASTQ file is invalid")
+                raise InvalidFileError(f"Line {i + 1} in FASTQ file is invalid")
         # At the end of the file, the last sequence or score block
         # must have properly ended
         if in_sequence or in_scores:
@@ -392,7 +392,7 @@ class FastqFile(TextFile, MutableMapping):
                     yield identifier, ("".join(seq_str_list), scores)
                 else:  # score_len > seq_len
                     raise InvalidFileError(
-                        "The amount of scores is not equal to the sequence " "length"
+                        "The amount of scores is not equal to the sequence length"
                     )
             else:

biotite/sequence/io/genbank/file.py CHANGED Viewed

@@ -80,7 +80,7 @@ class GenBankFile(TextFile):
     >>> print(content)
     ['One line', 'A second line']
     >>> print(subfields)
-    OrderedDict([('SUBFIELD1', ['Single Line']), ('SUBFIELD2', ['Two', 'lines'])])
+    OrderedDict({'SUBFIELD1': ['Single Line'], 'SUBFIELD2': ['Two', 'lines']})
     Adding an additional field:
@@ -391,7 +391,7 @@ class GenBankFile(TextFile):
             The field name.
         content : list of str
             The content lines.
-        subfield_dict : dict of str -> str, optional
+        subfields : dict of str -> str, optional
             The subfields of the field.
             The dictionary maps subfield names to the content lines of
             the respective subfield.
@@ -432,7 +432,7 @@ class GenBankFile(TextFile):
             The field name.
         content : list of str
             The content lines.
-        subfield_dict : dict of str -> str, optional
+        subfields : dict of str -> str, optional
             The subfields of the field.
             The dictionary maps subfield names to the content lines of
             the respective subfield.

biotite/sequence/io/genbank/sequence.py CHANGED Viewed

@@ -82,6 +82,8 @@ def get_annotated_sequence(gb_file, format="gb", include_only=None):
     ----------
     gb_file : GenBankFile
         The GenBank file to read the fields from.
+    format : {'gb', 'gp'}
+        Whether the file is a *GenBank* or *GenPept* file.
     include_only : iterable object of str, optional
         List of names of feature keys, which should included
         in the annotation. By default all features are included.

biotite/sequence/io/gff/convert.py CHANGED Viewed

@@ -84,7 +84,7 @@ def set_annotation(gff_file, annotation, seqid=None, source=None, is_stranded=Tr
     for feature in sorted(annotation):
         if len(feature.locs) > 1 and "ID" not in feature.qual:
             raise ValueError(
-                "The 'Id' qualifier is required " "for features with multiple locations"
+                "The 'Id' qualifier is required for features with multiple locations"
             )
         ## seqid ##
         if seqid is not None and " " in seqid:

biotite/sequence/io/gff/file.py CHANGED Viewed

@@ -303,8 +303,7 @@ class GFFFile(TextFile):
     def __getitem__(self, index):
         if (index >= 0 and index >= len(self)) or (index < 0 and -index > len(self)):
             raise IndexError(
-                f"Index {index} is out of range for GFFFile with "
-                f"{len(self)} entries"
+                f"Index {index} is out of range for GFFFile with {len(self)} entries"
             )
         line_index = self._entries[index]

biotite/sequence/phylo/nj.cp310-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/tree.cp310-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/upgma.cp310-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/profile.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # information.
 import warnings
+from numbers import Integral
 import numpy as np
 from biotite.sequence.align.alignment import get_codes
 from biotite.sequence.alphabet import LetterAlphabet
@@ -66,6 +67,9 @@ class SequenceProfile(object):
     It also saves the number of gaps at each position in the array
     'gaps'.
+    With :meth:`from_alignment()` a :class:`SequenceProfile` object can
+    be created from an indefinite number of aligned sequences.
     With :meth:`probability_matrix()` the position probability matrix
     can be created based on 'symbols' and a pseudocount.
@@ -73,9 +77,6 @@ class SequenceProfile(object):
     be created based on the before calculated position probability
     matrix and the background frequencies.
-    With :meth:`from_alignment()` a :class:`SequenceProfile` object can
-    be created from an indefinite number of aligned sequences.
     With :meth:`sequence_probability_from_matrix()` the probability of a
     sequence can be calculated based on the before calculated position
     probability matrix of this instance of object SequenceProfile.
@@ -94,7 +95,7 @@ class SequenceProfile(object):
     gaps : ndarray, dtype=int, shape=n
         Array which indicates the number of gaps at each position.
     alphabet : Alphabet, length=k
-        Alphabet of sequences of sequence profile
+        Alphabet of sequences of sequence profile.
     Attributes
     ----------
@@ -105,6 +106,63 @@ class SequenceProfile(object):
         Array which indicates the number of gaps at each position.
     alphabet : Alphabet, length=k
         Alphabet of sequences of sequence profile
+    Examples
+    --------
+    Create a profile from a multiple sequence alignment:
+    >>> sequences = [
+    ...     NucleotideSequence("CGCTCATTC"),
+    ...     NucleotideSequence("CGCTATTC"),
+    ...     NucleotideSequence("CCCTCAATC"),
+    ... ]
+    >>> msa, _, _, _ = align_multiple(
+    ...     sequences, SubstitutionMatrix.std_nucleotide_matrix(), gap_penalty=-5
+    ... )
+    >>> print(msa)
+    CGCTCATTC
+    CGCT-ATTC
+    CCCTCAATC
+    >>> profile = SequenceProfile.from_alignment(msa)
+    >>> print(profile)
+      A C G T
+    0 0 3 0 0
+    1 0 1 2 0
+    2 0 3 0 0
+    3 0 0 0 3
+    4 0 2 0 0
+    5 3 0 0 0
+    6 1 0 0 2
+    7 0 0 0 3
+    8 0 3 0 0
+    >>> print(profile.gaps)
+    [0 0 0 0 1 0 0 0 0]
+    Slice the profile (masks and index arrays are also supported):
+    >>> print(profile[2:])
+      A C G T
+    0 0 3 0 0
+    1 0 0 0 3
+    2 0 2 0 0
+    3 3 0 0 0
+    4 1 0 0 2
+    5 0 0 0 3
+    6 0 3 0 0
+    Use the profile to compute the position probability matrix:
+    >>> print(profile.probability_matrix())
+    [[0.000 1.000 0.000 0.000]
+     [0.000 0.333 0.667 0.000]
+     [0.000 1.000 0.000 0.000]
+     [0.000 0.000 0.000 1.000]
+     [0.000 1.000 0.000 0.000]
+     [1.000 0.000 0.000 0.000]
+     [0.333 0.000 0.000 0.667]
+     [0.000 0.000 0.000 1.000]
+     [0.000 1.000 0.000 0.000]]
     """
     def __init__(self, symbols, gaps, alphabet):
@@ -156,8 +214,23 @@ class SequenceProfile(object):
             )
         self._gaps = new_gaps
+    def __str__(self):
+        # Add an additional row and column for the position and symbol indicators
+        print_matrix = np.full(
+            (self.symbols.shape[0] + 1, self.symbols.shape[1] + 1), "", dtype=object
+        )
+        print_matrix[1:, 1:] = self.symbols.astype(str)
+        print_matrix[0, 1:] = [str(sym) for sym in self.alphabet]
+        print_matrix[1:, 0] = [str(i) for i in range(self.symbols.shape[0])]
+        max_len = len(max(print_matrix.flatten(), key=len))
+        return "\n".join(
+            [
+                " ".join([str(cell).rjust(max_len) for cell in row])
+                for row in print_matrix
+            ]
+        )
     def __repr__(self):
-        """Represent SequenceProfile as a string for debugging."""
         return (
             f"SequenceProfile(np.{np.array_repr(self.symbols)}, "
             f"np.{np.array_repr(self.gaps)}, Alphabet({self.alphabet}))"
@@ -191,15 +264,14 @@ class SequenceProfile(object):
         alphabet : bool
             This alphabet will be used when creating the SequenceProfile
             object. If no alphabet is selected, the alphabet for this
-            SequenceProfile
+            :class:`SequenceProfile`.
             object will be calculated from the sequences of object
             Alignment.
-            (Default: None).
         Returns
         -------
         profile: SequenceProfile
-            The created SequenceProfile object
+            The created :class:`SequenceProfile` object.
         """
         sequences = get_codes(alignment)
         if alphabet is None:
@@ -233,13 +305,12 @@ class SequenceProfile(object):
             If true, returns consensus sequence as GeneralSequence
             object.
             Otherwise, the consensus sequence object type is chosen
-            based on the alphabet of this SequenceProfile object
-            (Default: False).
+            based on the alphabet of this SequenceProfile object.
         Returns
         -------
         consensus: Sequence
-            The calculated consensus sequence
+            The calculated consensus sequence.
         """
         # https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes
         if as_general:
@@ -347,14 +418,13 @@ class SequenceProfile(object):
         Parameters
         ----------
-        pseudocount: int, optional
+        pseudocount : int, optional
             Amount added to the number of observed cases in order to
             change the expected probability of the PPM.
-            (Default: 0)
         Returns
         -------
-        ppm: ndarray, dtype=float, shape=(n,k)
+        ppm : ndarray, dtype=float, shape=(n,k)
             The calculated the position probability matrix.
         """
         if pseudocount < 0:
@@ -383,17 +453,16 @@ class SequenceProfile(object):
         Parameters
         ----------
-        pseudocount: int, optional
-            Amount added to the number of observed cases in order to change
-            the expected probability of the PPM.
-            (Default: 0)
-        background_frequencies: ndarray, shape=(k,), dtype=float, optional
+        background_frequencies : ndarray, shape=(k,), dtype=float, optional
             The background frequencies for each symbol in the alphabet.
             By default, a uniform distribution is assumed.
+        pseudocount : int, optional
+            Amount added to the number of observed cases in order to change
+            the expected probability of the PPM.
         Returns
         -------
-        pwm: ndarray, dtype=float, shape=(n,k)
+        pwm : ndarray, dtype=float, shape=(n,k)
             The calculated the position weight matrix.
         """
         if background_frequencies is None:
@@ -417,14 +486,13 @@ class SequenceProfile(object):
         ----------
         sequence : Sequence
            The input sequence.
-        pseudocount: int, optional
+        pseudocount : int, optional
             Amount added to the number of observed cases in order to change
             the expected probability of the PPM.
-            (Default: 0)
         Returns
         -------
-        probability: float
+        probability : float
            The calculated probability for the input sequence based on
            the PPM.
         """
@@ -453,17 +521,16 @@ class SequenceProfile(object):
         ----------
         sequence : Sequence
            The input sequence.
-        pseudocount: int, optional
-            Amount added to the number of observed cases in order to change
-            the expected probability of the PPM.
-            (Default: 0)
-        background_frequencies: ndarray, shape=(k,), dtype=float, optional
+        background_frequencies : ndarray, shape=(k,), dtype=float, optional
             The background frequencies for each symbol in the alphabet.
             By default a uniform distribution is assumed.
+        pseudocount : int, optional
+            Amount added to the number of observed cases in order to change
+            the expected probability of the PPM.
         Returns
         -------
-        score: float
+        score : float
            The calculated score for the input sequence based on
            the PWM.
         """
@@ -483,3 +550,12 @@ class SequenceProfile(object):
                 f"as 'symbols' {self.symbols.shape}"
             )
         return np.sum(pwm[np.arange(len(sequence)), sequence.code])
+    def __getitem__(self, index):
+        if isinstance(index, Integral):
+            # Do not allow to collapse dimensions
+            index = slice(index, index + 1)
+        return SequenceProfile(self.symbols[index], self.gaps[index], self.alphabet)
+    def __len__(self):
+        return len(self.symbols)

biotite/sequence/search.py CHANGED Viewed

@@ -39,7 +39,6 @@ def find_subsequence(sequence, query):
     >>> sub_seq = NucleotideSequence("TGA")
     >>> print(find_subsequence(main_seq, sub_seq))
     [2 6]
     """
     if not sequence.get_alphabet().extends(query.get_alphabet()):
         raise ValueError("The sequences alphabets are not equal")