PyPI - biotite - Versions diffs - 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl - Mend

biotite 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (124) hide show

biotite/__init__.py +3 -3
biotite/application/application.py +33 -28
biotite/application/dssp/app.py +18 -18
biotite/application/sra/__init__.py +5 -0
biotite/application/sra/app.py +337 -55
biotite/database/entrez/__init__.py +2 -1
biotite/database/entrez/check.py +14 -3
biotite/database/entrez/download.py +20 -13
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +38 -34
biotite/database/pubchem/query.py +44 -44
biotite/database/rcsb/download.py +19 -14
biotite/database/rcsb/query.py +46 -46
biotite/sequence/align/__init__.py +5 -1
biotite/sequence/align/banded.c +1408 -1025
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +389 -0
biotite/sequence/align/kmeralphabet.c +3220 -2850
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.c +713 -663
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cpp +68398 -0
biotite/sequence/align/localgapped.c +1507 -1074
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.c +1143 -833
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.c +1569 -1092
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.c +1612 -1212
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.c +33259 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.c +685 -646
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/codec.c +1159 -841
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/graphics/alignment.py +212 -2
biotite/sequence/io/genbank/annotation.py +11 -11
biotite/sequence/phylo/nj.c +684 -636
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.c +970 -673
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.c +672 -626
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/structure/__init__.py +1 -1
biotite/structure/atoms.py +1 -1
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +3861 -3749
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/celllist.c +727 -707
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/charges.c +1561 -1560
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +25 -67
biotite/structure/info/bonds.py +46 -100
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1646 -0
biotite/structure/info/ccd/carbohydrates.txt +1133 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +797 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +11 -22
biotite/structure/info/standardize.py +17 -12
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +1 -1
biotite/structure/io/general.py +37 -43
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +528 -365
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.c +725 -676
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.c +1070 -754
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.c +727 -677
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +72 -70
biotite/structure/io/pdb/hybrid36.c +540 -478
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +82 -68
biotite/structure/io/pdbx/__init__.py +13 -6
biotite/structure/io/pdbx/bcif.py +649 -0
biotite/structure/io/pdbx/cif.py +1028 -0
biotite/structure/io/pdbx/component.py +243 -0
biotite/structure/io/pdbx/convert.py +707 -359
biotite/structure/io/pdbx/encoding.c +112813 -0
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/error.py +14 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/residues.py +40 -40
biotite/structure/sasa.c +713 -644
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/superimpose.py +158 -115
biotite/visualize.py +9 -11
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0

biotite/sequence/codec.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/graphics/alignment.py CHANGED Viewed

@@ -5,9 +5,9 @@
 __name__ = "biotite.sequence.graphics"
 __author__ = "Patrick Kunzmann"
 __all__ = ["SymbolPlotter", "LetterPlotter", "LetterSimilarityPlotter",
-           "LetterTypePlotter",
+           "LetterTypePlotter","ArrayPlotter",
            "plot_alignment", "plot_alignment_similarity_based",
-           "plot_alignment_type_based"]
+           "plot_alignment_type_based","plot_alignment_array"]
 import abc
 import numpy as np
@@ -345,6 +345,116 @@ class LetterTypePlotter(LetterPlotter):
         return self._colors[code]
+class ArrayPlotter(LetterPlotter):
+    '''
+    This :class:`SymbolPlotter` quantitatively decorates sequences alignments, with molecular
+    recognition data obtained from e.g. microarrays. Symbols are visualized as characters
+    on a colored background box. The color of a given box represents the recognition
+    signal. The intensity of the color, is proportional to the strenght of the
+    recognition.
+    Parameters
+    ----------
+    axes : Axes
+        A Matplotlib axes, that is used as plotting area.
+    fl_score : numpy.ndarray
+        The ndarray to store recognition values corresponding to the score residues.
+        By default, the normalized score is 1 for maximum recognition
+        and 0 for non-recognition (no color).
+    color_symbols : bool, optional
+        If true, the symbols themselves are colored.
+        If false, the symbols are black, and the boxes behind the
+        symbols are colored.
+    font_size : float, optional
+        Font size of the sequence symbols.
+    font_param : dict, optional
+        Additional parameters that is given to the
+        :class:`matplotlib.Text` instance of each symbol.
+    '''
+    def __init__(self, axes, fl_score, color_symbols=False,
+                 font_size=None, font_param=None):
+        super().__init__(axes, color_symbols, font_size, font_param)
+        self.fl_score = fl_score
+        self._cmap = self._generate_colormap(colors["dimorange"],
+                                             self._color_symbols)
+    def get_color(self, alignment, column_i, seq_i):
+        index1 = alignment.trace[column_i, seq_i]
+        if index1 == -1:
+            spot_signal = 0
+        else:
+            spot_signal = self._get_signal(self.fl_score, column_i, seq_i)
+        return self._cmap(spot_signal)
+    def _get_signal(self, fl_score, column_i, seq_i):
+        if fl_score is None:
+            signal = 0.0
+        else:
+            signal = fl_score[column_i, seq_i]
+        return signal
+    def get_cmap(self):
+        return self._cmap
+    def plot_symbol(self, bbox, alignment, column_i, seq_i):
+        from matplotlib.patches import Rectangle
+        trace = alignment.trace
+        if trace[column_i, seq_i] != -1:
+            key1 = alignment.sequences[1][trace[column_i, 1]]
+            key2 = alignment.sequences[0][trace[column_i, 0]]
+            if key1 == key2:
+                if seq_i == 1:
+                    symbol = "*"
+                else:
+                    symbol = alignment.sequences[seq_i][trace[column_i, seq_i]]
+            else:
+                symbol = alignment.sequences[seq_i][trace[column_i, seq_i]]
+        else:
+            symbol = "-"
+        color = self.get_color(alignment, column_i, seq_i)
+        box = Rectangle(bbox.p0, bbox.width, bbox.height)
+        self.axes.add_patch(box)
+        text = self.axes.text(
+            bbox.x0 + bbox.width/2, bbox.y0 + bbox.height/2,
+            symbol, color="black", ha="center", va="center",
+            size=self._font_size, **self._font_param)
+        text.set_clip_on(True)
+        if self._color_symbols:
+            box.set_color("None")
+            text.set_color(color)
+        else:
+            box.set_color(color)
+    @staticmethod
+    def _generate_colormap(color, to_black):
+        from matplotlib.colors import ListedColormap, to_rgb
+        color = to_rgb(color)
+        if to_black:
+            cmap_val = np.stack(
+                [
+                    np.interp(np.linspace(0, 1, 100), [0, 1], [color[i], 0])
+                    for i in range(len(color))
+                ]
+            ).transpose()
+        else:
+            cmap_val = np.stack(
+                [
+                    np.interp(np.linspace(0, 1, 100), [0, 1], [1, color[i]])
+                    for i in range(len(color))
+                ]
+            ).transpose()
+        return ListedColormap(cmap_val)
 def plot_alignment(axes, alignment, symbol_plotter, symbols_per_line=50,
                    show_numbers=False, number_size=None, number_functions=None,
                    labels=None, label_size=None,
@@ -800,6 +910,106 @@ def plot_alignment_type_based(axes, alignment, symbols_per_line=50,
         spacing=spacing, symbol_spacing=symbol_spacing
     )
+def plot_alignment_array(axes, alignment, fl_score, symbols_per_line=50,
+                         show_numbers=False, number_size=None,
+                         number_functions=None, labels=None, label_size=None,
+                         show_line_position=False, spacing=1, color=None,
+                         cmap=None, symbol_spacing=None,
+                         symbol_size=None, symbol_param=None):
+    '''
+    Plot a pairwise sequence alignment using an :class:`ArrayPlotter`
+    instance.
+    Highlights sequence recognition regions at the positions of the respective
+    score residue per alignment column.
+    Parameters
+    ----------
+    axes : Axes
+        A Matplotlib axes, that is used as plotting area.
+    alignment : Alignment
+        The pairwise sequence alignment to be plotted.
+    fl_score : ndarray
+        The array to map fluorescence values to score residues.
+        By default the normalized score is 1 for maximum recognition
+        and 0 for non-recognition (no color).
+    symbol_plotter : SymbolPlotter
+        Instance of ArrayPlotter. Defines how the symbols are drawn
+        in the alignment.
+    symbols_per_line : int, optional
+        The amount of alignment columns that are displayed per line.
+    show_numbers : bool, optional
+        If true, the sequence position of the symbols in the last
+        alignment column of a line is shown on the right side of the
+        plot.
+        If the last symbol is a gap, the position of the last actual
+        symbol before this gap is taken.
+        If the first symbol did not occur up to this point,
+        no number is shown for this line.
+        By default the first symbol of a sequence has the position 1,
+        but this behavior can be changed using the `number_functions`
+        parameter.
+    number_size : float, optional
+        The font size of the position numbers
+    number_functions : list of [(None or Callable(int -> int)], optional
+        By default the position of the first symbol in a sequence is 1,
+        i.e. the sequence position is the sequence index incremented by
+        1.
+        The behavior can be changed with this parameter:
+        If supplied, the length of the list must match the number of
+        sequences in the alignment.
+        Every entry is a function that maps a sequence index (*int*) to
+        a sequence position (*int*) for the respective sequence.
+        A `None` entry means, that the default numbering is applied
+        for the sequence.
+    labels : list of str, optional
+        The sequence labels.
+        Must be the same size and order as the sequences in the
+        alignment.
+    label_size : float, optional
+        Font size of the labels
+    show_line_position : bool, optional
+        If true the position within a line is plotted below the
+        alignment.
+    spacing : float, optional
+        The spacing between the alignment lines. 1.0 means that the size
+        is equal to the size of a symbol box.
+    color : tuple or str, optional
+        A *Matplotlib* compatible color.
+    cmap : Colormap or str, optional
+        The boxes are
+        colored based on the normalized intensity value on the
+        given *Matplotlib* Colormap.
+    symbol_size : float, optional
+        Font size of the sequence symbols.
+    symbol_param : dict
+        Additional parameters that is given to the
+        :class:`matplotlib.Text` instance of each symbol.
+    symbol_spacing : int, optional
+        A space is placed between each number of elements desired
+        by variable.
+    Notes
+    -----
+    A '*' represents a sequence match on the alignment
+    A '-' represents a sequence gap on the alignment
+    '''
+    symbol_plotter = ArrayPlotter(
+        axes, fl_score = fl_score, font_size = symbol_size, font_param = symbol_param,
+    )
+    plot_alignment(
+        axes=axes, alignment=alignment, symbol_plotter=symbol_plotter,
+        symbols_per_line=symbols_per_line,
+        show_numbers=show_numbers, number_size=number_size,
+        number_functions=number_functions,
+        labels=labels, label_size=label_size,
+        show_line_position=show_line_position,
+        spacing=spacing, symbol_spacing=symbol_spacing
+    )
 def _get_last_valid_index(alignment, column_i, seq_i):
     """

biotite/sequence/io/genbank/annotation.py CHANGED Viewed

@@ -25,7 +25,7 @@ def get_annotation(gb_file, include_only=None):
     """
     Get the sequence annotation from the *FEATURES* field of a
     GenBank file.
     Parameters
     ----------
     gb_file : GenBankFile
@@ -33,7 +33,7 @@ def get_annotation(gb_file, include_only=None):
     include_only : iterable object of str, optional
         List of names of feature keys, which should included
         in the annotation. By default all features are included.
     Returns
     -------
     annotation : Annotation
@@ -45,11 +45,11 @@ def get_annotation(gb_file, include_only=None):
     if len(fields) > 1:
         raise InvalidFileError("File has multiple 'FEATURES' fields")
     lines, _ = fields[0]
     ### Parse all lines to create an index of features,
     # i.e. pairs of the feature key
-    # and the text belonging to the respective feature
+    # and the text belonging to the respective feature
     feature_list = []
     feature_key = None
     feature_value = ""
@@ -65,7 +65,7 @@ def get_annotation(gb_file, include_only=None):
         feature_value += line[_QUAL_START:] + " "
     # Store last feature key and value (loop already exited)
     feature_list.append((feature_key, feature_value))
     ### Process only relevant features and put them into an Annotation
     annotation = Annotation()
@@ -74,7 +74,7 @@ def get_annotation(gb_file, include_only=None):
     for key, val in feature_list:
         if include_only is None or key in include_only:
             qual_dict = {}
             # Split feature definition into parts
             # e.g.
             #
@@ -138,9 +138,9 @@ def get_annotation(gb_file, include_only=None):
                     _set_qual(qual_dict, qual_key, qual_val)
                     qual_key = None
                     qual_val = None
             annotation.add_feature(Feature(key, locs, qual_dict))
     return annotation
@@ -149,11 +149,11 @@ def _parse_locs(loc_str):
     if loc_str.startswith(("join", "order")):
         str_list = loc_str[loc_str.index("(")+1:loc_str.rindex(")")].split(",")
         for s in str_list:
-            locs.extend(_parse_locs(s))
+            locs.extend(_parse_locs(s.strip()))
     elif loc_str.startswith("complement"):
         compl_str = loc_str[loc_str.index("(")+1:loc_str.rindex(")")]
         compl_locs = [
-            Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
+            Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
             for loc in _parse_locs(compl_str)
         ]
         locs.extend(compl_locs)
@@ -219,7 +219,7 @@ def _set_qual(qual_dict, key, val):
 def set_annotation(gb_file, annotation):
     """
     Set the *FEATURES* field of a GenBank file with an annotation.
     Parameters
     ----------
     gb_file : GenBankFile