PyPI - biotite - Versions diffs - 0.41.1__cp311-cp311-win_amd64.whl → 0.41.2__cp311-cp311-win_amd64.whl - Mend

biotite 0.41.1__cp311-cp311-win_amd64.whl → 0.41.2__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (42) hide show

biotite/application/__init__.py +35 -9
biotite/application/application.py +2 -1
biotite/sequence/__init__.py +13 -2
biotite/sequence/align/__init__.py +158 -4
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +34 -34
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +63 -63
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/sequence/sequence.py +52 -50
biotite/structure/atoms.py +8 -8
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +59 -68
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/info/ccd.py +17 -2
biotite/structure/info/groups.py +9 -12
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/bcif.py +0 -8
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/version.py +2 -2
{biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/METADATA +2 -2
{biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/RECORD +42 -42
{biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/WHEEL +1 -1
{biotite-0.41.1.dist-info → biotite-0.41.2.dist-info}/licenses/LICENSE.rst +0 -0

biotite/application/__init__.py CHANGED Viewed

@@ -19,17 +19,43 @@ These programs are not shipped with the *Biotite* package.
 Each application is represented by its respective :class:`Application`
 class.
-:class:`Application` objects are created, started and after the run has
-finished, the results are collected.
-The current state of the the execution is indicated by an
-:class:`AppState` object, which restricts which method calls are
-allowed:
-For example, the parameters can only be set, when the
-:class:`Application` has not been started yet and the results can only
-be collected after :class:`Application` has finished.
+Each :class:`Application` instance has a life cycle, starting with its
+creation and ending with the result extraction.
+Each state in this life cycle is described by the value of the
+*enum* :class:`AppState`, that each :class:`Application` contains:
+Directly after its instantiation the app is in the ``CREATED`` state.
+In this state further parameters can be set for the application run.
+After the user calls the :func:`Application.start()` method, the app
+state is set to ``RUNNING`` and the app performs the calculations.
+When the application finishes the AppState
+changes to ``FINISHED``.
+The user can now call the :func:`Application.join()` method, concluding
+the application in the ``JOINED`` state and making the results of the
+application accessible.
+Furthermore, this may trigger cleanup actions in some applications.
+:func:`Application.join()` can even be called in the ``RUNNING`` state:
+This will constantly check if the application has finished and will
+directly go into the ``JOINED`` state as soon as the application reaches
+the ``FINISHED`` state.
+Calling the :func:`Application.cancel()` method while the application is
+``RUNNING`` or ``FINISHED`` leaves the application in the ``CANCELLED``
+state.
+This triggers cleanup, too, but there are no accessible results.
+If a method is called in an unsuitable app state, an
+:class:`AppStateError` is called.
+At each state in the life cycle, :class:`Application` type specific
+methods are called, as shown in the following diagram.
+.. figure:: /static/assets/figures/app_lifecycle.png
+    :alt: Application life cycle
+    :scale: 50%
+    Taken from
+    `Kunzmann & Hamacher 2018 <https://doi.org/10.1186/s12859-018-2367-z>`_
+    licensed under `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_.
 The execution of an :class:`Application` can run in parallel:
-In the time between starting the run and collecting the results can be
+The time between starting the run and collecting the results can be
 used to run other code, similar to the *Python* :class:`Thread` or
 :class:`Process` classes.
 """

biotite/application/application.py CHANGED Viewed

@@ -74,7 +74,8 @@ class Application(metaclass=abc.ABCMeta):
     Every :class:`Application` runs through a different app states
     (instances of enum :class:`AppState`) from its creation until its
-    termination:
+    termination.
     Directly after its instantiation the app is in the *CREATED* state.
     In this state further parameters can be set for the application run.
     After the user calls the :func:`start()` method, the app state is

biotite/sequence/__init__.py CHANGED Viewed

@@ -24,7 +24,15 @@ For example, ``'A'``, ``'C'``, ``'G'`` and ``'T'`` would be encoded into
 These integer values are called *symbol code*, the encoding of an entire
 sequence of symbols is called *sequence code*.
-The size of the symbol code type in the array is determined by the
+.. figure:: /static/assets/figures/symbol_encoding.png
+    :alt: Symbol encoding in Biotite
+    :scale: 50%
+    Taken from
+    `Kunzmann & Hamacher 2018 <https://doi.org/10.1186/s12859-018-2367-z>`_
+    licensed under `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_.
+The size of the symbol code type in the array is determined by the
 size of the :class:`Alphabet`:
 If the :class:`Alphabet` contains 256 symbols or less, one byte is used
 per array element, between 257 and 65536 symbols, two bytes are used,
@@ -41,6 +49,7 @@ This approach has multiple advantages:
       indifferent to the actual type of sequence.
     - Symbol codes are directly indices for substitution matrices in
       alignments
+    - *k-mers* can be computed fast
 The abstract :class:`Sequence` superclass cannot be instantiated
 directly, as it does not define an :class:`Alphabet` by itself.
@@ -55,10 +64,12 @@ The class :class:`GeneralSequence` allows the usage of a custom
 Additionally, this subpackage provides support for sequence features,
 as used in e.g. GenBank or GFF files.
 A :class:`Feature` stores its key name, its qualifiers and locations.
-An :class:`Annotation` is a group of multiple :class:`Feataure` objects
+An :class:`Annotation` is a group of multiple :class:`Feature` objects
 and offers convenient location based indexing.
 An :class:`AnnotatedSequence` combines an :class:`Annotation` and a
 :class:`Sequence`.
+Sequence profiles can be created with the :class:`SequenceProfile` class.
 """
 __name__ = "biotite.sequence"

biotite/sequence/align/__init__.py CHANGED Viewed

@@ -22,11 +22,165 @@ These objects contain the original sequences and a trace, that describe
 which positions (indices) in the sequences are aligned.
 Optionally they also contain the similarity score.
-The aligning functions are usually C-accelerated, reducing the
-computation time substantially.
+The aligning functions :func:`align_optimal()` and
+:func:`align_multiple()` cover most use cases for pairwise and multiple
+sequence alignments respectively.
+However, *Biotite* provides also a modular system to build performant
+heuristic alignment search methods, e.g. for finding homologies in a sequence
+database or map reads to a genome.
+The table below summarizes those provided functionalities.
+The typical stages in alignment search, where those functionalities are used,
+are arranged from top to bottom.
+.. grid::
+    :gutter: 0
+    :class-container: sd-text-center
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 3
+        **Entire k-mer set**
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 9
+        .. grid::
+            :margin: 0
+            .. grid-item::
+                :padding: 2
+                :columns: 12
+                **k-mer subset selection**
+            .. grid-item::
+                :padding: 2
+                :columns: 4
+                Minimizers
+                :class:`MinimizerSelector`
+            .. grid-item::
+                :padding: 2
+                :columns: 4
+                Syncmers
+                :class:`SyncmerSelector`
+                :class:`CachedSyncmerSelector`
+            .. grid-item::
+                :padding: 2
+                :columns: 4
+                Mincode
+                :class:`MincodeSelector`
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 12
+        .. grid::
+            :margin: 0
+            .. grid-item::
+                :padding: 2
+                :columns: 12
+                **k-mer indexing and matching**
+            .. grid-item::
+                :padding: 2
+                :columns: 6
+                Perfect hashing
+                :class:`KmerTable`
+            .. grid-item::
+                :padding: 2
+                :columns: 6
+                Space-efficient hashing
+                :class:`BucketKmerTable`
+                :func:`bucket_number()`
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 12
+        .. grid::
+            :margin: 0
+            .. grid-item::
+                :padding: 2
+                :columns: 12
+                **Ungapped seed extension**
+                :class:`align_local_ungapped()`
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 12
+        .. grid::
+            :margin: 0
+            .. grid-item::
+                :padding: 2
+                :columns: 12
+                **Gapped alignment**
+            .. grid-item::
+                :padding: 2
+                :columns: 6
+                Banded local/semiglobal alignment
+                :class:`align_banded()`
+            .. grid-item::
+                :padding: 2
+                :columns: 6
+                Local alignment (*X-drop*)
+                :class:`align_local_gapped()`
+    .. grid-item::
+        :padding: 2
+        :outline:
+        :columns: 12
+        .. grid::
+            :margin: 0
+            .. grid-item::
+                :padding: 2
+                :columns: 12
+                **Significance evaluation**
+                :class:`EValueEstimator`
-This subpackage also contains functionality for finding *k-mer* matches
-between two sequences, allowing fast heuristic pairwise alignments.
 """
 __name__ = "biotite.sequence.align"

biotite/sequence/align/banded.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/localgapped.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/localungapped.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/multiple.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/multiple.pyx CHANGED Viewed

@@ -39,9 +39,9 @@ cdef float32 MAX_FLOAT = np.finfo(np.float32).max
 class GapSymbol:
     _instance = None
     def __init__(self):
         if GapSymbol._instance is not None:
             raise ValueError(
@@ -49,16 +49,16 @@ class GapSymbol:
             )
         else:
             GapSymbol._instance = self
     @staticmethod
     def instance():
         if GapSymbol._instance is None:
             GapSymbol._instance = GapSymbol()
         return GapSymbol._instance
     def __str__(self):
         return "-"
     def __hash__(self):
         return 0
@@ -69,13 +69,13 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
     align_multiple(sequences, matrix, gap_penalty=-10,
                    terminal_penalty=True, distances=None,
                    guide_tree=None)
     Perform a multiple sequence alignment using a progressive
     alignment algorithm. :footcite:`Feng1987`
     Based on pairwise sequence distances a guide tree is constructed.
     The sequences are progessively aligned according to the tree,
-    following the rule 'Once a gap, always a gap'.
+    following the rule 'Once a gap, always a gap'.
     Parameters
     ----------
@@ -124,7 +124,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
     distance_matrix : ndarray, shape=(n,n), dtype=float32
         The pairwise distance matrix used to construct the guide tree.
         Equal to `distances` if provided.
     Notes
     -----
     The similarity to distance conversion is performed according to the
@@ -137,14 +137,14 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
               \right)
     .. math:: S_{a,b}^{max} = \frac{ S_{a,a} + S_{b,b} }{ 2 }
     .. math:: S_{a,b}^{rand} = \frac{1}{L_{a,b}}
               \left(
                  \sum_{x \in \Omega} \sum_{y \in \Omega}
                  s_{x,y} \cdot N_a(x) \cdot N_b(y)
               \right)
               + N_{a,b}^{open} \cdot p^{open} + N_{a,b}^{ext} \cdot p^{ext}
     :math:`D_{a,b}` - The distance between the sequences *a* and *b*.
     :math:`S_{a,b}` - The similarity score between the sequences *a* and *b*.
@@ -164,17 +164,17 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
     In rare cases of extremely unrelated sequences, :math:`S_{a,b}`
     can be lower than :math:`S_{a,b}^{rand}`.
-    In this case the logaritmus cannot be calculated and a
+    In this case the logarithm cannot be calculated and a
     :class:`ValueError` is raised.
     References
     ----------
     .. footbibliography::
     Examples
     --------
     >>> seq1 = ProteinSequence("BIQTITE")
     >>> seq2 = ProteinSequence("TITANITE")
     >>> seq3 = ProteinSequence("BISMITE")
@@ -232,7 +232,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
     else:
         # Assure that every node in the guide tree is binary
         guide_tree = as_binary(guide_tree)
     # Create new matrix with neutral gap symbol
     gap_symbol = GapSymbol.instance()
     new_alphabet = Alphabet(
@@ -275,7 +275,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
     ]
     for i in range(len(aligned_seqs)):
         aligned_seqs[i].code = aligned_seq_codes[i]
     # Reorder alignmets into original alignemnt
     new_order = np.argsort(order)
     aligned_seqs = [aligned_seqs[pos] for pos in new_order]
@@ -290,7 +290,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
     Create all pairwise alignments for the given sequences and use the
     method proposed by Feng & Doolittle to calculate the pairwise
     distance matrix
     Parameters
     ----------
     _T : ndarray, dtype=VARAIBLE
@@ -306,7 +306,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
     terminal_penalty : bool
         Whether to or not count terminal gap penalties for the
         alignments.
     Returns
     -------
     distances : ndarray, shape=(n,n), dtype=float32
@@ -332,7 +332,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
             )[0]
             scores[i,j] = alignment.score
             alignments[i,j] = alignment
     ### Distance calculation from similarity scores ###
     # Calculate the occurences of each symbol code in each sequence
     # This is used later for the random score
@@ -364,7 +364,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
     cdef CodeType[:] seq_code1, seq_code2
     cdef CodeType code1, code2
     cdef float32 score_rand, score_max
     # Calculate distance
     # i and j are indicating the alignment between the sequences i and j
     for i in range(scores_v.shape[0]):
@@ -405,14 +405,14 @@ def _count_gaps(int64[:,:] trace_v, bint terminal_penalty):
     """
     Count the number of gap openings and gap extensions in an alignment
     trace.
     Parameters
     ----------
     trace_v : ndarary, shape=(n,2), dtype=int
         The alignemnt trace.
     terminal_penalty : bool
         Whether to or not count terminal gap penalties.
     Returns
     -------
     gap_open_count, gap_ext_count: int
@@ -440,7 +440,7 @@ def _count_gaps(int64[:,:] trace_v, bint terminal_penalty):
         if start_index == -1 or stop_index == -1:
             return 0, 0
         trace_v = trace_v[start_index : stop_index]
     if trace_v[0,0] == -1:
         gap_open_count += 1
     if trace_v[0,1] == -1:
@@ -471,7 +471,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
     The gaps inserted in this pairwise alignment are also inserted
     into all other sequences in the respective sub-MSA at the same
     position.
     Parameters
     ----------
     _T : ndarray, dtype=VARAIBLE
@@ -490,13 +490,13 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
     matrix : SubstitutionMatrix
         The substitution matrix used for the alignments.
     gap_symbol_code : int
-        The symbol code for the gap symbol.
+        The symbol code for the gap symbol.
     gap_penalty : int or tuple(int, int)
         A linear or affine gap penalty for the alignments.
     terminal_penalty : bool
         Whether to or not count terminal gap penalties for the
         alignments.
     Returns
     -------
     order : ndarray, shape=(m,), dtype=int
@@ -515,7 +515,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
     cdef int32[:] indices1_v, indices2_v
     cdef np.ndarray incides1, incides2
     cdef list aligned_seqs1, aligned_seqs2
     if tree_node.is_leaf():
         # Child node -> Cannot do an alignment
         # -> Just return the sequence corresponding to the leaf node
@@ -523,7 +523,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
         # when neutral gap character is inserted
         return np.array([tree_node.index], dtype=np.int32), \
                [sequences[tree_node.index].copy()]
     else:
         # Multiple alignment of sequences corresponding to both child nodes
         child1, child2 = tree_node.children
@@ -537,7 +537,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
             gap_symbol_code, gap_penalty, terminal_penalty
         )
         indices2_v = incides2
         # Find sequence pair with lowest distance
         dist_min = MAX_FLOAT
         for i in range(indices1_v.shape[0]):
@@ -554,7 +554,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
             gap_penalty, terminal_penalty, max_number=1
         )[0]
         # Place neutral gap symbol for position of new gaps
-        # in both sequence groups
+        # in both sequence groups
         for i in range(len(aligned_seqs1)):
             seq = aligned_seqs1[i]
             seq.code = _replace_gaps(
@@ -580,7 +580,7 @@ def _replace_gaps(CodeType[:] _T,
     The replacement is required by the progressive alignment algorithm
     to be able to align gapped sequences with each other.
     Parameters
     ----------
     _T : ndarray, dtype=VARAIBLE
@@ -592,8 +592,8 @@ def _replace_gaps(CodeType[:] _T,
     seq_code : ndarary, shape=(n,)
         The sequence code representing the given sequence.
     gap_symbol_code : int
-        The symbol code for the gap symbol.
+        The symbol code for the gap symbol.
     Returns
     -------
     new_seq_code : ndarary, shape=(m,)
@@ -609,12 +609,12 @@ def _replace_gaps(CodeType[:] _T,
         partial_trace_v.shape[0], dtype=seq_code.dtype
     )
     cdef CodeType[:] new_seq_code_v = new_seq_code
     for i in range(partial_trace_v.shape[0]):
         index = partial_trace_v[i]
         if index == -1:
             new_seq_code_v[i] = gap_symbol_code
         else:
             new_seq_code_v[i] = seq_code[index]
     return new_seq_code

biotite/sequence/align/pairwise.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/permutation.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/selector.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/tracetable.cp311-win_amd64.pyd CHANGED Viewed

Binary file