PyPI - biotite - Versions diffs - 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show

biotite/__init__.py +3 -3
biotite/application/dssp/app.py +18 -18
biotite/database/pubchem/download.py +23 -23
biotite/database/pubchem/query.py +7 -7
biotite/database/rcsb/download.py +19 -14
biotite/file.py +17 -9
biotite/sequence/align/banded.c +258 -237
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/cigar.py +60 -15
biotite/sequence/align/kmeralphabet.c +243 -222
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.c +215 -196
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpp +233 -205
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/localgapped.c +258 -237
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.c +235 -214
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/multiple.c +255 -234
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.c +274 -253
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.c +215 -196
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.c +217 -197
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.c +215 -195
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/annotation.py +2 -2
biotite/sequence/codec.c +235 -214
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/io/fasta/convert.py +27 -24
biotite/sequence/phylo/nj.c +215 -196
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.c +227 -202
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.c +215 -196
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/structure/__init__.py +2 -0
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +1437 -1279
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/celllist.c +217 -197
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.c +1052 -1101
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/dotbracket.py +2 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +31 -68
biotite/structure/info/bonds.py +47 -101
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +78 -25
biotite/structure/info/standardize.py +17 -12
biotite/structure/integrity.py +19 -70
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +12 -106
biotite/structure/io/general.py +167 -181
biotite/structure/io/gro/file.py +16 -16
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +219 -198
biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.c +217 -197
biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/decode.c +225 -204
biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/encode.c +215 -196
biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/mol/__init__.py +4 -2
biotite/structure/io/mol/convert.py +71 -7
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/{file.py → mol.py} +69 -82
biotite/structure/io/mol/sdf.py +909 -0
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +89 -34
biotite/structure/io/pdb/hybrid36.c +63 -43
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +12 -6
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +858 -386
biotite/structure/io/pdbx/encoding.c +112813 -0
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/repair.py +253 -0
biotite/structure/sasa.c +215 -196
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/sequence.py +112 -0
biotite/structure/superimpose.py +618 -116
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0

biotite/sequence/align/banded.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/cigar.py CHANGED Viewed

@@ -106,6 +106,14 @@ def read_alignment_from_cigar(cigar, position,
     AAAAGGTTTCCGACCGTAGGTAG
     CCCCGGTTT--GACCGTATGTAG
+    Explicit terminal deletions are also possible.
+    Note that in this case the deleted positions count as aligned bases
+    with respect to the `position` parameter.
+    >>> print(read_alignment_from_cigar("3D9M2D12M4D", 0, ref, seg))
+    TATAAAAGGTTTCCGACCGTAGGTAGCTGA
+    ---CCCCGGTTT--GACCGTATGTAG----
     If bases in the segment sequence are soft-clipped, they do not
     appear in the alignment.
     Furthermore, the start of the reference sequence must be adapted.
@@ -122,7 +130,7 @@ def read_alignment_from_cigar(cigar, position,
     GGTTTCCGACCGTAGGTAG
     GGTTT--GACCGTATGTAG
-    Reading from BAM codes is also possible:
+    Reading from BAM codes is also possible.
     >>> seg = NucleotideSequence("CCCCGGTTTGACCGTATGTAG")
     >>> op_tuples = [
@@ -190,7 +198,8 @@ def read_alignment_from_cigar(cigar, position,
 def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
                              introns=(), distinguish_matches=False,
-                             hard_clip=False, as_string=True):
+                             hard_clip=False, include_terminal_gaps=False,
+                             as_string=True):
     """
     Convert an :class:`Alignment` into a CIGAR string.
@@ -220,6 +229,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     hard_clip : bool, optional
         If true, clipped bases are hard-clipped.
         Otherwise, clipped bases are soft-clipped.
+    include_terminal_gaps : bool, optional
+        If true, terminal gaps in the segment sequence are included in
+        the CIGAR string.
+        These are represented by ``D`` operations at the start and/or
+        end of the string.
+        By default, those terminal gaps are omitted in the CIGAR, which
+        is the way SAM/BAM expects a CIGAR to be.
     as_string : bool, optional
         If true, the CIGAR string is returned.
         Otherwise, a list of tuples is returned, where the first element
@@ -238,6 +254,12 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     --------
     read_alignment_from_cigar
+    Notes
+    -----
+    If `include_terminal_gaps` is set to true, you usually want to set
+    ``position=0`` in :func:`read_alignment_from_cigar` to get the
+    correct alignment.
     Examples
     --------
@@ -256,6 +278,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     9M2N12M
     >>> print(write_alignment_to_cigar(semiglobal_alignment, distinguish_matches=True))
     4X5=2D7=1X4=
+    >>> print(write_alignment_to_cigar(semiglobal_alignment, include_terminal_gaps=True))
+    3D9M2D12M4D
     >>> local_alignment = align_optimal(ref, seg, matrix, local=True)[0]
     >>> print(local_alignment)
     GGTTTCCGACCGTAGGTAG
@@ -274,9 +298,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     CigarOp.DELETION 2
     CigarOp.MATCH 12
     """
-    # Ignore terminal gaps in segment sequence
-    no_gap_pos = np.where(alignment.trace[:, segment_index] != -1)[0]
-    alignment = alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
+    if not include_terminal_gaps:
+        alignment = _remove_terminal_segment_gaps(alignment, segment_index)
     ref_trace = alignment.trace[:, reference_index]
     seg_trace = alignment.trace[:, segment_index]
@@ -321,19 +344,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     op_tuples = _aggregate_consecutive(operations)
     clip_op = CigarOp.HARD_CLIP if hard_clip else CigarOp.SOFT_CLIP
-    # Missing bases at the beginning and end of the segment are
-    # interpreted as clipped
-    # As first element in the segment trace is the first aligned base,
-    # all previous bases are clipped...
-    start_clip_length = seg_trace[0]
+    start_clip_length, end_clip_length = _find_clipped_bases(
+        alignment, segment_index
+    )
     if start_clip_length != 0:
-        start_clip = [(clip_op, seg_trace[0])]
+        start_clip = [(clip_op, start_clip_length)]
     else:
         start_clip = np.zeros((0, 2), dtype=int)
-    # ...and the same applies for the last base
-    end_clip_length = (
-        len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
-    )
     if end_clip_length != 0:
         end_clip = [(clip_op, end_clip_length)]
     else:
@@ -347,6 +364,34 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
         return op_tuples
+def _remove_terminal_segment_gaps(alignment, segment_index):
+    """
+    Remove terminal gaps in the segment sequence.
+    """
+    no_gap_pos = np.where(alignment.trace[:, segment_index] != -1)[0]
+    return alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
+def _find_clipped_bases(alignment, segment_index):
+    """
+    Find the number of clipped bases at the start and end of the segment.
+    """
+    # Finding the clipped part is easier, when the terminal segment gaps
+    # are removed (if not already done)
+    alignment = _remove_terminal_segment_gaps(alignment, segment_index)
+    seg_trace = alignment.trace[:, segment_index]
+    # Missing bases at the beginning and end of the segment are
+    # interpreted as clipped
+    # As first element in the segment trace is the first aligned base,
+    # all previous bases are clipped...
+    start_clip_length = seg_trace[0]
+    # ...and the same applies for the last base
+    end_clip_length = (
+        len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
+    )
+    return start_clip_length, end_clip_length
 def _aggregate_consecutive(operations):
     """
     Aggregate consecutive operations of the same type.