PyPI - biotite - Versions diffs - 0.40.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl - Mend

biotite 0.40.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show

biotite/__init__.py +1 -1
biotite/database/pubchem/download.py +23 -23
biotite/database/pubchem/query.py +7 -7
biotite/file.py +17 -9
biotite/sequence/align/banded.c +119 -119
biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/cigar.py +60 -15
biotite/sequence/align/kmeralphabet.c +119 -119
biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.c +119 -119
biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cpp +119 -119
biotite/sequence/align/localgapped.c +119 -119
biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.c +119 -119
biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.c +119 -119
biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.c +119 -119
biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.c +119 -119
biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/selector.c +119 -119
biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.c +119 -119
biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
biotite/sequence/annotation.py +2 -2
biotite/sequence/codec.c +119 -119
biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
biotite/sequence/io/fasta/convert.py +27 -24
biotite/sequence/phylo/nj.c +119 -119
biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.c +119 -119
biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.c +119 -119
biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
biotite/structure/__init__.py +2 -0
biotite/structure/bonds.c +1124 -915
biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
biotite/structure/celllist.c +119 -119
biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
biotite/structure/charges.c +119 -119
biotite/structure/charges.cp310-win_amd64.pyd +0 -0
biotite/structure/dotbracket.py +2 -0
biotite/structure/info/atoms.py +6 -1
biotite/structure/info/bonds.py +1 -1
biotite/structure/info/ccd/amino_acids.txt +17 -0
biotite/structure/info/ccd/carbohydrates.txt +2 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +1 -0
biotite/structure/info/misc.py +69 -5
biotite/structure/integrity.py +19 -70
biotite/structure/io/ctab.py +12 -106
biotite/structure/io/general.py +157 -165
biotite/structure/io/gro/file.py +16 -16
biotite/structure/io/mmtf/convertarray.c +119 -119
biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.c +119 -119
biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.c +119 -119
biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.c +119 -119
biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mol/__init__.py +4 -2
biotite/structure/io/mol/convert.py +71 -7
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/{file.py → mol.py} +69 -82
biotite/structure/io/mol/sdf.py +909 -0
biotite/structure/io/pdb/file.py +84 -31
biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/__init__.py +0 -1
biotite/structure/io/pdbx/bcif.py +2 -3
biotite/structure/io/pdbx/cif.py +9 -5
biotite/structure/io/pdbx/component.py +4 -1
biotite/structure/io/pdbx/convert.py +203 -79
biotite/structure/io/pdbx/encoding.c +119 -119
biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
biotite/structure/repair.py +253 -0
biotite/structure/sasa.c +119 -119
biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
biotite/structure/sequence.py +112 -0
biotite/structure/superimpose.py +472 -13
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
biotite/structure/io/pdbx/error.py +0 -14
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0

biotite/structure/io/pdb/file.py CHANGED Viewed

@@ -12,7 +12,7 @@ from ...atoms import AtomArray, AtomArrayStack, repeat
 from ...bonds import BondList, connect_via_residue_names
 from ...box import vectors_from_unitcell, unitcell_from_vectors
 from ....file import TextFile, InvalidFileError
-from ..general import _guess_element as guess_element
+from ...repair import infer_elements
 from ...error import BadStructureError
 from ...filter import (
     filter_first_altloc,
@@ -23,6 +23,9 @@ from ...util import matrix_rotate
 from .hybrid36 import encode_hybrid36, decode_hybrid36, max_hybrid36_number
+_PDB_MAX_ATOMS = 99999
+_PDB_MAX_RESIDUES = 9999
 # slice objects for readability
 # ATOM/HETATM
 _record = slice(0, 6)
@@ -460,15 +463,14 @@ class PDBFile(TextFile):
         # Replace empty strings for elements with guessed types
         # This is used e.g. for PDB files created by Gromacs
-        if "" in array.element:
-            rep_num = 0
-            for idx in range(len(array.element)):
-                if not array.element[idx]:
-                    atom_name = array.atom_name[idx]
-                    array.element[idx] = guess_element(atom_name)
-                    rep_num += 1
+        empty_element_mask = array.element == ""
+        if empty_element_mask.any():
             warnings.warn(
-                "{} elements were guessed from atom_name.".format(rep_num)
+                f"{np.count_nonzero(empty_element_mask)} elements "
+                "were guessed from atom name"
+            )
+            array.element[empty_element_mask] = infer_elements(
+                array.atom_name[empty_element_mask]
             )
         # Fill in coordinates
@@ -574,6 +576,8 @@ class PDBFile(TextFile):
         records are also written for all non-water hetero residues
         and all inter-residue connections.
         """
+        _check_pdb_compatibility(array, hybrid36)
         natoms = array.array_length()
         annot_categories = array.get_annotation_categories()
         record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
@@ -599,25 +603,6 @@ class PDBFile(TextFile):
         else:
             charge = np.char.array(np.full(natoms, "  ", dtype="U2"))
-        # Do checks on atom array (stack)
-        if hybrid36:
-            max_atoms = max_hybrid36_number(5)
-            max_residues = max_hybrid36_number(4)
-        else:
-            max_atoms, max_residues = 99999, 9999
-        if array.array_length() > max_atoms:
-            warnings.warn(f"More then {max_atoms:,} atoms per model")
-        if (array.res_id > max_residues).any():
-            warnings.warn(f"Residue IDs exceed {max_residues:,}")
-        if np.isnan(array.coord).any():
-            raise BadStructureError("Coordinates contain 'NaN' values")
-        if any([len(name) > 1 for name in array.chain_id]):
-            raise BadStructureError("Some chain IDs exceed 1 character")
-        if any([len(name) > 3 for name in array.res_name]):
-            raise BadStructureError("Some residue names exceed 3 characters")
-        if any([len(name) > 4 for name in array.atom_name]):
-            raise BadStructureError("Some atom names exceed 4 characters")
         if hybrid36:
             pdb_atom_id = np.char.array(
                 [encode_hybrid36(i, 5) for i in atom_id]
@@ -630,14 +615,14 @@ class PDBFile(TextFile):
             # but negative IDs are also possible
             pdb_atom_id = np.char.array(np.where(
                 atom_id > 0,
-                ((atom_id - 1) % 99999) + 1,
+                ((atom_id - 1) % _PDB_MAX_ATOMS) + 1,
                 atom_id
             ).astype(str))
             # Residue IDs are supported up to 9999,
             # but negative IDs are also possible
             pdb_res_id = np.char.array(np.where(
                 array.res_id > 0,
-                ((array.res_id - 1) % 9999) + 1,
+                ((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
                 array.res_id
             ).astype(str))
@@ -1184,4 +1169,72 @@ def _apply_transformations(structure, rotations, translations):
         coord += translation
         assembly_coord[i] = coord
-    return repeat(structure, assembly_coord)
+    return repeat(structure, assembly_coord)
+def _check_pdb_compatibility(array, hybrid36):
+    annot_categories = array.get_annotation_categories()
+    if hybrid36:
+        max_atoms = max_hybrid36_number(5)
+        max_residues = max_hybrid36_number(4)
+    else:
+        max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
+    if "atom_id" in annot_categories:
+        max_atom_id = np.max(array.atom_id)
+    else:
+        max_atom_id = array.array_length()
+    if max_atom_id > max_atoms:
+        warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
+    if (array.res_id > max_residues).any():
+        warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
+    if np.isnan(array.coord).any():
+        raise BadStructureError("Coordinates contain 'NaN' values")
+    if any([len(name) > 1 for name in array.chain_id]):
+        raise BadStructureError("Some chain IDs exceed 1 character")
+    if any([len(name) > 3 for name in array.res_name]):
+        raise BadStructureError("Some residue names exceed 3 characters")
+    if any([len(name) > 4 for name in array.atom_name]):
+        raise BadStructureError("Some atom names exceed 4 characters")
+    for i, coord_name in enumerate(["x", "y", "z"]):
+        n_coord_digits = _number_of_integer_digits(array.coord[..., i])
+        if n_coord_digits > 4:
+            raise BadStructureError(
+                f"4 pre-decimal columns for {coord_name}-coordinates are "
+                f"available, but array would require {n_coord_digits}"
+            )
+    if "b_factor" in annot_categories:
+        n_b_factor_digits = _number_of_integer_digits(array.b_factor)
+        if n_b_factor_digits > 3:
+            raise BadStructureError(
+                "3 pre-decimal columns for B-factor are available, "
+                f"but array would require {n_b_factor_digits}"
+            )
+    if "occupancy" in annot_categories:
+        n_occupancy_digits = _number_of_integer_digits(array.occupancy)
+        if n_occupancy_digits > 3:
+            raise BadStructureError(
+                "3 pre-decimal columns for occupancy are available, "
+                f"but array would require {n_occupancy_digits}"
+            )
+    if "charge" in annot_categories:
+        # The sign can be omitted is it is put into the adjacent column
+        n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
+        if n_charge_digits > 1:
+            raise BadStructureError(
+                "1 column for charge is available, "
+                f"but array would require {n_charge_digits}"
+            )
+def _number_of_integer_digits(values):
+    """
+    Get the maximum number of characters needed to represent the
+    pre-decimal positions of the given numeric values.
+    """
+    values = values.astype(int, copy=False)
+    n_digits = 0
+    n_digits = max(n_digits, len(str(np.min(values))))
+    n_digits = max(n_digits, len(str(np.max(values))))
+    return n_digits

biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/io/pdbx/__init__.py CHANGED Viewed

@@ -20,5 +20,4 @@ from .bcif import *
 from .cif import *
 from .component import *
 from .encoding import *
-from .error import *
 from .legacy import *

biotite/structure/io/pdbx/bcif.py CHANGED Viewed

@@ -12,9 +12,8 @@ import numpy as np
 import msgpack
 from .component import _Component, _HierarchicalContainer, MaskValue
 from .encoding import decode_stepwise, encode_stepwise, deserialize_encoding, \
-                      create_uncompressed_encoding, ByteArrayEncoding
-from .error import SerializationError
-from ....file import File, is_binary, is_open_compatible
+                      create_uncompressed_encoding
+from ....file import File, is_binary, is_open_compatible, SerializationError
 class BinaryCIFData(_Component):

biotite/structure/io/pdbx/cif.py CHANGED Viewed

@@ -11,8 +11,8 @@ import shlex
 from collections.abc import MutableMapping, Sequence
 import numpy as np
 from .component import _Component, MaskValue
-from .error import DeserializationError, SerializationError
-from ....file import File, is_open_compatible, is_text
+from ....file import File, is_open_compatible, is_text, DeserializationError, \
+                     SerializationError
 UNICODE_CHAR_SIZE = 4
@@ -402,7 +402,9 @@ class CIFCategory(_Component, MutableMapping):
                     f"while the first column has row_count {self._row_count}"
                 )
-        if self._row_count == 1:
+        if self._row_count == 0:
+            raise ValueError("At least one row is required")
+        elif self._row_count == 1:
             lines = self._serialize_single()
         else:
             lines = self._serialize_looped()
@@ -766,7 +768,7 @@ class CIFFile(_Component, File, MutableMapping):
     @property
     def lines(self):
-        return "\n".join(self.serialize())
+        return self.serialize().splitlines()
     @property
     def block(self):
@@ -978,7 +980,7 @@ def _to_single(lines, is_looped):
                 processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
             in_i = j + 1
-        elif not is_looped and lines[in_i][0] in ["'", '"']:
+        elif not is_looped and lines[in_i][0] != "_":
             # Singleline value in the line after the corresponding key
             processed_lines[out_i - 1] += " " + lines[in_i]
             in_i += 1
@@ -1025,4 +1027,6 @@ def _multiline(value):
 def _arrayfy(data):
     if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
         data = [data]
+    elif len(data) == 0:
+        raise ValueError("Array must contain at least one element")
     return np.asarray(data)

biotite/structure/io/pdbx/component.py CHANGED Viewed

@@ -14,7 +14,7 @@ __all__ = ["MaskValue"]
 from enum import IntEnum
 from abc import ABCMeta, abstractmethod
 from collections.abc import MutableMapping
-from .error import SerializationError, DeserializationError
+from ....file import SerializationError, DeserializationError
 class MaskValue(IntEnum):
@@ -105,6 +105,9 @@ class _Component(metaclass=ABCMeta):
         """
         raise NotImplementedError()
+    def __str__(self):
+        return str(self.serialize())
 class _HierarchicalContainer(_Component, MutableMapping,
                              metaclass=ABCMeta):