PyPI - biotite - Versions diffs - 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +156 -43
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/filter.py +1 -1
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +148 -107
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +227 -68
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +16 -16
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/encoding.pyx CHANGED Viewed

@@ -287,7 +287,8 @@ class FixedPointEncoding(Encoding):
         The data type of the array to be encoded.
         Either a NumPy dtype or a *BinaryCIF* type code is accepted.
         The dtype must be a float type.
-        If omitted, 32-bit floats are assumed.
+        If omitted, the data type is taken from the data the
+        first time :meth:`encode()` is called.
     Attributes
     ----------
@@ -304,7 +305,7 @@ class FixedPointEncoding(Encoding):
     [987 654]
     """
     factor: ...
-    src_type: ... = TypeCode.FLOAT32
+    src_type: ... = None
     def __post_init__(self):
         if self.src_type is not None:
@@ -315,6 +316,14 @@ class FixedPointEncoding(Encoding):
                 )
     def encode(self, data):
+        # If not given in constructor, it is determined from the data
+        if self.src_type is None:
+            self.src_type = TypeCode.from_dtype(data.dtype)
+            if self.src_type not in (TypeCode.FLOAT32, TypeCode.FLOAT64):
+                raise ValueError(
+                    "Only floating point types are supported"
+                )
         # Round to avoid wrong values due to floating point inaccuracies
         return np.round(data * self.factor).astype(np.int32)
@@ -340,7 +349,8 @@ class IntervalQuantizationEncoding(Encoding):
         The data type of the array to be encoded.
         Either a NumPy dtype or a *BinaryCIF* type code is accepted.
         The dtype must be a float type.
-        If omitted, 32-bit floats are assumed.
+        If omitted, the data type is taken from the data the
+        first time :meth:`encode()` is called.
     Attributes
     ----------
@@ -367,13 +377,17 @@ class IntervalQuantizationEncoding(Encoding):
     min: ...
     max: ...
     num_steps: ...
-    src_type: ... = TypeCode.FLOAT32
+    src_type: ... = None
     def __post_init__(self):
         if self.src_type is not None:
             self.src_type = TypeCode.from_dtype(self.src_type)
     def encode(self, data):
+        # If not given in constructor, it is determined from the data
+        if self.src_type is None:
+            self.src_type = TypeCode.from_dtype(data.dtype)
         steps = np.linspace(
             self.min, self.max, self.num_steps, dtype=data.dtype
         )
@@ -524,7 +538,8 @@ class DeltaEncoding(Encoding):
         first time :meth:`encode()` is called.
     origin : int, optional
         The starting value from which the differences are calculated.
-        If omitted, the origin is set to 0.
+        If omitted, the value is taken from the first array element the
+        first time :meth:`encode()` is called.
     Attributes
     ----------
@@ -535,11 +550,14 @@ class DeltaEncoding(Encoding):
     --------
     >>> data = np.array([1, 1, 2, 3, 5, 8])
-    >>> print(DeltaEncoding().encode(data))
-    [1 0 1 1 2 3]
+    >>> encoding = DeltaEncoding()
+    >>> print(encoding.encode(data))
+    [0 0 1 1 2 3]
+    >>> print(encoding.origin)
+    1
     """
     src_type: ... = None
-    origin: ... = 0
+    origin: ... = None
     def __post_init__(self):
         if self.src_type is not None:
@@ -549,6 +567,8 @@ class DeltaEncoding(Encoding):
         # If not given in constructor, it is determined from the data
         if self.src_type is None:
             self.src_type = TypeCode.from_dtype(data.dtype)
+        if self.origin is None:
+            self.origin = data[0]
         data = data - self.origin
         return np.diff(data, prepend=0).astype(np.int32, copy=False)
@@ -582,7 +602,8 @@ class IntegerPackingEncoding(Encoding):
     is_unsigned : bool, optional
         Whether the values should be packed into signed or unsigned
         integers.
-        If omitted, the values are packed into signed integers.
+        If omitted, first time :meth:`encode()` is called, determines whether
+        the values fit into unsigned integers.
     Attributes
     ----------
@@ -601,7 +622,7 @@ class IntegerPackingEncoding(Encoding):
     """
     byte_count: ...
     src_size: ... = None
-    is_unsigned: ... = False
+    is_unsigned: ... = None
     def encode(self, data):
         if self.src_size is None:
@@ -610,6 +631,9 @@ class IntegerPackingEncoding(Encoding):
             raise IndexError(
                 "Given source size does not match actual data size"
             )
+        if self.is_unsigned is None:
+            # Only positive values -> use unsigned integers
+            self.is_unsigned = data.min().item() >= 0
         data = data.astype(np.int32, copy=False)
         return self._encode(
@@ -672,7 +696,7 @@ class IntegerPackingEncoding(Encoding):
         # Get length of output array
         # by summing up required length of each element
         cdef int number
-        cdef int length = 0
+        cdef long length = 0
         for i in range(data.shape[0]):
             number = data[i]
             if number < 0:
@@ -750,7 +774,7 @@ class StringArrayEncoding(Encoding):
         If omitted, the unique strings are determined from the data the
         first time :meth:`encode()` is called.
     data_encoding : list of Encoding, optional
-        The encodings that are applied to the indiy array.
+        The encodings that are applied to the index array.
         If omitted, the array is directly encoded into bytes without
         further compression.
     offset_encoding : list of Encoding, optional
@@ -837,8 +861,11 @@ class StringArrayEncoding(Encoding):
             raise TypeError("Data must be of string type")
         if self.strings is None:
-            # 'unique()' already sorts the strings
-            self.strings = np.unique(data)
+            # 'unique()' already sorts the strings, but this is not necessarily
+            # desired, as this makes efficient encoding of the indices more difficult
+            # -> Bring into the original order
+            _, unique_indices = np.unique(data, return_index=True)
+            self.strings = data[np.sort(unique_indices)]
             check_present = False
         else:
             check_present = True
@@ -888,6 +915,19 @@ _encoding_classes_kinds = {
 def deserialize_encoding(content):
+    """
+    Create a :class:`Encoding` by deserializing the given *BinaryCIF* content.
+    Parameters
+    ----------
+    content : dict
+        The encoding represenet as *BinaryCIF* dictionary.
+    Returns
+    -------
+    encoding : Encoding
+        The deserialized encoding.
+    """
     try:
         encoding_class = _encoding_classes[content["kind"]]
     except KeyError:
@@ -898,28 +938,69 @@ def deserialize_encoding(content):
 def create_uncompressed_encoding(array):
-    dtype = array.dtype
+    """
+    Create a simple encoding for the given array that does not compress the data.
-    if np.issubdtype(dtype, np.str_):
+    Parameters
+    ----------
+    array : ndarray
+        The array to to create the encoding for.
+    Returns
+    -------
+    encoding : list of Encoding
+        The encoding for the data.
+    """
+    if np.issubdtype(array.dtype, np.str_):
         return [StringArrayEncoding()]
     else:
         return [ByteArrayEncoding()]
 def encode_stepwise(data, encoding):
+    """
+    Apply a list of encodings stepwise to the given data.
+    Parameters
+    ----------
+    data : ndarray
+        The data to be encoded.
+    encoding : list of Encoding
+        The encodings to be applied.
+    Returns
+    -------
+    encoded_data : ndarray or bytes
+        The encoded data.
+    """
     for encoding in encoding:
         data = encoding.encode(data)
     return data
 def decode_stepwise(data, encoding):
+    """
+    Apply a list of encodings stepwise to the given data.
+    Parameters
+    ----------
+    data : ndarray or bytes
+        The data to be decoded.
+    encoding : list of Encoding
+        The encodings to be applied.
+    Returns
+    -------
+    decoded_data : ndarray
+        The decoded data.
+    """
     for enc in reversed(encoding):
         data = enc.decode(data)
     return data
 def _camel_to_snake_case(attribute_name):
-    return re.sub(CAMEL_CASE_PATTERN, "_", attribute_name).lower()
+    return CAMEL_CASE_PATTERN.sub("_", attribute_name).lower()
 def _snake_to_camel_case(attribute_name):

biotite/structure/io/trajfile.py CHANGED Viewed

@@ -54,9 +54,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        file_name : str
+        file_name : str or Path
             The path of the file to be read.
-            A file-like-object cannot be used.
+            Any other file-like object cannot be used.
         start : int, optional
             The frame index, where file parsing is started. If no value
             is given, parsing starts at the first frame.
@@ -101,7 +101,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
                 chunk_size = ((chunk_size // step) + 1) * step
         traj_type = cls.traj_type()
-        with traj_type(file_name, "r") as f:
+        with traj_type(str(file_name), "r") as f:
             if start is None:
                 start = 0
             # Discard atoms before start
@@ -153,9 +153,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        file_name : str
+        file_name : str or Path
             The path of the file to be read.
-            A file-like-object cannot be used.
+            Any other file-like object cannot be used.
         start : int, optional
             The frame index, where file parsing is started. If no value
             is given, parsing starts at the first frame.
@@ -196,7 +196,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         The `step` parameter does currently not work for *DCD* files.
         """
         traj_type = cls.traj_type()
-        with traj_type(file_name, "r") as f:
+        with traj_type(str(file_name), "r") as f:
             if start is None:
                 start = 0
             # Discard atoms before start
@@ -280,9 +280,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        file_name : str
+        file_name : str or Path
             The path of the file to be read.
-            A file-like-object cannot be used.
+            Any other file-like object cannot be used.
         template : AtomArray or AtomArrayStack
             The template array or stack, where the atom annotation data
             is taken from.
@@ -354,13 +354,13 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        file_name : str
-            The path of the file to be written to.
-            A file-like-object cannot be used.
+        file_name : str or Path
+            The path of the file to be read.
+            Any other file-like object cannot be used.
         """
         traj_type = self.traj_type()
         param = self.prepare_write_values(self._coord, self._box, self._time)
-        with traj_type(file_name, "w") as f:
+        with traj_type(str(file_name), "w") as f:
             f.write(**param)
     @classmethod
@@ -378,9 +378,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        file_name : str
-            The path of the file to be written to.
-            A file-like-object cannot be used.
+        file_name : str or Path
+            The path of the file to be read.
+            Any other file-like object cannot be used.
         coord : generator or array-like of ndarray, shape=(n,3), dtype=float
             The atom coordinates for each frame.
         box : generator or array-like of ndarray, shape=(3,3), dtype=float, optional
@@ -398,7 +398,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
             time = itertools.repeat(None)
         traj_type = cls.traj_type()
-        with traj_type(file_name, "w") as f:
+        with traj_type(str(file_name), "w") as f:
             for c, b, t in zip(coord, box, time):
                 if c.ndim != 2:
                     raise IndexError(