PyPI - biotite - Versions diffs - 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +156 -43
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/filter.py +1 -1
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +148 -107
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +227 -68
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +16 -16
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/cif.py CHANGED Viewed

@@ -7,7 +7,6 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
 import itertools
-import re
 from collections.abc import MutableMapping, Sequence
 import numpy as np
 from biotite.file import (
@@ -357,7 +356,7 @@ class CIFCategory(_Component, MutableMapping):
         return CIFBlock
     @staticmethod
-    def deserialize(text, expect_whitespace=True):
+    def deserialize(text):
         lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
         if _is_loop_start(lines[0]):
@@ -370,9 +369,9 @@ class CIFCategory(_Component, MutableMapping):
         if category_name is None:
             raise DeserializationError("Failed to parse category name")
-        lines = _to_single(lines, is_looped)
+        lines = _to_single(lines)
         if is_looped:
-            category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
+            category_dict = CIFCategory._deserialize_looped(lines)
         else:
             category_dict = CIFCategory._deserialize_single(lines)
         return CIFCategory(category_dict, category_name)
@@ -416,6 +415,9 @@ class CIFCategory(_Component, MutableMapping):
             raise ValueError("At least one column must remain")
         del self._columns[key]
+    def __contains__(self, key):
+        return key in self._columns
     def __iter__(self):
         return iter(self._columns)
@@ -439,15 +441,32 @@ class CIFCategory(_Component, MutableMapping):
         Process a category where each field has a single value.
         """
         category_dict = {}
-        for line in lines:
-            parts = _split_one_line(line)
-            column_name = parts[0].split(".")[1]
-            column = parts[1]
-            category_dict[column_name] = CIFColumn(column)
+        line_i = 0
+        while line_i < len(lines):
+            line = lines[line_i]
+            parts = list(_split_one_line(line))
+            if len(parts) == 2:
+                # Standard case -> name and value in one line
+                name_part, value_part = parts
+                line_i += 1
+            elif len(parts) == 1:
+                # Value is a multiline value on the next line
+                name_part = parts[0]
+                parts = list(_split_one_line(lines[line_i + 1]))
+                if len(parts) == 1:
+                    value_part = parts[0]
+                else:
+                    raise DeserializationError(f"Failed to parse line '{line}'")
+                line_i += 2
+            elif len(parts) == 0:
+                raise DeserializationError("Empty line within category")
+            else:
+                raise DeserializationError(f"Failed to parse line '{line}'")
+            category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
         return category_dict
     @staticmethod
-    def _deserialize_looped(lines, expect_whitespace):
+    def _deserialize_looped(lines):
         """
         Process a category where each field has multiple values
         (category is a table).
@@ -468,26 +487,22 @@ class CIFCategory(_Component, MutableMapping):
         data_lines = lines[i:]
         # Rows may be split over multiple lines -> do not rely on
         # row-line-alignment at all and simply cycle through columns
-        column_names = itertools.cycle(column_names)
+        column_indices = itertools.cycle(range(len(column_names)))
         for data_line in data_lines:
-            # If whitespace is expected in quote protected values,
-            # use regex-based _split_one_line() to split
-            # Otherwise use much more faster whitespace split
-            # and quote removal if applicable.
-            if expect_whitespace:
-                values = _split_one_line(data_line)
-            else:
-                values = data_line.split()
-                for k in range(len(values)):
-                    # Remove quotes
-                    if (values[k][0] == '"' and values[k][-1] == '"') or (
-                        values[k][0] == "'" and values[k][-1] == "'"
-                    ):
-                        values[k] = values[k][1:-1]
+            values = _split_one_line(data_line)
             for val in values:
-                column_name = next(column_names)
+                column_index = next(column_indices)
+                column_name = column_names[column_index]
                 category_dict[column_name].append(val)
+        # Check if all columns have the same length
+        # Otherwise, this would indicate a parsing error or an invalid CIF file
+        column_index = next(column_indices)
+        if column_index != 0:
+            raise DeserializationError(
+                "Category contains columns with different lengths"
+            )
         return category_dict
     def _serialize_single(self):
@@ -496,7 +511,8 @@ class CIFCategory(_Component, MutableMapping):
         # "+3" Because of three whitespace chars after longest key
         req_len = max_len + 3
         return [
-            key.ljust(req_len) + _multiline(_quote(column.as_item()))
+            # Remove potential terminal newlines from multiline values
+            (key.ljust(req_len) + _escape(column.as_item())).strip()
             for key, column in zip(keys, self.values())
         ]
@@ -508,7 +524,7 @@ class CIFCategory(_Component, MutableMapping):
             array = column.as_array(str)
             # Quote before measuring the number of chars,
             # as the quote characters modify the length
-            array = np.array([_multiline(_quote(element)) for element in array])
+            array = np.array([_escape(element) for element in array])
             column_arrays.append(array)
         # Number of characters the longest string in the column needs
@@ -522,7 +538,8 @@ class CIFCategory(_Component, MutableMapping):
             for j, array in enumerate(column_arrays):
                 value_lines[i] += array[i].ljust(column_n_chars[j])
             # Remove trailing justification of last column
-            value_lines[i].rstrip()
+            # and potential terminal newlines from multiline values
+            value_lines[i] = value_lines[i].strip()
         return ["loop_"] + key_lines + value_lines
@@ -541,6 +558,17 @@ class CIFBlock(_Component, MutableMapping):
         The keys are the category names and the values are the
         :class:`CIFCategory` objects.
         By default, an empty block is created.
+    name : str, optional
+        The name of the block.
+        This is only used for serialization and is automatically set,
+        when the :class:`CIFBlock` is added to a :class:`CIFFile`.
+        It only needs to be set manually, when the block is directly
+        serialized.
+    Attributes
+    ----------
+    name : str
+        The name of the block.
     Notes
     -----
@@ -552,13 +580,15 @@ class CIFBlock(_Component, MutableMapping):
     --------
     >>> # Add category on creation
-    >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})})
+    >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
     >>> # Add category later on
     >>> block["bar"] = CIFCategory({"another_column": [2, 3]})
     >>> # Access a column
     >>> print(block["bar"]["another_column"].as_array())
     ['2' '3']
     >>> print(block.serialize())
+    data_baz
+    #
     _foo.some_column   1
     #
     loop_
@@ -568,11 +598,20 @@ class CIFBlock(_Component, MutableMapping):
     #
     """
-    def __init__(self, categories=None):
+    def __init__(self, categories=None, name=None):
+        self._name = name
         if categories is None:
             categories = {}
         self._categories = categories
+    @property
+    def name(self):
+        return self._name
+    @name.setter
+    def name(self, name):
+        self._name = name
     @staticmethod
     def subcomponent_class():
         return CIFCategory
@@ -606,7 +645,10 @@ class CIFBlock(_Component, MutableMapping):
         return CIFBlock(_create_element_dict(lines, category_names, category_starts))
     def serialize(self):
-        text_blocks = []
+        if self._name is None:
+            raise SerializationError("Block name is required")
+        # The block starts with the black name line followed by a comment line
+        text_blocks = ["data_" + self._name + "\n#\n"]
         for category_name, category in self._categories.items():
             if isinstance(category, str):
                 # Category is already stored as lines
@@ -629,15 +671,7 @@ class CIFBlock(_Component, MutableMapping):
             # Element is stored in serialized form
             # -> must be deserialized first
             try:
-                # Special optimization for "atom_site":
-                # Even if the values are quote protected,
-                # no whitespace is expected in escaped values
-                # Therefore slow regex-based _split_one_line() call is not necessary
-                if key == "atom_site":
-                    expect_whitespace = False
-                else:
-                    expect_whitespace = True
-                category = CIFCategory.deserialize(category, expect_whitespace)
+                category = CIFCategory.deserialize(category)
             except Exception:
                 raise DeserializationError(f"Failed to deserialize category '{key}'")
             # Update with deserialized object
@@ -655,6 +689,9 @@ class CIFBlock(_Component, MutableMapping):
     def __delitem__(self, key):
         del self._categories[key]
+    def __contains__(self, key):
+        return key in self._categories
     def __iter__(self):
         return iter(self._categories)
@@ -778,14 +815,12 @@ class CIFFile(_Component, File, MutableMapping):
     def serialize(self):
         text_blocks = []
         for block_name, block in self._blocks.items():
-            text_blocks.append("data_" + block_name + "\n")
-            # A comment line is set after the block indicator
-            text_blocks.append("#\n")
             if isinstance(block, str):
                 # Block is already stored as text
                 text_blocks.append(block)
             else:
                 try:
+                    block.name = block_name
                     text_blocks.append(block.serialize())
                 except Exception:
                     raise SerializationError(
@@ -856,11 +891,15 @@ class CIFFile(_Component, File, MutableMapping):
     def __setitem__(self, key, block):
         if not isinstance(block, CIFBlock):
             raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
+        block.name = key
         self._blocks[key] = block
     def __delitem__(self, key):
         del self._blocks[key]
+    def __contains__(self, key):
+        return key in self._blocks
     def __iter__(self):
         return iter(self._blocks)
@@ -893,7 +932,7 @@ def _create_element_dict(lines, element_names, element_starts):
     # Lazy deserialization
     # -> keep as text for now and deserialize later if needed
     return {
-        element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
+        element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
         for i, element_name in enumerate(element_names)
     }
@@ -927,52 +966,50 @@ def _is_loop_start(line):
     return line.startswith("loop_")
-def _to_single(lines, is_looped):
-    """
+def _to_single(lines):
+    r"""
     Convert multiline values into singleline values
     (in terms of 'lines' list elements).
-    Linebreaks are preserved.
+    Linebreaks are preserved as ``'\n'`` characters within a list element.
+    The initial ``';'`` character is also preserved, while the final ``';'`` character
+    is removed.
     """
-    processed_lines = [None] * len(lines)
-    in_i = 0
-    out_i = 0
-    while in_i < len(lines):
-        if lines[in_i][0] == ";":
-            # Multiline value
-            multi_line_str = lines[in_i][1:]
-            j = in_i + 1
-            while lines[j] != ";":
-                # Preserve linebreaks
-                multi_line_str += "\n" + lines[j]
-                j += 1
-            if is_looped:
-                # Create a line for the multiline string only
-                processed_lines[out_i] = f"'{multi_line_str}'"
-                out_i += 1
+    processed_lines = []
+    in_multi_line = False
+    mutli_line_value = []
+    for line in lines:
+        # Multiline value are enclosed by ';' at the start of the beginning and end line
+        if line[0] == ";":
+            if not in_multi_line:
+                # Start of multiline value
+                in_multi_line = True
+                mutli_line_value.append(line)
             else:
-                # Append multiline string to previous line
-                processed_lines[out_i - 1] += " " + f"'{multi_line_str}'"
-            in_i = j + 1
-        elif not is_looped and lines[in_i][0] != "_":
-            # Singleline value in the line after the corresponding key
-            processed_lines[out_i - 1] += " " + lines[in_i]
-            in_i += 1
+                # End of multiline value
+                in_multi_line = False
+                # The current line contains only the end character ';'
+                # Hence this line is not added to the processed lines
+                processed_lines.append("\n".join(mutli_line_value))
+                mutli_line_value = []
         else:
-            # Normal singleline value in the same row as the key
-            processed_lines[out_i] = lines[in_i]
-            in_i += 1
-            out_i += 1
-    return [line for line in processed_lines if line is not None]
+            if in_multi_line:
+                mutli_line_value.append(line)
+            else:
+                processed_lines.append(line)
+    return processed_lines
-def _quote(value):
+def _escape(value):
     """
-    A less secure but much quicker version of ``shlex.quote()``.
+    Escape special characters in a value to make it compatible with CIF.
     """
-    if len(value) == 0:
+    if "\n" in value:
+        # A value with linebreaks must be represented as multiline value
+        return _multiline(value)
+    elif "'" in value and '"' in value:
+        # If both quote types are present, you cannot use them for escaping
+        return _multiline(value)
+    elif len(value) == 0:
         return "''"
     elif value[0] == "_":
         return "'" + value + "'"
@@ -990,12 +1027,10 @@ def _quote(value):
 def _multiline(value):
     """
-    Convert a string containing linebreaks into CIF-compatible
+    Convert a string that may contain linebreaks into CIF-compatible
     multiline string.
     """
-    if "\n" in value:
-        return "\n;" + value + "\n;\n"
-    return value
+    return "\n;" + value + "\n;\n"
 def _split_one_line(line):
@@ -1003,27 +1038,33 @@ def _split_one_line(line):
     Split a line into its fields.
     Supporting embedded quotes (' or "), like `'a dog's life'` to  `a dog's life`
     """
-    # Define the patterns for different types of fields
-    single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
-    double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
-    unquoted_pattern = r"([^\s]+)"
-    # Combine the patterns using alternation
-    combined_pattern = (
-        f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
-    )
-    # Find all matches
-    matches = re.findall(combined_pattern, line)
-    # Extract non-empty groups from the matches
-    fields = []
-    for match in matches:
-        field = next(group for group in match if group)
-        if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
-            field = field[1:-1]
-        fields.append(field)
-    return fields
+    # Special case of multiline value, where the line starts with ';'
+    if line[0] == ";":
+        yield line[1:]
+    elif "'" in line or '"' in line:
+        # Quoted values in the line
+        while line:
+            # Strip leading whitespace(s)
+            stripped_line = line.lstrip()
+            # Split the line on whitespace
+            word, _, line = stripped_line.partition(" ")
+            # Handle the case where the word start with a quote
+            if word.startswith(("'", '"')):
+                # Set the separator to the quote found
+                separator = word[0]
+                # Handle the case of a quoted word without space
+                if word.endswith(separator) and len(word) > 1:
+                    # Yield the word without the opening and closing quotes
+                    yield word[1:-1]
+                    continue
+                # split the word on the separator
+                word, _, line = stripped_line[1:].partition(separator)
+            yield word
+    else:
+        # No quoted values in the line -> simple whitespace split
+        for line in line.split():
+            yield line
 def _arrayfy(data):

biotite/structure/io/pdbx/component.py CHANGED Viewed

@@ -171,10 +171,10 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
         Parameters
         ----------
         store_key_in: str, optional
-        If given, the key of each element is stored as value in the
-        serialized element.
-        This is basically the reverse operation of `take_key_from` in
-        :meth:`_deserialize_elements()`.
+            If given, the key of each element is stored as value in the
+            serialized element.
+            This is basically the reverse operation of `take_key_from` in
+            :meth:`_deserialize_elements()`.
         """
         serialized_elements = []
         for key, element in self._elements.items():
@@ -223,6 +223,11 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
     def __delitem__(self, key):
         del self._elements[key]
+    # Implement `__contains__()` explicitly,
+    # because the mixin method unnecessarily deserializes the value, if available
+    def __contains__(self, key):
+        return key in self._elements
     def __iter__(self):
         return iter(self._elements)