PyPI - biotite - Versions diffs - 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl - Mend

biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +1 -1
biotite/application/application.py +20 -10
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +3 -3
biotite/sequence/align/__init__.py +2 -2
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -1
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +51 -65
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +15 -17
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +246 -236
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +29 -32
biotite/structure/box.py +67 -71
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +83 -78
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +4 -5
biotite/structure/info/groups.py +1 -3
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -44
biotite/structure/io/pdbx/cif.py +140 -110
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +260 -258
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +90 -107
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
biotite-1.0.1.dist-info/RECORD +322 -0
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.2.dist-info/RECORD +0 -340
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/cif.py CHANGED Viewed

@@ -7,13 +7,17 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
 import itertools
-import shlex
+import re
 from collections.abc import MutableMapping, Sequence
 import numpy as np
-from .component import _Component, MaskValue
-from ....file import File, is_open_compatible, is_text, DeserializationError, \
-                     SerializationError
+from biotite.file import (
+    DeserializationError,
+    File,
+    SerializationError,
+    is_open_compatible,
+    is_text,
+)
+from biotite.structure.io.pdbx.component import MaskValue, _Component
 UNICODE_CHAR_SIZE = 4
@@ -133,9 +137,7 @@ class CIFColumn:
         if not isinstance(data, CIFData):
             data = CIFData(data, str)
         if mask is None:
-            mask = np.full(
-                len(data), MaskValue.PRESENT, dtype=np.uint8
-            )
+            mask = np.full(len(data), MaskValue.PRESENT, dtype=np.uint8)
             mask[data.array == "."] = MaskValue.INAPPLICABLE
             mask[data.array == "?"] = MaskValue.MISSING
             if np.all(mask == MaskValue.PRESENT):
@@ -148,8 +150,7 @@ class CIFColumn:
                 mask = CIFData(mask, np.uint8)
             if len(mask) != len(data):
                 raise IndexError(
-                    f"Data has length {len(data)}, "
-                    f"but mask has length {len(mask)}"
+                    f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
                 )
         self._data = data
         self._mask = mask
@@ -222,9 +223,7 @@ class CIFColumn:
         elif np.issubdtype(dtype, np.str_):
             # Limit float precision to 3 decimals
             if np.issubdtype(self._data.array.dtype, np.floating):
-                array = np.array(
-                    [f"{e:.3f}" for e in self._data.array], type=dtype
-                )
+                array = np.array([f"{e:.3f}" for e in self._data.array], type=dtype)
             else:
                 # Copy, as otherwise original data would be overwritten
                 # with mask values
@@ -247,9 +246,7 @@ class CIFColumn:
                 array = np.full(len(self._data), masked_value, dtype=dtype)
             present_mask = self._mask.array == MaskValue.PRESENT
-            array[present_mask] = (
-                self._data.array[present_mask].astype(dtype)
-            )
+            array[present_mask] = self._data.array[present_mask].astype(dtype)
             return array
     def __len__(self):
@@ -361,9 +358,7 @@ class CIFCategory(_Component, MutableMapping):
     @staticmethod
     def deserialize(text, expect_whitespace=True):
-        lines = [
-            line.strip() for line in text.splitlines() if not _is_empty(line)
-        ]
+        lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
         if _is_loop_start(lines[0]):
             is_looped = True
@@ -373,15 +368,11 @@ class CIFCategory(_Component, MutableMapping):
         category_name = _parse_category_name(lines[0])
         if category_name is None:
-            raise DeserializationError(
-                "Failed to parse category name"
-            )
+            raise DeserializationError("Failed to parse category name")
-        lines = _to_single(lines, is_looped)
+        lines = _to_single(lines)
         if is_looped:
-            category_dict = CIFCategory._deserialize_looped(
-                lines, expect_whitespace
-            )
+            category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
         else:
             category_dict = CIFCategory._deserialize_single(lines)
         return CIFCategory(category_dict, category_name)
@@ -448,11 +439,28 @@ class CIFCategory(_Component, MutableMapping):
         Process a category where each field has a single value.
         """
         category_dict = {}
-        for line in lines:
-            parts = shlex.split(line)
-            column_name = parts[0].split(".")[1]
-            column = parts[1]
-            category_dict[column_name] = CIFColumn(column)
+        line_i = 0
+        while line_i < len(lines):
+            line = lines[line_i]
+            parts = _split_one_line(line)
+            if len(parts) == 2:
+                # Standard case -> name and value in one line
+                name_part, value_part = parts
+                line_i += 1
+            elif len(parts) == 1:
+                # Value is a multiline value on the next line
+                name_part = parts[0]
+                parts = _split_one_line(lines[line_i + 1])
+                if len(parts) == 1:
+                    value_part = parts[0]
+                else:
+                    raise DeserializationError(f"Failed to parse line '{line}'")
+                line_i += 2
+            elif len(parts) == 0:
+                raise DeserializationError("Empty line within category")
+            else:
+                raise DeserializationError(f"Failed to parse line '{line}'")
+            category_dict[name_part.split(".")[1]] = CIFColumn(value_part)
         return category_dict
     @staticmethod
@@ -477,15 +485,14 @@ class CIFCategory(_Component, MutableMapping):
         data_lines = lines[i:]
         # Rows may be split over multiple lines -> do not rely on
         # row-line-alignment at all and simply cycle through columns
-        column_names = itertools.cycle(column_names)
+        column_indices = itertools.cycle(range(len(column_names)))
         for data_line in data_lines:
             # If whitespace is expected in quote protected values,
-            # use standard shlex split
+            # use regex-based _split_one_line() to split
             # Otherwise use much more faster whitespace split
-            # and quote removal if applicable,
-            # bypassing the slow shlex module
+            # and quote removal if applicable.
             if expect_whitespace:
-                values = shlex.split(data_line)
+                values = _split_one_line(data_line)
             else:
                 values = data_line.split()
                 for k in range(len(values)):
@@ -495,9 +502,18 @@ class CIFCategory(_Component, MutableMapping):
                     ):
                         values[k] = values[k][1:-1]
             for val in values:
-                column_name = next(column_names)
+                column_index = next(column_indices)
+                column_name = column_names[column_index]
                 category_dict[column_name].append(val)
+        # Check if all columns have the same length
+        # Otherwise, this would indicate a parsing error or an invalid CIF file
+        column_index = next(column_indices)
+        if column_index != 0:
+            raise DeserializationError(
+                "Category contains columns with different lengths"
+            )
         return category_dict
     def _serialize_single(self):
@@ -506,39 +522,35 @@ class CIFCategory(_Component, MutableMapping):
         # "+3" Because of three whitespace chars after longest key
         req_len = max_len + 3
         return [
-            key.ljust(req_len) + _multiline(_quote(column.as_item()))
+            # Remove potential terminal newlines from multiline values
+            (key.ljust(req_len) + _escape(column.as_item())).strip()
             for key, column in zip(keys, self.values())
         ]
     def _serialize_looped(self):
-        key_lines = [
-            "_" + self._name + "." + key + " "
-            for key in self.keys()
-        ]
+        key_lines = ["_" + self._name + "." + key + " " for key in self.keys()]
         column_arrays = []
         for column in self.values():
             array = column.as_array(str)
             # Quote before measuring the number of chars,
             # as the quote characters modify the length
-            array = np.array(
-                [_multiline(_quote(element)) for element in array]
-            )
+            array = np.array([_escape(element) for element in array])
             column_arrays.append(array)
         # Number of characters the longest string in the column needs
         # This can be deduced from the dtype
         # The "+1" is for the small whitespace column
         column_n_chars = [
-            array.dtype.itemsize // UNICODE_CHAR_SIZE + 1
-            for array in column_arrays
+            array.dtype.itemsize // UNICODE_CHAR_SIZE + 1 for array in column_arrays
         ]
         value_lines = [""] * self._row_count
         for i in range(self._row_count):
             for j, array in enumerate(column_arrays):
                 value_lines[i] += array[i].ljust(column_n_chars[j])
             # Remove trailing justification of last column
-            value_lines[i].rstrip()
+            # and potential terminal newlines from multiline values
+            value_lines[i] = value_lines[i].strip()
         return ["loop_"] + key_lines + value_lines
@@ -615,15 +627,11 @@ class CIFBlock(_Component, MutableMapping):
                     if is_loop_in_line:
                         # In case of lines with "loop_" the category is
                         # in the next line
-                        category_name_in_line = _parse_category_name(
-                            lines[i + 1]
-                        )
+                        category_name_in_line = _parse_category_name(lines[i + 1])
                     current_category_name = category_name_in_line
                     category_starts.append(i)
                     category_names.append(current_category_name)
-        return CIFBlock(_create_element_dict(
-            lines, category_names, category_starts
-        ))
+        return CIFBlock(_create_element_dict(lines, category_names, category_starts))
     def serialize(self):
         text_blocks = []
@@ -635,7 +643,7 @@ class CIFBlock(_Component, MutableMapping):
                 try:
                     category.name = category_name
                     text_blocks.append(category.serialize())
-                except:
+                except Exception:
                     raise SerializationError(
                         f"Failed to serialize category '{category_name}'"
                     )
@@ -652,16 +660,14 @@ class CIFBlock(_Component, MutableMapping):
                 # Special optimization for "atom_site":
                 # Even if the values are quote protected,
                 # no whitespace is expected in escaped values
-                # Therefore slow shlex.split() call is not necessary
+                # Therefore slow regex-based _split_one_line() call is not necessary
                 if key == "atom_site":
                     expect_whitespace = False
                 else:
                     expect_whitespace = True
                 category = CIFCategory.deserialize(category, expect_whitespace)
-            except:
-                raise DeserializationError(
-                    f"Failed to deserialize category '{key}'"
-                )
+            except Exception:
+                raise DeserializationError(f"Failed to deserialize category '{key}'")
             # Update with deserialized object
             self._categories[key] = category
         return category
@@ -809,7 +815,7 @@ class CIFFile(_Component, File, MutableMapping):
             else:
                 try:
                     text_blocks.append(block.serialize())
-                except:
+                except Exception:
                     raise SerializationError(
                         f"Failed to serialize block '{block_name}'"
                     )
@@ -869,19 +875,15 @@ class CIFFile(_Component, File, MutableMapping):
             # -> must be deserialized first
             try:
                 block = CIFBlock.deserialize(block)
-            except:
-                raise DeserializationError(
-                    f"Failed to deserialize block '{key}'"
-                )
+            except Exception:
+                raise DeserializationError(f"Failed to deserialize block '{key}'")
             # Update with deserialized object
             self._blocks[key] = block
         return block
     def __setitem__(self, key, block):
         if not isinstance(block, CIFBlock):
-            raise TypeError(
-                f"Expected 'CIFBlock', but got '{type(block).__name__}'"
-            )
+            raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
         self._blocks[key] = block
     def __delitem__(self, key):
@@ -919,7 +921,7 @@ def _create_element_dict(lines, element_names, element_starts):
     # Lazy deserialization
     # -> keep as text for now and deserialize later if needed
     return {
-        element_name: "\n".join(lines[element_starts[i] : element_starts[i+1]])
+        element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
         for i, element_name in enumerate(element_names)
     }
@@ -953,52 +955,50 @@ def _is_loop_start(line):
     return line.startswith("loop_")
-def _to_single(lines, is_looped):
-    """
+def _to_single(lines):
+    r"""
     Convert multiline values into singleline values
     (in terms of 'lines' list elements).
-    Linebreaks are preserved.
+    Linebreaks are preserved as ``'\n'`` characters within a list element.
+    The initial ``';'`` character is also preserved, while the final ``';'`` character
+    is removed.
     """
-    processed_lines = [None] * len(lines)
-    in_i = 0
-    out_i = 0
-    while in_i < len(lines):
-        if lines[in_i][0] == ";":
-            # Multiline value
-            multi_line_str = lines[in_i][1:]
-            j = in_i + 1
-            while lines[j] != ";":
-                # Preserve linebreaks
-                multi_line_str += "\n" + lines[j]
-                j += 1
-            if is_looped:
-                # Create a line for the multiline string only
-                processed_lines[out_i] = shlex.quote(multi_line_str)
-                out_i += 1
+    processed_lines = []
+    in_multi_line = False
+    mutli_line_value = []
+    for line in lines:
+        # Multiline value are enclosed by ';' at the start of the beginning and end line
+        if line[0] == ";":
+            if not in_multi_line:
+                # Start of multiline value
+                in_multi_line = True
+                mutli_line_value.append(line)
             else:
-                # Append multiline string to previous line
-                processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
-            in_i = j + 1
-        elif not is_looped and lines[in_i][0] != "_":
-            # Singleline value in the line after the corresponding key
-            processed_lines[out_i - 1] += " " + lines[in_i]
-            in_i += 1
+                # End of multiline value
+                in_multi_line = False
+                # The current line contains only the end character ';'
+                # Hence this line is not added to the processed lines
+                processed_lines.append("\n".join(mutli_line_value))
+                mutli_line_value = []
         else:
-            # Normal singleline value in the same row as the key
-            processed_lines[out_i] = lines[in_i]
-            in_i += 1
-            out_i += 1
-    return [line for line in processed_lines if line is not None]
+            if in_multi_line:
+                mutli_line_value.append(line)
+            else:
+                processed_lines.append(line)
+    return processed_lines
-def _quote(value):
+def _escape(value):
     """
-    A less secure but much quicker version of ``shlex.quote()``.
+    Escape special characters in a value to make it compatible with CIF.
     """
-    if len(value) == 0:
+    if "\n" in value:
+        # A value with linebreaks must be represented as multiline value
+        return _multiline(value)
+    elif "'" in value and '"' in value:
+        # If both quote types are present, you cannot use them for escaping
+        return _multiline(value)
+    elif len(value) == 0:
         return "''"
     elif value[0] == "_":
         return "'" + value + "'"
@@ -1016,12 +1016,42 @@ def _quote(value):
 def _multiline(value):
     """
-    Convert a string containing linebreaks into CIF-compatible
+    Convert a string that may contain linebreaks into CIF-compatible
     multiline string.
     """
-    if "\n" in value:
-        return "\n;" + value + "\n;\n"
-    return value
+    return "\n;" + value + "\n;\n"
+def _split_one_line(line):
+    """
+    Split a line into its fields.
+    Supporting embedded quotes (' or "), like `'a dog's life'` to  `a dog's life`
+    """
+    # Special case of multiline value, where the line starts with ';'
+    if line[0] == ";":
+        return [line[1:]]
+    # Define the patterns for different types of fields
+    single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
+    double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
+    unquoted_pattern = r"([^\s]+)"
+    # Combine the patterns using alternation
+    combined_pattern = (
+        f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
+    )
+    # Find all matches
+    matches = re.findall(combined_pattern, line)
+    # Extract non-empty groups from the matches
+    fields = []
+    for match in matches:
+        field = next(group for group in match if group)
+        if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
+            field = field[1:-1]
+        fields.append(field)
+    return fields
 def _arrayfy(data):

biotite/structure/io/pdbx/component.py CHANGED Viewed

@@ -11,10 +11,10 @@ __name__ = "biotite.structure.io.pdbx"
 __author__ = "Patrick Kunzmann"
 __all__ = ["MaskValue"]
-from enum import IntEnum
 from abc import ABCMeta, abstractmethod
 from collections.abc import MutableMapping
-from ....file import SerializationError, DeserializationError
+from enum import IntEnum
+from biotite.file import DeserializationError, SerializationError
 class MaskValue(IntEnum):
@@ -29,6 +29,7 @@ class MaskValue(IntEnum):
     - `MISSING` : For this row the value is missing or unknown
       (``?`` in *CIF*).
     """
     PRESENT = 0
     INAPPLICABLE = 1
     MISSING = 2
@@ -109,8 +110,7 @@ class _Component(metaclass=ABCMeta):
         return str(self.serialize())
-class _HierarchicalContainer(_Component, MutableMapping,
-                             metaclass=ABCMeta):
+class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
     """
     A container for hierarchical data in BinaryCIF files.
     For example, the file contains multiple blocks, each block contains
@@ -181,10 +181,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
             if isinstance(element, self.subcomponent_class()):
                 try:
                     serialized_element = element.serialize()
-                except:
-                    raise SerializationError(
-                        f"Failed to serialize element '{key}'"
-                    )
+                except Exception:
+                    raise SerializationError(f"Failed to serialize element '{key}'")
             else:
                 # Element is already stored in serialized form
                 serialized_element = element
@@ -200,10 +198,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
             # -> must be deserialized first
             try:
                 element = self.subcomponent_class().deserialize(element)
-            except:
-                raise DeserializationError(
-                    f"Failed to deserialize element '{key}'"
-                )
+            except Exception:
+                raise DeserializationError(f"Failed to deserialize element '{key}'")
             # Update container with deserialized object
             self._elements[key] = element
         return element
@@ -220,10 +216,8 @@ class _HierarchicalContainer(_Component, MutableMapping,
         else:
             try:
                 element = self.subcomponent_class().deserialize(element)
-            except:
-                raise DeserializationError(
-                    f"Failed to deserialize given value"
-                )
+            except Exception:
+                raise DeserializationError("Failed to deserialize given value")
         self._elements[key] = element
     def __delitem__(self, key):