PyPI - biotite - Versions diffs - 0.38.0__cp311-cp311-macosx_11_0_arm64.whl → 0.40.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 0.38.0__cp311-cp311-macosx_11_0_arm64.whl → 0.40.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (124) hide show

biotite/__init__.py +3 -3
biotite/application/application.py +33 -28
biotite/application/dssp/app.py +18 -18
biotite/application/sra/__init__.py +5 -0
biotite/application/sra/app.py +337 -55
biotite/database/entrez/__init__.py +2 -1
biotite/database/entrez/check.py +14 -3
biotite/database/entrez/download.py +20 -13
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +38 -34
biotite/database/pubchem/query.py +44 -44
biotite/database/rcsb/download.py +19 -14
biotite/database/rcsb/query.py +46 -46
biotite/sequence/align/__init__.py +5 -1
biotite/sequence/align/banded.c +1408 -1025
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +389 -0
biotite/sequence/align/kmeralphabet.c +3220 -2850
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.c +713 -663
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpp +68398 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/localgapped.c +1507 -1074
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.c +1143 -833
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/multiple.c +1569 -1092
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.c +1612 -1212
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.c +33259 -0
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.c +685 -646
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/codec.c +1159 -841
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/graphics/alignment.py +212 -2
biotite/sequence/io/genbank/annotation.py +11 -11
biotite/sequence/phylo/nj.c +684 -636
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.c +970 -673
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.c +672 -626
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/structure/__init__.py +1 -1
biotite/structure/atoms.py +1 -1
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +3861 -3749
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/celllist.c +727 -707
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.c +1561 -1560
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +25 -67
biotite/structure/info/bonds.py +46 -100
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1646 -0
biotite/structure/info/ccd/carbohydrates.txt +1133 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +797 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +11 -22
biotite/structure/info/standardize.py +17 -12
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +1 -1
biotite/structure/io/general.py +37 -43
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +528 -365
biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.c +725 -676
biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/decode.c +1070 -754
biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/encode.c +727 -677
biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +72 -70
biotite/structure/io/pdb/hybrid36.c +540 -478
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +82 -68
biotite/structure/io/pdbx/__init__.py +13 -6
biotite/structure/io/pdbx/bcif.py +649 -0
biotite/structure/io/pdbx/cif.py +1028 -0
biotite/structure/io/pdbx/component.py +243 -0
biotite/structure/io/pdbx/convert.py +707 -359
biotite/structure/io/pdbx/encoding.c +112813 -0
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/error.py +14 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/residues.py +40 -40
biotite/structure/sasa.c +713 -644
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/superimpose.py +158 -115
biotite/visualize.py +9 -11
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0

biotite/structure/io/pdb/file.py CHANGED Viewed

@@ -52,23 +52,25 @@ _gamma = slice(47, 54)
 class PDBFile(TextFile):
     r"""
     This class represents a PDB file.
-    The usage of PDBxFile is encouraged in favor of this class.
+    The usage of :mod:`biotite.structure.io.pdbx` is encouraged in favor
+    of this class.
     This class only provides support for reading/writing the pure atom
     information (*ATOM*, *HETATM*, *MODEL* and *ENDMDL* records). *TER*
     records cannot be written.
     Additionally, *REMARK* records can be read
     See also
     --------
-    PDBxFile
+    CIFFile
+    BinaryCIFFile
     Examples
     --------
     Load a `\\*.pdb` file, modify the structure and save the new
     structure into a new file:
     >>> import os.path
     >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
     >>> array_stack = file.get_structure()
@@ -85,7 +87,7 @@ class PDBFile(TextFile):
         file.lines = [line.ljust(80) for line in file.lines]
         file._index_models_and_atoms()
         return file
     def get_remark(self, number):
         r"""
@@ -96,7 +98,7 @@ class PDBFile(TextFile):
         ----------
         number : int
             The *REMARK* number, i.e. the `XXX` in ``REMARK XXX``.
         Returns
         -------
         remark_lines : None or list of str
@@ -115,11 +117,11 @@ class PDBFile(TextFile):
         >>> file = PDBFile.read(os.path.join(path_to_structures, "1l2y.pdb"))
         >>> remarks = file.get_remark(900)
         >>> print("\n".join(remarks))
-        RELATED ENTRIES
-        RELATED ID: 5292   RELATED DB: BMRB
-        BMRB 5292 IS CHEMICAL SHIFTS FOR TC5B IN BUFFER AND BUFFER
-        CONTAINING 30 VOL-% TFE.
-        RELATED ID: 1JRJ   RELATED DB: PDB
+        RELATED ENTRIES
+        RELATED ID: 5292   RELATED DB: BMRB
+        BMRB 5292 IS CHEMICAL SHIFTS FOR TC5B IN BUFFER AND BUFFER
+        CONTAINING 30 VOL-% TFE.
+        RELATED ID: 1JRJ   RELATED DB: PDB
         1JRJ IS AN ANALAGOUS C-TERMINAL STRUCTURE.
         >>> nonexistent_remark = file.get_remark(999)
         >>> print(nonexistent_remark)
@@ -131,7 +133,7 @@ class PDBFile(TextFile):
         number = int(number)
         if number < 0 or number > 999:
             raise ValueError("The number must be in range 0-999")
         remark_string = f"REMARK {number:>3d}"
         # Find lines and omit ``REMARK XXX `` part
         remark_lines = [
@@ -155,12 +157,12 @@ class PDBFile(TextFile):
             The number of models.
         """
         return len(self._model_start_i)
     def get_coord(self, model=None):
         """
         Get only the coordinates from the PDB file.
         Parameters
         ----------
         model : int, optional
@@ -172,13 +174,13 @@ class PDBFile(TextFile):
             If this parameter is omitted, an 3D coordinate array
             containing all models will be returned, even if
             the structure contains only one model.
         Returns
         -------
         coord : ndarray, shape=(m,n,3) or shape=(n,3), dtype=float
             The coordinates read from the ATOM and HETATM records of the
             file.
         Notes
         -----
         Note that :func:`get_coord()` may output more coordinates than
@@ -186,18 +188,18 @@ class PDBFile(TextFile):
         :func:`get_structure()` call has.
         The reason for this is, that :func:`get_structure()` filters
         *altloc* IDs, while `get_coord()` does not.
         Examples
         --------
         Read an :class:`AtomArrayStack` from multiple PDB files, where
         each PDB file contains the same atoms but different positions.
         This is an efficient approach when a trajectory is spread into
         multiple PDB files, as done e.g. by the *Rosetta* modeling
-        software.
+        software.
         For the purpose of this example, the PDB files are created from
         an existing :class:`AtomArrayStack`.
         >>> import os.path
         >>> from tempfile import gettempdir
         >>> file_names = []
@@ -251,7 +253,7 @@ class PDBFile(TextFile):
                 coord[m,i,2] = float(line[_coord_z])
                 i += 1
             return coord
         else:
             coord_i = self._get_atom_record_indices_for_model(model)
             coord = np.zeros((len(coord_i), 3), dtype=np.float32)
@@ -261,12 +263,12 @@ class PDBFile(TextFile):
                 coord[i,1] = float(line[_coord_y])
                 coord[i,2] = float(line[_coord_z])
             return coord
     def get_b_factor(self, model=None):
         """
         Get only the B-factors from the PDB file.
         Parameters
         ----------
         model : int, optional
@@ -278,13 +280,13 @@ class PDBFile(TextFile):
             If this parameter is omitted, an 2D B-factor array
             containing all models will be returned, even if
             the structure contains only one model.
         Returns
         -------
         b_factor : ndarray, shape=(m,n) or shape=(n,), dtype=float
             The B-factors read from the ATOM and HETATM records of the
             file.
         Notes
         -----
         Note that :func:`get_b_factor()` may output more B-factors
@@ -311,7 +313,7 @@ class PDBFile(TextFile):
                 b_factor[m,i] = float(line[_temp_f])
                 i += 1
             return b_factor
         else:
             b_factor_i = self._get_atom_record_indices_for_model(model)
             b_factor = np.zeros(len(b_factor_i), dtype=np.float32)
@@ -325,10 +327,10 @@ class PDBFile(TextFile):
                       include_bonds=False):
         """
         Get an :class:`AtomArray` or :class:`AtomArrayStack` from the PDB file.
         This function parses standard base-10 PDB files as well as
         hybrid-36 PDB.
         Parameters
         ----------
         model : int, optional
@@ -365,7 +367,7 @@ class PDBFile(TextFile):
             (e.g. especially inter-residue bonds),
             have :attr:`BondType.ANY`, since the PDB format itself does
             not support bond orders.
         Returns
         -------
         array : AtomArray or AtomArrayStack
@@ -380,11 +382,11 @@ class PDBFile(TextFile):
             annot_i = self._get_atom_record_indices_for_model(1)
             # Record indices for coordinate determination
             coord_i = self._atom_line_i
         else:
             annot_i = coord_i = self._get_atom_record_indices_for_model(model)
             array = AtomArray(len(coord_i))
         # Create mandatory and optional annotation arrays
         chain_id  = np.zeros(array.array_length(), array.chain_id.dtype)
         res_id    = np.zeros(array.array_length(), array.res_id.dtype)
@@ -416,10 +418,10 @@ class PDBFile(TextFile):
             if line[_charge][0] in "+-":
                 charge_raw[i] = line[_charge]
             else:
-                charge_raw[i] = line[_charge][::-1]
+                charge_raw[i] = line[_charge][::-1]
             occupancy[i] = float(line[_occupancy].strip())
             b_factor[i] = float(line[_temp_f].strip())
         if include_bonds or \
             (extra_fields is not None and "atom_id" in extra_fields):
                 # The atom IDs are only required in these two cases
@@ -429,7 +431,7 @@ class PDBFile(TextFile):
                 )
         else:
             atom_id = None
         # Add annotation arrays to atom array (stack)
         array.chain_id = chain_id
         array.res_id = res_id
@@ -441,7 +443,7 @@ class PDBFile(TextFile):
         for field in (extra_fields if extra_fields is not None else []):
             if field == "atom_id":
-                # Copy is necessary to avoid double masking in
+                # Copy is necessary to avoid double masking in
                 # later altloc ID filtering
                 array.set_annotation("atom_id", atom_id.copy())
             elif field == "charge":
@@ -468,7 +470,7 @@ class PDBFile(TextFile):
             warnings.warn(
                 "{} elements were guessed from atom_name.".format(rep_num)
             )
         # Fill in coordinates
         if isinstance(array, AtomArray):
             for i, line_i in enumerate(coord_i):
@@ -476,7 +478,7 @@ class PDBFile(TextFile):
                 array.coord[i, 0] = float(line[_coord_x])
                 array.coord[i, 1] = float(line[_coord_y])
                 array.coord[i, 2] = float(line[_coord_z])
         elif isinstance(array, AtomArrayStack):
             m = 0
             i = 0
@@ -510,7 +512,7 @@ class PDBFile(TextFile):
                     warnings.warn(
                         "File contains invalid 'CRYST1' record, box is ignored"
                     )
-                    box = None
+                    break
                 if isinstance(array, AtomArray):
                     array.box = box
@@ -518,7 +520,7 @@ class PDBFile(TextFile):
                     array.box = np.repeat(
                         box[np.newaxis, ...], array.stack_depth(), axis=0
                     )
-                break
+                break
         # Filter altloc IDs
         if altloc == "occupancy":
@@ -535,13 +537,13 @@ class PDBFile(TextFile):
             array.set_annotation("altloc_id", altloc_id)
         else:
             raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
         # Read bonds
         if include_bonds:
             bond_list = self._get_bonds(atom_id)
             bond_list = bond_list.merge(connect_via_residue_names(array))
             array.bonds = bond_list
         return array
@@ -549,13 +551,13 @@ class PDBFile(TextFile):
         """
         Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
         file.
         This makes also use of the optional annotation arrays
         ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
         If the atom array (stack) contains the annotation ``'atom_id'``,
         these values will be used for atom numbering instead of
         continuous numbering.
         Parameters
         ----------
         array : AtomArray or AtomArrayStack
@@ -565,7 +567,7 @@ class PDBFile(TextFile):
         hybrid36: bool, optional
             Defines wether the file should be written in hybrid-36
             format.
         Notes
         -----
         If `array` has an associated :class:`BondList`, ``CONECT``
@@ -608,13 +610,13 @@ class PDBFile(TextFile):
         if (array.res_id > max_residues).any():
             warnings.warn(f"Residue IDs exceed {max_residues:,}")
         if np.isnan(array.coord).any():
-            raise ValueError("Coordinates contain 'NaN' values")
+            raise BadStructureError("Coordinates contain 'NaN' values")
         if any([len(name) > 1 for name in array.chain_id]):
-            raise ValueError("Some chain IDs exceed 1 character")
+            raise BadStructureError("Some chain IDs exceed 1 character")
         if any([len(name) > 3 for name in array.res_name]):
-            raise ValueError("Some residue names exceed 3 characters")
+            raise BadStructureError("Some residue names exceed 3 characters")
         if any([len(name) > 4 for name in array.atom_name]):
-            raise ValueError("Some atom names exceed 4 characters")
+            raise BadStructureError("Some atom names exceed 4 characters")
         if hybrid36:
             pdb_atom_id = np.char.array(
@@ -638,7 +640,7 @@ class PDBFile(TextFile):
                 ((array.res_id - 1) % 9999) + 1,
                 array.res_id
             ).astype(str))
         names = np.char.array(
             [f" {atm}" if len(elem) == 1 and len(atm) < 4 else atm
              for atm, elem in zip(array.atom_name, array.element)]
@@ -666,7 +668,7 @@ class PDBFile(TextFile):
         coords = array.coord
         if coords.ndim == 2:
             coords = coords[np.newaxis, ...]
         self.lines = []
         # Prepend a single CRYST1 record if we have box information
         if array.box is not None:
@@ -693,7 +695,7 @@ class PDBFile(TextFile):
             )
             if is_stack:
                 self.lines.append("ENDMDL")
         # Add CONECT records if bonds are present
         if array.bonds is not None:
             # Only non-water hetero records and connections between
@@ -709,9 +711,9 @@ class PDBFile(TextFile):
             self._set_bonds(
                 BondList(array.array_length(), bond_array), pdb_atom_id
             )
         self._index_models_and_atoms()
     def list_assemblies(self):
         """
@@ -726,7 +728,7 @@ class PDBFile(TextFile):
         -------
         assemblies : list of str
             A list that contains the available assembly IDs.
         Examples
         --------
         >>> import os.path
@@ -741,11 +743,11 @@ class PDBFile(TextFile):
                 "File does not contain assembly information (REMARK 300)"
             )
         return [
-            assembly_id.strip()
+            assembly_id.strip()
             for assembly_id in remark_lines[0][12:].split(",")
         ]
     def get_assembly(self, assembly_id=None, model=None, altloc="first",
                      extra_fields=[], include_bonds=False):
         """
@@ -801,7 +803,7 @@ class PDBFile(TextFile):
         assembly : AtomArray or AtomArrayStack
             The assembly.
             The return type depends on the `model` parameter.
         Examples
         --------
@@ -869,7 +871,7 @@ class PDBFile(TextFile):
                 if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or \
                    line.startswith("                   AND CHAINS:"):
                         affected_chain_ids += [
-                            chain_id.strip()
+                            chain_id.strip()
                             for chain_id in line[30:].split(",")
                         ]
                 else:
@@ -900,7 +902,7 @@ class PDBFile(TextFile):
                 assembly += sub_assembly
         return assembly
     def get_symmetry_mates(self, model=None, altloc="first",
                            extra_fields=[], include_bonds=False):
@@ -956,13 +958,13 @@ class PDBFile(TextFile):
         symmetry_mates : AtomArray or AtomArrayStack
             All atoms within a single unit cell.
             The return type depends on the `model` parameter.
         Notes
         -----
         To expand the structure beyond a single unit cell, use
         :func:`repeat_box()` with the return value as its
         input.
         Examples
         --------
@@ -993,7 +995,7 @@ class PDBFile(TextFile):
         return _apply_transformations(
             structure, rotations, translations
         )
@@ -1014,7 +1016,7 @@ class PDBFile(TextFile):
                     # Single model
                     self._model_start_i = np.array([0])
                     break
         # Line indices with ATOM or HETATM records
         self._atom_line_i = np.array(
             [
@@ -1075,7 +1077,7 @@ class PDBFile(TextFile):
     def _get_bonds(self, atom_ids):
         conect_lines = [line for line in self.lines
                         if line.startswith("CONECT")]
         # Mapping from atom ids to indices in an AtomArray
         atom_id_to_index = np.zeros(atom_ids[-1]+1, dtype=int)
         try:
@@ -1097,7 +1099,7 @@ class PDBFile(TextFile):
                     # String is empty -> no further IDs
                     break
                 bonds.append((center_id, id))
         # The length of the 'atom_ids' array
         # is equal to the length of the AtomArray
         return BondList(len(atom_ids), np.array(bonds, dtype=np.uint32))
@@ -1158,10 +1160,10 @@ def _parse_transformations(lines):
         component_i += 1
         if component_i == 3:
             # All (x,y,z) components were parsed
-            # -> head to the next transformation
+            # -> head to the next transformation
             transformation_i += 1
             component_i = 0
     return rotations, translations