PyPI - biotite - Versions diffs - 0.41.2__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl - Mend

biotite 0.41.2__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +1 -1
biotite/application/application.py +20 -10
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +3 -3
biotite/sequence/align/__init__.py +2 -2
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -1
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +51 -65
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +15 -17
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +246 -236
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +29 -32
biotite/structure/box.py +67 -71
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +83 -78
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +4 -5
biotite/structure/info/groups.py +1 -3
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -44
biotite/structure/io/pdbx/cif.py +140 -110
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +260 -258
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +90 -107
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
biotite-1.0.1.dist-info/RECORD +322 -0
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.2.dist-info/RECORD +0 -340
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/convert.py CHANGED Viewed

@@ -18,30 +18,41 @@ __all__ = [
 import itertools
 import warnings
 import numpy as np
-from ....file import InvalidFileError
-from ....sequence.seqtypes import NucleotideSequence, ProteinSequence
-from ...atoms import AtomArray, AtomArrayStack, repeat
-from ...bonds import BondList, BondType, connect_via_residue_names
-from ...box import unitcell_from_vectors, vectors_from_unitcell
-from ...filter import filter_first_altloc, filter_highest_occupancy_altloc
-from ...residues import get_residue_count, get_residue_starts_for
-from ...error import BadStructureError
-from ...util import matrix_rotate
-from .legacy import PDBxFile
-from .component import MaskValue
-from .cif import CIFFile, CIFBlock
-from .bcif import BinaryCIFFile, BinaryCIFBlock, BinaryCIFColumn
-from .encoding import StringArrayEncoding
+from biotite.file import InvalidFileError
+from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
+from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
+from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
+from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
+from biotite.structure.error import BadStructureError
+from biotite.structure.filter import (
+    filter_first_altloc,
+    filter_highest_occupancy_altloc,
+)
+from biotite.structure.io.pdbx.bcif import (
+    BinaryCIFBlock,
+    BinaryCIFColumn,
+    BinaryCIFFile,
+)
+from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
+from biotite.structure.io.pdbx.component import MaskValue
+from biotite.structure.io.pdbx.encoding import StringArrayEncoding
+from biotite.structure.residues import get_residue_count, get_residue_starts_for
+from biotite.structure.util import matrix_rotate
 # Cond types in `struct_conn` category that refer to covalent bonds
 PDBX_COVALENT_TYPES = [
-    "covale", "covale_base", "covale_phosphate", "covale_sugar",
-    "disulf", "modres", "modres_link", "metalc"
+    "covale",
+    "covale_base",
+    "covale_phosphate",
+    "covale_sugar",
+    "disulf",
+    "modres",
+    "modres_link",
+    "metalc",
 ]
 # Map 'struct_conn' bond orders to 'BondType'...
 PDBX_BOND_ORDER_TO_TYPE = {
-    "":     BondType.ANY,
+    "": BondType.ANY,
     "sing": BondType.SINGLE,
     "doub": BondType.DOUBLE,
     "trip": BondType.TRIPLE,
@@ -61,13 +72,13 @@ PDBX_BOND_TYPE_TO_ORDER = {
 }
 # Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
 COMP_BOND_ORDER_TO_TYPE = {
-    ("SING", "N") : BondType.SINGLE,
-    ("DOUB", "N") : BondType.DOUBLE,
-    ("TRIP", "N") : BondType.TRIPLE,
-    ("QUAD", "N") : BondType.QUADRUPLE,
-    ("SING", "Y") : BondType.AROMATIC_SINGLE,
-    ("DOUB", "Y") : BondType.AROMATIC_DOUBLE,
-    ("TRIP", "Y") : BondType.AROMATIC_TRIPLE,
+    ("SING", "N"): BondType.SINGLE,
+    ("DOUB", "N"): BondType.DOUBLE,
+    ("TRIP", "N"): BondType.TRIPLE,
+    ("QUAD", "N"): BondType.QUADRUPLE,
+    ("SING", "Y"): BondType.AROMATIC_SINGLE,
+    ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
+    ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
 }
 # ...and vice versa
 COMP_BOND_TYPE_TO_ORDER = {
@@ -98,16 +109,15 @@ def _filter(category, index):
     Column = Category.subcomponent_class()
     Data = Column.subcomponent_class()
-    return Category({
-        key: Column(
-            Data(column.data.array[index]),
-            (
-                Data(column.mask.array[index])
-                if column.mask is not None else None
+    return Category(
+        {
+            key: Column(
+                Data(column.data.array[index]),
+                (Data(column.mask.array[index]) if column.mask is not None else None),
             )
-        )
-        for key, column in category.items()
-    })
+            for key, column in category.items()
+        }
+    )
 def get_sequence(pdbx_file, data_block=None):
@@ -134,26 +144,47 @@ def get_sequence(pdbx_file, data_block=None):
     Returns
     -------
-    sequences : list of Sequence
-        The protein and nucleotide sequences for each entity
-        (equivalent to chains in most cases).
+    sequence_dict : Dictionary of Sequences
+        Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
+        (often equivalent to chain_id and atom_site.auth_asym_id
+        in most cases). Dictionary values are sequences.
+    Notes
+    -----
+    The ``entity_poly.pdbx_seq_one_letter_code_can`` field contains the initial
+    complete sequence. If the structure represents a truncated or spliced
+    version of this initial sequence, it will include only a subset of the
+    initial sequence. Use biotite.structure.get_residues to retrieve only
+    the residues that are represented in the structure.
     """
     block = _get_block(pdbx_file, data_block)
+    poly_category = block["entity_poly"]
-    poly_category= block["entity_poly"]
     seq_string = poly_category["pdbx_seq_one_letter_code_can"].as_array(str)
     seq_type = poly_category["type"].as_array(str)
-    sequences = []
-    for string, stype in zip(seq_string, seq_type):
-        sequence = _convert_string_to_sequence(string, stype)
-        if sequence is not None:
-            sequences.append(sequence)
-    return sequences
+    sequences = [
+        _convert_string_to_sequence(string, stype)
+        for string, stype in zip(seq_string, seq_type)
+    ]
+    strand_ids = poly_category["pdbx_strand_id"].as_array(str)
+    strand_ids = [strand_id.split(",") for strand_id in strand_ids]
+    sequence_dict = {
+        strand_id: sequence
+        for sequence, strand_ids in zip(sequences, strand_ids)
+        for strand_id in strand_ids
+        if sequence is not None
+    }
+    return sequence_dict
 def get_model_count(pdbx_file, data_block=None):
     """
-    Get the number of models contained in a :class:`PDBxFile`.
+    Get the number of models contained in a file.
     Parameters
     ----------
@@ -172,17 +203,23 @@ def get_model_count(pdbx_file, data_block=None):
         The number of models.
     """
     block = _get_block(pdbx_file, data_block)
-    return len(_get_model_starts(
-        block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32)
-    ))
+    return len(
+        _get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
+    )
-def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
-                  extra_fields=None, use_author_fields=True,
-                  include_bonds=False):
+def get_structure(
+    pdbx_file,
+    model=None,
+    data_block=None,
+    altloc="first",
+    extra_fields=None,
+    use_author_fields=True,
+    include_bonds=False,
+):
     """
     Create an :class:`AtomArray` or :class:`AtomArrayStack` from the
-    ``atom_site`` category in a :class:`PDBxFile`.
+    ``atom_site`` category in a file.
     Parameters
     ----------
@@ -228,7 +265,7 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
         for example both, ``label_seq_id`` and ``auth_seq_id`` describe
         the ID of the residue.
         While, the ``label_xxx`` fields can be used as official pointers
-        to other categories in the :class:`PDBxFile`, the ``auth_xxx``
+        to other categories in the file, the ``auth_xxx``
         fields are set by the author(s) of the structure and are
         consistent with the corresponding values in PDB files.
         If `use_author_fields` is true, the annotation arrays will be
@@ -290,12 +327,21 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
                 "instead"
             )
-        atoms.coord[:, :, 0] = atom_site["Cartn_x"].as_array(np.float32) \
-                              .reshape((model_count, model_length))
-        atoms.coord[:, :, 1] = atom_site["Cartn_y"].as_array(np.float32) \
-                              .reshape((model_count, model_length))
-        atoms.coord[:, :, 2] = atom_site["Cartn_z"].as_array(np.float32) \
-                              .reshape((model_count, model_length))
+        atoms.coord[:, :, 0] = (
+            atom_site["Cartn_x"]
+            .as_array(np.float32)
+            .reshape((model_count, model_length))
+        )
+        atoms.coord[:, :, 1] = (
+            atom_site["Cartn_y"]
+            .as_array(np.float32)
+            .reshape((model_count, model_length))
+        )
+        atoms.coord[:, :, 2] = (
+            atom_site["Cartn_z"]
+            .as_array(np.float32)
+            .reshape((model_count, model_length))
+        )
         box = _get_box(block)
         if box is not None:
@@ -325,31 +371,25 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
         atoms.box = _get_box(block)
     # The below part is the same for both, AtomArray and AtomArrayStack
-    _fill_annotations(
-        atoms, model_atom_site, extra_fields, use_author_fields
-    )
+    _fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
     if include_bonds:
         if "chem_comp_bond" in block:
             try:
-                custom_bond_dict = _parse_intra_residue_bonds(
-                    block["chem_comp_bond"]
-                )
+                custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
             except KeyError:
                 warnings.warn(
                     "The 'chem_comp_bond' category has missing columns, "
                     "falling back to using Chemical Component Dictionary",
-                    UserWarning
+                    UserWarning,
                 )
                 custom_bond_dict = None
-            bonds = connect_via_residue_names(
-                atoms, custom_bond_dict=custom_bond_dict
-            )
+            bonds = connect_via_residue_names(atoms, custom_bond_dict=custom_bond_dict)
         else:
             bonds = connect_via_residue_names(atoms)
         if "struct_conn" in block:
-            bonds = bonds.merge(_parse_inter_residue_bonds(
-                model_atom_site, block["struct_conn"]
-            ))
+            bonds = bonds.merge(
+                _parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
+            )
         atoms.bonds = bonds
     atoms = _filter_altloc(atoms, model_atom_site, altloc)
@@ -357,10 +397,6 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
 def _get_block(pdbx_component, block_name):
-    if isinstance(pdbx_component, PDBxFile):
-        # The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
-        pdbx_component = pdbx_component.cif_file
     if not isinstance(pdbx_component, (CIFBlock, BinaryCIFBlock)):
         # Determine block
         if block_name is None:
@@ -372,24 +408,24 @@ def _get_block(pdbx_component, block_name):
 def _get_or_fallback(category, key, fallback_key):
-        """
-        Return column related to key in category if it exists,
-        otherwise try to get the column related to fallback key.
-        """
-        if key not in category:
-            warnings.warn(
-                f"Attribute '{key}' not found within 'atom_site' category. "
-                f"The fallback attribute '{fallback_key}' will be used instead",
-                UserWarning
-            )
-            try:
-                return category[fallback_key]
-            except KeyError as key_exc:
-                raise InvalidFileError(
-                    f"Fallback attribute '{fallback_key}' not found within "
-                    "'atom_site' category"
-                ) from key_exc
-        return category[key]
+    """
+    Return column related to key in category if it exists,
+    otherwise try to get the column related to fallback key.
+    """
+    if key not in category:
+        warnings.warn(
+            f"Attribute '{key}' not found within 'atom_site' category. "
+            f"The fallback attribute '{fallback_key}' will be used instead",
+            UserWarning,
+        )
+        try:
+            return category[fallback_key]
+        except KeyError as key_exc:
+            raise InvalidFileError(
+                f"Fallback attribute '{fallback_key}' not found within "
+                "'atom_site' category"
+            ) from key_exc
+    return category[key]
 def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
@@ -408,78 +444,52 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
         instead of ``label_``.
     """
-    prefix, alt_prefix = (
-        ("auth", "label") if use_author_fields else ("label", "auth")
-    )
+    prefix, alt_prefix = ("auth", "label") if use_author_fields else ("label", "auth")
     array.set_annotation(
         "chain_id",
         _get_or_fallback(
             atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
-        ).as_array("U4")
+        ).as_array(str),
     )
     array.set_annotation(
         "res_id",
         _get_or_fallback(
             atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
-        ).as_array(int, -1)
-    )
-    array.set_annotation(
-        "ins_code",
-        atom_site["pdbx_PDB_ins_code"].as_array("U1", "")
+        ).as_array(int, -1),
     )
+    array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
     array.set_annotation(
         "res_name",
         _get_or_fallback(
             atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
-        ).as_array("U5")
-    )
-    array.set_annotation(
-        "hetero",
-        atom_site["group_PDB"].as_array(str) == "HETATM"
+        ).as_array(str),
     )
+    array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
     array.set_annotation(
         "atom_name",
         _get_or_fallback(
             atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
-        ).as_array("U6")
-    )
-    array.set_annotation(
-        "element",
-        atom_site["type_symbol"].as_array("U2")
+        ).as_array(str),
     )
+    array.set_annotation("element", atom_site["type_symbol"].as_array(str))
     if "atom_id" in extra_fields:
-        array.set_annotation(
-            "atom_id",
-            atom_site["id"].as_array(int)
-        )
+        array.set_annotation("atom_id", atom_site["id"].as_array(int))
         extra_fields.remove("atom_id")
     if "b_factor" in extra_fields:
-        array.set_annotation(
-            "b_factor",
-            atom_site["B_iso_or_equiv"].as_array(float)
-        )
+        array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float))
         extra_fields.remove("b_factor")
     if "occupancy" in extra_fields:
-        array.set_annotation(
-            "occupancy",
-            atom_site["occupancy"].as_array(float)
-        )
+        array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
         extra_fields.remove("occupancy")
     if "charge" in extra_fields:
-        array.set_annotation(
-            "charge",
-            atom_site["pdbx_formal_charge"].as_array(int, 0)
-        )
+        array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0))
         extra_fields.remove("charge")
     # Handle all remaining custom fields
     for field in extra_fields:
-        array.set_annotation(
-            field,
-            atom_site[field].as_array(str)
-        )
+        array.set_annotation(field, atom_site[field].as_array(str))
 def _parse_intra_residue_bonds(chem_comp_bond):
@@ -493,7 +503,7 @@ def _parse_intra_residue_bonds(chem_comp_bond):
         chem_comp_bond["atom_id_1"].as_array(str),
         chem_comp_bond["atom_id_2"].as_array(str),
         chem_comp_bond["value_order"].as_array(str),
-        chem_comp_bond["pdbx_aromatic_flag"].as_array(str)
+        chem_comp_bond["pdbx_aromatic_flag"].as_array(str),
     ):
         if res_name not in custom_bond_dict:
             custom_bond_dict[res_name] = {}
@@ -514,33 +524,32 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     IDENTITY = "1_555"
     # Columns in 'atom_site' that should be matched by 'struct_conn'
     COLUMNS = [
-        "label_asym_id", "label_comp_id", "label_seq_id", "label_atom_id",
-        "label_alt_id", "auth_asym_id", "auth_comp_id", "auth_seq_id",
-        "pdbx_PDB_ins_code"
+        "label_asym_id",
+        "label_comp_id",
+        "label_seq_id",
+        "label_atom_id",
+        "label_alt_id",
+        "auth_asym_id",
+        "auth_comp_id",
+        "auth_seq_id",
+        "pdbx_PDB_ins_code",
     ]
     covale_mask = np.isin(
         struct_conn["conn_type_id"].as_array(str), PDBX_COVALENT_TYPES
     )
     if "ptnr1_symmetry" in struct_conn:
-        covale_mask &= (
-            struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
-        )
+        covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
     if "ptnr2_symmetry" in struct_conn:
-        covale_mask &= (
-            struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
-        )
+        covale_mask &= struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
     atom_indices = [None] * 2
     for i in range(2):
         reference_arrays = []
         query_arrays = []
         for col_name in COLUMNS:
-            struct_conn_col_name = _get_struct_conn_col_name(col_name, i+1)
-            if (
-                col_name not in atom_site
-                or struct_conn_col_name not in struct_conn
-            ):
+            struct_conn_col_name = _get_struct_conn_col_name(col_name, i + 1)
+            if col_name not in atom_site or struct_conn_col_name not in struct_conn:
                 continue
             # Ensure both arrays have the same dtype to allow comparison
             reference = atom_site[col_name].as_array()
@@ -568,7 +577,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
     # Interpret missing values as ANY bonds
-    bond_order = struct_conn["pdbx_value_order"].as_array("U4", "")
+    bond_order = struct_conn["pdbx_value_order"].as_array(str, "")
     # Consecutively apply the same masks as applied to the atom indices
     # Logical combination does not work here,
     # as the second mask was created based on already filtered data
@@ -577,7 +586,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     return BondList(
         atom_site.row_count,
-        np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1)
+        np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
     )
@@ -587,10 +596,13 @@ def _find_matches(query_arrays, reference_arrays):
     `reference_arrays` where all query values the reference counterpart.
     If no match is found for a query, the corresponding index is -1.
     """
-    match_masks_for_all_columns = np.stack([
-        query[:, np.newaxis] == reference[np.newaxis, :]
-        for query, reference in zip(query_arrays, reference_arrays)
-    ], axis=-1)
+    match_masks_for_all_columns = np.stack(
+        [
+            query[:, np.newaxis] == reference[np.newaxis, :]
+            for query, reference in zip(query_arrays, reference_arrays)
+        ],
+        axis=-1,
+    )
     match_masks = np.all(match_masks_for_all_columns, axis=-1)
     query_matches, reference_matches = np.where(match_masks)
@@ -664,14 +676,8 @@ def _filter_model(atom_site, model_starts, model):
     Reduce the ``atom_site`` category to the values for the given
     model.
     """
-    Category = type(atom_site)
-    Column = Category.subcomponent_class()
-    Data = Column.subcomponent_class()
     # Append exclusive stop
-    model_starts = np.append(
-        model_starts, [atom_site.row_count]
-    )
+    model_starts = np.append(model_starts, [atom_site.row_count])
     # Indexing starts at 0, but model number starts at 1
     model_index = model - 1
     index = slice(model_starts[model_index], model_starts[model_index + 1])
@@ -757,9 +763,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
     # Fill PDBx columns from information
     # in structures' attribute arrays as good as possible
     atom_site = Category()
-    atom_site["group_PDB"] = np.where(
-        array.hetero, "HETATM", "ATOM"
-    )
+    atom_site["group_PDB"] = np.where(array.hetero, "HETATM", "ATOM")
     atom_site["type_symbol"] = np.copy(array.element)
     atom_site["label_atom_id"] = np.copy(array.atom_name)
     atom_site["label_alt_id"] = Column(
@@ -773,7 +777,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
     atom_site["label_seq_id"] = np.copy(array.res_id)
     atom_site["pdbx_PDB_ins_code"] = Column(
         np.copy(array.ins_code),
-        np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT)
+        np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT),
     )
     atom_site["auth_seq_id"] = atom_site["label_seq_id"]
     atom_site["auth_comp_id"] = atom_site["label_comp_id"]
@@ -790,11 +794,11 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
     if "charge" in annot_categories:
         atom_site["pdbx_formal_charge"] = Column(
             np.array([f"{c:+d}" if c != 0 else "?" for c in array.charge]),
-            np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT)
+            np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
         )
     if array.bonds is not None:
-        struct_conn =  _set_inter_residue_bonds(array, atom_site)
+        struct_conn = _set_inter_residue_bonds(array, atom_site)
         if struct_conn is not None:
             block["struct_conn"] = struct_conn
         if include_bonds:
@@ -804,24 +808,20 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
     # In case of a single model handle each coordinate
     # simply like a flattened array
-    if type(array) == AtomArray or (
-        type(array) == AtomArrayStack and array.stack_depth() == 1
+    if isinstance(array, AtomArray) or (
+        isinstance(array, AtomArrayStack) and array.stack_depth() == 1
     ):
         # 'ravel' flattens coord without copy
         # in case of stack with stack_depth = 1
         atom_site["Cartn_x"] = np.copy(np.ravel(array.coord[..., 0]))
         atom_site["Cartn_y"] = np.copy(np.ravel(array.coord[..., 1]))
         atom_site["Cartn_z"] = np.copy(np.ravel(array.coord[..., 2]))
-        atom_site["pdbx_PDB_model_num"] = np.ones(
-            array.array_length(), dtype=np.int32
-        )
+        atom_site["pdbx_PDB_model_num"] = np.ones(array.array_length(), dtype=np.int32)
     # In case of multiple models repeat annotations
     # and use model specific coordinates
     else:
         atom_site = _repeat(atom_site, array.stack_depth())
-        coord = np.reshape(
-            array.coord, (array.stack_depth() * array.array_length(), 3)
-        )
+        coord = np.reshape(array.coord, (array.stack_depth() * array.array_length(), 3))
         atom_site["Cartn_x"] = np.copy(coord[:, 0])
         atom_site["Cartn_y"] = np.copy(coord[:, 1])
         atom_site["Cartn_z"] = np.copy(coord[:, 2])
@@ -829,11 +829,9 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
             np.arange(1, array.stack_depth() + 1, dtype=np.int32),
             repeats=array.array_length(),
         )
-    if not "atom_id" in annot_categories:
+    if "atom_id" not in annot_categories:
         # Count from 1
-        atom_site["id"] = np.arange(
-            1, len(atom_site["group_PDB"]) + 1
-        )
+        atom_site["id"] = np.arange(1, len(atom_site["group_PDB"]) + 1)
     block["atom_site"] = atom_site
     # Write box into file
@@ -870,10 +868,6 @@ def _check_non_empty(array):
 def _get_or_create_block(pdbx_component, block_name):
-    if isinstance(pdbx_component, PDBxFile):
-        # The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
-        pdbx_component = pdbx_component.cif_file
     Block = pdbx_component.subcomponent_class()
     if isinstance(pdbx_component, (CIFFile, BinaryCIFFile)):
@@ -901,7 +895,7 @@ def _determine_entity_id(chain_id):
     for i in range(len(chain_id)):
         try:
             entity_id[i] = id_translation[chain_id[i]]
-        except:
+        except KeyError:
             # chain_id is not in dictionary -> new entry
             id_translation[chain_id[i]] = id
             entity_id[i] = id_translation[chain_id[i]]
@@ -926,8 +920,11 @@ def _repeat(category, repetitions):
             data = Data(np.tile(column.data.array, repetitions), data_encoding)
         else:
             data = Data(np.tile(column.data.array, repetitions))
-        mask = Data(np.tile(column.mask.array, repetitions)) \
-               if column.mask is not None else None
+        mask = (
+            Data(np.tile(column.mask.array, repetitions))
+            if column.mask is not None
+            else None
+        )
         category_dict[key] = Column(data, mask)
     return Category(category_dict)
@@ -967,28 +964,37 @@ def _set_intra_residue_bonds(array, atom_site):
         aromatic_flag[i] = aromatic
     any_mask = bond_array[:, 2] == BondType.ANY
-    chem_comp_bond = Category()
+    # Remove already existing residue and atom name combinations
+    # These appear when the structure contains a residue multiple times
+    atom_id_1 = array.atom_name[bond_array[:, 0]]
+    atom_id_2 = array.atom_name[bond_array[:, 1]]
     # Take the residue name from the first atom index, as the residue
     # name is the same for both atoms, since we have only intra bonds
-    chem_comp_bond["comp_id"] = array.res_name[bond_array[:, 0]]
-    chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
-    chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
+    comp_id = array.res_name[bond_array[:, 0]]
+    _, unique_indices = np.unique(
+        np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
+    )
+    unique_indices.sort()
+    chem_comp_bond = Category()
+    n_bonds = len(unique_indices)
+    chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
+    chem_comp_bond["comp_id"] = comp_id[unique_indices]
+    chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
+    chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
     chem_comp_bond["value_order"] = Column(
-        value_order,
-        np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
+        value_order[unique_indices],
+        np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
     )
     chem_comp_bond["pdbx_aromatic_flag"] = Column(
-        aromatic_flag,
-        np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
+        aromatic_flag[unique_indices],
+        np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
     )
     # BondList does not contain stereo information
     # -> all values are missing
     chem_comp_bond["pdbx_stereo_config"] = Column(
-        np.zeros(len(bond_array), dtype="U1"),
-        np.full(len(bond_array), MaskValue.MISSING)
-    )
-    chem_comp_bond["pdbx_ordinal"] = np.arange(
-        1, len(bond_array) + 1, dtype=np.int32
+        np.zeros(n_bonds, dtype="U1"),
+        np.full(n_bonds, MaskValue.MISSING),
     )
     return chem_comp_bond
@@ -1001,8 +1007,11 @@ def _set_inter_residue_bonds(array, atom_site):
     ``atom_site`` category.
     """
     COLUMNS = [
-        "label_asym_id", "label_comp_id", "label_seq_id", "label_atom_id",
-        "pdbx_PDB_ins_code"
+        "label_asym_id",
+        "label_comp_id",
+        "label_seq_id",
+        "label_atom_id",
+        "pdbx_PDB_ins_code",
     ]
     Category = type(atom_site)
@@ -1011,17 +1020,17 @@ def _set_inter_residue_bonds(array, atom_site):
     bond_array = _filter_bonds(array, "inter")
     if len(bond_array) == 0:
         return None
     struct_conn = Category()
     struct_conn["id"] = np.arange(1, len(bond_array) + 1)
     struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
     struct_conn["pdbx_value_order"] = Column(
-        np.array(
-            [PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]
-        ),
+        np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
         np.where(
             bond_array[:, 2] == BondType.ANY,
-            MaskValue.MISSING, MaskValue.PRESENT,
-        )
+            MaskValue.MISSING,
+            MaskValue.PRESENT,
+        ),
     )
     # Write the identifying annotation...
     for col_name in COLUMNS:
@@ -1029,8 +1038,9 @@ def _set_inter_residue_bonds(array, atom_site):
         # ...for each bond partner
         for i in range(2):
             atom_indices = bond_array[:, i]
-            struct_conn[_get_struct_conn_col_name(col_name, i+1)] \
-                = annot[atom_indices]
+            struct_conn[_get_struct_conn_col_name(col_name, i + 1)] = annot[
+                atom_indices
+            ]
     return struct_conn
@@ -1042,9 +1052,9 @@ def _filter_bonds(array, connection):
     bond_array = array.bonds.as_array()
     # To save computation time call 'get_residue_starts_for()' only once
     # with indices of the first and second atom of each bond
-    residue_starts_1, residue_starts_2 = get_residue_starts_for(
-        array, bond_array[:, :2].flatten()
-    ).reshape(-1, 2).T
+    residue_starts_1, residue_starts_2 = (
+        get_residue_starts_for(array, bond_array[:, :2].flatten()).reshape(-1, 2).T
+    )
     if connection == "intra":
         return bond_array[residue_starts_1 == residue_starts_2]
     elif connection == "inter":
@@ -1053,12 +1063,11 @@ def _filter_bonds(array, connection):
         raise ValueError("Invalid 'connection' option")
-def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
-                  res_name=None):
+def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
     """
     Create an :class:`AtomArray` for a chemical component from the
     ``chem_comp_atom`` and, if available, the ``chem_comp_bond``
-    category in a :class:`PDBxFile`.
+    category in a file.
     Parameters
     ----------
@@ -1140,12 +1149,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
     array = AtomArray(atom_category.row_count)
-    array.hetero[:] = True
-    array.res_name = atom_category["comp_id"].as_array("U5")
-    array.atom_name = atom_category["atom_id"].as_array("U6")
-    array.element = atom_category["type_symbol"].as_array("U2")
-    array.add_annotation("charge", int)
-    array.charge = atom_category["charge"].as_array(int, 0)
+    array.set_annotation("hetero", np.full(len(atom_category["comp_id"]), True))
+    array.set_annotation("res_name", atom_category["comp_id"].as_array(str))
+    array.set_annotation("atom_name", atom_category["atom_id"].as_array(str))
+    array.set_annotation("element", atom_category["type_symbol"].as_array(str))
+    array.set_annotation("charge", atom_category["charge"].as_array(int, 0))
     coord_fields = [f"pdbx_model_Cartn_{dim}_ideal" for dim in ("x", "y", "z")]
     alt_coord_fields = [f"model_Cartn_{dim}" for dim in ("x", "y", "z")]
@@ -1154,16 +1162,16 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
         coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
     try:
         for i, field in enumerate(coord_fields):
-            array.coord[:,i] = atom_category[field].as_array(np.float32)
+            array.coord[:, i] = atom_category[field].as_array(np.float32)
     except KeyError as err:
         key = err.args[0]
         warnings.warn(
             f"Attribute '{key}' not found within 'chem_comp_atom' category. "
             f"The fallback coordinates will be used instead",
-            UserWarning
+            UserWarning,
         )
         for i, field in enumerate(alt_coord_fields):
-            array.coord[:,i] = atom_category[field].as_array(np.float32)
+            array.coord[:, i] = atom_category[field].as_array(np.float32)
     try:
         bond_category = block["chem_comp_bond"]
@@ -1173,9 +1181,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
             )
     except KeyError:
         warnings.warn(
-            f"Category 'chem_comp_bond' not found. "
-            f"No bonds will be parsed",
-            UserWarning
+            "Category 'chem_comp_bond' not found. " "No bonds will be parsed",
+            UserWarning,
         )
     else:
         bonds = BondList(array.array_length())
@@ -1183,7 +1190,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
             bond_category["atom_id_1"].as_array(str),
             bond_category["atom_id_2"].as_array(str),
             bond_category["value_order"].as_array(str),
-            bond_category["pdbx_aromatic_flag"].as_array(str)
+            bond_category["pdbx_aromatic_flag"].as_array(str),
         ):
             atom_i = np.where(array.atom_name == atom1)[0][0]
             atom_j = np.where(array.atom_name == atom2)[0][0]
@@ -1225,9 +1232,7 @@ def set_component(pdbx_file, array, data_block=None):
     Category = block.subcomponent_class()
     if get_residue_count(array) > 1:
-        raise BadStructureError(
-            "The input atom array must comprise only one residue"
-        )
+        raise BadStructureError("The input atom array must comprise only one residue")
     res_name = array.res_name[0]
     annot_categories = array.get_annotation_categories()
@@ -1250,31 +1255,28 @@ def set_component(pdbx_file, array, data_block=None):
     atom_cat["pdbx_model_Cartn_z_ideal"] = atom_cat["model_Cartn_z"]
     atom_cat["pdbx_component_atom_id"] = atom_cat["atom_id"]
     atom_cat["pdbx_component_comp_id"] = atom_cat["comp_id"]
-    atom_cat["pdbx_ordinal"] = np.arange(
-        1, array.array_length() + 1
-    ).astype(str)
+    atom_cat["pdbx_ordinal"] = np.arange(1, array.array_length() + 1).astype(str)
     block["chem_comp_atom"] = atom_cat
     if array.bonds is not None and array.bonds.get_bond_count() > 0:
         bond_array = array.bonds.as_array()
         order_flags = []
         aromatic_flags = []
-        for bond_type in bond_array[:,2]:
+        for bond_type in bond_array[:, 2]:
             order_flag, aromatic_flag = COMP_BOND_TYPE_TO_ORDER[bond_type]
             order_flags.append(order_flag)
             aromatic_flags.append(aromatic_flag)
         bond_cat = Category()
         bond_cat["comp_id"] = np.full(len(bond_array), res_name)
-        bond_cat["atom_id_1"] = array.atom_name[bond_array[:,0]]
-        bond_cat["atom_id_2"] = array.atom_name[bond_array[:,1]]
+        bond_cat["atom_id_1"] = array.atom_name[bond_array[:, 0]]
+        bond_cat["atom_id_2"] = array.atom_name[bond_array[:, 1]]
         bond_cat["value_order"] = np.array(order_flags)
         bond_cat["pdbx_aromatic_flag"] = np.array(aromatic_flags)
-        bond_cat["pdbx_ordinal"] = np.arange(
-            1, len(bond_array) + 1
-        ).astype(str)
+        bond_cat["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1).astype(str)
         block["chem_comp_bond"] = bond_cat
 def list_assemblies(pdbx_file, data_block=None):
     """
     List the biological assemblies that are available for the structure
@@ -1325,14 +1327,21 @@ def list_assemblies(pdbx_file, data_block=None):
         id: details
         for id, details in zip(
             assembly_category["id"].as_array(str),
-            assembly_category["details"].as_array(str)
+            assembly_category["details"].as_array(str),
         )
     }
-def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
-                 altloc="first", extra_fields=None, use_author_fields=True,
-                 include_bonds=False):
+def get_assembly(
+    pdbx_file,
+    assembly_id=None,
+    model=None,
+    data_block=None,
+    altloc="first",
+    extra_fields=None,
+    use_author_fields=True,
+    include_bonds=False,
+):
     """
     Build the given biological assembly.
@@ -1389,7 +1398,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
         for example both, ``label_seq_id`` and ``auth_seq_id`` describe
         the ID of the residue.
         While, the ``label_xxx`` fields can be used as official pointers
-        to other categories in the :class:`PDBxFile`, the ``auth_xxx``
+        to other categories in the file, the ``auth_xxx``
         fields are set by the author(s) of the structure and are
         consistent with the corresponding values in PDB files.
         If `use_author_fields` is true, the annotation arrays will be
@@ -1422,9 +1431,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
     try:
         assembly_gen_category = block["pdbx_struct_assembly_gen"]
     except KeyError:
-        raise InvalidFileError(
-            "File has no 'pdbx_struct_assembly_gen' category"
-        )
+        raise InvalidFileError("File has no 'pdbx_struct_assembly_gen' category")
     try:
         struct_oper_category = block["pdbx_struct_oper_list"]
@@ -1457,7 +1464,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
         altloc,
         extra_fields_and_asym,
         use_author_fields,
-        include_bonds
+        include_bonds,
     )
     ### Get transformations and apply them to the affected asym IDs
@@ -1473,9 +1480,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
             operations = _parse_operation_expression(op_expr)
             asym_ids = asym_id_expr.split(",")
             # Filter affected asym IDs
-            sub_structure = structure[
-                ..., np.isin(structure.label_asym_id, asym_ids)
-            ]
+            sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
             sub_assembly = _apply_transformations(
                 sub_structure, transformations, operations
             )
@@ -1534,10 +1539,9 @@ def _get_transformations(struct_oper):
                 for i in (1, 2, 3)
             ]
         )
-        translation_vector = np.array([
-            struct_oper[f"vector[{i}]"].as_array(float)[index]
-            for i in (1, 2, 3)
-        ])
+        translation_vector = np.array(
+            [struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
+        )
         transformation_dict[id] = (rotation_matrix, translation_vector)
     return transformation_dict
@@ -1592,6 +1596,4 @@ def _convert_string_to_sequence(string, stype):
     elif stype in _other_type_list:
         return None
     else:
-        raise InvalidFileError(
-            "mmCIF _entity_poly.type unsupported" " type: " + stype
-        )
+        raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)