PyPI - biotite - Versions diffs - 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +156 -43
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/filter.py +1 -1
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +148 -107
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +227 -68
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +16 -16
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/convert.py CHANGED Viewed

@@ -24,6 +24,10 @@ from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
 from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
 from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
 from biotite.structure.error import BadStructureError
+from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
+from biotite.structure.filter import (
+    _canonical_nucleotide_list as canonical_nucleotide_list,
+)
 from biotite.structure.filter import (
     filter_first_altloc,
     filter_highest_occupancy_altloc,
@@ -36,32 +40,38 @@ from biotite.structure.io.pdbx.bcif import (
 from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
 from biotite.structure.io.pdbx.component import MaskValue
 from biotite.structure.io.pdbx.encoding import StringArrayEncoding
-from biotite.structure.residues import get_residue_count, get_residue_starts_for
+from biotite.structure.residues import (
+    get_residue_count,
+    get_residue_positions,
+    get_residue_starts_for,
+)
 from biotite.structure.util import matrix_rotate
-# Cond types in `struct_conn` category that refer to covalent bonds
-PDBX_COVALENT_TYPES = [
-    "covale",
-    "covale_base",
-    "covale_phosphate",
-    "covale_sugar",
-    "disulf",
-    "modres",
-    "modres_link",
-    "metalc",
-]
-# Map 'struct_conn' bond orders to 'BondType'...
-PDBX_BOND_ORDER_TO_TYPE = {
-    "": BondType.ANY,
-    "sing": BondType.SINGLE,
-    "doub": BondType.DOUBLE,
-    "trip": BondType.TRIPLE,
-    "quad": BondType.QUADRUPLE,
+# Bond types in `struct_conn` category that refer to covalent bonds
+PDBX_BOND_TYPE_ID_TO_TYPE = {
+    # Although a covalent bond, could in theory have a higher bond order,
+    # practically inter-residue bonds are always single
+    "covale": BondType.SINGLE,
+    "covale_base": BondType.SINGLE,
+    "covale_phosphate": BondType.SINGLE,
+    "covale_sugar": BondType.SINGLE,
+    "disulf": BondType.SINGLE,
+    "modres": BondType.SINGLE,
+    "modres_link": BondType.SINGLE,
+    "metalc": BondType.COORDINATION,
+}
+PDBX_BOND_TYPE_TO_TYPE_ID = {
+    BondType.ANY: "covale",
+    BondType.SINGLE: "covale",
+    BondType.DOUBLE: "covale",
+    BondType.TRIPLE: "covale",
+    BondType.QUADRUPLE: "covale",
+    BondType.AROMATIC_SINGLE: "covale",
+    BondType.AROMATIC_DOUBLE: "covale",
+    BondType.AROMATIC_TRIPLE: "covale",
+    BondType.COORDINATION: "metalc",
 }
-# ...and vice versa
 PDBX_BOND_TYPE_TO_ORDER = {
-    # 'ANY' is masked later, it is merely added here to avoid a KeyError
-    BondType.ANY: "",
     BondType.SINGLE: "sing",
     BondType.DOUBLE: "doub",
     BondType.TRIPLE: "trip",
@@ -69,6 +79,9 @@ PDBX_BOND_TYPE_TO_ORDER = {
     BondType.AROMATIC_SINGLE: "sing",
     BondType.AROMATIC_DOUBLE: "doub",
     BondType.AROMATIC_TRIPLE: "trip",
+    # These are masked later, it is merely added here to avoid a KeyError
+    BondType.ANY: "",
+    BondType.COORDINATION: "",
 }
 # Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
 COMP_BOND_ORDER_TO_TYPE = {
@@ -84,6 +97,7 @@ COMP_BOND_ORDER_TO_TYPE = {
 COMP_BOND_TYPE_TO_ORDER = {
     bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
 }
+CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
 _proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
 _nucleotideseq_type_list = [
@@ -450,7 +464,7 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
         "chain_id",
         _get_or_fallback(
             atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
-        ).as_array("U4"),
+        ).as_array(str),
     )
     array.set_annotation(
         "res_id",
@@ -458,33 +472,70 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
             atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
         ).as_array(int, -1),
     )
-    array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array("U1", ""))
+    array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
     array.set_annotation(
         "res_name",
         _get_or_fallback(
             atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
-        ).as_array("U5"),
+        ).as_array(str),
     )
     array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
     array.set_annotation(
         "atom_name",
         _get_or_fallback(
             atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
-        ).as_array("U6"),
+        ).as_array(str),
     )
-    array.set_annotation("element", atom_site["type_symbol"].as_array("U2"))
+    array.set_annotation("element", atom_site["type_symbol"].as_array(str))
     if "atom_id" in extra_fields:
-        array.set_annotation("atom_id", atom_site["id"].as_array(int))
+        if "id" in atom_site:
+            array.set_annotation("atom_id", atom_site["id"].as_array(int))
+        else:
+            warnings.warn(
+                "Missing 'id' in 'atom_site' category. 'atom_id' generated automatically.",
+                UserWarning,
+            )
+            array.set_annotation("atom_id", np.arange(array.array_length()))
         extra_fields.remove("atom_id")
     if "b_factor" in extra_fields:
-        array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float))
+        if "B_iso_or_equiv" in atom_site:
+            array.set_annotation(
+                "b_factor", atom_site["B_iso_or_equiv"].as_array(float)
+            )
+        else:
+            warnings.warn(
+                "Missing 'B_iso_or_equiv' in 'atom_site' category. 'b_factor' will be set to `nan`.",
+                UserWarning,
+            )
+            array.set_annotation("b_factor", np.full(array.array_length(), np.nan))
         extra_fields.remove("b_factor")
     if "occupancy" in extra_fields:
-        array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
+        if "occupancy" in atom_site:
+            array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
+        else:
+            warnings.warn(
+                "Missing 'occupancy' in 'atom_site' category. 'occupancy' will be assumed to be 1.0",
+                UserWarning,
+            )
+            array.set_annotation(
+                "occupancy", np.ones(array.array_length(), dtype=float)
+            )
         extra_fields.remove("occupancy")
     if "charge" in extra_fields:
-        array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0))
+        if "pdbx_formal_charge" in atom_site:
+            array.set_annotation(
+                "charge",
+                atom_site["pdbx_formal_charge"].as_array(
+                    int, 0
+                ),  # masked values are set to 0
+            )
+        else:
+            warnings.warn(
+                "Missing 'pdbx_formal_charge' in 'atom_site' category. 'charge' will be set to 0",
+                UserWarning,
+            )
+            array.set_annotation("charge", np.zeros(array.array_length(), dtype=int))
         extra_fields.remove("charge")
     # Handle all remaining custom fields
@@ -536,7 +587,8 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     ]
     covale_mask = np.isin(
-        struct_conn["conn_type_id"].as_array(str), PDBX_COVALENT_TYPES
+        struct_conn["conn_type_id"].as_array(str),
+        list(PDBX_BOND_TYPE_ID_TO_TYPE.keys()),
     )
     if "ptnr1_symmetry" in struct_conn:
         covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
@@ -576,13 +628,14 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     atoms_indices_1 = atoms_indices_1[mapping_exists_mask]
     atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
-    # Interpret missing values as ANY bonds
-    bond_order = struct_conn["pdbx_value_order"].as_array("U4", "")
+    bond_type_id = struct_conn["conn_type_id"].as_array()
     # Consecutively apply the same masks as applied to the atom indices
     # Logical combination does not work here,
     # as the second mask was created based on already filtered data
-    bond_order = bond_order[covale_mask][mapping_exists_mask]
-    bond_types = [PDBX_BOND_ORDER_TO_TYPE[order] for order in bond_order]
+    bond_type_id = bond_type_id[covale_mask][mapping_exists_mask]
+    # The type ID is always present in the dictionary,
+    # as it was used to filter the applicable bonds
+    bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
     return BondList(
         atom_site.row_count,
@@ -593,7 +646,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
 def _find_matches(query_arrays, reference_arrays):
     """
     For each index in the `query_arrays` find the indices in the
-    `reference_arrays` where all query values the reference counterpart.
+    `reference_arrays` where all query values match the reference counterpart.
     If no match is found for a query, the corresponding index is -1.
     """
     match_masks_for_all_columns = np.stack(
@@ -703,7 +756,13 @@ def _get_box(block):
     return vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
-def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
+def set_structure(
+    pdbx_file,
+    array,
+    data_block=None,
+    include_bonds=False,
+    extra_fields=[],
+):
     """
     Set the ``atom_site`` category with atom information from an
     :class:`AtomArray` or :class:`AtomArrayStack`.
@@ -737,6 +796,10 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
         category.
         Inter-residue bonds will be written into the ``struct_conn``
         independent of this parameter.
+    extra_fields : list of str, optional
+        List of additional fields from the ``atom_site`` category
+        that should be written into the file.
+        Default is an empty list.
     Notes
     -----
@@ -797,6 +860,32 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
             np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
         )
+    # Handle all remaining custom fields
+    if len(extra_fields) > 0:
+        # ... check to avoid clashes with standard annotations
+        _standard_annotations = [
+            "hetero",
+            "element",
+            "atom_name",
+            "res_name",
+            "chain_id",
+            "res_id",
+            "ins_code",
+            "atom_id",
+            "b_factor",
+            "occupancy",
+            "charge",
+        ]
+        _reserved_annotation_names = list(atom_site.keys()) + _standard_annotations
+        for annot in extra_fields:
+            if annot in _reserved_annotation_names:
+                raise ValueError(
+                    f"Annotation name '{annot}' is reserved and cannot be written to as extra field. "
+                    "Please choose another name."
+                )
+            atom_site[annot] = np.copy(array.get_annotation(annot))
     if array.bonds is not None:
         struct_conn = _set_inter_residue_bonds(array, atom_site)
         if struct_conn is not None:
@@ -964,25 +1053,38 @@ def _set_intra_residue_bonds(array, atom_site):
         aromatic_flag[i] = aromatic
     any_mask = bond_array[:, 2] == BondType.ANY
-    chem_comp_bond = Category()
+    # Remove already existing residue and atom name combinations
+    # These appear when the structure contains a residue multiple times
+    atom_id_1 = array.atom_name[bond_array[:, 0]]
+    atom_id_2 = array.atom_name[bond_array[:, 1]]
     # Take the residue name from the first atom index, as the residue
     # name is the same for both atoms, since we have only intra bonds
-    chem_comp_bond["comp_id"] = array.res_name[bond_array[:, 0]]
-    chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
-    chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
+    comp_id = array.res_name[bond_array[:, 0]]
+    _, unique_indices = np.unique(
+        np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
+    )
+    unique_indices.sort()
+    chem_comp_bond = Category()
+    n_bonds = len(unique_indices)
+    chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
+    chem_comp_bond["comp_id"] = comp_id[unique_indices]
+    chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
+    chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
     chem_comp_bond["value_order"] = Column(
-        value_order, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
+        value_order[unique_indices],
+        np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
     )
     chem_comp_bond["pdbx_aromatic_flag"] = Column(
-        aromatic_flag, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
+        aromatic_flag[unique_indices],
+        np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
     )
     # BondList does not contain stereo information
     # -> all values are missing
     chem_comp_bond["pdbx_stereo_config"] = Column(
-        np.zeros(len(bond_array), dtype="U1"),
-        np.full(len(bond_array), MaskValue.MISSING),
+        np.zeros(n_bonds, dtype="U1"),
+        np.full(n_bonds, MaskValue.MISSING),
     )
-    chem_comp_bond["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1, dtype=np.int32)
     return chem_comp_bond
@@ -1007,13 +1109,22 @@ def _set_inter_residue_bonds(array, atom_site):
     bond_array = _filter_bonds(array, "inter")
     if len(bond_array) == 0:
         return None
+    # Filter out 'standard' links, i.e. backbone bonds between adjacent canonical
+    # nucleotide/amino acid residues
+    bond_array = bond_array[~_filter_canonical_links(array, bond_array)]
+    if len(bond_array) == 0:
+        return None
     struct_conn = Category()
     struct_conn["id"] = np.arange(1, len(bond_array) + 1)
-    struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
+    struct_conn["conn_type_id"] = [
+        PDBX_BOND_TYPE_TO_TYPE_ID[btype] for btype in bond_array[:, 2]
+    ]
     struct_conn["pdbx_value_order"] = Column(
         np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
         np.where(
-            bond_array[:, 2] == BondType.ANY,
+            np.isin(bond_array[:, 2], (BondType.ANY, BondType.COORDINATION)),
             MaskValue.MISSING,
             MaskValue.PRESENT,
         ),
@@ -1049,6 +1160,27 @@ def _filter_bonds(array, connection):
         raise ValueError("Invalid 'connection' option")
+def _filter_canonical_links(array, bond_array):
+    """
+    Filter out peptide bonds between adjacent canonical amino acid residues.
+    """
+    # Get the residue index for each bonded atom
+    residue_indices = get_residue_positions(array, bond_array[:, :2].flatten()).reshape(
+        -1, 2
+    )
+    return (
+        # Must be canonical residues
+        np.isin(array.res_name[bond_array[:, 0]], CANONICAL_RESIDUE_LIST) &
+        np.isin(array.res_name[bond_array[:, 1]], CANONICAL_RESIDUE_LIST) &
+        # Must be backbone bond
+        np.isin(array.atom_name[bond_array[:, 0]], ("C", "O3'")) &
+        np.isin(array.atom_name[bond_array[:, 1]], ("N", "P")) &
+        # Must connect adjacent residues
+        residue_indices[:, 1] - residue_indices[:, 0] == 1
+    )  # fmt: skip
 def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
     """
     Create an :class:`AtomArray` for a chemical component from the
@@ -1135,12 +1267,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
     array = AtomArray(atom_category.row_count)
-    array.hetero[:] = True
-    array.res_name = atom_category["comp_id"].as_array("U5")
-    array.atom_name = atom_category["atom_id"].as_array("U6")
-    array.element = atom_category["type_symbol"].as_array("U2")
-    array.add_annotation("charge", int)
-    array.charge = atom_category["charge"].as_array(int, 0)
+    array.set_annotation("hetero", np.full(len(atom_category["comp_id"]), True))
+    array.set_annotation("res_name", atom_category["comp_id"].as_array(str))
+    array.set_annotation("atom_name", atom_category["atom_id"].as_array(str))
+    array.set_annotation("element", atom_category["type_symbol"].as_array(str))
+    array.set_annotation("charge", atom_category["charge"].as_array(int, 0))
     coord_fields = [f"pdbx_model_Cartn_{dim}_ideal" for dim in ("x", "y", "z")]
     alt_coord_fields = [f"model_Cartn_{dim}" for dim in ("x", "y", "z")]
@@ -1148,17 +1279,28 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
         # Swap with the fallback option
         coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
     try:
-        for i, field in enumerate(coord_fields):
-            array.coord[:, i] = atom_category[field].as_array(np.float32)
-    except KeyError as err:
-        key = err.args[0]
-        warnings.warn(
-            f"Attribute '{key}' not found within 'chem_comp_atom' category. "
-            f"The fallback coordinates will be used instead",
-            UserWarning,
+        array.coord = _parse_component_coordinates(
+            [atom_category[field] for field in coord_fields]
+        )
+    except Exception as err:
+        if isinstance(err, KeyError):
+            key = err.args[0]
+            warnings.warn(
+                f"Attribute '{key}' not found within 'chem_comp_atom' category. "
+                f"The fallback coordinates will be used instead",
+                UserWarning,
+            )
+        elif isinstance(err, ValueError):
+            warnings.warn(
+                "The coordinates are missing for some atoms. "
+                "The fallback coordinates will be used instead",
+                UserWarning,
+            )
+        else:
+            raise
+        array.coord = _parse_component_coordinates(
+            [atom_category[field] for field in alt_coord_fields]
         )
-        for i, field in enumerate(alt_coord_fields):
-            array.coord[:, i] = atom_category[field].as_array(np.float32)
     try:
         bond_category = block["chem_comp_bond"]
@@ -1188,6 +1330,17 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
     return array
+def _parse_component_coordinates(coord_columns):
+    coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
+    for i, column in enumerate(coord_columns):
+        if column.mask is not None and column.mask.array.any():
+            raise ValueError(
+                "Missing coordinates for some atoms",
+            )
+        coord[:, i] = column.as_array(np.float32)
+    return coord
 def set_component(pdbx_file, array, data_block=None):
     """
     Set the ``chem_comp_atom`` and, if bonds are available,
@@ -1404,7 +1557,10 @@ def get_assembly(
     Returns
     -------
     assembly : AtomArray or AtomArrayStack
-        The assembly. The return type depends on the `model` parameter.
+        The assembly.
+        The return type depends on the `model` parameter.
+        Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
+        unit in the assembly.
     Examples
     --------
@@ -1493,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations):
     """
     # Additional first dimesion for 'structure.repeat()'
     assembly_coord = np.zeros((len(operations),) + structure.coord.shape)
     # Apply corresponding transformation for each copy in the assembly
     for i, operation in enumerate(operations):
         coord = structure.coord
@@ -1507,7 +1662,11 @@ def _apply_transformations(structure, transformation_dict, operations):
             coord += translation_vector
         assembly_coord[i] = coord
-    return repeat(structure, assembly_coord)
+    assembly = repeat(structure, assembly_coord)
+    assembly.set_annotation(
+        "sym_id", np.repeat(np.arange(len(operations)), structure.array_length())
+    )
+    return assembly
 def _get_transformations(struct_oper):

biotite/structure/io/pdbx/encoding.cpython-311-darwin.so CHANGED Viewed

Binary file