PyPI - biotite - Versions diffs - 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.2.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.2.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +36 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +5 -18
biotite/application/muscle/app5.py +5 -5
biotite/application/sra/app.py +0 -5
biotite/application/util.py +22 -2
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +9 -3
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +8 -9
biotite/database/uniprot/check.py +22 -17
biotite/database/uniprot/download.py +3 -6
biotite/database/uniprot/query.py +4 -5
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +49 -14
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +26 -26
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +19 -2
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +58 -48
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +284 -57
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/alphabet.py +5 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/sequence/profile.py +105 -29
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +136 -8
biotite/sequence/sequence.py +1 -2
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +6 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +109 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +170 -0
biotite/structure/alphabet/unkerasify.py +128 -0
biotite/structure/atoms.py +163 -66
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/bonds.pyx +79 -25
biotite/structure/box.py +19 -21
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +74 -127
biotite/structure/hbond.py +17 -19
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +24 -15
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -34
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +62 -19
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -22
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +4 -4
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +80 -53
biotite/structure/io/pdb/convert.py +4 -3
biotite/structure/io/pdb/file.py +85 -25
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +36 -36
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +54 -15
biotite/structure/io/pdbx/cif.py +92 -66
biotite/structure/io/pdbx/component.py +15 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +410 -75
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +141 -156
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +69 -11
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +76 -4
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/alphabet/pb.py ADDED Viewed

@@ -0,0 +1,170 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+Conversion of structures into the *Protein Blocks* structural alphabet.
+"""
+__name__ = "biotite.structure.alphabet"
+__author__ = "Patrick Kunzmann"
+__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
+import numpy as np
+from biotite.sequence.alphabet import LetterAlphabet
+from biotite.sequence.sequence import Sequence
+from biotite.structure.chains import get_chain_starts
+from biotite.structure.geometry import dihedral_backbone
+# PB reference angles, adapted from PBxplore
+PB_ANGLES = np.array(
+    [
+        [41.14,    75.53,   13.92,  -99.80,  131.88,  -96.27, 122.08,  -99.68],
+        [108.24,  -90.12,  119.54,  -92.21,  -18.06, -128.93, 147.04,  -99.90],
+        [-11.61, -105.66,   94.81, -106.09,  133.56, -106.93, 135.97, -100.63],
+        [141.98, -112.79,  132.20, -114.79,  140.11, -111.05, 139.54, -103.16],
+        [133.25, -112.37,  137.64, -108.13,  133.00,  -87.30, 120.54,   77.40],
+        [116.40, -105.53,  129.32,  -96.68,  140.72,  -74.19, -26.65,  -94.51],
+        [0.40,    -81.83,    4.91, -100.59,   85.50,  -71.65, 130.78,   84.98],
+        [119.14, -102.58,  130.83,  -67.91,  121.55,   76.25,  -2.95,  -90.88],
+        [130.68,  -56.92,  119.26,   77.85,   10.42,  -99.43, 141.40,  -98.01],
+        [114.32, -121.47,  118.14,   82.88, -150.05,  -83.81,  23.35,  -85.82],
+        [117.16,  -95.41,  140.40,  -59.35,  -29.23,  -72.39, -25.08,  -76.16],
+        [139.20,  -55.96,  -32.70,  -68.51,  -26.09,  -74.44, -22.60,  -71.74],
+        [-39.62,  -64.73,  -39.52,  -65.54,  -38.88,  -66.89, -37.76,  -70.19],
+        [-35.34,  -65.03,  -38.12,  -66.34,  -29.51,  -89.10,  -2.91,   77.90],
+        [-45.29,  -67.44,  -27.72,  -87.27,    5.13,   77.49,  30.71,  -93.23],
+        [-27.09,  -86.14,    0.30,   59.85,   21.51,  -96.30, 132.67,  -92.91],
+    ]
+)  # fmt: skip
+class ProteinBlocksSequence(Sequence):
+    """
+    Representation of a structure in the *Protein Blocks* structural alphabet.
+    :footcite:`Brevern2000`
+    Parameters
+    ----------
+    sequence : iterable object, optional
+        The *Protein Blocks* sequence.
+        This may either be a list or a string.
+        May take upper or lower case letters.
+        By default the sequence is empty.
+    See Also
+    --------
+    to_protein_blocks : Create *Protein Blocks* sequences from a structure.
+    References
+    ----------
+    .. footbibliography::
+    """
+    alphabet = LetterAlphabet("abcdefghijklmnopz")
+    undefined_symbol = "z"
+    def __init__(self, sequence=""):
+        if isinstance(sequence, str):
+            sequence = sequence.lower()
+        else:
+            sequence = [symbol.upper() for symbol in sequence]
+        super().__init__(sequence)
+    def get_alphabet(self):
+        return ProteinBlocksSequence.alphabet
+    def remove_undefined(self):
+        """
+        Remove undefined symbols from the sequence.
+        Returns
+        -------
+        filtered_sequence : ProteinBlocksSequence
+            The sequence without undefined symbols.
+        """
+        undefined_code = ProteinBlocksSequence.alphabet.encode(
+            ProteinBlocksSequence.undefined_symbol
+        )
+        filtered_code = self.code[self.code != undefined_code]
+        filtered_sequence = ProteinBlocksSequence()
+        filtered_sequence.code = filtered_code
+        return filtered_sequence
+def to_protein_blocks(atoms):
+    """
+    Encode each chain in the given structure to the *Protein Blocks* structural
+    alphabet.
+    :footcite:`Brevern2000`
+    Parameters
+    ----------
+    atoms : AtomArray
+        The atom array to encode.
+        May contain multiple chains.
+    Returns
+    -------
+    sequences : list of Sequence, length=n
+        The encoded *Protein Blocks* sequence for each peptide chain in the structure.
+    chain_start_indices : ndarray, shape=(n,), dtype=int
+        The atom index where each chain starts.
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> sequences, chain_starts = to_protein_blocks(atom_array)
+    >>> print(sequences[0])
+    zzmmmmmnopjmnopacdzz
+    """
+    sequences = []
+    chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
+    for i in range(len(chain_start_indices) - 1):
+        start = chain_start_indices[i]
+        stop = chain_start_indices[i + 1]
+        chain = atoms[start:stop]
+        sequences.append(_to_protein_blocks(chain))
+    return sequences, chain_start_indices[:-1]
+def _to_protein_blocks(chain):
+    undefined_code = ProteinBlocksSequence.alphabet.encode(
+        ProteinBlocksSequence.undefined_symbol
+    )
+    phi, psi, _ = dihedral_backbone(chain)
+    pb_angles = np.full((len(phi), 8), np.nan)
+    pb_angles[2:-2, 0] = psi[:-4]
+    pb_angles[2:-2, 1] = phi[1:-3]
+    pb_angles[2:-2, 2] = psi[1:-3]
+    pb_angles[2:-2, 3] = phi[2:-2]
+    pb_angles[2:-2, 4] = psi[2:-2]
+    pb_angles[2:-2, 5] = phi[3:-1]
+    pb_angles[2:-2, 6] = psi[3:-1]
+    pb_angles[2:-2, 7] = phi[4:]
+    pb_angles = np.rad2deg(pb_angles)
+    # Angle RMSD of all reference angles with all actual angles
+    rmsda = np.sum(
+        ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
+        axis=-1,
+    )
+    # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
+    pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
+    pb_available_mask = ~np.isnan(rmsda).any(axis=0)
+    # Chose PB, where the RMSDA to the reference angle is lowest
+    # Due to the definition of Biotite symbol codes
+    # the index of the chosen PB is directly the symbol code
+    pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
+    # Put the array of symbol codes into actual sequence objects
+    pb_sequence = ProteinBlocksSequence()
+    pb_sequence.code = pb_seq_code
+    return pb_sequence

biotite/structure/alphabet/unkerasify.py ADDED Viewed

@@ -0,0 +1,128 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+Parser for extracting weights from Keras files.
+Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
+"""
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde"
+__all__ = ["load_kerasify"]
+import enum
+import functools
+import itertools
+import struct
+import numpy as np
+from biotite.structure.alphabet.layers import DenseLayer, Layer
+class LayerType(enum.IntEnum):
+    DENSE = 1
+    CONVOLUTION2D = 2
+    FLATTEN = 3
+    ELU = 4
+    ACTIVATION = 5
+    MAXPOOLING2D = 6
+    LSTM = 7
+    EMBEDDING = 8
+class ActivationType(enum.IntEnum):
+    LINEAR = 1
+    RELU = 2
+    SOFTPLUS = 3
+    SIGMOID = 4
+    TANH = 5
+    HARD_SIGMOID = 6
+class KerasifyParser:
+    """
+    An incomplete parser for model files serialized with `kerasify`.
+    Parameters
+    ----------
+    file : file-like
+        The ``.kerasify`` file to parse.
+    Notes
+    -----
+    Only dense layers are supported, since the ``foldseek`` VQ-VAE model
+    is only using 3 dense layers.
+    """
+    def __init__(self, file) -> None:
+        self.file = file
+        self.buffer = bytearray(1024)
+        (self.n_layers,) = self._get("I")
+    def read(self):
+        if self.n_layers == 0:
+            return None
+        self.n_layers -= 1
+        layer_type = LayerType(self._get("I")[0])
+        if layer_type == LayerType.DENSE:
+            (w0,) = self._get("I")
+            (w1,) = self._get("I")
+            (b0,) = self._get("I")
+            weights = (
+                np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
+                .reshape(w0, w1)
+                .copy()
+            )
+            biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
+            activation = ActivationType(self._get("I")[0])
+            if activation not in (ActivationType.LINEAR, ActivationType.RELU):
+                raise NotImplementedError(
+                    f"Unsupported activation type: {activation!r}"
+                )
+            return DenseLayer(weights, biases, activation == ActivationType.RELU)
+        else:
+            raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
+    def __iter__(self):
+        return self
+    def __next__(self) -> Layer:
+        layer = self.read()
+        if layer is None:
+            raise StopIteration
+        return layer
+    def _read(self, format: str) -> memoryview:
+        n = struct.calcsize(format)
+        if len(self.buffer) < n:
+            self.buffer.extend(
+                itertools.islice(itertools.repeat(0), n - len(self.buffer))
+            )
+        v = memoryview(self.buffer)[:n]
+        self.file.readinto(v)  # type: ignore
+        return v
+    def _get(self, format: str):
+        v = self._read(format)
+        return struct.unpack(format, v)
+@functools.cache
+def load_kerasify(file_path):
+    """
+    Load the the model layers from a ``.kerasify`` file.
+    Parameters
+    ----------
+    file_path : str
+        The path to the ``.kerasify`` file.
+    Returns
+    -------
+    layers : tuple of Layer
+        The model layers.
+    """
+    with open(file_path, "rb") as file:
+        return tuple(KerasifyParser(file))

biotite/structure/atoms.py CHANGED Viewed

@@ -13,6 +13,7 @@ __all__ = [
     "Atom",
     "AtomArray",
     "AtomArrayStack",
+    "concatenate",
     "array",
     "stack",
     "repeat",
@@ -22,6 +23,7 @@ __all__ = [
 import abc
 import numbers
+from collections.abc import Sequence
 import numpy as np
 from biotite.copyable import Copyable
 from biotite.structure.bonds import BondList
@@ -33,6 +35,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
     :class:`AtomArrayStack`.
     It implements functionality for annotation arrays and also
     rudimentarily for coordinates.
+    Parameters
+    ----------
+    length : int
+        The amount of atoms in the structure.
     """
     def __init__(self, length):
@@ -94,11 +101,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
             The annotation category to be added.
         dtype : type or str
             A type instance or a valid *NumPy* *dtype* string.
-            Defines the type of the annotation
+            Defines the type of the annotation.
         See Also
         --------
-        set_annotation
+        set_annotation : Assign directly a value to an annotation.
         Notes
         -----
@@ -157,7 +164,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         ----------
         category : str
             The annotation category to be set.
-        array : ndarray or None
+        array : ndarray
             The new value of the annotation category. The size of the
             array must be the same as the array length.
@@ -169,7 +176,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         array = np.asarray(array)
         if len(array) != self._array_length:
             raise IndexError(
-                f"Expected array length {self._array_length}, " f"but got {len(array)}"
+                f"Expected array length {self._array_length}, but got {len(array)}"
             )
         if category in self._annot:
             # If the annotation already exists, find the compatible dtype
@@ -233,7 +240,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         else:
             raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
-    def equal_annotations(self, item):
+    def equal_annotations(self, item, equal_nan=True):
         """
         Check, if this object shares equal annotation arrays with the
         given :class:`AtomArray` or :class:`AtomArrayStack`.
@@ -242,6 +249,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         ----------
         item : AtomArray or AtomArrayStack
             The object to compare the annotation arrays with.
+        equal_nan : bool
+            Whether to count `nan` values as equal. Default: True.
         Returns
         -------
@@ -253,7 +262,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         if not self.equal_annotation_categories(item):
             return False
         for name in self._annot:
-            if not np.array_equal(self._annot[name], item._annot[name]):
+            # ... allowing `nan` values causes type-casting, which is
+            #     only possible for floating-point arrays
+            allow_nan = (
+                equal_nan
+                if np.issubdtype(self._annot[name].dtype, np.floating)
+                else False
+            )
+            if not np.array_equal(
+                self._annot[name],
+                item._annot[name],
+                equal_nan=allow_nan,
+            ):
                 return False
         return True
@@ -308,17 +328,16 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
             if isinstance(self, AtomArray):
                 if value.ndim != 2:
                     raise ValueError(
-                        "A 2-dimensional ndarray is expected " "for an AtomArray"
+                        "A 2-dimensional ndarray is expected for an AtomArray"
                     )
             elif isinstance(self, AtomArrayStack):
                 if value.ndim != 3:
                     raise ValueError(
-                        "A 3-dimensional ndarray is expected " "for an AtomArrayStack"
+                        "A 3-dimensional ndarray is expected for an AtomArrayStack"
                     )
             if value.shape[-2] != self._array_length:
                 raise ValueError(
-                    f"Expected array length {self._array_length}, "
-                    f"but got {len(value)}"
+                    f"Expected array length {self._array_length}, but got {len(value)}"
                 )
             if value.shape[-1] != 3:
                 raise TypeError("Expected 3 coordinates for each atom")
@@ -343,13 +362,12 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
                 if isinstance(self, AtomArray):
                     if value.ndim != 2:
                         raise ValueError(
-                            "A 2-dimensional ndarray is expected " "for an AtomArray"
+                            "A 2-dimensional ndarray is expected for an AtomArray"
                         )
                 else:  # AtomArrayStack
                     if value.ndim != 3:
                         raise ValueError(
-                            "A 3-dimensional ndarray is expected "
-                            "for an AtomArrayStack"
+                            "A 3-dimensional ndarray is expected for an AtomArrayStack"
                         )
                 if value.shape[-2:] != (3, 3):
                     raise TypeError("Box must be a 3x3 matrix (three vectors)")
@@ -407,42 +425,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         return self._array_length
     def __add__(self, array):
-        if not isinstance(self, type(array)):
-            raise TypeError("Can only concatenate two arrays or two stacks")
-        # Create either new array or stack, depending of the own type
-        if isinstance(self, AtomArray):
-            concat = AtomArray(length=self._array_length + array._array_length)
-        if isinstance(self, AtomArrayStack):
-            concat = AtomArrayStack(
-                self.stack_depth(), self._array_length + array._array_length
-            )
-        concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
-        # Transfer only annotations,
-        # which are existent in both operands
-        arr_categories = list(array._annot.keys())
-        for category in self._annot.keys():
-            if category in arr_categories:
-                annot = self._annot[category]
-                arr_annot = array._annot[category]
-                concat._annot[category] = np.concatenate((annot, arr_annot))
-        # Concatenate bonds lists,
-        # if at least one of them contains bond information
-        if self._bonds is not None or array._bonds is not None:
-            bonds1 = self._bonds
-            bonds2 = array._bonds
-            if bonds1 is None:
-                bonds1 = BondList(self._array_length)
-            if bonds2 is None:
-                bonds2 = BondList(array._array_length)
-            concat._bonds = bonds1 + bonds2
-        # Copy box
-        if self._box is not None:
-            concat._box = np.copy(self._box)
-        return concat
+        return concatenate([self, array])
     def __copy_fill__(self, clone):
         super().__copy_fill__(clone)
@@ -468,9 +451,9 @@ class Atom(Copyable):
     Parameters
     ----------
-    coord: list or ndarray
+    coord : list or ndarray
         The x, y and z coordinates.
-    kwargs
+    **kwargs
         Atom annotations as key value pair.
     Attributes
@@ -492,7 +475,6 @@ class Atom(Copyable):
     CA
     >>> print(atom.coord)
     [1. 2. 3.]
     """
     def __init__(self, coord, **kwargs):
@@ -606,6 +588,7 @@ class AtomArray(_AtomArrayBase):
     :class:`AtomArray` is done with the '+' operator.
     Only the annotation categories, which are existing in both arrays,
     are transferred to the new array.
+    For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
     Optionally, an :class:`AtomArray` can store chemical bond
     information via a :class:`BondList` object.
@@ -651,6 +634,10 @@ class AtomArray(_AtomArrayBase):
         The single value in the tuple is
         the length of the atom array.
+    See Also
+    --------
+    AtomArrayStack : Representation of multiple structure models.
     Examples
     --------
     Creating an atom array from atoms:
@@ -719,10 +706,6 @@ class AtomArray(_AtomArrayBase):
             Shape of the array.
             The single value in the tuple is
             the :func:`array_length()`.
-        See Also
-        --------
-        array_length
         """
         return (self.array_length(),)
@@ -878,7 +861,9 @@ class AtomArrayStack(_AtomArrayBase):
     :class:`AtomArray` instance.
     Concatenation of atoms for each array in the stack is done using the
-    '+' operator. For addition of atom arrays onto the stack use the
+    '+' operator.
+    For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
+    For addition of atom arrays onto the stack use the
     :func:`stack()` method.
     The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
@@ -912,9 +897,9 @@ class AtomArrayStack(_AtomArrayBase):
         The numbers correspond to the stack depth
         and array length, respectively.
-    See also
+    See Also
     --------
-    AtomArray
+    AtomArray : Representation of a single structure model.
     Examples
     --------
@@ -1212,9 +1197,18 @@ def array(atoms):
                 f"annotation categories as the atom at index 0"
             )
     array = AtomArray(len(atoms))
     # Add all (also optional) annotation categories
     for name in names:
-        array.add_annotation(name, dtype=type(atoms[0]._annot[name]))
+        value = atoms[0]._annot[name]
+        if isinstance(value, str):
+            # Find maximum string length across all atoms for this annotation
+            max_len = max(len(str(atom._annot[name])) for atom in atoms)
+            dtype = f"<U{max_len}"
+        else:
+            dtype = type(value)
+        array.add_annotation(name, dtype=dtype)
     # Add all atoms to AtomArray
     for i in range(len(atoms)):
         for name in names:
@@ -1292,6 +1286,112 @@ def stack(arrays):
     return array_stack
+def concatenate(atoms):
+    """
+    Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
+    a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
+    Parameters
+    ----------
+    atoms : iterable object of AtomArray or AtomArrayStack
+        The atoms to be concatenated.
+        :class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
+    Returns
+    -------
+    concatenated_atoms : AtomArray or AtomArrayStack
+        The concatenated atoms, i.e. its ``array_length()`` is the sum of the
+        ``array_length()`` of the input ``atoms``.
+    Notes
+    -----
+    The following rules apply:
+    - Only the annotation categories that exist in all elements are transferred.
+    - The box of the first element that has a box is transferred, if any.
+    - The bonds of all elements are concatenated, if any element has associated bonds.
+      For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
+    Examples
+    --------
+    >>> atoms1 = array([
+    ...     Atom([1,2,3], res_id=1, atom_name="N"),
+    ...     Atom([4,5,6], res_id=1, atom_name="CA"),
+    ...     Atom([7,8,9], res_id=1, atom_name="C")
+    ... ])
+    >>> atoms2 = array([
+    ...     Atom([1,2,3], res_id=2, atom_name="N"),
+    ...     Atom([4,5,6], res_id=2, atom_name="CA"),
+    ...     Atom([7,8,9], res_id=2, atom_name="C")
+    ... ])
+    >>> print(concatenate([atoms1, atoms2]))
+                1      N                1.000    2.000    3.000
+                1      CA               4.000    5.000    6.000
+                1      C                7.000    8.000    9.000
+                2      N                1.000    2.000    3.000
+                2      CA               4.000    5.000    6.000
+                2      C                7.000    8.000    9.000
+    """
+    # Ensure that the atoms can be iterated over multiple times
+    if not isinstance(atoms, Sequence):
+        atoms = list(atoms)
+    length = 0
+    depth = None
+    element_type = None
+    common_categories = set(atoms[0].get_annotation_categories())
+    box = None
+    has_bonds = False
+    for element in atoms:
+        if element_type is None:
+            element_type = type(element)
+        else:
+            if not isinstance(element, element_type):
+                raise TypeError(
+                    f"Cannot concatenate '{type(element).__name__}' "
+                    f"with '{element_type.__name__}'"
+                )
+        length += element.array_length()
+        if isinstance(element, AtomArrayStack):
+            if depth is None:
+                depth = element.stack_depth()
+            else:
+                if element.stack_depth() != depth:
+                    raise IndexError("The stack depths are not equal")
+        common_categories &= set(element.get_annotation_categories())
+        if element.box is not None and box is None:
+            box = element.box
+        if element.bonds is not None:
+            has_bonds = True
+    if element_type == AtomArray:
+        concat_atoms = AtomArray(length)
+    elif element_type == AtomArrayStack:
+        concat_atoms = AtomArrayStack(depth, length)
+    concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
+    for category in common_categories:
+        concat_atoms.set_annotation(
+            category,
+            np.concatenate(
+                [element.get_annotation(category) for element in atoms], axis=0
+            ),
+        )
+    concat_atoms.box = box
+    if has_bonds:
+        # Concatenate bonds of all elements
+        concat_atoms.bonds = BondList.concatenate(
+            [
+                element.bonds
+                if element.bonds is not None
+                else BondList(element.array_length())
+                for element in atoms
+            ]
+        )
+    return concat_atoms
 def repeat(atoms, coord):
     """
     Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)
@@ -1354,8 +1454,7 @@ def repeat(atoms, coord):
     if isinstance(atoms, AtomArray):
         if coord.ndim != 3:
             raise ValueError(
-                f"Expected 3 dimensions for the coordinate array, "
-                f"but got {coord.ndim}"
+                f"Expected 3 dimensions for the coordinate array, but got {coord.ndim}"
             )
         repeated = AtomArray(new_length)
         repeated.coord = coord.reshape((new_length, 3))
@@ -1363,16 +1462,14 @@ def repeat(atoms, coord):
     elif isinstance(atoms, AtomArrayStack):
         if coord.ndim != 4:
             raise ValueError(
-                f"Expected 4 dimensions for the coordinate array, "
-                f"but got {coord.ndim}"
+                f"Expected 4 dimensions for the coordinate array, but got {coord.ndim}"
             )
         repeated = AtomArrayStack(atoms.stack_depth(), new_length)
         repeated.coord = coord.reshape((atoms.stack_depth(), new_length, 3))
     else:
         raise TypeError(
-            f"Expected 'AtomArray' or 'AtomArrayStack', "
-            f"but got {type(atoms).__name__}"
+            f"Expected 'AtomArray' or 'AtomArrayStack', but got {type(atoms).__name__}"
         )
     for category in atoms.get_annotation_categories():