biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show
  1. biotite/application/dssp/app.py +13 -3
  2. biotite/application/localapp.py +34 -0
  3. biotite/application/muscle/app3.py +2 -15
  4. biotite/application/muscle/app5.py +2 -2
  5. biotite/application/util.py +1 -1
  6. biotite/application/viennarna/rnaplot.py +6 -2
  7. biotite/database/rcsb/query.py +6 -6
  8. biotite/database/uniprot/check.py +20 -15
  9. biotite/database/uniprot/download.py +1 -1
  10. biotite/database/uniprot/query.py +1 -1
  11. biotite/sequence/align/alignment.py +16 -3
  12. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  13. biotite/sequence/align/banded.pyx +5 -5
  14. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  15. biotite/sequence/align/kmeralphabet.pyx +17 -0
  16. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  17. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  18. biotite/sequence/align/kmertable.pyx +52 -42
  19. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  20. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  21. biotite/sequence/align/matrix.py +273 -55
  22. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  23. biotite/sequence/align/matrix_data/PB.license +21 -0
  24. biotite/sequence/align/matrix_data/PB.mat +18 -0
  25. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  26. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  27. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  28. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  29. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  30. biotite/sequence/alphabet.py +3 -0
  31. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  32. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  33. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  34. biotite/sequence/graphics/colorschemes.py +44 -11
  35. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  36. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  37. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  38. biotite/sequence/profile.py +86 -4
  39. biotite/sequence/seqtypes.py +124 -3
  40. biotite/setup_ccd.py +197 -0
  41. biotite/structure/__init__.py +4 -3
  42. biotite/structure/alphabet/__init__.py +25 -0
  43. biotite/structure/alphabet/encoder.py +332 -0
  44. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  45. biotite/structure/alphabet/i3d.py +110 -0
  46. biotite/structure/alphabet/layers.py +86 -0
  47. biotite/structure/alphabet/pb.license +21 -0
  48. biotite/structure/alphabet/pb.py +171 -0
  49. biotite/structure/alphabet/unkerasify.py +122 -0
  50. biotite/structure/atoms.py +156 -43
  51. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  52. biotite/structure/bonds.pyx +72 -21
  53. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  54. biotite/structure/charges.cpython-311-darwin.so +0 -0
  55. biotite/structure/filter.py +1 -1
  56. biotite/structure/geometry.py +60 -113
  57. biotite/structure/info/__init__.py +1 -0
  58. biotite/structure/info/atoms.py +13 -13
  59. biotite/structure/info/bonds.py +12 -6
  60. biotite/structure/info/ccd.py +125 -32
  61. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  62. biotite/structure/info/groups.py +63 -17
  63. biotite/structure/info/masses.py +9 -6
  64. biotite/structure/info/misc.py +15 -21
  65. biotite/structure/info/standardize.py +3 -2
  66. biotite/structure/io/mol/sdf.py +41 -40
  67. biotite/structure/io/pdb/convert.py +2 -0
  68. biotite/structure/io/pdb/file.py +74 -3
  69. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  70. biotite/structure/io/pdbqt/file.py +32 -32
  71. biotite/structure/io/pdbx/__init__.py +1 -0
  72. biotite/structure/io/pdbx/bcif.py +32 -8
  73. biotite/structure/io/pdbx/cif.py +148 -107
  74. biotite/structure/io/pdbx/component.py +9 -4
  75. biotite/structure/io/pdbx/compress.py +321 -0
  76. biotite/structure/io/pdbx/convert.py +227 -68
  77. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  78. biotite/structure/io/pdbx/encoding.pyx +98 -17
  79. biotite/structure/io/trajfile.py +16 -16
  80. biotite/structure/molecules.py +141 -141
  81. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  82. biotite/structure/segments.py +1 -2
  83. biotite/structure/util.py +73 -1
  84. biotite/version.py +2 -2
  85. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
  86. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
  87. biotite/structure/info/ccd/README.rst +0 -8
  88. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  89. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  90. biotite/structure/info/ccd/nucleotides.txt +0 -798
  91. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
  92. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -0,0 +1,171 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Conversion of structures into the *Protein Blocks* structural alphabet.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
12
+
13
+ import numpy as np
14
+ from biotite.sequence.alphabet import LetterAlphabet
15
+ from biotite.sequence.sequence import Sequence
16
+ from biotite.structure.chains import get_chain_starts
17
+ from biotite.structure.geometry import dihedral_backbone
18
+
19
+ # PB reference angles, adapted from PBxplore
20
+ PB_ANGLES = np.array(
21
+ [
22
+ [41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
23
+ [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
24
+ [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
25
+ [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
26
+ [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
27
+ [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
28
+ [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
29
+ [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
30
+ [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
31
+ [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
32
+ [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
33
+ [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
34
+ [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
35
+ [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
36
+ [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
37
+ [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
38
+ ]
39
+ ) # fmt: skip
40
+
41
+
42
+ class ProteinBlocksSequence(Sequence):
43
+ """
44
+ Representation of a structure in the *Protein Blocks* structural alphabet.
45
+ :footcite:`Brevern2000`
46
+
47
+ Parameters
48
+ ----------
49
+ sequence : iterable object, optional
50
+ The *Protein Blocks* sequence.
51
+ This may either be a list or a string.
52
+ May take upper or lower case letters.
53
+ By default the sequence is empty.
54
+
55
+ See also
56
+ --------
57
+ to_protein_blocks : Create *Protein Blocks* sequences from a structure.
58
+
59
+ References
60
+ ----------
61
+
62
+ .. footbibliography::
63
+
64
+ """
65
+
66
+ alphabet = LetterAlphabet("abcdefghijklmnopz")
67
+ undefined_symbol = "z"
68
+
69
+ def __init__(self, sequence=""):
70
+ if isinstance(sequence, str):
71
+ sequence = sequence.lower()
72
+ else:
73
+ sequence = [symbol.upper() for symbol in sequence]
74
+ super().__init__(sequence)
75
+
76
+ def get_alphabet(self):
77
+ return ProteinBlocksSequence.alphabet
78
+
79
+ def remove_undefined(self):
80
+ """
81
+ Remove undefined symbols from the sequence.
82
+
83
+ Returns
84
+ -------
85
+ filtered_sequence : ProteinBlocksSequence
86
+ The sequence without undefined symbols.
87
+ """
88
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
89
+ ProteinBlocksSequence.undefined_symbol
90
+ )
91
+ filtered_code = self.code[self.code != undefined_code]
92
+ filtered_sequence = ProteinBlocksSequence()
93
+ filtered_sequence.code = filtered_code
94
+ return filtered_sequence
95
+
96
+
97
+ def to_protein_blocks(atoms):
98
+ """
99
+ Encode each chain in the given structure to the *Protein Blocks* structural
100
+ alphabet.
101
+ :footcite:`Brevern2000`
102
+
103
+ Parameters
104
+ ----------
105
+ atoms : AtomArray
106
+ The atom array to encode.
107
+ May contain multiple chains.
108
+
109
+ Returns
110
+ -------
111
+ sequences : list of Sequence, length=n
112
+ The encoded *Protein Blocks* sequence for each peptide chain in the structure.
113
+ chain_start_indices : ndarray, shape=(n,), dtype=int
114
+ The atom index where each chain starts.
115
+
116
+ References
117
+ ----------
118
+
119
+ .. footbibliography::
120
+
121
+ Examples
122
+ --------
123
+
124
+ >>> sequences, chain_starts = to_protein_blocks(atom_array)
125
+ >>> print(sequences[0])
126
+ zzmmmmmnopjmnopacdzz
127
+ """
128
+ sequences = []
129
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
130
+ for i in range(len(chain_start_indices) - 1):
131
+ start = chain_start_indices[i]
132
+ stop = chain_start_indices[i + 1]
133
+ chain = atoms[start:stop]
134
+ sequences.append(_to_protein_blocks(chain))
135
+ return sequences, chain_start_indices[:-1]
136
+
137
+
138
+ def _to_protein_blocks(chain):
139
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
140
+ ProteinBlocksSequence.undefined_symbol
141
+ )
142
+
143
+ phi, psi, _ = dihedral_backbone(chain)
144
+
145
+ pb_angles = np.full((len(phi), 8), np.nan)
146
+ pb_angles[2:-2, 0] = psi[:-4]
147
+ pb_angles[2:-2, 1] = phi[1:-3]
148
+ pb_angles[2:-2, 2] = psi[1:-3]
149
+ pb_angles[2:-2, 3] = phi[2:-2]
150
+ pb_angles[2:-2, 4] = psi[2:-2]
151
+ pb_angles[2:-2, 5] = phi[3:-1]
152
+ pb_angles[2:-2, 6] = psi[3:-1]
153
+ pb_angles[2:-2, 7] = phi[4:]
154
+ pb_angles = np.rad2deg(pb_angles)
155
+
156
+ # Angle RMSD of all reference angles with all actual angles
157
+ rmsda = np.sum(
158
+ ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
159
+ axis=-1,
160
+ )
161
+ # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
162
+ pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
163
+ pb_available_mask = ~np.isnan(rmsda).any(axis=0)
164
+ # Chose PB, where the RMSDA to the reference angle is lowest
165
+ # Due to the definition of Biotite symbol codes
166
+ # the index of the chosen PB is directly the symbol code
167
+ pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
168
+ # Put the array of symbol codes into actual sequence objects
169
+ pb_sequence = ProteinBlocksSequence()
170
+ pb_sequence.code = pb_seq_code
171
+ return pb_sequence
@@ -0,0 +1,122 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Parser for extracting weights from Keras files.
7
+
8
+ Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
9
+ """
10
+
11
+ __name__ = "biotite.structure.alphabet"
12
+ __author__ = "Martin Larralde"
13
+ __all__ = ["load_kerasify"]
14
+
15
+ import enum
16
+ import functools
17
+ import itertools
18
+ import struct
19
+ import numpy as np
20
+ from biotite.structure.alphabet.layers import DenseLayer, Layer
21
+
22
+
23
+ class LayerType(enum.IntEnum):
24
+ DENSE = 1
25
+ CONVOLUTION2D = 2
26
+ FLATTEN = 3
27
+ ELU = 4
28
+ ACTIVATION = 5
29
+ MAXPOOLING2D = 6
30
+ LSTM = 7
31
+ EMBEDDING = 8
32
+
33
+
34
+ class ActivationType(enum.IntEnum):
35
+ LINEAR = 1
36
+ RELU = 2
37
+ SOFTPLUS = 3
38
+ SIGMOID = 4
39
+ TANH = 5
40
+ HARD_SIGMOID = 6
41
+
42
+
43
+ class KerasifyParser:
44
+ """An incomplete parser for model files serialized with `kerasify`.
45
+
46
+ Notes
47
+ -----
48
+ Only dense layers are supported, since the ``foldseek`` VQ-VAE model
49
+ is only using 3 dense layers.
50
+ """
51
+
52
+ def __init__(self, file) -> None:
53
+ self.file = file
54
+ self.buffer = bytearray(1024)
55
+ (self.n_layers,) = self._get("I")
56
+
57
+ def read(self):
58
+ if self.n_layers == 0:
59
+ return None
60
+
61
+ self.n_layers -= 1
62
+ layer_type = LayerType(self._get("I")[0])
63
+ if layer_type == LayerType.DENSE:
64
+ (w0,) = self._get("I")
65
+ (w1,) = self._get("I")
66
+ (b0,) = self._get("I")
67
+ weights = (
68
+ np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
69
+ .reshape(w0, w1)
70
+ .copy()
71
+ )
72
+ biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
73
+ activation = ActivationType(self._get("I")[0])
74
+ if activation not in (ActivationType.LINEAR, ActivationType.RELU):
75
+ raise NotImplementedError(
76
+ f"Unsupported activation type: {activation!r}"
77
+ )
78
+ return DenseLayer(weights, biases, activation == ActivationType.RELU)
79
+ else:
80
+ raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
81
+
82
+ def __iter__(self):
83
+ return self
84
+
85
+ def __next__(self) -> Layer:
86
+ layer = self.read()
87
+ if layer is None:
88
+ raise StopIteration
89
+ return layer
90
+
91
+ def _read(self, format: str) -> memoryview:
92
+ n = struct.calcsize(format)
93
+ if len(self.buffer) < n:
94
+ self.buffer.extend(
95
+ itertools.islice(itertools.repeat(0), n - len(self.buffer))
96
+ )
97
+ v = memoryview(self.buffer)[:n]
98
+ self.file.readinto(v) # type: ignore
99
+ return v
100
+
101
+ def _get(self, format: str):
102
+ v = self._read(format)
103
+ return struct.unpack(format, v)
104
+
105
+
106
+ @functools.cache
107
+ def load_kerasify(file_path):
108
+ """
109
+ Load the the model layers from a ``.kerasify`` file.
110
+
111
+ Parameters
112
+ ----------
113
+ file_path : str
114
+ The path to the ``.kerasify`` file.
115
+
116
+ Returns
117
+ -------
118
+ layers : tuple of Layer
119
+ The model layers.
120
+ """
121
+ with open(file_path, "rb") as file:
122
+ return tuple(KerasifyParser(file))
@@ -13,6 +13,7 @@ __all__ = [
13
13
  "Atom",
14
14
  "AtomArray",
15
15
  "AtomArrayStack",
16
+ "concatenate",
16
17
  "array",
17
18
  "stack",
18
19
  "repeat",
@@ -22,6 +23,7 @@ __all__ = [
22
23
 
23
24
  import abc
24
25
  import numbers
26
+ from collections.abc import Sequence
25
27
  import numpy as np
26
28
  from biotite.copyable import Copyable
27
29
  from biotite.structure.bonds import BondList
@@ -99,9 +101,24 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
99
101
  See Also
100
102
  --------
101
103
  set_annotation
104
+
105
+ Notes
106
+ -----
107
+ If the annotation category already exists, a compatible dtype is chosen,
108
+ that is also able to represent the old values.
102
109
  """
103
110
  if category not in self._annot:
104
111
  self._annot[str(category)] = np.zeros(self._array_length, dtype=dtype)
112
+ elif np.can_cast(self._annot[str(category)].dtype, dtype):
113
+ self._annot[str(category)] = self._annot[str(category)].astype(dtype)
114
+ elif np.can_cast(dtype, self._annot[str(category)].dtype):
115
+ # The existing dtype is more general
116
+ pass
117
+ else:
118
+ raise ValueError(
119
+ f"Cannot cast '{str(category)}' "
120
+ f"with dtype '{self._annot[str(category)].dtype}' into '{dtype}'"
121
+ )
105
122
 
106
123
  def del_annotation(self, category):
107
124
  """
@@ -142,19 +159,28 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
142
159
  ----------
143
160
  category : str
144
161
  The annotation category to be set.
145
- array : ndarray or None
162
+ array : ndarray
146
163
  The new value of the annotation category. The size of the
147
164
  array must be the same as the array length.
165
+
166
+ Notes
167
+ -----
168
+ If the annotation category already exists, a compatible dtype is chosen,
169
+ that is able to represent the old and new array values.
148
170
  """
171
+ array = np.asarray(array)
149
172
  if len(array) != self._array_length:
150
173
  raise IndexError(
151
174
  f"Expected array length {self._array_length}, " f"but got {len(array)}"
152
175
  )
153
176
  if category in self._annot:
154
- # Keep the dtype if the annotation already exists
155
- self._annot[category] = np.asarray(array, dtype=self._annot[category].dtype)
177
+ # If the annotation already exists, find the compatible dtype
178
+ self._annot[category] = array.astype(
179
+ dtype=np.promote_types(self._annot[category].dtype, array.dtype),
180
+ copy=False,
181
+ )
156
182
  else:
157
- self._annot[category] = np.asarray(array)
183
+ self._annot[category] = array
158
184
 
159
185
  def get_annotation_categories(self):
160
186
  """
@@ -209,7 +235,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
209
235
  else:
210
236
  raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
211
237
 
212
- def equal_annotations(self, item):
238
+ def equal_annotations(self, item, equal_nan=True):
213
239
  """
214
240
  Check, if this object shares equal annotation arrays with the
215
241
  given :class:`AtomArray` or :class:`AtomArrayStack`.
@@ -218,6 +244,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
218
244
  ----------
219
245
  item : AtomArray or AtomArrayStack
220
246
  The object to compare the annotation arrays with.
247
+ equal_nan: bool
248
+ Whether to count `nan` values as equal. Default: True.
221
249
 
222
250
  Returns
223
251
  -------
@@ -229,7 +257,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
229
257
  if not self.equal_annotation_categories(item):
230
258
  return False
231
259
  for name in self._annot:
232
- if not np.array_equal(self._annot[name], item._annot[name]):
260
+ # ... allowing `nan` values causes type-casting, which is
261
+ # only possible for floating-point arrays
262
+ allow_nan = (
263
+ equal_nan
264
+ if np.issubdtype(self._annot[name].dtype, np.floating)
265
+ else False
266
+ )
267
+ if not np.array_equal(
268
+ self._annot[name],
269
+ item._annot[name],
270
+ equal_nan=allow_nan,
271
+ ):
233
272
  return False
234
273
  return True
235
274
 
@@ -383,42 +422,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
383
422
  return self._array_length
384
423
 
385
424
  def __add__(self, array):
386
- if not isinstance(self, type(array)):
387
- raise TypeError("Can only concatenate two arrays or two stacks")
388
- # Create either new array or stack, depending of the own type
389
- if isinstance(self, AtomArray):
390
- concat = AtomArray(length=self._array_length + array._array_length)
391
- if isinstance(self, AtomArrayStack):
392
- concat = AtomArrayStack(
393
- self.stack_depth(), self._array_length + array._array_length
394
- )
395
-
396
- concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
397
-
398
- # Transfer only annotations,
399
- # which are existent in both operands
400
- arr_categories = list(array._annot.keys())
401
- for category in self._annot.keys():
402
- if category in arr_categories:
403
- annot = self._annot[category]
404
- arr_annot = array._annot[category]
405
- concat._annot[category] = np.concatenate((annot, arr_annot))
406
-
407
- # Concatenate bonds lists,
408
- # if at least one of them contains bond information
409
- if self._bonds is not None or array._bonds is not None:
410
- bonds1 = self._bonds
411
- bonds2 = array._bonds
412
- if bonds1 is None:
413
- bonds1 = BondList(self._array_length)
414
- if bonds2 is None:
415
- bonds2 = BondList(array._array_length)
416
- concat._bonds = bonds1 + bonds2
417
-
418
- # Copy box
419
- if self._box is not None:
420
- concat._box = np.copy(self._box)
421
- return concat
425
+ return concatenate([self, array])
422
426
 
423
427
  def __copy_fill__(self, clone):
424
428
  super().__copy_fill__(clone)
@@ -582,6 +586,7 @@ class AtomArray(_AtomArrayBase):
582
586
  :class:`AtomArray` is done with the '+' operator.
583
587
  Only the annotation categories, which are existing in both arrays,
584
588
  are transferred to the new array.
589
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
585
590
 
586
591
  Optionally, an :class:`AtomArray` can store chemical bond
587
592
  information via a :class:`BondList` object.
@@ -854,7 +859,9 @@ class AtomArrayStack(_AtomArrayBase):
854
859
  :class:`AtomArray` instance.
855
860
 
856
861
  Concatenation of atoms for each array in the stack is done using the
857
- '+' operator. For addition of atom arrays onto the stack use the
862
+ '+' operator.
863
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
864
+ For addition of atom arrays onto the stack use the
858
865
  :func:`stack()` method.
859
866
 
860
867
  The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
@@ -1268,6 +1275,112 @@ def stack(arrays):
1268
1275
  return array_stack
1269
1276
 
1270
1277
 
1278
+ def concatenate(atoms):
1279
+ """
1280
+ Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
1281
+ a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
1282
+
1283
+ Parameters
1284
+ ----------
1285
+ atoms : iterable object of AtomArray or AtomArrayStack
1286
+ The atoms to be concatenated.
1287
+ :class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
1288
+
1289
+ Returns
1290
+ -------
1291
+ concatenated_atoms : AtomArray or AtomArrayStack
1292
+ The concatenated atoms, i.e. its ``array_length()`` is the sum of the
1293
+ ``array_length()`` of the input ``atoms``.
1294
+
1295
+ Notes
1296
+ -----
1297
+ The following rules apply:
1298
+
1299
+ - Only the annotation categories that exist in all elements are transferred.
1300
+ - The box of the first element that has a box is transferred, if any.
1301
+ - The bonds of all elements are concatenated, if any element has associated bonds.
1302
+ For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
1303
+
1304
+ Examples
1305
+ --------
1306
+
1307
+ >>> atoms1 = array([
1308
+ ... Atom([1,2,3], res_id=1, atom_name="N"),
1309
+ ... Atom([4,5,6], res_id=1, atom_name="CA"),
1310
+ ... Atom([7,8,9], res_id=1, atom_name="C")
1311
+ ... ])
1312
+ >>> atoms2 = array([
1313
+ ... Atom([1,2,3], res_id=2, atom_name="N"),
1314
+ ... Atom([4,5,6], res_id=2, atom_name="CA"),
1315
+ ... Atom([7,8,9], res_id=2, atom_name="C")
1316
+ ... ])
1317
+ >>> print(concatenate([atoms1, atoms2]))
1318
+ 1 N 1.000 2.000 3.000
1319
+ 1 CA 4.000 5.000 6.000
1320
+ 1 C 7.000 8.000 9.000
1321
+ 2 N 1.000 2.000 3.000
1322
+ 2 CA 4.000 5.000 6.000
1323
+ 2 C 7.000 8.000 9.000
1324
+ """
1325
+ # Ensure that the atoms can be iterated over multiple times
1326
+ if not isinstance(atoms, Sequence):
1327
+ atoms = list(atoms)
1328
+
1329
+ length = 0
1330
+ depth = None
1331
+ element_type = None
1332
+ common_categories = set(atoms[0].get_annotation_categories())
1333
+ box = None
1334
+ has_bonds = False
1335
+ for element in atoms:
1336
+ if element_type is None:
1337
+ element_type = type(element)
1338
+ else:
1339
+ if not isinstance(element, element_type):
1340
+ raise TypeError(
1341
+ f"Cannot concatenate '{type(element).__name__}' "
1342
+ f"with '{element_type.__name__}'"
1343
+ )
1344
+ length += element.array_length()
1345
+ if isinstance(element, AtomArrayStack):
1346
+ if depth is None:
1347
+ depth = element.stack_depth()
1348
+ else:
1349
+ if element.stack_depth() != depth:
1350
+ raise IndexError("The stack depths are not equal")
1351
+ common_categories &= set(element.get_annotation_categories())
1352
+ if element.box is not None and box is None:
1353
+ box = element.box
1354
+ if element.bonds is not None:
1355
+ has_bonds = True
1356
+
1357
+ if element_type == AtomArray:
1358
+ concat_atoms = AtomArray(length)
1359
+ elif element_type == AtomArrayStack:
1360
+ concat_atoms = AtomArrayStack(depth, length)
1361
+ concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
1362
+ for category in common_categories:
1363
+ concat_atoms.set_annotation(
1364
+ category,
1365
+ np.concatenate(
1366
+ [element.get_annotation(category) for element in atoms], axis=0
1367
+ ),
1368
+ )
1369
+ concat_atoms.box = box
1370
+ if has_bonds:
1371
+ # Concatenate bonds of all elements
1372
+ concat_atoms.bonds = BondList.concatenate(
1373
+ [
1374
+ element.bonds
1375
+ if element.bonds is not None
1376
+ else BondList(element.array_length())
1377
+ for element in atoms
1378
+ ]
1379
+ )
1380
+
1381
+ return concat_atoms
1382
+
1383
+
1271
1384
  def repeat(atoms, coord):
1272
1385
  """
1273
1386
  Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)