biotite 1.0.1__cp310-cp310-win_amd64.whl → 1.1.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/application/dssp/app.py +13 -3
  2. biotite/application/localapp.py +34 -0
  3. biotite/application/muscle/app3.py +2 -15
  4. biotite/application/muscle/app5.py +2 -2
  5. biotite/application/util.py +1 -1
  6. biotite/application/viennarna/rnaplot.py +6 -2
  7. biotite/database/rcsb/query.py +6 -6
  8. biotite/database/uniprot/check.py +20 -15
  9. biotite/database/uniprot/download.py +1 -1
  10. biotite/database/uniprot/query.py +1 -1
  11. biotite/sequence/align/alignment.py +16 -3
  12. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  13. biotite/sequence/align/banded.pyx +5 -5
  14. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  15. biotite/sequence/align/kmeralphabet.pyx +17 -0
  16. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  17. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  18. biotite/sequence/align/kmertable.pyx +52 -42
  19. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  20. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  21. biotite/sequence/align/matrix.py +273 -55
  22. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  23. biotite/sequence/align/matrix_data/PB.license +21 -0
  24. biotite/sequence/align/matrix_data/PB.mat +18 -0
  25. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  26. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  27. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  28. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  29. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  30. biotite/sequence/alphabet.py +3 -0
  31. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  32. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  33. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  34. biotite/sequence/graphics/colorschemes.py +44 -11
  35. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  37. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  38. biotite/sequence/profile.py +86 -4
  39. biotite/sequence/seqtypes.py +124 -3
  40. biotite/setup_ccd.py +197 -0
  41. biotite/structure/__init__.py +4 -3
  42. biotite/structure/alphabet/__init__.py +25 -0
  43. biotite/structure/alphabet/encoder.py +332 -0
  44. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  45. biotite/structure/alphabet/i3d.py +110 -0
  46. biotite/structure/alphabet/layers.py +86 -0
  47. biotite/structure/alphabet/pb.license +21 -0
  48. biotite/structure/alphabet/pb.py +171 -0
  49. biotite/structure/alphabet/unkerasify.py +122 -0
  50. biotite/structure/atoms.py +129 -40
  51. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  52. biotite/structure/bonds.pyx +72 -21
  53. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  54. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  55. biotite/structure/geometry.py +60 -113
  56. biotite/structure/info/__init__.py +1 -0
  57. biotite/structure/info/atoms.py +13 -13
  58. biotite/structure/info/bonds.py +12 -6
  59. biotite/structure/info/ccd.py +125 -32
  60. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  61. biotite/structure/info/groups.py +63 -17
  62. biotite/structure/info/masses.py +9 -6
  63. biotite/structure/info/misc.py +15 -21
  64. biotite/structure/info/standardize.py +3 -2
  65. biotite/structure/io/mol/sdf.py +41 -40
  66. biotite/structure/io/pdb/convert.py +2 -0
  67. biotite/structure/io/pdb/file.py +74 -3
  68. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  69. biotite/structure/io/pdbqt/file.py +32 -32
  70. biotite/structure/io/pdbx/__init__.py +1 -0
  71. biotite/structure/io/pdbx/bcif.py +32 -8
  72. biotite/structure/io/pdbx/cif.py +72 -59
  73. biotite/structure/io/pdbx/component.py +9 -4
  74. biotite/structure/io/pdbx/compress.py +321 -0
  75. biotite/structure/io/pdbx/convert.py +194 -48
  76. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  77. biotite/structure/io/pdbx/encoding.pyx +98 -17
  78. biotite/structure/molecules.py +141 -141
  79. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  80. biotite/structure/segments.py +1 -2
  81. biotite/structure/util.py +73 -1
  82. biotite/version.py +2 -2
  83. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
  84. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
  85. biotite/structure/info/ccd/README.rst +0 -8
  86. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  87. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  88. biotite/structure/info/ccd/nucleotides.txt +0 -798
  89. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
  90. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -0,0 +1,171 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Conversion of structures into the *Protein Blocks* structural alphabet.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
12
+
13
+ import numpy as np
14
+ from biotite.sequence.alphabet import LetterAlphabet
15
+ from biotite.sequence.sequence import Sequence
16
+ from biotite.structure.chains import get_chain_starts
17
+ from biotite.structure.geometry import dihedral_backbone
18
+
19
+ # PB reference angles, adapted from PBxplore
20
+ PB_ANGLES = np.array(
21
+ [
22
+ [41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
23
+ [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
24
+ [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
25
+ [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
26
+ [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
27
+ [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
28
+ [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
29
+ [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
30
+ [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
31
+ [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
32
+ [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
33
+ [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
34
+ [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
35
+ [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
36
+ [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
37
+ [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
38
+ ]
39
+ ) # fmt: skip
40
+
41
+
42
+ class ProteinBlocksSequence(Sequence):
43
+ """
44
+ Representation of a structure in the *Protein Blocks* structural alphabet.
45
+ :footcite:`Brevern2000`
46
+
47
+ Parameters
48
+ ----------
49
+ sequence : iterable object, optional
50
+ The *Protein Blocks* sequence.
51
+ This may either be a list or a string.
52
+ May take upper or lower case letters.
53
+ By default the sequence is empty.
54
+
55
+ See also
56
+ --------
57
+ to_protein_blocks : Create *Protein Blocks* sequences from a structure.
58
+
59
+ References
60
+ ----------
61
+
62
+ .. footbibliography::
63
+
64
+ """
65
+
66
+ alphabet = LetterAlphabet("abcdefghijklmnopz")
67
+ undefined_symbol = "z"
68
+
69
+ def __init__(self, sequence=""):
70
+ if isinstance(sequence, str):
71
+ sequence = sequence.lower()
72
+ else:
73
+ sequence = [symbol.upper() for symbol in sequence]
74
+ super().__init__(sequence)
75
+
76
+ def get_alphabet(self):
77
+ return ProteinBlocksSequence.alphabet
78
+
79
+ def remove_undefined(self):
80
+ """
81
+ Remove undefined symbols from the sequence.
82
+
83
+ Returns
84
+ -------
85
+ filtered_sequence : ProteinBlocksSequence
86
+ The sequence without undefined symbols.
87
+ """
88
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
89
+ ProteinBlocksSequence.undefined_symbol
90
+ )
91
+ filtered_code = self.code[self.code != undefined_code]
92
+ filtered_sequence = ProteinBlocksSequence()
93
+ filtered_sequence.code = filtered_code
94
+ return filtered_sequence
95
+
96
+
97
+ def to_protein_blocks(atoms):
98
+ """
99
+ Encode each chain in the given structure to the *Protein Blocks* structural
100
+ alphabet.
101
+ :footcite:`Brevern2000`
102
+
103
+ Parameters
104
+ ----------
105
+ atoms : AtomArray
106
+ The atom array to encode.
107
+ May contain multiple chains.
108
+
109
+ Returns
110
+ -------
111
+ sequences : list of Sequence, length=n
112
+ The encoded *Protein Blocks* sequence for each peptide chain in the structure.
113
+ chain_start_indices : ndarray, shape=(n,), dtype=int
114
+ The atom index where each chain starts.
115
+
116
+ References
117
+ ----------
118
+
119
+ .. footbibliography::
120
+
121
+ Examples
122
+ --------
123
+
124
+ >>> sequences, chain_starts = to_protein_blocks(atom_array)
125
+ >>> print(sequences[0])
126
+ zzmmmmmnopjmnopacdzz
127
+ """
128
+ sequences = []
129
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
130
+ for i in range(len(chain_start_indices) - 1):
131
+ start = chain_start_indices[i]
132
+ stop = chain_start_indices[i + 1]
133
+ chain = atoms[start:stop]
134
+ sequences.append(_to_protein_blocks(chain))
135
+ return sequences, chain_start_indices[:-1]
136
+
137
+
138
+ def _to_protein_blocks(chain):
139
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
140
+ ProteinBlocksSequence.undefined_symbol
141
+ )
142
+
143
+ phi, psi, _ = dihedral_backbone(chain)
144
+
145
+ pb_angles = np.full((len(phi), 8), np.nan)
146
+ pb_angles[2:-2, 0] = psi[:-4]
147
+ pb_angles[2:-2, 1] = phi[1:-3]
148
+ pb_angles[2:-2, 2] = psi[1:-3]
149
+ pb_angles[2:-2, 3] = phi[2:-2]
150
+ pb_angles[2:-2, 4] = psi[2:-2]
151
+ pb_angles[2:-2, 5] = phi[3:-1]
152
+ pb_angles[2:-2, 6] = psi[3:-1]
153
+ pb_angles[2:-2, 7] = phi[4:]
154
+ pb_angles = np.rad2deg(pb_angles)
155
+
156
+ # Angle RMSD of all reference angles with all actual angles
157
+ rmsda = np.sum(
158
+ ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
159
+ axis=-1,
160
+ )
161
+ # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
162
+ pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
163
+ pb_available_mask = ~np.isnan(rmsda).any(axis=0)
164
+ # Chose PB, where the RMSDA to the reference angle is lowest
165
+ # Due to the definition of Biotite symbol codes
166
+ # the index of the chosen PB is directly the symbol code
167
+ pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
168
+ # Put the array of symbol codes into actual sequence objects
169
+ pb_sequence = ProteinBlocksSequence()
170
+ pb_sequence.code = pb_seq_code
171
+ return pb_sequence
@@ -0,0 +1,122 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Parser for extracting weights from Keras files.
7
+
8
+ Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
9
+ """
10
+
11
+ __name__ = "biotite.structure.alphabet"
12
+ __author__ = "Martin Larralde"
13
+ __all__ = ["load_kerasify"]
14
+
15
+ import enum
16
+ import functools
17
+ import itertools
18
+ import struct
19
+ import numpy as np
20
+ from biotite.structure.alphabet.layers import DenseLayer, Layer
21
+
22
+
23
+ class LayerType(enum.IntEnum):
24
+ DENSE = 1
25
+ CONVOLUTION2D = 2
26
+ FLATTEN = 3
27
+ ELU = 4
28
+ ACTIVATION = 5
29
+ MAXPOOLING2D = 6
30
+ LSTM = 7
31
+ EMBEDDING = 8
32
+
33
+
34
+ class ActivationType(enum.IntEnum):
35
+ LINEAR = 1
36
+ RELU = 2
37
+ SOFTPLUS = 3
38
+ SIGMOID = 4
39
+ TANH = 5
40
+ HARD_SIGMOID = 6
41
+
42
+
43
+ class KerasifyParser:
44
+ """An incomplete parser for model files serialized with `kerasify`.
45
+
46
+ Notes
47
+ -----
48
+ Only dense layers are supported, since the ``foldseek`` VQ-VAE model
49
+ is only using 3 dense layers.
50
+ """
51
+
52
+ def __init__(self, file) -> None:
53
+ self.file = file
54
+ self.buffer = bytearray(1024)
55
+ (self.n_layers,) = self._get("I")
56
+
57
+ def read(self):
58
+ if self.n_layers == 0:
59
+ return None
60
+
61
+ self.n_layers -= 1
62
+ layer_type = LayerType(self._get("I")[0])
63
+ if layer_type == LayerType.DENSE:
64
+ (w0,) = self._get("I")
65
+ (w1,) = self._get("I")
66
+ (b0,) = self._get("I")
67
+ weights = (
68
+ np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
69
+ .reshape(w0, w1)
70
+ .copy()
71
+ )
72
+ biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
73
+ activation = ActivationType(self._get("I")[0])
74
+ if activation not in (ActivationType.LINEAR, ActivationType.RELU):
75
+ raise NotImplementedError(
76
+ f"Unsupported activation type: {activation!r}"
77
+ )
78
+ return DenseLayer(weights, biases, activation == ActivationType.RELU)
79
+ else:
80
+ raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
81
+
82
+ def __iter__(self):
83
+ return self
84
+
85
+ def __next__(self) -> Layer:
86
+ layer = self.read()
87
+ if layer is None:
88
+ raise StopIteration
89
+ return layer
90
+
91
+ def _read(self, format: str) -> memoryview:
92
+ n = struct.calcsize(format)
93
+ if len(self.buffer) < n:
94
+ self.buffer.extend(
95
+ itertools.islice(itertools.repeat(0), n - len(self.buffer))
96
+ )
97
+ v = memoryview(self.buffer)[:n]
98
+ self.file.readinto(v) # type: ignore
99
+ return v
100
+
101
+ def _get(self, format: str):
102
+ v = self._read(format)
103
+ return struct.unpack(format, v)
104
+
105
+
106
+ @functools.cache
107
+ def load_kerasify(file_path):
108
+ """
109
+ Load the the model layers from a ``.kerasify`` file.
110
+
111
+ Parameters
112
+ ----------
113
+ file_path : str
114
+ The path to the ``.kerasify`` file.
115
+
116
+ Returns
117
+ -------
118
+ layers : tuple of Layer
119
+ The model layers.
120
+ """
121
+ with open(file_path, "rb") as file:
122
+ return tuple(KerasifyParser(file))
@@ -13,6 +13,7 @@ __all__ = [
13
13
  "Atom",
14
14
  "AtomArray",
15
15
  "AtomArrayStack",
16
+ "concatenate",
16
17
  "array",
17
18
  "stack",
18
19
  "repeat",
@@ -22,6 +23,7 @@ __all__ = [
22
23
 
23
24
  import abc
24
25
  import numbers
26
+ from collections.abc import Sequence
25
27
  import numpy as np
26
28
  from biotite.copyable import Copyable
27
29
  from biotite.structure.bonds import BondList
@@ -157,7 +159,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
157
159
  ----------
158
160
  category : str
159
161
  The annotation category to be set.
160
- array : ndarray or None
162
+ array : ndarray
161
163
  The new value of the annotation category. The size of the
162
164
  array must be the same as the array length.
163
165
 
@@ -233,7 +235,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
233
235
  else:
234
236
  raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
235
237
 
236
- def equal_annotations(self, item):
238
+ def equal_annotations(self, item, equal_nan=True):
237
239
  """
238
240
  Check, if this object shares equal annotation arrays with the
239
241
  given :class:`AtomArray` or :class:`AtomArrayStack`.
@@ -242,6 +244,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
242
244
  ----------
243
245
  item : AtomArray or AtomArrayStack
244
246
  The object to compare the annotation arrays with.
247
+ equal_nan: bool
248
+ Whether to count `nan` values as equal. Default: True.
245
249
 
246
250
  Returns
247
251
  -------
@@ -253,7 +257,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
253
257
  if not self.equal_annotation_categories(item):
254
258
  return False
255
259
  for name in self._annot:
256
- if not np.array_equal(self._annot[name], item._annot[name]):
260
+ # ... allowing `nan` values causes type-casting, which is
261
+ # only possible for floating-point arrays
262
+ allow_nan = (
263
+ equal_nan
264
+ if np.issubdtype(self._annot[name].dtype, np.floating)
265
+ else False
266
+ )
267
+ if not np.array_equal(
268
+ self._annot[name],
269
+ item._annot[name],
270
+ equal_nan=allow_nan,
271
+ ):
257
272
  return False
258
273
  return True
259
274
 
@@ -407,42 +422,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
407
422
  return self._array_length
408
423
 
409
424
  def __add__(self, array):
410
- if not isinstance(self, type(array)):
411
- raise TypeError("Can only concatenate two arrays or two stacks")
412
- # Create either new array or stack, depending of the own type
413
- if isinstance(self, AtomArray):
414
- concat = AtomArray(length=self._array_length + array._array_length)
415
- if isinstance(self, AtomArrayStack):
416
- concat = AtomArrayStack(
417
- self.stack_depth(), self._array_length + array._array_length
418
- )
419
-
420
- concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
421
-
422
- # Transfer only annotations,
423
- # which are existent in both operands
424
- arr_categories = list(array._annot.keys())
425
- for category in self._annot.keys():
426
- if category in arr_categories:
427
- annot = self._annot[category]
428
- arr_annot = array._annot[category]
429
- concat._annot[category] = np.concatenate((annot, arr_annot))
430
-
431
- # Concatenate bonds lists,
432
- # if at least one of them contains bond information
433
- if self._bonds is not None or array._bonds is not None:
434
- bonds1 = self._bonds
435
- bonds2 = array._bonds
436
- if bonds1 is None:
437
- bonds1 = BondList(self._array_length)
438
- if bonds2 is None:
439
- bonds2 = BondList(array._array_length)
440
- concat._bonds = bonds1 + bonds2
441
-
442
- # Copy box
443
- if self._box is not None:
444
- concat._box = np.copy(self._box)
445
- return concat
425
+ return concatenate([self, array])
446
426
 
447
427
  def __copy_fill__(self, clone):
448
428
  super().__copy_fill__(clone)
@@ -606,6 +586,7 @@ class AtomArray(_AtomArrayBase):
606
586
  :class:`AtomArray` is done with the '+' operator.
607
587
  Only the annotation categories, which are existing in both arrays,
608
588
  are transferred to the new array.
589
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
609
590
 
610
591
  Optionally, an :class:`AtomArray` can store chemical bond
611
592
  information via a :class:`BondList` object.
@@ -878,7 +859,9 @@ class AtomArrayStack(_AtomArrayBase):
878
859
  :class:`AtomArray` instance.
879
860
 
880
861
  Concatenation of atoms for each array in the stack is done using the
881
- '+' operator. For addition of atom arrays onto the stack use the
862
+ '+' operator.
863
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
864
+ For addition of atom arrays onto the stack use the
882
865
  :func:`stack()` method.
883
866
 
884
867
  The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
@@ -1292,6 +1275,112 @@ def stack(arrays):
1292
1275
  return array_stack
1293
1276
 
1294
1277
 
1278
+ def concatenate(atoms):
1279
+ """
1280
+ Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
1281
+ a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
1282
+
1283
+ Parameters
1284
+ ----------
1285
+ atoms : iterable object of AtomArray or AtomArrayStack
1286
+ The atoms to be concatenated.
1287
+ :class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
1288
+
1289
+ Returns
1290
+ -------
1291
+ concatenated_atoms : AtomArray or AtomArrayStack
1292
+ The concatenated atoms, i.e. its ``array_length()`` is the sum of the
1293
+ ``array_length()`` of the input ``atoms``.
1294
+
1295
+ Notes
1296
+ -----
1297
+ The following rules apply:
1298
+
1299
+ - Only the annotation categories that exist in all elements are transferred.
1300
+ - The box of the first element that has a box is transferred, if any.
1301
+ - The bonds of all elements are concatenated, if any element has associated bonds.
1302
+ For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
1303
+
1304
+ Examples
1305
+ --------
1306
+
1307
+ >>> atoms1 = array([
1308
+ ... Atom([1,2,3], res_id=1, atom_name="N"),
1309
+ ... Atom([4,5,6], res_id=1, atom_name="CA"),
1310
+ ... Atom([7,8,9], res_id=1, atom_name="C")
1311
+ ... ])
1312
+ >>> atoms2 = array([
1313
+ ... Atom([1,2,3], res_id=2, atom_name="N"),
1314
+ ... Atom([4,5,6], res_id=2, atom_name="CA"),
1315
+ ... Atom([7,8,9], res_id=2, atom_name="C")
1316
+ ... ])
1317
+ >>> print(concatenate([atoms1, atoms2]))
1318
+ 1 N 1.000 2.000 3.000
1319
+ 1 CA 4.000 5.000 6.000
1320
+ 1 C 7.000 8.000 9.000
1321
+ 2 N 1.000 2.000 3.000
1322
+ 2 CA 4.000 5.000 6.000
1323
+ 2 C 7.000 8.000 9.000
1324
+ """
1325
+ # Ensure that the atoms can be iterated over multiple times
1326
+ if not isinstance(atoms, Sequence):
1327
+ atoms = list(atoms)
1328
+
1329
+ length = 0
1330
+ depth = None
1331
+ element_type = None
1332
+ common_categories = set(atoms[0].get_annotation_categories())
1333
+ box = None
1334
+ has_bonds = False
1335
+ for element in atoms:
1336
+ if element_type is None:
1337
+ element_type = type(element)
1338
+ else:
1339
+ if not isinstance(element, element_type):
1340
+ raise TypeError(
1341
+ f"Cannot concatenate '{type(element).__name__}' "
1342
+ f"with '{element_type.__name__}'"
1343
+ )
1344
+ length += element.array_length()
1345
+ if isinstance(element, AtomArrayStack):
1346
+ if depth is None:
1347
+ depth = element.stack_depth()
1348
+ else:
1349
+ if element.stack_depth() != depth:
1350
+ raise IndexError("The stack depths are not equal")
1351
+ common_categories &= set(element.get_annotation_categories())
1352
+ if element.box is not None and box is None:
1353
+ box = element.box
1354
+ if element.bonds is not None:
1355
+ has_bonds = True
1356
+
1357
+ if element_type == AtomArray:
1358
+ concat_atoms = AtomArray(length)
1359
+ elif element_type == AtomArrayStack:
1360
+ concat_atoms = AtomArrayStack(depth, length)
1361
+ concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
1362
+ for category in common_categories:
1363
+ concat_atoms.set_annotation(
1364
+ category,
1365
+ np.concatenate(
1366
+ [element.get_annotation(category) for element in atoms], axis=0
1367
+ ),
1368
+ )
1369
+ concat_atoms.box = box
1370
+ if has_bonds:
1371
+ # Concatenate bonds of all elements
1372
+ concat_atoms.bonds = BondList.concatenate(
1373
+ [
1374
+ element.bonds
1375
+ if element.bonds is not None
1376
+ else BondList(element.array_length())
1377
+ for element in atoms
1378
+ ]
1379
+ )
1380
+
1381
+ return concat_atoms
1382
+
1383
+
1295
1384
  def repeat(atoms, coord):
1296
1385
  """
1297
1386
  Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)
Binary file
@@ -17,6 +17,7 @@ cimport cython
17
17
  cimport numpy as np
18
18
  from libc.stdlib cimport free, realloc
19
19
 
20
+ from collections.abc import Sequence
20
21
  import itertools
21
22
  import numbers
22
23
  from enum import IntEnum
@@ -59,6 +60,7 @@ class BondType(IntEnum):
59
60
  - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
60
61
  - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
61
62
  - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
63
+ - `COORDINATION` - Coordination complex involving a metal atom
62
64
  """
63
65
  ANY = 0
64
66
  SINGLE = 1
@@ -68,6 +70,7 @@ class BondType(IntEnum):
68
70
  AROMATIC_SINGLE = 5
69
71
  AROMATIC_DOUBLE = 6
70
72
  AROMATIC_TRIPLE = 7
73
+ COORDINATION = 8
71
74
 
72
75
 
73
76
  def without_aromaticity(self):
@@ -88,10 +91,12 @@ class BondType(IntEnum):
88
91
  >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
89
92
  DOUBLE
90
93
  """
91
- difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
92
- if self >= BondType.AROMATIC_SINGLE:
93
- difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
94
- return BondType(self - difference)
94
+ if self == BondType.AROMATIC_SINGLE:
95
+ return BondType.SINGLE
96
+ elif self == BondType.AROMATIC_DOUBLE:
97
+ return BondType.DOUBLE
98
+ elif self == BondType.AROMATIC_TRIPLE:
99
+ return BondType.TRIPLE
95
100
  else:
96
101
  return self
97
102
 
@@ -305,6 +310,61 @@ class BondList(Copyable):
305
310
  self._bonds = np.zeros((0, 3), dtype=np.uint32)
306
311
  self._max_bonds_per_atom = 0
307
312
 
313
+ @staticmethod
314
+ def concatenate(bonds_lists):
315
+ """
316
+ Concatenate multiple :class:`BondList` objects into a single
317
+ :class:`BondList`, respectively.
318
+
319
+ Parameters
320
+ ----------
321
+ bonds_lists : iterable object of BondList
322
+ The bond lists to be concatenated.
323
+
324
+ Returns
325
+ -------
326
+ concatenated_bonds : BondList
327
+ The concatenated bond lists.
328
+
329
+ Examples
330
+ --------
331
+
332
+ >>> bonds1 = BondList(2, np.array([(0, 1)]))
333
+ >>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
334
+ >>> merged_bonds = BondList.concatenate([bonds1, bonds2])
335
+ >>> print(merged_bonds.get_atom_count())
336
+ 5
337
+ >>> print(merged_bonds.as_array()[:, :2])
338
+ [[0 1]
339
+ [2 3]
340
+ [2 4]]
341
+ """
342
+ # Ensure that the bonds_lists can be iterated over multiple times
343
+ if not isinstance(bonds_lists, Sequence):
344
+ bonds_lists = list(bonds_lists)
345
+
346
+ cdef np.ndarray merged_bonds = np.concatenate(
347
+ [bond_list._bonds for bond_list in bonds_lists]
348
+ )
349
+ # Offset the indices of appended bonds list
350
+ # (consistent with addition of AtomArray)
351
+ cdef int start = 0, stop = 0
352
+ cdef int cum_atom_count = 0
353
+ for bond_list in bonds_lists:
354
+ stop = start + bond_list._bonds.shape[0]
355
+ merged_bonds[start : stop, :2] += cum_atom_count
356
+ cum_atom_count += bond_list._atom_count
357
+ start = stop
358
+
359
+ cdef merged_bond_list = BondList(cum_atom_count)
360
+ # Array is not used in constructor to prevent unnecessary
361
+ # maximum and redundant bond calculation
362
+ merged_bond_list._bonds = merged_bonds
363
+ merged_bond_list._max_bonds_per_atom = max(
364
+ [bond_list._max_bonds_per_atom for bond_list in bonds_lists]
365
+ )
366
+ return merged_bond_list
367
+
308
368
  def __copy_create__(self):
309
369
  # Create empty bond list to prevent
310
370
  # unnecessary removal of redundant atoms
@@ -453,9 +513,13 @@ class BondList(Copyable):
453
513
  0 1 SINGLE
454
514
  1 2 DOUBLE
455
515
  """
456
- bonds = self._bonds
457
- difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
458
- bonds[bonds[:, 2] >= BondType.AROMATIC_SINGLE, 2] -= difference
516
+ bond_types = self._bonds[:,2]
517
+ for aromatic_type, non_aromatic_type in [
518
+ (BondType.AROMATIC_SINGLE, BondType.SINGLE),
519
+ (BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
520
+ (BondType.AROMATIC_TRIPLE, BondType.TRIPLE)
521
+ ]:
522
+ bond_types[bond_types == aromatic_type] = non_aromatic_type
459
523
 
460
524
  def remove_bond_order(self):
461
525
  """
@@ -994,20 +1058,7 @@ class BondList(Copyable):
994
1058
  )
995
1059
 
996
1060
  def __add__(self, bond_list):
997
- cdef np.ndarray merged_bonds \
998
- = np.concatenate([self._bonds, bond_list._bonds])
999
- # Offset the indices of appended bonds list
1000
- # (consistent with addition of AtomArray)
1001
- merged_bonds[len(self._bonds):, :2] += self._atom_count
1002
- cdef uint32 merged_count = self._atom_count + bond_list._atom_count
1003
- cdef merged_bond_list = BondList(merged_count)
1004
- # Array is not used in constructor to prevent unnecessary
1005
- # maximum and redundant bond calculation
1006
- merged_bond_list._bonds = merged_bonds
1007
- merged_bond_list._max_bonds_per_atom = max(
1008
- self._max_bonds_per_atom, bond_list._max_bonds_per_atom
1009
- )
1010
- return merged_bond_list
1061
+ return BondList.concatenate([self, bond_list])
1011
1062
 
1012
1063
  def __getitem__(self, index):
1013
1064
  ## Variables for both, integer and boolean index arrays