biotite 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.2.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/dssp/app.py +13 -3
  6. biotite/application/localapp.py +36 -2
  7. biotite/application/msaapp.py +10 -10
  8. biotite/application/muscle/app3.py +5 -18
  9. biotite/application/muscle/app5.py +5 -5
  10. biotite/application/sra/app.py +0 -5
  11. biotite/application/util.py +22 -2
  12. biotite/application/viennarna/rnaalifold.py +8 -8
  13. biotite/application/viennarna/rnaplot.py +9 -3
  14. biotite/application/viennarna/util.py +1 -1
  15. biotite/application/webapp.py +1 -1
  16. biotite/database/afdb/__init__.py +12 -0
  17. biotite/database/afdb/download.py +191 -0
  18. biotite/database/entrez/dbnames.py +10 -0
  19. biotite/database/entrez/download.py +9 -10
  20. biotite/database/entrez/key.py +1 -1
  21. biotite/database/entrez/query.py +5 -4
  22. biotite/database/pubchem/download.py +6 -6
  23. biotite/database/pubchem/error.py +10 -0
  24. biotite/database/pubchem/query.py +12 -23
  25. biotite/database/rcsb/download.py +3 -2
  26. biotite/database/rcsb/query.py +8 -9
  27. biotite/database/uniprot/check.py +22 -17
  28. biotite/database/uniprot/download.py +3 -6
  29. biotite/database/uniprot/query.py +4 -5
  30. biotite/file.py +14 -2
  31. biotite/interface/__init__.py +19 -0
  32. biotite/interface/openmm/__init__.py +16 -0
  33. biotite/interface/openmm/state.py +93 -0
  34. biotite/interface/openmm/system.py +227 -0
  35. biotite/interface/pymol/__init__.py +198 -0
  36. biotite/interface/pymol/cgo.py +346 -0
  37. biotite/interface/pymol/convert.py +185 -0
  38. biotite/interface/pymol/display.py +267 -0
  39. biotite/interface/pymol/object.py +1226 -0
  40. biotite/interface/pymol/shapes.py +178 -0
  41. biotite/interface/pymol/startup.py +169 -0
  42. biotite/interface/rdkit/__init__.py +15 -0
  43. biotite/interface/rdkit/mol.py +490 -0
  44. biotite/interface/version.py +71 -0
  45. biotite/interface/warning.py +19 -0
  46. biotite/sequence/align/__init__.py +0 -4
  47. biotite/sequence/align/alignment.py +49 -14
  48. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  49. biotite/sequence/align/banded.pyx +26 -26
  50. biotite/sequence/align/cigar.py +2 -2
  51. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  52. biotite/sequence/align/kmeralphabet.pyx +19 -2
  53. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  54. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  55. biotite/sequence/align/kmertable.pyx +58 -48
  56. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/localgapped.pyx +47 -47
  58. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  59. biotite/sequence/align/localungapped.pyx +10 -10
  60. biotite/sequence/align/matrix.py +284 -57
  61. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  62. biotite/sequence/align/matrix_data/PB.license +21 -0
  63. biotite/sequence/align/matrix_data/PB.mat +18 -0
  64. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  66. biotite/sequence/align/pairwise.pyx +35 -35
  67. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  68. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/selector.pyx +2 -2
  70. biotite/sequence/align/statistics.py +1 -1
  71. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  72. biotite/sequence/alphabet.py +5 -2
  73. biotite/sequence/annotation.py +19 -13
  74. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  75. biotite/sequence/codon.py +1 -2
  76. biotite/sequence/graphics/alignment.py +25 -39
  77. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  78. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  79. biotite/sequence/graphics/colorschemes.py +44 -11
  80. biotite/sequence/graphics/dendrogram.py +4 -2
  81. biotite/sequence/graphics/features.py +2 -2
  82. biotite/sequence/graphics/logo.py +10 -12
  83. biotite/sequence/io/fasta/convert.py +1 -2
  84. biotite/sequence/io/fasta/file.py +1 -1
  85. biotite/sequence/io/fastq/file.py +3 -3
  86. biotite/sequence/io/genbank/file.py +3 -3
  87. biotite/sequence/io/genbank/sequence.py +2 -0
  88. biotite/sequence/io/gff/convert.py +1 -1
  89. biotite/sequence/io/gff/file.py +1 -2
  90. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  91. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  92. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  93. biotite/sequence/profile.py +105 -29
  94. biotite/sequence/search.py +0 -1
  95. biotite/sequence/seqtypes.py +136 -8
  96. biotite/sequence/sequence.py +1 -2
  97. biotite/setup_ccd.py +197 -0
  98. biotite/structure/__init__.py +6 -3
  99. biotite/structure/alphabet/__init__.py +25 -0
  100. biotite/structure/alphabet/encoder.py +332 -0
  101. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  102. biotite/structure/alphabet/i3d.py +109 -0
  103. biotite/structure/alphabet/layers.py +86 -0
  104. biotite/structure/alphabet/pb.license +21 -0
  105. biotite/structure/alphabet/pb.py +170 -0
  106. biotite/structure/alphabet/unkerasify.py +128 -0
  107. biotite/structure/atoms.py +163 -66
  108. biotite/structure/basepairs.py +26 -26
  109. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  110. biotite/structure/bonds.pyx +79 -25
  111. biotite/structure/box.py +19 -21
  112. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  113. biotite/structure/celllist.pyx +83 -67
  114. biotite/structure/chains.py +5 -37
  115. biotite/structure/charges.cpython-311-darwin.so +0 -0
  116. biotite/structure/compare.py +420 -13
  117. biotite/structure/density.py +1 -1
  118. biotite/structure/dotbracket.py +27 -28
  119. biotite/structure/filter.py +8 -8
  120. biotite/structure/geometry.py +74 -127
  121. biotite/structure/hbond.py +17 -19
  122. biotite/structure/info/__init__.py +1 -0
  123. biotite/structure/info/atoms.py +24 -15
  124. biotite/structure/info/bonds.py +12 -6
  125. biotite/structure/info/ccd.py +125 -34
  126. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  127. biotite/structure/info/groups.py +62 -19
  128. biotite/structure/info/masses.py +9 -6
  129. biotite/structure/info/misc.py +15 -22
  130. biotite/structure/info/radii.py +92 -22
  131. biotite/structure/info/standardize.py +4 -4
  132. biotite/structure/integrity.py +4 -6
  133. biotite/structure/io/general.py +2 -2
  134. biotite/structure/io/gro/file.py +8 -9
  135. biotite/structure/io/mol/convert.py +1 -1
  136. biotite/structure/io/mol/ctab.py +33 -28
  137. biotite/structure/io/mol/mol.py +1 -1
  138. biotite/structure/io/mol/sdf.py +80 -53
  139. biotite/structure/io/pdb/convert.py +4 -3
  140. biotite/structure/io/pdb/file.py +85 -25
  141. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  142. biotite/structure/io/pdbqt/file.py +36 -36
  143. biotite/structure/io/pdbx/__init__.py +1 -0
  144. biotite/structure/io/pdbx/bcif.py +54 -15
  145. biotite/structure/io/pdbx/cif.py +92 -66
  146. biotite/structure/io/pdbx/component.py +15 -4
  147. biotite/structure/io/pdbx/compress.py +321 -0
  148. biotite/structure/io/pdbx/convert.py +410 -75
  149. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  150. biotite/structure/io/pdbx/encoding.pyx +98 -17
  151. biotite/structure/io/trajfile.py +9 -6
  152. biotite/structure/io/util.py +38 -0
  153. biotite/structure/mechanics.py +0 -1
  154. biotite/structure/molecules.py +141 -156
  155. biotite/structure/pseudoknots.py +7 -13
  156. biotite/structure/repair.py +2 -4
  157. biotite/structure/residues.py +13 -24
  158. biotite/structure/rings.py +335 -0
  159. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  160. biotite/structure/sasa.pyx +2 -1
  161. biotite/structure/segments.py +69 -11
  162. biotite/structure/sequence.py +0 -1
  163. biotite/structure/sse.py +0 -2
  164. biotite/structure/superimpose.py +74 -62
  165. biotite/structure/tm.py +581 -0
  166. biotite/structure/transform.py +12 -25
  167. biotite/structure/util.py +76 -4
  168. biotite/version.py +9 -4
  169. biotite/visualize.py +111 -1
  170. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
  171. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
  172. biotite/structure/info/ccd/README.rst +0 -8
  173. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  174. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  175. biotite/structure/info/ccd/nucleotides.txt +0 -798
  176. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
  177. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -0,0 +1,170 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Conversion of structures into the *Protein Blocks* structural alphabet.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
12
+
13
+ import numpy as np
14
+ from biotite.sequence.alphabet import LetterAlphabet
15
+ from biotite.sequence.sequence import Sequence
16
+ from biotite.structure.chains import get_chain_starts
17
+ from biotite.structure.geometry import dihedral_backbone
18
+
19
+ # PB reference angles, adapted from PBxplore
20
+ PB_ANGLES = np.array(
21
+ [
22
+ [41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
23
+ [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
24
+ [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
25
+ [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
26
+ [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
27
+ [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
28
+ [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
29
+ [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
30
+ [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
31
+ [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
32
+ [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
33
+ [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
34
+ [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
35
+ [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
36
+ [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
37
+ [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
38
+ ]
39
+ ) # fmt: skip
40
+
41
+
42
+ class ProteinBlocksSequence(Sequence):
43
+ """
44
+ Representation of a structure in the *Protein Blocks* structural alphabet.
45
+ :footcite:`Brevern2000`
46
+
47
+ Parameters
48
+ ----------
49
+ sequence : iterable object, optional
50
+ The *Protein Blocks* sequence.
51
+ This may either be a list or a string.
52
+ May take upper or lower case letters.
53
+ By default the sequence is empty.
54
+
55
+ See Also
56
+ --------
57
+ to_protein_blocks : Create *Protein Blocks* sequences from a structure.
58
+
59
+ References
60
+ ----------
61
+
62
+ .. footbibliography::
63
+ """
64
+
65
+ alphabet = LetterAlphabet("abcdefghijklmnopz")
66
+ undefined_symbol = "z"
67
+
68
+ def __init__(self, sequence=""):
69
+ if isinstance(sequence, str):
70
+ sequence = sequence.lower()
71
+ else:
72
+ sequence = [symbol.upper() for symbol in sequence]
73
+ super().__init__(sequence)
74
+
75
+ def get_alphabet(self):
76
+ return ProteinBlocksSequence.alphabet
77
+
78
+ def remove_undefined(self):
79
+ """
80
+ Remove undefined symbols from the sequence.
81
+
82
+ Returns
83
+ -------
84
+ filtered_sequence : ProteinBlocksSequence
85
+ The sequence without undefined symbols.
86
+ """
87
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
88
+ ProteinBlocksSequence.undefined_symbol
89
+ )
90
+ filtered_code = self.code[self.code != undefined_code]
91
+ filtered_sequence = ProteinBlocksSequence()
92
+ filtered_sequence.code = filtered_code
93
+ return filtered_sequence
94
+
95
+
96
+ def to_protein_blocks(atoms):
97
+ """
98
+ Encode each chain in the given structure to the *Protein Blocks* structural
99
+ alphabet.
100
+ :footcite:`Brevern2000`
101
+
102
+ Parameters
103
+ ----------
104
+ atoms : AtomArray
105
+ The atom array to encode.
106
+ May contain multiple chains.
107
+
108
+ Returns
109
+ -------
110
+ sequences : list of Sequence, length=n
111
+ The encoded *Protein Blocks* sequence for each peptide chain in the structure.
112
+ chain_start_indices : ndarray, shape=(n,), dtype=int
113
+ The atom index where each chain starts.
114
+
115
+ References
116
+ ----------
117
+
118
+ .. footbibliography::
119
+
120
+ Examples
121
+ --------
122
+
123
+ >>> sequences, chain_starts = to_protein_blocks(atom_array)
124
+ >>> print(sequences[0])
125
+ zzmmmmmnopjmnopacdzz
126
+ """
127
+ sequences = []
128
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
129
+ for i in range(len(chain_start_indices) - 1):
130
+ start = chain_start_indices[i]
131
+ stop = chain_start_indices[i + 1]
132
+ chain = atoms[start:stop]
133
+ sequences.append(_to_protein_blocks(chain))
134
+ return sequences, chain_start_indices[:-1]
135
+
136
+
137
+ def _to_protein_blocks(chain):
138
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
139
+ ProteinBlocksSequence.undefined_symbol
140
+ )
141
+
142
+ phi, psi, _ = dihedral_backbone(chain)
143
+
144
+ pb_angles = np.full((len(phi), 8), np.nan)
145
+ pb_angles[2:-2, 0] = psi[:-4]
146
+ pb_angles[2:-2, 1] = phi[1:-3]
147
+ pb_angles[2:-2, 2] = psi[1:-3]
148
+ pb_angles[2:-2, 3] = phi[2:-2]
149
+ pb_angles[2:-2, 4] = psi[2:-2]
150
+ pb_angles[2:-2, 5] = phi[3:-1]
151
+ pb_angles[2:-2, 6] = psi[3:-1]
152
+ pb_angles[2:-2, 7] = phi[4:]
153
+ pb_angles = np.rad2deg(pb_angles)
154
+
155
+ # Angle RMSD of all reference angles with all actual angles
156
+ rmsda = np.sum(
157
+ ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
158
+ axis=-1,
159
+ )
160
+ # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
161
+ pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
162
+ pb_available_mask = ~np.isnan(rmsda).any(axis=0)
163
+ # Chose PB, where the RMSDA to the reference angle is lowest
164
+ # Due to the definition of Biotite symbol codes
165
+ # the index of the chosen PB is directly the symbol code
166
+ pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
167
+ # Put the array of symbol codes into actual sequence objects
168
+ pb_sequence = ProteinBlocksSequence()
169
+ pb_sequence.code = pb_seq_code
170
+ return pb_sequence
@@ -0,0 +1,128 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Parser for extracting weights from Keras files.
7
+
8
+ Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
9
+ """
10
+
11
+ __name__ = "biotite.structure.alphabet"
12
+ __author__ = "Martin Larralde"
13
+ __all__ = ["load_kerasify"]
14
+
15
+ import enum
16
+ import functools
17
+ import itertools
18
+ import struct
19
+ import numpy as np
20
+ from biotite.structure.alphabet.layers import DenseLayer, Layer
21
+
22
+
23
+ class LayerType(enum.IntEnum):
24
+ DENSE = 1
25
+ CONVOLUTION2D = 2
26
+ FLATTEN = 3
27
+ ELU = 4
28
+ ACTIVATION = 5
29
+ MAXPOOLING2D = 6
30
+ LSTM = 7
31
+ EMBEDDING = 8
32
+
33
+
34
+ class ActivationType(enum.IntEnum):
35
+ LINEAR = 1
36
+ RELU = 2
37
+ SOFTPLUS = 3
38
+ SIGMOID = 4
39
+ TANH = 5
40
+ HARD_SIGMOID = 6
41
+
42
+
43
+ class KerasifyParser:
44
+ """
45
+ An incomplete parser for model files serialized with `kerasify`.
46
+
47
+ Parameters
48
+ ----------
49
+ file : file-like
50
+ The ``.kerasify`` file to parse.
51
+
52
+ Notes
53
+ -----
54
+ Only dense layers are supported, since the ``foldseek`` VQ-VAE model
55
+ is only using 3 dense layers.
56
+ """
57
+
58
+ def __init__(self, file) -> None:
59
+ self.file = file
60
+ self.buffer = bytearray(1024)
61
+ (self.n_layers,) = self._get("I")
62
+
63
+ def read(self):
64
+ if self.n_layers == 0:
65
+ return None
66
+
67
+ self.n_layers -= 1
68
+ layer_type = LayerType(self._get("I")[0])
69
+ if layer_type == LayerType.DENSE:
70
+ (w0,) = self._get("I")
71
+ (w1,) = self._get("I")
72
+ (b0,) = self._get("I")
73
+ weights = (
74
+ np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
75
+ .reshape(w0, w1)
76
+ .copy()
77
+ )
78
+ biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
79
+ activation = ActivationType(self._get("I")[0])
80
+ if activation not in (ActivationType.LINEAR, ActivationType.RELU):
81
+ raise NotImplementedError(
82
+ f"Unsupported activation type: {activation!r}"
83
+ )
84
+ return DenseLayer(weights, biases, activation == ActivationType.RELU)
85
+ else:
86
+ raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
87
+
88
+ def __iter__(self):
89
+ return self
90
+
91
+ def __next__(self) -> Layer:
92
+ layer = self.read()
93
+ if layer is None:
94
+ raise StopIteration
95
+ return layer
96
+
97
+ def _read(self, format: str) -> memoryview:
98
+ n = struct.calcsize(format)
99
+ if len(self.buffer) < n:
100
+ self.buffer.extend(
101
+ itertools.islice(itertools.repeat(0), n - len(self.buffer))
102
+ )
103
+ v = memoryview(self.buffer)[:n]
104
+ self.file.readinto(v) # type: ignore
105
+ return v
106
+
107
+ def _get(self, format: str):
108
+ v = self._read(format)
109
+ return struct.unpack(format, v)
110
+
111
+
112
+ @functools.cache
113
+ def load_kerasify(file_path):
114
+ """
115
+ Load the the model layers from a ``.kerasify`` file.
116
+
117
+ Parameters
118
+ ----------
119
+ file_path : str
120
+ The path to the ``.kerasify`` file.
121
+
122
+ Returns
123
+ -------
124
+ layers : tuple of Layer
125
+ The model layers.
126
+ """
127
+ with open(file_path, "rb") as file:
128
+ return tuple(KerasifyParser(file))
@@ -13,6 +13,7 @@ __all__ = [
13
13
  "Atom",
14
14
  "AtomArray",
15
15
  "AtomArrayStack",
16
+ "concatenate",
16
17
  "array",
17
18
  "stack",
18
19
  "repeat",
@@ -22,6 +23,7 @@ __all__ = [
22
23
 
23
24
  import abc
24
25
  import numbers
26
+ from collections.abc import Sequence
25
27
  import numpy as np
26
28
  from biotite.copyable import Copyable
27
29
  from biotite.structure.bonds import BondList
@@ -33,6 +35,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
33
35
  :class:`AtomArrayStack`.
34
36
  It implements functionality for annotation arrays and also
35
37
  rudimentarily for coordinates.
38
+
39
+ Parameters
40
+ ----------
41
+ length : int
42
+ The amount of atoms in the structure.
36
43
  """
37
44
 
38
45
  def __init__(self, length):
@@ -94,11 +101,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
94
101
  The annotation category to be added.
95
102
  dtype : type or str
96
103
  A type instance or a valid *NumPy* *dtype* string.
97
- Defines the type of the annotation
104
+ Defines the type of the annotation.
98
105
 
99
106
  See Also
100
107
  --------
101
- set_annotation
108
+ set_annotation : Assign directly a value to an annotation.
102
109
 
103
110
  Notes
104
111
  -----
@@ -157,7 +164,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
157
164
  ----------
158
165
  category : str
159
166
  The annotation category to be set.
160
- array : ndarray or None
167
+ array : ndarray
161
168
  The new value of the annotation category. The size of the
162
169
  array must be the same as the array length.
163
170
 
@@ -169,7 +176,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
169
176
  array = np.asarray(array)
170
177
  if len(array) != self._array_length:
171
178
  raise IndexError(
172
- f"Expected array length {self._array_length}, " f"but got {len(array)}"
179
+ f"Expected array length {self._array_length}, but got {len(array)}"
173
180
  )
174
181
  if category in self._annot:
175
182
  # If the annotation already exists, find the compatible dtype
@@ -233,7 +240,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
233
240
  else:
234
241
  raise TypeError(f"Index must be integer, not '{type(index).__name__}'")
235
242
 
236
- def equal_annotations(self, item):
243
+ def equal_annotations(self, item, equal_nan=True):
237
244
  """
238
245
  Check, if this object shares equal annotation arrays with the
239
246
  given :class:`AtomArray` or :class:`AtomArrayStack`.
@@ -242,6 +249,8 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
242
249
  ----------
243
250
  item : AtomArray or AtomArrayStack
244
251
  The object to compare the annotation arrays with.
252
+ equal_nan : bool
253
+ Whether to count `nan` values as equal. Default: True.
245
254
 
246
255
  Returns
247
256
  -------
@@ -253,7 +262,18 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
253
262
  if not self.equal_annotation_categories(item):
254
263
  return False
255
264
  for name in self._annot:
256
- if not np.array_equal(self._annot[name], item._annot[name]):
265
+ # ... allowing `nan` values causes type-casting, which is
266
+ # only possible for floating-point arrays
267
+ allow_nan = (
268
+ equal_nan
269
+ if np.issubdtype(self._annot[name].dtype, np.floating)
270
+ else False
271
+ )
272
+ if not np.array_equal(
273
+ self._annot[name],
274
+ item._annot[name],
275
+ equal_nan=allow_nan,
276
+ ):
257
277
  return False
258
278
  return True
259
279
 
@@ -308,17 +328,16 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
308
328
  if isinstance(self, AtomArray):
309
329
  if value.ndim != 2:
310
330
  raise ValueError(
311
- "A 2-dimensional ndarray is expected " "for an AtomArray"
331
+ "A 2-dimensional ndarray is expected for an AtomArray"
312
332
  )
313
333
  elif isinstance(self, AtomArrayStack):
314
334
  if value.ndim != 3:
315
335
  raise ValueError(
316
- "A 3-dimensional ndarray is expected " "for an AtomArrayStack"
336
+ "A 3-dimensional ndarray is expected for an AtomArrayStack"
317
337
  )
318
338
  if value.shape[-2] != self._array_length:
319
339
  raise ValueError(
320
- f"Expected array length {self._array_length}, "
321
- f"but got {len(value)}"
340
+ f"Expected array length {self._array_length}, but got {len(value)}"
322
341
  )
323
342
  if value.shape[-1] != 3:
324
343
  raise TypeError("Expected 3 coordinates for each atom")
@@ -343,13 +362,12 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
343
362
  if isinstance(self, AtomArray):
344
363
  if value.ndim != 2:
345
364
  raise ValueError(
346
- "A 2-dimensional ndarray is expected " "for an AtomArray"
365
+ "A 2-dimensional ndarray is expected for an AtomArray"
347
366
  )
348
367
  else: # AtomArrayStack
349
368
  if value.ndim != 3:
350
369
  raise ValueError(
351
- "A 3-dimensional ndarray is expected "
352
- "for an AtomArrayStack"
370
+ "A 3-dimensional ndarray is expected for an AtomArrayStack"
353
371
  )
354
372
  if value.shape[-2:] != (3, 3):
355
373
  raise TypeError("Box must be a 3x3 matrix (three vectors)")
@@ -407,42 +425,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
407
425
  return self._array_length
408
426
 
409
427
  def __add__(self, array):
410
- if not isinstance(self, type(array)):
411
- raise TypeError("Can only concatenate two arrays or two stacks")
412
- # Create either new array or stack, depending of the own type
413
- if isinstance(self, AtomArray):
414
- concat = AtomArray(length=self._array_length + array._array_length)
415
- if isinstance(self, AtomArrayStack):
416
- concat = AtomArrayStack(
417
- self.stack_depth(), self._array_length + array._array_length
418
- )
419
-
420
- concat._coord = np.concatenate((self._coord, array.coord), axis=-2)
421
-
422
- # Transfer only annotations,
423
- # which are existent in both operands
424
- arr_categories = list(array._annot.keys())
425
- for category in self._annot.keys():
426
- if category in arr_categories:
427
- annot = self._annot[category]
428
- arr_annot = array._annot[category]
429
- concat._annot[category] = np.concatenate((annot, arr_annot))
430
-
431
- # Concatenate bonds lists,
432
- # if at least one of them contains bond information
433
- if self._bonds is not None or array._bonds is not None:
434
- bonds1 = self._bonds
435
- bonds2 = array._bonds
436
- if bonds1 is None:
437
- bonds1 = BondList(self._array_length)
438
- if bonds2 is None:
439
- bonds2 = BondList(array._array_length)
440
- concat._bonds = bonds1 + bonds2
441
-
442
- # Copy box
443
- if self._box is not None:
444
- concat._box = np.copy(self._box)
445
- return concat
428
+ return concatenate([self, array])
446
429
 
447
430
  def __copy_fill__(self, clone):
448
431
  super().__copy_fill__(clone)
@@ -468,9 +451,9 @@ class Atom(Copyable):
468
451
 
469
452
  Parameters
470
453
  ----------
471
- coord: list or ndarray
454
+ coord : list or ndarray
472
455
  The x, y and z coordinates.
473
- kwargs
456
+ **kwargs
474
457
  Atom annotations as key value pair.
475
458
 
476
459
  Attributes
@@ -492,7 +475,6 @@ class Atom(Copyable):
492
475
  CA
493
476
  >>> print(atom.coord)
494
477
  [1. 2. 3.]
495
-
496
478
  """
497
479
 
498
480
  def __init__(self, coord, **kwargs):
@@ -606,6 +588,7 @@ class AtomArray(_AtomArrayBase):
606
588
  :class:`AtomArray` is done with the '+' operator.
607
589
  Only the annotation categories, which are existing in both arrays,
608
590
  are transferred to the new array.
591
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
609
592
 
610
593
  Optionally, an :class:`AtomArray` can store chemical bond
611
594
  information via a :class:`BondList` object.
@@ -651,6 +634,10 @@ class AtomArray(_AtomArrayBase):
651
634
  The single value in the tuple is
652
635
  the length of the atom array.
653
636
 
637
+ See Also
638
+ --------
639
+ AtomArrayStack : Representation of multiple structure models.
640
+
654
641
  Examples
655
642
  --------
656
643
  Creating an atom array from atoms:
@@ -719,10 +706,6 @@ class AtomArray(_AtomArrayBase):
719
706
  Shape of the array.
720
707
  The single value in the tuple is
721
708
  the :func:`array_length()`.
722
-
723
- See Also
724
- --------
725
- array_length
726
709
  """
727
710
  return (self.array_length(),)
728
711
 
@@ -878,7 +861,9 @@ class AtomArrayStack(_AtomArrayBase):
878
861
  :class:`AtomArray` instance.
879
862
 
880
863
  Concatenation of atoms for each array in the stack is done using the
881
- '+' operator. For addition of atom arrays onto the stack use the
864
+ '+' operator.
865
+ For a list of :class:`AtomArray` objects, use :func:`concatenate()`.
866
+ For addition of atom arrays onto the stack use the
882
867
  :func:`stack()` method.
883
868
 
884
869
  The :attr:`box` attribute has the shape *m x 3 x 3*, as the cell
@@ -912,9 +897,9 @@ class AtomArrayStack(_AtomArrayBase):
912
897
  The numbers correspond to the stack depth
913
898
  and array length, respectively.
914
899
 
915
- See also
900
+ See Also
916
901
  --------
917
- AtomArray
902
+ AtomArray : Representation of a single structure model.
918
903
 
919
904
  Examples
920
905
  --------
@@ -1212,9 +1197,18 @@ def array(atoms):
1212
1197
  f"annotation categories as the atom at index 0"
1213
1198
  )
1214
1199
  array = AtomArray(len(atoms))
1200
+
1215
1201
  # Add all (also optional) annotation categories
1216
1202
  for name in names:
1217
- array.add_annotation(name, dtype=type(atoms[0]._annot[name]))
1203
+ value = atoms[0]._annot[name]
1204
+ if isinstance(value, str):
1205
+ # Find maximum string length across all atoms for this annotation
1206
+ max_len = max(len(str(atom._annot[name])) for atom in atoms)
1207
+ dtype = f"<U{max_len}"
1208
+ else:
1209
+ dtype = type(value)
1210
+ array.add_annotation(name, dtype=dtype)
1211
+
1218
1212
  # Add all atoms to AtomArray
1219
1213
  for i in range(len(atoms)):
1220
1214
  for name in names:
@@ -1292,6 +1286,112 @@ def stack(arrays):
1292
1286
  return array_stack
1293
1287
 
1294
1288
 
1289
+ def concatenate(atoms):
1290
+ """
1291
+ Concatenate multiple :class:`AtomArray` or :class:`AtomArrayStack` objects into
1292
+ a single :class:`AtomArray` or :class:`AtomArrayStack`, respectively.
1293
+
1294
+ Parameters
1295
+ ----------
1296
+ atoms : iterable object of AtomArray or AtomArrayStack
1297
+ The atoms to be concatenated.
1298
+ :class:`AtomArray` cannot be mixed with :class:`AtomArrayStack`.
1299
+
1300
+ Returns
1301
+ -------
1302
+ concatenated_atoms : AtomArray or AtomArrayStack
1303
+ The concatenated atoms, i.e. its ``array_length()`` is the sum of the
1304
+ ``array_length()`` of the input ``atoms``.
1305
+
1306
+ Notes
1307
+ -----
1308
+ The following rules apply:
1309
+
1310
+ - Only the annotation categories that exist in all elements are transferred.
1311
+ - The box of the first element that has a box is transferred, if any.
1312
+ - The bonds of all elements are concatenated, if any element has associated bonds.
1313
+ For elements without a :class:`BondList` an empty :class:`BondList` is assumed.
1314
+
1315
+ Examples
1316
+ --------
1317
+
1318
+ >>> atoms1 = array([
1319
+ ... Atom([1,2,3], res_id=1, atom_name="N"),
1320
+ ... Atom([4,5,6], res_id=1, atom_name="CA"),
1321
+ ... Atom([7,8,9], res_id=1, atom_name="C")
1322
+ ... ])
1323
+ >>> atoms2 = array([
1324
+ ... Atom([1,2,3], res_id=2, atom_name="N"),
1325
+ ... Atom([4,5,6], res_id=2, atom_name="CA"),
1326
+ ... Atom([7,8,9], res_id=2, atom_name="C")
1327
+ ... ])
1328
+ >>> print(concatenate([atoms1, atoms2]))
1329
+ 1 N 1.000 2.000 3.000
1330
+ 1 CA 4.000 5.000 6.000
1331
+ 1 C 7.000 8.000 9.000
1332
+ 2 N 1.000 2.000 3.000
1333
+ 2 CA 4.000 5.000 6.000
1334
+ 2 C 7.000 8.000 9.000
1335
+ """
1336
+ # Ensure that the atoms can be iterated over multiple times
1337
+ if not isinstance(atoms, Sequence):
1338
+ atoms = list(atoms)
1339
+
1340
+ length = 0
1341
+ depth = None
1342
+ element_type = None
1343
+ common_categories = set(atoms[0].get_annotation_categories())
1344
+ box = None
1345
+ has_bonds = False
1346
+ for element in atoms:
1347
+ if element_type is None:
1348
+ element_type = type(element)
1349
+ else:
1350
+ if not isinstance(element, element_type):
1351
+ raise TypeError(
1352
+ f"Cannot concatenate '{type(element).__name__}' "
1353
+ f"with '{element_type.__name__}'"
1354
+ )
1355
+ length += element.array_length()
1356
+ if isinstance(element, AtomArrayStack):
1357
+ if depth is None:
1358
+ depth = element.stack_depth()
1359
+ else:
1360
+ if element.stack_depth() != depth:
1361
+ raise IndexError("The stack depths are not equal")
1362
+ common_categories &= set(element.get_annotation_categories())
1363
+ if element.box is not None and box is None:
1364
+ box = element.box
1365
+ if element.bonds is not None:
1366
+ has_bonds = True
1367
+
1368
+ if element_type == AtomArray:
1369
+ concat_atoms = AtomArray(length)
1370
+ elif element_type == AtomArrayStack:
1371
+ concat_atoms = AtomArrayStack(depth, length)
1372
+ concat_atoms.coord = np.concatenate([element.coord for element in atoms], axis=-2)
1373
+ for category in common_categories:
1374
+ concat_atoms.set_annotation(
1375
+ category,
1376
+ np.concatenate(
1377
+ [element.get_annotation(category) for element in atoms], axis=0
1378
+ ),
1379
+ )
1380
+ concat_atoms.box = box
1381
+ if has_bonds:
1382
+ # Concatenate bonds of all elements
1383
+ concat_atoms.bonds = BondList.concatenate(
1384
+ [
1385
+ element.bonds
1386
+ if element.bonds is not None
1387
+ else BondList(element.array_length())
1388
+ for element in atoms
1389
+ ]
1390
+ )
1391
+
1392
+ return concat_atoms
1393
+
1394
+
1295
1395
  def repeat(atoms, coord):
1296
1396
  """
1297
1397
  Repeat atoms (:class:`AtomArray` or :class:`AtomArrayStack`)
@@ -1354,8 +1454,7 @@ def repeat(atoms, coord):
1354
1454
  if isinstance(atoms, AtomArray):
1355
1455
  if coord.ndim != 3:
1356
1456
  raise ValueError(
1357
- f"Expected 3 dimensions for the coordinate array, "
1358
- f"but got {coord.ndim}"
1457
+ f"Expected 3 dimensions for the coordinate array, but got {coord.ndim}"
1359
1458
  )
1360
1459
  repeated = AtomArray(new_length)
1361
1460
  repeated.coord = coord.reshape((new_length, 3))
@@ -1363,16 +1462,14 @@ def repeat(atoms, coord):
1363
1462
  elif isinstance(atoms, AtomArrayStack):
1364
1463
  if coord.ndim != 4:
1365
1464
  raise ValueError(
1366
- f"Expected 4 dimensions for the coordinate array, "
1367
- f"but got {coord.ndim}"
1465
+ f"Expected 4 dimensions for the coordinate array, but got {coord.ndim}"
1368
1466
  )
1369
1467
  repeated = AtomArrayStack(atoms.stack_depth(), new_length)
1370
1468
  repeated.coord = coord.reshape((atoms.stack_depth(), new_length, 3))
1371
1469
 
1372
1470
  else:
1373
1471
  raise TypeError(
1374
- f"Expected 'AtomArray' or 'AtomArrayStack', "
1375
- f"but got {type(atoms).__name__}"
1472
+ f"Expected 'AtomArray' or 'AtomArrayStack', but got {type(atoms).__name__}"
1376
1473
  )
1377
1474
 
1378
1475
  for category in atoms.get_annotation_categories():