biotite 1.1.0__cp313-cp313-win_amd64.whl → 1.3.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (160) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/localapp.py +2 -2
  6. biotite/application/msaapp.py +10 -10
  7. biotite/application/muscle/app3.py +3 -3
  8. biotite/application/muscle/app5.py +3 -3
  9. biotite/application/sra/app.py +0 -5
  10. biotite/application/util.py +21 -1
  11. biotite/application/viennarna/rnaalifold.py +8 -8
  12. biotite/application/viennarna/rnaplot.py +10 -8
  13. biotite/application/viennarna/util.py +1 -1
  14. biotite/application/webapp.py +1 -1
  15. biotite/database/afdb/__init__.py +12 -0
  16. biotite/database/afdb/download.py +191 -0
  17. biotite/database/entrez/dbnames.py +10 -0
  18. biotite/database/entrez/download.py +9 -10
  19. biotite/database/entrez/key.py +1 -1
  20. biotite/database/entrez/query.py +5 -4
  21. biotite/database/pubchem/download.py +6 -6
  22. biotite/database/pubchem/error.py +10 -0
  23. biotite/database/pubchem/query.py +12 -23
  24. biotite/database/rcsb/download.py +3 -2
  25. biotite/database/rcsb/query.py +2 -3
  26. biotite/database/uniprot/check.py +2 -2
  27. biotite/database/uniprot/download.py +2 -5
  28. biotite/database/uniprot/query.py +3 -4
  29. biotite/file.py +14 -2
  30. biotite/interface/__init__.py +19 -0
  31. biotite/interface/openmm/__init__.py +20 -0
  32. biotite/interface/openmm/state.py +93 -0
  33. biotite/interface/openmm/system.py +227 -0
  34. biotite/interface/pymol/__init__.py +201 -0
  35. biotite/interface/pymol/cgo.py +346 -0
  36. biotite/interface/pymol/convert.py +185 -0
  37. biotite/interface/pymol/display.py +267 -0
  38. biotite/interface/pymol/object.py +1226 -0
  39. biotite/interface/pymol/shapes.py +178 -0
  40. biotite/interface/pymol/startup.py +169 -0
  41. biotite/interface/rdkit/__init__.py +19 -0
  42. biotite/interface/rdkit/mol.py +490 -0
  43. biotite/interface/version.py +94 -0
  44. biotite/interface/warning.py +19 -0
  45. biotite/sequence/align/__init__.py +0 -4
  46. biotite/sequence/align/alignment.py +33 -11
  47. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  48. biotite/sequence/align/banded.pyx +22 -22
  49. biotite/sequence/align/cigar.py +2 -2
  50. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  51. biotite/sequence/align/kmeralphabet.pyx +2 -2
  52. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  53. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  54. biotite/sequence/align/kmertable.pyx +6 -6
  55. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  56. biotite/sequence/align/localgapped.pyx +47 -47
  57. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  58. biotite/sequence/align/localungapped.pyx +10 -10
  59. biotite/sequence/align/matrix.py +12 -3
  60. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  61. biotite/sequence/align/multiple.pyx +1 -2
  62. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  63. biotite/sequence/align/pairwise.pyx +37 -39
  64. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  65. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  66. biotite/sequence/align/selector.pyx +2 -2
  67. biotite/sequence/align/statistics.py +1 -1
  68. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  69. biotite/sequence/alphabet.py +2 -2
  70. biotite/sequence/annotation.py +19 -13
  71. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  72. biotite/sequence/codon.py +1 -2
  73. biotite/sequence/graphics/alignment.py +25 -39
  74. biotite/sequence/graphics/dendrogram.py +4 -2
  75. biotite/sequence/graphics/features.py +2 -2
  76. biotite/sequence/graphics/logo.py +10 -12
  77. biotite/sequence/io/fasta/convert.py +1 -2
  78. biotite/sequence/io/fasta/file.py +1 -1
  79. biotite/sequence/io/fastq/file.py +3 -3
  80. biotite/sequence/io/genbank/file.py +3 -3
  81. biotite/sequence/io/genbank/sequence.py +2 -0
  82. biotite/sequence/io/gff/convert.py +1 -1
  83. biotite/sequence/io/gff/file.py +1 -2
  84. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  85. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  86. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  87. biotite/sequence/profile.py +19 -25
  88. biotite/sequence/search.py +0 -1
  89. biotite/sequence/seqtypes.py +12 -5
  90. biotite/sequence/sequence.py +1 -2
  91. biotite/structure/__init__.py +2 -0
  92. biotite/structure/alphabet/i3d.py +1 -2
  93. biotite/structure/alphabet/pb.py +1 -2
  94. biotite/structure/alphabet/unkerasify.py +8 -2
  95. biotite/structure/atoms.py +35 -27
  96. biotite/structure/basepairs.py +39 -40
  97. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  98. biotite/structure/bonds.pyx +8 -5
  99. biotite/structure/box.py +159 -23
  100. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  101. biotite/structure/celllist.pyx +83 -68
  102. biotite/structure/chains.py +17 -55
  103. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  104. biotite/structure/compare.py +420 -13
  105. biotite/structure/density.py +1 -1
  106. biotite/structure/dotbracket.py +31 -32
  107. biotite/structure/filter.py +8 -8
  108. biotite/structure/geometry.py +15 -15
  109. biotite/structure/graphics/rna.py +19 -16
  110. biotite/structure/hbond.py +18 -21
  111. biotite/structure/info/atoms.py +11 -2
  112. biotite/structure/info/ccd.py +0 -2
  113. biotite/structure/info/components.bcif +0 -0
  114. biotite/structure/info/groups.py +0 -3
  115. biotite/structure/info/misc.py +0 -1
  116. biotite/structure/info/radii.py +92 -22
  117. biotite/structure/info/standardize.py +1 -2
  118. biotite/structure/integrity.py +4 -6
  119. biotite/structure/io/general.py +2 -2
  120. biotite/structure/io/gro/file.py +8 -9
  121. biotite/structure/io/mol/convert.py +1 -1
  122. biotite/structure/io/mol/ctab.py +33 -28
  123. biotite/structure/io/mol/mol.py +1 -1
  124. biotite/structure/io/mol/sdf.py +39 -13
  125. biotite/structure/io/pdb/convert.py +86 -5
  126. biotite/structure/io/pdb/file.py +90 -24
  127. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  128. biotite/structure/io/pdbqt/file.py +4 -4
  129. biotite/structure/io/pdbx/bcif.py +22 -7
  130. biotite/structure/io/pdbx/cif.py +20 -7
  131. biotite/structure/io/pdbx/component.py +6 -0
  132. biotite/structure/io/pdbx/compress.py +71 -34
  133. biotite/structure/io/pdbx/convert.py +429 -77
  134. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  135. biotite/structure/io/pdbx/encoding.pyx +39 -23
  136. biotite/structure/io/trajfile.py +9 -6
  137. biotite/structure/io/util.py +38 -0
  138. biotite/structure/mechanics.py +0 -1
  139. biotite/structure/molecules.py +0 -15
  140. biotite/structure/pseudoknots.py +13 -19
  141. biotite/structure/repair.py +2 -4
  142. biotite/structure/residues.py +20 -48
  143. biotite/structure/rings.py +335 -0
  144. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  145. biotite/structure/sasa.pyx +30 -30
  146. biotite/structure/segments.py +123 -9
  147. biotite/structure/sequence.py +0 -1
  148. biotite/structure/spacegroups.json +1567 -0
  149. biotite/structure/spacegroups.license +26 -0
  150. biotite/structure/sse.py +0 -2
  151. biotite/structure/superimpose.py +75 -253
  152. biotite/structure/tm.py +581 -0
  153. biotite/structure/transform.py +232 -26
  154. biotite/structure/util.py +3 -3
  155. biotite/version.py +9 -4
  156. biotite/visualize.py +111 -1
  157. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
  158. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
  159. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
  160. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
230
230
  # since the file content may be invalid/malicious.
231
231
  raise NotImplementedError()
232
232
 
233
+ def __str__(self):
234
+ # Restore original behavior, as `__str__()` implementation of `_Component`
235
+ # may require serialization, which is not possible for some encodings prior
236
+ # to the first encoding pass
237
+ return object.__str__(self)
238
+
233
239
 
234
240
  @dataclass
235
241
  class ByteArrayEncoding(Encoding):
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
325
331
  )
326
332
 
327
333
  # Round to avoid wrong values due to floating point inaccuracies
328
- return np.round(data * self.factor).astype(np.int32)
334
+ scaled_data = np.round(data * self.factor)
335
+ return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
329
336
 
330
337
  def decode(self, data):
331
338
  return (data / self.factor).astype(
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
392
399
  self.min, self.max, self.num_steps, dtype=data.dtype
393
400
  )
394
401
  indices = np.searchsorted(steps, data, side="left")
395
- return indices.astype(np.int32, copy=False)
402
+ return _safe_cast(indices, np.int32)
396
403
 
397
404
  def decode(self, data):
398
405
  output = data * (self.max - self.min) / (self.num_steps - 1)
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
570
577
  if self.origin is None:
571
578
  self.origin = data[0]
572
579
 
580
+ # Differences (including `np.diff`) return an array with the same dtype as the
581
+ # input array
582
+ # As the input dtype may be unsigned, the output dtype could underflow,
583
+ # if the difference is negative
584
+ # -> cast to int64 to avoid this
585
+ data = data.astype(np.int64, copy=False)
573
586
  data = data - self.origin
574
- return np.diff(data, prepend=0).astype(np.int32, copy=False)
587
+ return _safe_cast(np.diff(data, prepend=0), np.int32)
575
588
 
576
589
  def decode(self, data):
577
590
  output = np.cumsum(data, dtype=self.src_type.to_dtype())
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
635
648
  # Only positive values -> use unsigned integers
636
649
  self.is_unsigned = data.min().item() >= 0
637
650
 
638
- data = data.astype(np.int32, copy=False)
651
+ data = _safe_cast(data, np.int32)
639
652
  return self._encode(
640
653
  data, np.empty(0, dtype=self._determine_packed_dtype())
641
654
  )
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
870
883
  else:
871
884
  check_present = True
872
885
 
873
- string_order = np.argsort(self.strings).astype(np.int32)
886
+ string_order = _safe_cast(np.argsort(self.strings), np.int32)
874
887
  sorted_strings = self.strings[string_order]
875
888
  sorted_indices = np.searchsorted(sorted_strings, data)
876
889
  indices = string_order[sorted_indices]
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
1010
1023
  return attribute_name[0].lower() + attribute_name[1:]
1011
1024
 
1012
1025
 
1013
- def _safe_cast(array, dtype):
1014
- dtype = np.dtype(dtype)
1015
- if dtype == array.dtype:
1026
+ def _safe_cast(array, dtype, allow_decimal_loss=False):
1027
+ source_dtype = array.dtype
1028
+ target_dtype = np.dtype(dtype)
1029
+
1030
+ if target_dtype == source_dtype:
1016
1031
  return array
1017
- if np.issubdtype(dtype, np.integer):
1018
- if not np.issubdtype(array.dtype, np.integer):
1019
- raise ValueError("Cannot cast floating point to integer")
1020
- dtype_info = np.iinfo(dtype)
1021
- if np.any(array < dtype_info.min) or np.any(array > dtype_info.max):
1022
- raise ValueError("Integer values do not fit into the given dtype")
1023
- return array.astype(dtype)
1024
-
1025
-
1026
- def _get_n_decimals(value, tolerance):
1027
- MAX_DECIMALS = 10
1028
- for n in range(MAX_DECIMALS):
1029
- if abs(value - round(value, n)) < tolerance:
1030
- return n
1031
- return MAX_DECIMALS
1032
+
1033
+ if np.issubdtype(target_dtype, np.integer):
1034
+ if np.issubdtype(source_dtype, np.floating):
1035
+ if not allow_decimal_loss:
1036
+ raise ValueError("Cannot cast floating point to integer")
1037
+ if not np.isfinite(array).all():
1038
+ raise ValueError("Data contains non-finite values")
1039
+ elif not np.issubdtype(source_dtype, np.integer):
1040
+ # Neither float, nor integer -> cannot cast
1041
+ raise ValueError(f"Cannot cast '{source_dtype}' to integer")
1042
+ dtype_info = np.iinfo(target_dtype)
1043
+ # Check if an integer underflow/overflow would occur during conversion
1044
+ if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
1045
+ raise ValueError("Values do not fit into the given dtype")
1046
+
1047
+ return array.astype(target_dtype)
@@ -187,9 +187,11 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
187
187
  time : float or ndarray, dtype=float32, shape=(n,) or None
188
188
  The simulation time of the current frame or stack in *ps*.
189
189
 
190
- See also
190
+ See Also
191
191
  --------
192
- read_iter_structure
192
+ read_iter_structure :
193
+ Get an :class:`AtomArray` for each frame or an :class:`AtomArrayStack`
194
+ for each chunk of frames instead.
193
195
 
194
196
  Notes
195
197
  -----
@@ -315,9 +317,10 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
315
317
  If `stack_size` is set, multiple frames are returned as
316
318
  :class:`AtomArrayStack`.
317
319
 
318
- See also
320
+ See Also
319
321
  --------
320
- read_iter
322
+ read_iter :
323
+ Get an the raw data for each frame or for each chunk of frames instead.
321
324
 
322
325
  Notes
323
326
  -----
@@ -480,7 +483,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
480
483
 
481
484
  Parameters
482
485
  ----------
483
- time : ndarray, dtype=float, shape=(m,3,3)
486
+ box : ndarray, dtype=float, shape=(m,3,3)
484
487
  The box vectors to be set.
485
488
  """
486
489
  self._check_model_count(box)
@@ -546,7 +549,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
546
549
  ------
547
550
  NotImplementedError
548
551
  """
549
- raise NotImplementedError("Copying is not implemented " "for trajectory files")
552
+ raise NotImplementedError("Copying is not implemented for trajectory files")
550
553
 
551
554
  @classmethod
552
555
  @abc.abstractmethod
@@ -0,0 +1,38 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Common functions used by a number of subpackages.
7
+ """
8
+
9
+ __name__ = "biotite.structure.io"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["number_of_integer_digits"]
12
+
13
+ import numpy as np
14
+
15
+
16
+ def number_of_integer_digits(values):
17
+ """
18
+ Get the maximum number of characters needed to represent the
19
+ pre-decimal positions of the given numeric values.
20
+
21
+ Parameters
22
+ ----------
23
+ values : ndarray, dtype=float
24
+ The values to be checked.
25
+
26
+ Returns
27
+ -------
28
+ n_digits : int
29
+ The maximum number of characters needed to represent the
30
+ pre-decimal positions of the given numeric values.
31
+ """
32
+ if len(values) == 0:
33
+ return 0
34
+ values = values.astype(int, copy=False)
35
+ n_digits = 0
36
+ n_digits = max(n_digits, len(str(np.min(values))))
37
+ n_digits = max(n_digits, len(str(np.max(values))))
38
+ return n_digits
@@ -30,7 +30,6 @@ def gyration_radius(array, masses=None):
30
30
  Must have the same length as `array`. By default, the standard
31
31
  atomic mass for each element is taken.
32
32
 
33
-
34
33
  Returns
35
34
  -------
36
35
  masses : float or ndarray, dtype=float
@@ -39,11 +39,6 @@ def get_molecule_indices(array):
39
39
  Consequently, the length of this list is equal to the number of
40
40
  molecules in the input `array`.
41
41
 
42
- See also
43
- --------
44
- get_molecule_masks
45
- molecule_iter
46
-
47
42
  Examples
48
43
  --------
49
44
  Get an :class:`AtomArray` for ATP and show that it is a single
@@ -157,11 +152,6 @@ def get_molecule_masks(array):
157
152
  Consequently, the length of this list is equal to the number of
158
153
  molecules in the input `array`.
159
154
 
160
- See also
161
- --------
162
- get_molecule_indices
163
- molecule_iter
164
-
165
155
  Examples
166
156
  --------
167
157
  Get an :class:`AtomArray` for ATP and show that it is a single
@@ -270,11 +260,6 @@ def molecule_iter(array):
270
260
  molecule : AtomArray or AtomArrayStack
271
261
  A single molecule of the input `array`.
272
262
 
273
- See also
274
- --------
275
- get_molecule_indices
276
- get_molecule_masks
277
-
278
263
  Examples
279
264
  --------
280
265
  Get an :class:`AtomArray` for ATP and break it into two molecules
@@ -69,6 +69,11 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
69
69
  Therefore, there are no pseudoknots between base pairs with the same
70
70
  pseudoknot order.
71
71
 
72
+ References
73
+ ----------
74
+
75
+ .. footbibliography::
76
+
72
77
  Examples
73
78
  --------
74
79
  Remove the pseudoknotted base pair for the sequence *ABCbac*, where
@@ -102,17 +107,6 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
102
107
  [[0 0 1]]
103
108
  >>> print(dot_bracket(basepairs, 6)[0])
104
109
  (([))]
105
-
106
- See Also
107
- --------
108
- base_pairs
109
- dot_bracket
110
-
111
- References
112
- ----------
113
-
114
- .. footbibliography::
115
-
116
110
  """
117
111
  if len(base_pairs) == 0:
118
112
  # No base pairs -> empty pseudoknot order array
@@ -149,12 +143,12 @@ class _Region:
149
143
 
150
144
  Parameters
151
145
  ----------
152
- base_pairs: ndarray, shape=(n,2), dtype=int
146
+ base_pairs : ndarray, shape=(n,2), dtype=int
153
147
  All base pairs of the structure the region is a subset for.
154
- region_pairs: ndarray, dtype=int
148
+ region_pairs : ndarray, dtype=int
155
149
  The indices of the base pairs in ``base_pairs`` that are part of
156
150
  the region.
157
- scores : ndarray, dtype=int, shape=(n,) (default: None)
151
+ scores : ndarray, dtype=int, shape=(n,)
158
152
  The score for each base pair.
159
153
  """
160
154
 
@@ -208,7 +202,7 @@ def _find_regions(base_pairs, scores):
208
202
  base_pairs : ndarray, dtype=int, shape=(n, 2)
209
203
  Each row is equivalent to one base pair and contains the first
210
204
  indices of the residues corresponding to each base.
211
- scores : ndarray, dtype=int, shape=(n,) (default: None)
205
+ scores : ndarray, dtype=int, shape=(n,)
212
206
  The score for each base pair.
213
207
 
214
208
  Returns
@@ -358,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
358
352
  return i
359
353
 
360
354
 
361
- def _get_region_array_for(regions, content=[], dtype=[]):
355
+ def _get_region_array_for(regions, content=(), dtype=()):
362
356
  """
363
357
  Get a :class:`ndarray` of region objects. Each object occurs twice,
364
358
  representing its start and end point. The regions positions in the
@@ -371,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
371
365
  ----------
372
366
  regions : set {_region, ...}
373
367
  The regions to be considered
374
- content : list [function, ...] (default: [])
368
+ content : list [function, ...]
375
369
  The functions to be considered for custom outputs. For a given
376
370
  region they must return a tuple of which the first value is
377
371
  placed at the start position and the second value at the end
378
372
  position of the region relative to the other regions.
379
- dtype : list [str, ...] (default: [])
373
+ dtype : list [str, ...]
380
374
  The data type of the output of the custom functions.
381
375
 
382
376
  Returns
@@ -560,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
560
554
  The maximum pseudoknot order to be found. If a base pair would
561
555
  be of a higher order, its order is specified as -1. If ``None``
562
556
  is given, all base pairs are evaluated.
563
- order : int (default: 0)
557
+ order : int
564
558
  The order that is currently evaluated.
565
559
 
566
560
  Returns
@@ -48,7 +48,6 @@ def create_continuous_res_ids(atoms, restart_each_chain=True):
48
48
  >>> res_ids, _ = get_residues(atom_array)
49
49
  >>> print(res_ids)
50
50
  [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
51
-
52
51
  """
53
52
  res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
54
53
  res_starts = get_residue_starts(atoms)
@@ -80,7 +79,7 @@ def infer_elements(atoms):
80
79
 
81
80
  See Also
82
81
  --------
83
- create_atoms_names : The opposite of this function
82
+ create_atoms_names : The opposite of this function.
84
83
 
85
84
  Examples
86
85
  --------
@@ -89,7 +88,6 @@ def infer_elements(atoms):
89
88
  ['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
90
89
  >>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
91
90
  ['C' 'C' 'C' 'O' 'H' 'H' 'FE']
92
-
93
91
  """
94
92
  if isinstance(atoms, (AtomArray, AtomArrayStack)):
95
93
  atom_names = atoms.atom_name
@@ -117,7 +115,7 @@ def create_atom_names(atoms):
117
115
 
118
116
  See Also
119
117
  --------
120
- infer_elements : The opposite of this function
118
+ infer_elements : The opposite of this function.
121
119
 
122
120
  Notes
123
121
  -----
@@ -21,23 +21,23 @@ __all__ = [
21
21
  "residue_iter",
22
22
  ]
23
23
 
24
- import numpy as np
25
24
  from biotite.structure.segments import (
26
25
  apply_segment_wise,
27
26
  get_segment_masks,
28
27
  get_segment_positions,
28
+ get_segment_starts,
29
29
  get_segment_starts_for,
30
30
  segment_iter,
31
31
  spread_segment_wise,
32
32
  )
33
33
 
34
34
 
35
- def get_residue_starts(array, add_exclusive_stop=False):
35
+ def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
36
36
  """
37
37
  Get indices for an atom array, each indicating the beginning of
38
38
  a residue.
39
39
 
40
- A new residue starts, either when the chain ID, residue ID,
40
+ A new residue starts, either when the chain ID, sym ID, residue ID,
41
41
  insertion code or residue name changes from one to the next atom.
42
42
 
43
43
  Parameters
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
48
48
  If true, the exclusive stop of the input atom array, i.e.
49
49
  ``array.array_length()``, is added to the returned array of
50
50
  start indices as last element.
51
+ extra_categories : tuple of str, optional
52
+ Additional annotation categories that induce the start of a new residue,
53
+ when their value change from one atom to the next.
51
54
 
52
55
  Returns
53
56
  -------
@@ -69,27 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
69
72
  [ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
70
73
  278 292 304]
71
74
  """
72
- # These mask are 'true' at indices where the value changes
73
- chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
74
- res_id_changes = array.res_id[1:] != array.res_id[:-1]
75
- ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
76
- res_name_changes = array.res_name[1:] != array.res_name[:-1]
77
-
78
- # If any of these annotation arrays change, a new residue starts
79
- residue_change_mask = (
80
- chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
81
- )
82
-
83
- # Convert mask to indices
84
- # Add 1, to shift the indices from the end of a residue
85
- # to the start of a new residue
86
- residue_starts = np.where(residue_change_mask)[0] + 1
87
-
88
- # The first residue is not included yet -> Insert '[0]'
89
- if add_exclusive_stop:
90
- return np.concatenate(([0], residue_starts, [array.array_length()]))
91
- else:
92
- return np.concatenate(([0], residue_starts))
75
+ categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
76
+ if "sym_id" in array.get_annotation_categories():
77
+ categories.append("sym_id")
78
+ return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
93
79
 
94
80
 
95
81
  def apply_residue_wise(array, data, function, axis=None):
@@ -123,9 +109,8 @@ def apply_residue_wise(array, data, function, axis=None):
123
109
  Returns
124
110
  -------
125
111
  processed_data : ndarray
126
- Residue-wise evaluation of `data` by `function`. The size of the
127
- first dimension of this array is equal to the amount of
128
- residues.
112
+ Residue-wise evaluation of `data` by `function`. The size of the first dimension
113
+ of this array is equal to the amount of residues.
129
114
 
130
115
  Examples
131
116
  --------
@@ -193,14 +178,15 @@ def spread_residue_wise(array, input_data):
193
178
  array : AtomArray or AtomArrayStack
194
179
  The atom array (stack) to determine the residues from.
195
180
  input_data : ndarray
196
- The data to be spread. The length of axis=0 must be equal to
197
- the amount of different residue IDs in `array`.
181
+ The data to be spread.
182
+ The length of the 0-th axis must be equal to the amount of different residue IDs
183
+ in `array`.
198
184
 
199
185
  Returns
200
186
  -------
201
187
  output_data : ndarray
202
- Residue-wise spread `input_data`. Length is the same as
203
- `array_length()` of `array`.
188
+ Residue-wise spread `input_data`.
189
+ Length is the same as `array_length()` of `array`.
204
190
 
205
191
  Examples
206
192
  --------
@@ -260,11 +246,6 @@ def get_residue_masks(array, indices):
260
246
  Each array masks the atoms that belong to the same residue as
261
247
  the atom at the given index.
262
248
 
263
- See also
264
- --------
265
- get_residue_starts_for
266
- get_residue_positions
267
-
268
249
  Examples
269
250
  --------
270
251
 
@@ -338,11 +319,6 @@ def get_residue_starts_for(array, indices):
338
319
  The indices that point to the residue starts for the input
339
320
  `indices`.
340
321
 
341
- See also
342
- --------
343
- get_residue_masks
344
- get_residue_positions
345
-
346
322
  Examples
347
323
  --------
348
324
 
@@ -382,14 +358,9 @@ def get_residue_positions(array, indices):
382
358
 
383
359
  Returns
384
360
  -------
385
- start_indices : ndarray, dtype=int, shape=(k,)
361
+ residue_indices : ndarray, dtype=int, shape=(k,)
386
362
  The indices that point to the position of the residues.
387
363
 
388
- See also
389
- --------
390
- get_residue_masks
391
- get_residue_starts_for
392
-
393
364
  Examples
394
365
  --------
395
366
  >>> atom_index = [5, 42]
@@ -569,4 +540,5 @@ def residue_iter(array):
569
540
  """
570
541
  # The exclusive stop is appended to the residue starts
571
542
  starts = get_residue_starts(array, add_exclusive_stop=True)
572
- return segment_iter(array, starts)
543
+ for residue in segment_iter(array, starts):
544
+ yield residue