biotite 1.1.0__cp312-cp312-win_amd64.whl → 1.3.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (160) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/localapp.py +2 -2
  6. biotite/application/msaapp.py +10 -10
  7. biotite/application/muscle/app3.py +3 -3
  8. biotite/application/muscle/app5.py +3 -3
  9. biotite/application/sra/app.py +0 -5
  10. biotite/application/util.py +21 -1
  11. biotite/application/viennarna/rnaalifold.py +8 -8
  12. biotite/application/viennarna/rnaplot.py +10 -8
  13. biotite/application/viennarna/util.py +1 -1
  14. biotite/application/webapp.py +1 -1
  15. biotite/database/afdb/__init__.py +12 -0
  16. biotite/database/afdb/download.py +191 -0
  17. biotite/database/entrez/dbnames.py +10 -0
  18. biotite/database/entrez/download.py +9 -10
  19. biotite/database/entrez/key.py +1 -1
  20. biotite/database/entrez/query.py +5 -4
  21. biotite/database/pubchem/download.py +6 -6
  22. biotite/database/pubchem/error.py +10 -0
  23. biotite/database/pubchem/query.py +12 -23
  24. biotite/database/rcsb/download.py +3 -2
  25. biotite/database/rcsb/query.py +2 -3
  26. biotite/database/uniprot/check.py +2 -2
  27. biotite/database/uniprot/download.py +2 -5
  28. biotite/database/uniprot/query.py +3 -4
  29. biotite/file.py +14 -2
  30. biotite/interface/__init__.py +19 -0
  31. biotite/interface/openmm/__init__.py +20 -0
  32. biotite/interface/openmm/state.py +93 -0
  33. biotite/interface/openmm/system.py +227 -0
  34. biotite/interface/pymol/__init__.py +201 -0
  35. biotite/interface/pymol/cgo.py +346 -0
  36. biotite/interface/pymol/convert.py +185 -0
  37. biotite/interface/pymol/display.py +267 -0
  38. biotite/interface/pymol/object.py +1226 -0
  39. biotite/interface/pymol/shapes.py +178 -0
  40. biotite/interface/pymol/startup.py +169 -0
  41. biotite/interface/rdkit/__init__.py +19 -0
  42. biotite/interface/rdkit/mol.py +490 -0
  43. biotite/interface/version.py +94 -0
  44. biotite/interface/warning.py +19 -0
  45. biotite/sequence/align/__init__.py +0 -4
  46. biotite/sequence/align/alignment.py +33 -11
  47. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  48. biotite/sequence/align/banded.pyx +22 -22
  49. biotite/sequence/align/cigar.py +2 -2
  50. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  51. biotite/sequence/align/kmeralphabet.pyx +2 -2
  52. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  53. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  54. biotite/sequence/align/kmertable.pyx +6 -6
  55. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  56. biotite/sequence/align/localgapped.pyx +47 -47
  57. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  58. biotite/sequence/align/localungapped.pyx +10 -10
  59. biotite/sequence/align/matrix.py +12 -3
  60. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  61. biotite/sequence/align/multiple.pyx +1 -2
  62. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  63. biotite/sequence/align/pairwise.pyx +37 -39
  64. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  65. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  66. biotite/sequence/align/selector.pyx +2 -2
  67. biotite/sequence/align/statistics.py +1 -1
  68. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  69. biotite/sequence/alphabet.py +2 -2
  70. biotite/sequence/annotation.py +19 -13
  71. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  72. biotite/sequence/codon.py +1 -2
  73. biotite/sequence/graphics/alignment.py +25 -39
  74. biotite/sequence/graphics/dendrogram.py +4 -2
  75. biotite/sequence/graphics/features.py +2 -2
  76. biotite/sequence/graphics/logo.py +10 -12
  77. biotite/sequence/io/fasta/convert.py +1 -2
  78. biotite/sequence/io/fasta/file.py +1 -1
  79. biotite/sequence/io/fastq/file.py +3 -3
  80. biotite/sequence/io/genbank/file.py +3 -3
  81. biotite/sequence/io/genbank/sequence.py +2 -0
  82. biotite/sequence/io/gff/convert.py +1 -1
  83. biotite/sequence/io/gff/file.py +1 -2
  84. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  85. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  86. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  87. biotite/sequence/profile.py +19 -25
  88. biotite/sequence/search.py +0 -1
  89. biotite/sequence/seqtypes.py +12 -5
  90. biotite/sequence/sequence.py +1 -2
  91. biotite/structure/__init__.py +2 -0
  92. biotite/structure/alphabet/i3d.py +1 -2
  93. biotite/structure/alphabet/pb.py +1 -2
  94. biotite/structure/alphabet/unkerasify.py +8 -2
  95. biotite/structure/atoms.py +35 -27
  96. biotite/structure/basepairs.py +39 -40
  97. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  98. biotite/structure/bonds.pyx +8 -5
  99. biotite/structure/box.py +159 -23
  100. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  101. biotite/structure/celllist.pyx +83 -68
  102. biotite/structure/chains.py +17 -55
  103. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  104. biotite/structure/compare.py +420 -13
  105. biotite/structure/density.py +1 -1
  106. biotite/structure/dotbracket.py +31 -32
  107. biotite/structure/filter.py +8 -8
  108. biotite/structure/geometry.py +15 -15
  109. biotite/structure/graphics/rna.py +19 -16
  110. biotite/structure/hbond.py +18 -21
  111. biotite/structure/info/atoms.py +11 -2
  112. biotite/structure/info/ccd.py +0 -2
  113. biotite/structure/info/components.bcif +0 -0
  114. biotite/structure/info/groups.py +0 -3
  115. biotite/structure/info/misc.py +0 -1
  116. biotite/structure/info/radii.py +92 -22
  117. biotite/structure/info/standardize.py +1 -2
  118. biotite/structure/integrity.py +4 -6
  119. biotite/structure/io/general.py +2 -2
  120. biotite/structure/io/gro/file.py +8 -9
  121. biotite/structure/io/mol/convert.py +1 -1
  122. biotite/structure/io/mol/ctab.py +33 -28
  123. biotite/structure/io/mol/mol.py +1 -1
  124. biotite/structure/io/mol/sdf.py +39 -13
  125. biotite/structure/io/pdb/convert.py +86 -5
  126. biotite/structure/io/pdb/file.py +90 -24
  127. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  128. biotite/structure/io/pdbqt/file.py +4 -4
  129. biotite/structure/io/pdbx/bcif.py +22 -7
  130. biotite/structure/io/pdbx/cif.py +20 -7
  131. biotite/structure/io/pdbx/component.py +6 -0
  132. biotite/structure/io/pdbx/compress.py +71 -34
  133. biotite/structure/io/pdbx/convert.py +429 -77
  134. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  135. biotite/structure/io/pdbx/encoding.pyx +39 -23
  136. biotite/structure/io/trajfile.py +9 -6
  137. biotite/structure/io/util.py +38 -0
  138. biotite/structure/mechanics.py +0 -1
  139. biotite/structure/molecules.py +0 -15
  140. biotite/structure/pseudoknots.py +13 -19
  141. biotite/structure/repair.py +2 -4
  142. biotite/structure/residues.py +20 -48
  143. biotite/structure/rings.py +335 -0
  144. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  145. biotite/structure/sasa.pyx +30 -30
  146. biotite/structure/segments.py +123 -9
  147. biotite/structure/sequence.py +0 -1
  148. biotite/structure/spacegroups.json +1567 -0
  149. biotite/structure/spacegroups.license +26 -0
  150. biotite/structure/sse.py +0 -2
  151. biotite/structure/superimpose.py +75 -253
  152. biotite/structure/tm.py +581 -0
  153. biotite/structure/transform.py +232 -26
  154. biotite/structure/util.py +3 -3
  155. biotite/version.py +9 -4
  156. biotite/visualize.py +111 -1
  157. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
  158. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
  159. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
  160. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -28,10 +28,10 @@ ctypedef np.uint8_t uint8
28
28
  cdef class CellList:
29
29
  """
30
30
  __init__(atom_array, cell_size, periodic=False, box=None, selection=None)
31
-
31
+
32
32
  This class enables the efficient search of atoms in vicinity of a
33
33
  defined location.
34
-
34
+
35
35
  This class stores the indices of an atom array in virtual "cells",
36
36
  each corresponding to a specific coordinate interval.
37
37
  If the atoms in vicinity of a specific location are searched, only
@@ -41,7 +41,7 @@ cdef class CellList:
41
41
  after the :class:`CellList` has been created.
42
42
  Therefore a :class:`CellList` saves calculation time in those
43
43
  cases, where vicinity is checked for multiple locations.
44
-
44
+
45
45
  Parameters
46
46
  ----------
47
47
  atom_array : AtomArray or ndarray, dtype=float, shape=(n,3)
@@ -55,7 +55,6 @@ cdef class CellList:
55
55
  periodic : bool, optional
56
56
  If true, the cell list considers periodic copies of atoms.
57
57
  The periodicity is based on the `box` attribute of `atom_array`.
58
- (Default: False)
59
58
  box : ndarray, dtype=float, shape=(3,3), optional
60
59
  If provided, the periodicity is based on this parameter instead
61
60
  of the :attr:`box` attribute of `atom_array`.
@@ -64,14 +63,14 @@ cdef class CellList:
64
63
  If provided, only the atoms masked by this array are stored in
65
64
  the cell list. However, the indices stored in the cell list
66
65
  will still refer to the original unfiltered `atom_array`.
67
-
66
+
68
67
  Examples
69
68
  --------
70
-
69
+
71
70
  >>> cell_list = CellList(atom_array, cell_size=5)
72
71
  >>> near_atoms = atom_array[cell_list.get_atoms(np.array([1,2,3]), radius=7.0)]
73
72
  """
74
-
73
+
75
74
  # The atom coordinates
76
75
  cdef float32[:,:] _coord
77
76
  # A boolean mask that covers the selected atoms
@@ -99,8 +98,8 @@ cdef class CellList:
99
98
  cdef int _orig_length
100
99
  cdef float32[:] _orig_min_coord
101
100
  cdef float32[:] _orig_max_coord
102
-
103
-
101
+
102
+
104
103
  @cython.initializedcheck(False)
105
104
  @cython.boundscheck(False)
106
105
  @cython.wraparound(False)
@@ -119,14 +118,10 @@ cdef class CellList:
119
118
  # if 'periodic' is true
120
119
  self._orig_length = coord.shape[0]
121
120
  self._box = None
122
- if coord.ndim != 2:
123
- raise ValueError("Coordinates must have shape (n,3)")
124
- if coord.shape[0] == 0:
125
- raise ValueError("Coordinates must not be empty")
126
- if coord.shape[1] != 3:
127
- raise ValueError("Coordinates must have form (x,y,z)")
128
- if np.isnan(coord).any():
129
- raise ValueError("Coordinates contain NaN values")
121
+ if selection is None:
122
+ _check_coord(coord)
123
+ else:
124
+ _check_coord(coord[selection])
130
125
 
131
126
  if periodic:
132
127
  if box is not None:
@@ -145,7 +140,7 @@ cdef class CellList:
145
140
  raise ValueError("Box contains NaN values")
146
141
  coord = move_inside_box(coord, self._box)
147
142
  coord, indices = repeat_box_coord(coord, self._box)
148
-
143
+
149
144
  if self._has_initialized_cells():
150
145
  raise Exception("Duplicate call of constructor")
151
146
  self._cells = None
@@ -155,23 +150,23 @@ cdef class CellList:
155
150
  self._coord = coord.astype(np.float32, copy=False)
156
151
  self._cellsize = cell_size
157
152
  # calculate how many cells are required for each dimension
158
- min_coord = np.min(coord, axis=0).astype(np.float32)
159
- max_coord = np.max(coord, axis=0).astype(np.float32)
153
+ min_coord = np.nanmin(coord, axis=0).astype(np.float32)
154
+ max_coord = np.nanmax(coord, axis=0).astype(np.float32)
160
155
  self._min_coord = min_coord
161
156
  self._max_coord = max_coord
162
157
  cell_count = (((max_coord - min_coord) / cell_size) +1).astype(int)
163
158
  if self._periodic:
164
- self._orig_min_coord = np.min(coord[:self._orig_length], axis=0) \
159
+ self._orig_min_coord = np.nanmin(coord[:self._orig_length], axis=0) \
165
160
  .astype(np.float32)
166
- self._orig_max_coord = np.max(coord[:self._orig_length], axis=0) \
161
+ self._orig_max_coord = np.nanmax(coord[:self._orig_length], axis=0) \
167
162
  .astype(np.float32)
168
-
163
+
169
164
  # ndarray of pointers to C-arrays
170
165
  # containing indices to atom array
171
166
  self._cells = np.zeros(cell_count, dtype=np.uint64)
172
167
  # Stores the length of the C-arrays
173
168
  self._cell_length = np.zeros(cell_count, dtype=np.int32)
174
-
169
+
175
170
  # Prepare selection
176
171
  if selection is not None:
177
172
  self._has_selection = True
@@ -183,7 +178,7 @@ cdef class CellList:
183
178
  )
184
179
  else:
185
180
  self._has_selection = False
186
-
181
+
187
182
  # Fill cells
188
183
  for atom_array_i in range(self._coord.shape[0]):
189
184
  # Only put selected atoms into cell list
@@ -208,34 +203,34 @@ cdef class CellList:
208
203
  # Store new cell pointer and length
209
204
  self._cell_length[i,j,k] = length
210
205
  self._cells[i,j,k] = <ptr> cell_ptr
211
-
212
-
206
+
207
+
213
208
  def __dealloc__(self):
214
209
  if self._has_initialized_cells():
215
210
  deallocate_ptrs(self._cells)
216
-
217
-
211
+
212
+
218
213
  @cython.initializedcheck(False)
219
214
  @cython.boundscheck(False)
220
215
  @cython.wraparound(False)
221
216
  def create_adjacency_matrix(self, float32 threshold_distance):
222
217
  """
223
218
  create_adjacency_matrix(threshold_distance)
224
-
219
+
225
220
  Create an adjacency matrix for the atoms in this cell list.
226
221
 
227
222
  An adjacency matrix depicts which atoms *i* and *j* have
228
223
  a distance lower than a given threshold distance.
229
224
  The values in the adjacency matrix ``m`` are
230
225
  ``m[i,j] = 1 if distance(i,j) <= threshold else 0``
231
-
226
+
232
227
  Parameters
233
228
  ----------
234
229
  threshold_distance : float
235
230
  The threshold distance. All atom pairs that have a distance
236
231
  lower than this value are indicated by ``True`` values in
237
232
  the resulting matrix.
238
-
233
+
239
234
  Returns
240
235
  -------
241
236
  matrix : ndarray, dtype=bool, shape=(n,n)
@@ -244,7 +239,7 @@ cdef class CellList:
244
239
  :class:`CellList`, the rows and columns corresponding to
245
240
  atoms, that are not masked by the selection, have all
246
241
  elements set to ``False``.
247
-
242
+
248
243
  Notes
249
244
  -----
250
245
  The highest performance is achieved when the the cell size is
@@ -269,7 +264,7 @@ cdef class CellList:
269
264
  if threshold_distance < 0:
270
265
  raise ValueError("Threshold must be a positive value")
271
266
  cdef int i=0
272
-
267
+
273
268
  # Get atom position for all original positions
274
269
  # (no periodic copies)
275
270
  coord = np.asarray(self._coord[:self._orig_length])
@@ -287,17 +282,17 @@ cdef class CellList:
287
282
  return matrix
288
283
  else:
289
284
  return self.get_atoms(coord, threshold_distance, as_mask=True)
290
-
291
-
285
+
286
+
292
287
  @cython.initializedcheck(False)
293
288
  @cython.boundscheck(False)
294
289
  @cython.wraparound(False)
295
290
  def get_atoms(self, np.ndarray coord, radius, bint as_mask=False):
296
291
  """
297
292
  get_atoms(coord, radius, as_mask=False)
298
-
293
+
299
294
  Find atoms with a maximum distance from given coordinates.
300
-
295
+
301
296
  Parameters
302
297
  ----------
303
298
  coord : ndarray, dtype=float, shape=(3,) or shape=(m,3)
@@ -321,7 +316,7 @@ cdef class CellList:
321
316
  as_mask : bool, optional
322
317
  If true, the result is returned as boolean mask, instead
323
318
  of an index array.
324
-
319
+
325
320
  Returns
326
321
  -------
327
322
  indices : ndarray, dtype=int32, shape=(p,) or shape=(m,p)
@@ -335,7 +330,7 @@ cdef class CellList:
335
330
  The values are true for atoms in the atom array,
336
331
  that are in the defined vicinity.
337
332
  Only returned with `as_mask` set to true.
338
-
333
+
339
334
  See Also
340
335
  --------
341
336
  get_atoms_in_cells
@@ -376,7 +371,7 @@ cdef class CellList:
376
371
  A 18 PRO HA H 2.719 3.181 1.316
377
372
  A 18 PRO HB3 H 2.781 3.223 3.618
378
373
  A 18 PRO CB C 3.035 4.190 3.187
379
-
374
+
380
375
  Get adjacent atoms for mutliple positions:
381
376
 
382
377
  >>> cell_list = CellList(atom_array, 3)
@@ -403,14 +398,14 @@ cdef class CellList:
403
398
  cdef float32 sq_radius
404
399
  cdef float32[:] sq_radii
405
400
  cdef np.ndarray cell_radii
406
-
401
+
407
402
  cdef int[:,:] all_indices
408
403
  cdef int[:,:] indices
409
404
  cdef float32[:,:] coord_v
410
405
 
411
406
  if len(coord) == 0:
412
407
  return _empty_result(as_mask)
413
-
408
+
414
409
  # Handle periodicity for the input coordinates
415
410
  if self._periodic:
416
411
  coord = move_inside_box(coord, self._box)
@@ -437,7 +432,7 @@ cdef class CellList:
437
432
  )
438
433
  # These have to be narrowed down in the next step
439
434
  # using the Euclidian distance
440
-
435
+
441
436
  # Filter all indices from all_indices
442
437
  # where squared distance is smaller than squared radius
443
438
  # Using the squared distance is computationally cheaper than
@@ -464,23 +459,23 @@ cdef class CellList:
464
459
  array_i += 1
465
460
  if array_i > max_array_length:
466
461
  max_array_length = array_i
467
-
462
+
468
463
  return self._post_process(
469
464
  np.asarray(indices)[:, :max_array_length],
470
465
  as_mask, is_multi_coord
471
466
  )
472
-
473
-
467
+
468
+
474
469
  @cython.boundscheck(False)
475
470
  @cython.wraparound(False)
476
471
  def get_atoms_in_cells(self, np.ndarray coord,
477
472
  cell_radius=1, bint as_mask=False):
478
473
  """
479
474
  get_atoms_in_cells(coord, cell_radius=1, as_mask=False)
480
-
475
+
481
476
  Find atoms with a maximum cell distance from given
482
477
  coordinates.
483
-
478
+
484
479
  Instead of using the radius as maximum euclidian distance to the
485
480
  given coordinates,
486
481
  the radius is measured as the amount of cells:
@@ -489,7 +484,7 @@ cdef class CellList:
489
484
  that the atoms indices from this cell and the 8 surrounding
490
485
  cells are returned and so forth.
491
486
  This is more efficient than `get_atoms()`.
492
-
487
+
493
488
  Parameters
494
489
  ----------
495
490
  coord : ndarray, dtype=float, shape=(3,) or shape=(m,3)
@@ -518,7 +513,7 @@ cdef class CellList:
518
513
  as_mask : bool, optional
519
514
  If true, the result is returned as boolean mask, instead
520
515
  of an index array.
521
-
516
+
522
517
  Returns
523
518
  -------
524
519
  indices : ndarray, dtype=int32, shape=(p,) or shape=(m,p)
@@ -563,8 +558,8 @@ cdef class CellList:
563
558
  coord, cell_radius, is_multi_radius
564
559
  )
565
560
  return self._post_process(array_indices, as_mask, is_multi_coord)
566
-
567
-
561
+
562
+
568
563
  @cython.boundscheck(False)
569
564
  @cython.wraparound(False)
570
565
  def _get_atoms_in_cells(self,
@@ -573,7 +568,7 @@ cdef class CellList:
573
568
  bint is_multi_radius):
574
569
  """
575
570
  Get the indices of atoms in `cell_radii` adjacency of `coord`.
576
-
571
+
577
572
  Parameters
578
573
  ----------
579
574
  coord : ndarray, dtype=float32, shape=(n,3)
@@ -583,7 +578,7 @@ cdef class CellList:
583
578
  is_multi_radius : bool
584
579
  True indicates, that all values in `cell_radii` are the
585
580
  same.
586
-
581
+
587
582
  Returns
588
583
  -------
589
584
  array_indices : ndarray, dtype=int32, shape=(m,p)
@@ -607,8 +602,8 @@ cdef class CellList:
607
602
  cdef int max_array_length \
608
603
  = self._find_adjacent_atoms(coord, array_indices, cell_radii)
609
604
  return array_indices[:, :max_array_length]
610
-
611
-
605
+
606
+
612
607
  @cython.boundscheck(False)
613
608
  @cython.wraparound(False)
614
609
  cdef int _find_adjacent_atoms(self,
@@ -618,7 +613,7 @@ cdef class CellList:
618
613
  """
619
614
  This method fills the given empty index array
620
615
  with actual indices of adjacent atoms.
621
-
616
+
622
617
  Since the length of 'indices' (second dimension) is
623
618
  the worst case assumption, this method returns the actual
624
619
  required length, i.e. the highest length of all arrays
@@ -632,11 +627,17 @@ cdef class CellList:
632
627
  cdef int pos_i, array_i, cell_i
633
628
  cdef int max_array_length = 0
634
629
  cdef int cell_r
635
-
630
+
636
631
  cdef ptr[:,:,:] cells = self._cells
637
632
  cdef int[:,:,:] cell_length = self._cell_length
633
+ cdef uint8[:] finite_mask = (
634
+ np.isfinite(np.asarray(coord)).all(axis=-1).astype(np.uint8, copy=False)
635
+ )
638
636
 
639
637
  for pos_i in range(coord.shape[0]):
638
+ if not finite_mask[pos_i]:
639
+ # For non-finite coordinates, there are no adjacent atoms
640
+ continue
640
641
  array_i = 0
641
642
  cell_r = cell_radius[pos_i]
642
643
  x = coord[pos_i, 0]
@@ -663,7 +664,7 @@ cdef class CellList:
663
664
  if array_i > max_array_length:
664
665
  max_array_length = array_i
665
666
  return max_array_length
666
-
667
+
667
668
 
668
669
  @cython.boundscheck(False)
669
670
  @cython.wraparound(False)
@@ -695,8 +696,8 @@ cdef class CellList:
695
696
  return indices
696
697
  else:
697
698
  return indices[0]
698
-
699
-
699
+
700
+
700
701
  @cython.initializedcheck(False)
701
702
  @cython.boundscheck(False)
702
703
  @cython.wraparound(False)
@@ -706,7 +707,7 @@ cdef class CellList:
706
707
  i[0] = <int>((x - self._min_coord[0]) / self._cellsize)
707
708
  j[0] = <int>((y - self._min_coord[1]) / self._cellsize)
708
709
  k[0] = <int>((z - self._min_coord[2]) / self._cellsize)
709
-
710
+
710
711
  @cython.initializedcheck(False)
711
712
  @cython.boundscheck(False)
712
713
  @cython.wraparound(False)
@@ -718,7 +719,7 @@ cdef class CellList:
718
719
  if z < self._min_coord[2] or z > self._max_coord[2]:
719
720
  return False
720
721
  return True
721
-
722
+
722
723
  @cython.initializedcheck(False)
723
724
  @cython.boundscheck(False)
724
725
  @cython.wraparound(False)
@@ -737,7 +738,7 @@ cdef class CellList:
737
738
  break
738
739
  matrix[i, index] = True
739
740
  return np.asarray(matrix, dtype=bool)
740
-
741
+
741
742
  cdef inline bint _has_initialized_cells(self):
742
743
  # Memoryviews are not initialized on class creation
743
744
  # This method checks if the _cells memoryview was initialized
@@ -751,6 +752,20 @@ cdef class CellList:
751
752
  return False
752
753
 
753
754
 
755
+ def _check_coord(coord):
756
+ """
757
+ Perform checks on validity of coordinates.
758
+ """
759
+ if coord.ndim != 2:
760
+ raise ValueError("Coordinates must have shape (n,3)")
761
+ if coord.shape[0] == 0:
762
+ raise ValueError("Coordinates must not be empty")
763
+ if coord.shape[1] != 3:
764
+ raise ValueError("Coordinates must have form (x,y,z)")
765
+ if not np.isfinite(coord).all():
766
+ raise ValueError("Coordinates contain non-finite values")
767
+
768
+
754
769
  def _empty_result(as_mask):
755
770
  """
756
771
  Create return value for :func:`get_atoms()` and
@@ -774,11 +789,11 @@ def _prepare_vectorization(np.ndarray coord, radius, radius_dtype):
774
789
  radii/coordinates were given.
775
790
 
776
791
  The shapes before and after conversion are:
777
-
792
+
778
793
  - coord: (3, ), radius: scalar -> coord: (1,3), radius: (1,)
779
794
  - coord: (n,3), radius: scalar -> coord: (n,3), radius: (n,)
780
795
  - coord: (n,3), radius: (n, ) -> coord: (n,3), radius: (n,)
781
-
796
+
782
797
  Thes resulting values have the same dimensionality for all cases and
783
798
  can be handeled uniformly by `get_atoms()` and
784
799
  `get_atoms_in_cells()`.
@@ -798,7 +813,7 @@ def _prepare_vectorization(np.ndarray coord, radius, radius_dtype):
798
813
  raise ValueError(
799
814
  f"Invalid shape for input coordinates"
800
815
  )
801
-
816
+
802
817
  if isinstance(radius, np.ndarray):
803
818
  # Multiple radii
804
819
  # Check whether amount of coordinates match amount of radii
@@ -22,23 +22,23 @@ __all__ = [
22
22
  "chain_iter",
23
23
  ]
24
24
 
25
- import numpy as np
26
25
  from biotite.structure.segments import (
27
26
  apply_segment_wise,
28
27
  get_segment_masks,
29
28
  get_segment_positions,
29
+ get_segment_starts,
30
30
  get_segment_starts_for,
31
31
  segment_iter,
32
32
  spread_segment_wise,
33
33
  )
34
34
 
35
35
 
36
- def get_chain_starts(array, add_exclusive_stop=False):
36
+ def get_chain_starts(array, add_exclusive_stop=False, extra_categories=()):
37
37
  """
38
38
  Get the indices in an atom array, which indicates the beginning of
39
39
  a new chain.
40
40
 
41
- A new chain starts, when the chain ID changes or when the residue ID
41
+ A new chain starts, when the chain or sym ID changes or when the residue ID
42
42
  decreases.
43
43
 
44
44
  Parameters
@@ -49,6 +49,9 @@ def get_chain_starts(array, add_exclusive_stop=False):
49
49
  If true, the exclusive stop of the input atom array, i.e.
50
50
  ``array.array_length()``, is added to the returned array of
51
51
  start indices as last element.
52
+ extra_categories : tuple of str, optional
53
+ Additional annotation categories that induce the start of a new chain,
54
+ when their value change from one atom to the next.
52
55
 
53
56
  Returns
54
57
  -------
@@ -59,26 +62,16 @@ def get_chain_starts(array, add_exclusive_stop=False):
59
62
  -----
60
63
  This method is internally used by all other chain-related
61
64
  functions.
62
-
63
- See also
64
- --------
65
- get_residue_starts
66
65
  """
67
- diff = np.diff(array.res_id)
68
- res_id_decrement = diff < 0
69
- # This mask is 'true' at indices where the value changes
70
- chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
71
-
72
- # Convert mask to indices
73
- # Add 1, to shift the indices from the end of a chain
74
- # to the start of a new chain
75
- chain_starts = np.where(res_id_decrement | chain_id_changes)[0] + 1
76
-
77
- # The first chain is not included yet -> Insert '[0]'
78
- if add_exclusive_stop:
79
- return np.concatenate(([0], chain_starts, [array.array_length()]))
80
- else:
81
- return np.concatenate(([0], chain_starts))
66
+ categories = ["chain_id"] + list(extra_categories)
67
+ if "sym_id" in array.get_annotation_categories():
68
+ categories.append("sym_id")
69
+ return get_segment_starts(
70
+ array,
71
+ add_exclusive_stop,
72
+ continuous_categories=("res_id",),
73
+ equal_categories=categories,
74
+ )
82
75
 
83
76
 
84
77
  def apply_chain_wise(array, data, function, axis=None):
@@ -115,10 +108,6 @@ def apply_chain_wise(array, data, function, axis=None):
115
108
  Chain-wise evaluation of `data` by `function`. The size of the
116
109
  first dimension of this array is equal to the amount of
117
110
  chains.
118
-
119
- See also
120
- --------
121
- apply_residue_wise
122
111
  """
123
112
  starts = get_chain_starts(array, add_exclusive_stop=True)
124
113
  return apply_segment_wise(starts, data, function, axis)
@@ -148,10 +137,6 @@ def spread_chain_wise(array, input_data):
148
137
  output_data : ndarray
149
138
  Chain-wise spread `input_data`. Length is the same as
150
139
  `array_length()` of `array`.
151
-
152
- See also
153
- --------
154
- spread_residue_wise
155
140
  """
156
141
  starts = get_chain_starts(array, add_exclusive_stop=True)
157
142
  return spread_segment_wise(starts, input_data)
@@ -177,10 +162,6 @@ def get_chain_masks(array, indices):
177
162
  Multiple boolean masks, one for each given index in `indices`.
178
163
  Each array masks the atoms that belong to the same chain as
179
164
  the atom at the given index.
180
-
181
- See also
182
- --------
183
- get_residue_masks
184
165
  """
185
166
  starts = get_chain_starts(array, add_exclusive_stop=True)
186
167
  return get_segment_masks(starts, indices)
@@ -205,10 +186,6 @@ def get_chain_starts_for(array, indices):
205
186
  start_indices : ndarray, dtype=int, shape=(k,)
206
187
  The indices that point to the chain starts for the input
207
188
  `indices`.
208
-
209
- See also
210
- --------
211
- get_residue_starts_for
212
189
  """
213
190
  starts = get_chain_starts(array, add_exclusive_stop=True)
214
191
  return get_segment_starts_for(starts, indices)
@@ -235,10 +212,6 @@ def get_chain_positions(array, indices):
235
212
  -------
236
213
  start_indices : ndarray, dtype=int, shape=(k,)
237
214
  The indices that point to the position of the chains.
238
-
239
- See also
240
- --------
241
- get_residue_positions
242
215
  """
243
216
  starts = get_chain_starts(array, add_exclusive_stop=True)
244
217
  return get_segment_positions(starts, indices)
@@ -260,10 +233,6 @@ def get_chains(array):
260
233
  -------
261
234
  ids : ndarray, dtype=str
262
235
  List of chain IDs.
263
-
264
- See also
265
- --------
266
- get_residues
267
236
  """
268
237
  return array.chain_id[get_chain_starts(array)]
269
238
 
@@ -284,10 +253,6 @@ def get_chain_count(array):
284
253
  -------
285
254
  count : int
286
255
  Amount of chains.
287
-
288
- See also
289
- --------
290
- get_residue_count
291
256
  """
292
257
  return len(get_chain_starts(array))
293
258
 
@@ -305,10 +270,7 @@ def chain_iter(array):
305
270
  ------
306
271
  chain : AtomArray or AtomArrayStack
307
272
  A single chain of the input `array`.
308
-
309
- See also
310
- --------
311
- residue_iter
312
273
  """
313
274
  starts = get_chain_starts(array, add_exclusive_stop=True)
314
- return segment_iter(array, starts)
275
+ for chain in segment_iter(array, starts):
276
+ yield chain