biotite 1.2.0__cp313-cp313-win_amd64.whl → 1.4.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (62) hide show
  1. biotite/application/viennarna/rnaplot.py +7 -7
  2. biotite/interface/openmm/__init__.py +4 -0
  3. biotite/interface/pymol/__init__.py +3 -0
  4. biotite/interface/pymol/object.py +3 -1
  5. biotite/interface/rdkit/__init__.py +4 -0
  6. biotite/interface/rdkit/mol.py +5 -5
  7. biotite/interface/version.py +23 -0
  8. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  9. biotite/sequence/align/banded.pyx +1 -1
  10. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  11. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  13. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  14. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  15. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  16. biotite/sequence/align/multiple.pyx +1 -2
  17. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  18. biotite/sequence/align/pairwise.pyx +2 -4
  19. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  20. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  21. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  22. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  23. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  24. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  25. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  26. biotite/structure/basepairs.py +13 -14
  27. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  28. biotite/structure/bonds.pyx +67 -6
  29. biotite/structure/box.py +141 -3
  30. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  31. biotite/structure/celllist.pyx +0 -1
  32. biotite/structure/chains.py +15 -21
  33. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  34. biotite/structure/compare.py +2 -0
  35. biotite/structure/dotbracket.py +4 -4
  36. biotite/structure/graphics/rna.py +19 -16
  37. biotite/structure/hbond.py +1 -2
  38. biotite/structure/info/components.bcif +0 -0
  39. biotite/structure/io/pdb/convert.py +84 -2
  40. biotite/structure/io/pdb/file.py +94 -7
  41. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  42. biotite/structure/io/pdbx/bcif.py +6 -3
  43. biotite/structure/io/pdbx/cif.py +5 -2
  44. biotite/structure/io/pdbx/compress.py +71 -34
  45. biotite/structure/io/pdbx/convert.py +226 -58
  46. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  47. biotite/structure/io/pdbx/encoding.pyx +39 -23
  48. biotite/structure/pseudoknots.py +6 -6
  49. biotite/structure/residues.py +10 -27
  50. biotite/structure/rings.py +118 -2
  51. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  52. biotite/structure/sasa.pyx +28 -29
  53. biotite/structure/segments.py +55 -0
  54. biotite/structure/spacegroups.json +1567 -0
  55. biotite/structure/spacegroups.license +26 -0
  56. biotite/structure/superimpose.py +1 -191
  57. biotite/structure/transform.py +220 -1
  58. biotite/version.py +2 -2
  59. {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/METADATA +4 -34
  60. {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/RECORD +62 -60
  61. {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/WHEEL +1 -1
  62. {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
230
230
  # since the file content may be invalid/malicious.
231
231
  raise NotImplementedError()
232
232
 
233
+ def __str__(self):
234
+ # Restore original behavior, as `__str__()` implementation of `_Component`
235
+ # may require serialization, which is not possible for some encodings prior
236
+ # to the first encoding pass
237
+ return object.__str__(self)
238
+
233
239
 
234
240
  @dataclass
235
241
  class ByteArrayEncoding(Encoding):
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
325
331
  )
326
332
 
327
333
  # Round to avoid wrong values due to floating point inaccuracies
328
- return np.round(data * self.factor).astype(np.int32)
334
+ scaled_data = np.round(data * self.factor)
335
+ return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
329
336
 
330
337
  def decode(self, data):
331
338
  return (data / self.factor).astype(
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
392
399
  self.min, self.max, self.num_steps, dtype=data.dtype
393
400
  )
394
401
  indices = np.searchsorted(steps, data, side="left")
395
- return indices.astype(np.int32, copy=False)
402
+ return _safe_cast(indices, np.int32)
396
403
 
397
404
  def decode(self, data):
398
405
  output = data * (self.max - self.min) / (self.num_steps - 1)
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
570
577
  if self.origin is None:
571
578
  self.origin = data[0]
572
579
 
580
+ # Differences (including `np.diff`) return an array with the same dtype as the
581
+ # input array
582
+ # As the input dtype may be unsigned, the output dtype could underflow,
583
+ # if the difference is negative
584
+ # -> cast to int64 to avoid this
585
+ data = data.astype(np.int64, copy=False)
573
586
  data = data - self.origin
574
- return np.diff(data, prepend=0).astype(np.int32, copy=False)
587
+ return _safe_cast(np.diff(data, prepend=0), np.int32)
575
588
 
576
589
  def decode(self, data):
577
590
  output = np.cumsum(data, dtype=self.src_type.to_dtype())
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
635
648
  # Only positive values -> use unsigned integers
636
649
  self.is_unsigned = data.min().item() >= 0
637
650
 
638
- data = data.astype(np.int32, copy=False)
651
+ data = _safe_cast(data, np.int32)
639
652
  return self._encode(
640
653
  data, np.empty(0, dtype=self._determine_packed_dtype())
641
654
  )
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
870
883
  else:
871
884
  check_present = True
872
885
 
873
- string_order = np.argsort(self.strings).astype(np.int32)
886
+ string_order = _safe_cast(np.argsort(self.strings), np.int32)
874
887
  sorted_strings = self.strings[string_order]
875
888
  sorted_indices = np.searchsorted(sorted_strings, data)
876
889
  indices = string_order[sorted_indices]
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
1010
1023
  return attribute_name[0].lower() + attribute_name[1:]
1011
1024
 
1012
1025
 
1013
- def _safe_cast(array, dtype):
1014
- dtype = np.dtype(dtype)
1015
- if dtype == array.dtype:
1026
+ def _safe_cast(array, dtype, allow_decimal_loss=False):
1027
+ source_dtype = array.dtype
1028
+ target_dtype = np.dtype(dtype)
1029
+
1030
+ if target_dtype == source_dtype:
1016
1031
  return array
1017
- if np.issubdtype(dtype, np.integer):
1018
- if not np.issubdtype(array.dtype, np.integer):
1019
- raise ValueError("Cannot cast floating point to integer")
1020
- dtype_info = np.iinfo(dtype)
1021
- if np.any(array < dtype_info.min) or np.any(array > dtype_info.max):
1022
- raise ValueError("Integer values do not fit into the given dtype")
1023
- return array.astype(dtype)
1024
-
1025
-
1026
- def _get_n_decimals(value, tolerance):
1027
- MAX_DECIMALS = 10
1028
- for n in range(MAX_DECIMALS):
1029
- if abs(value - round(value, n)) < tolerance:
1030
- return n
1031
- return MAX_DECIMALS
1032
+
1033
+ if np.issubdtype(target_dtype, np.integer):
1034
+ if np.issubdtype(source_dtype, np.floating):
1035
+ if not allow_decimal_loss:
1036
+ raise ValueError("Cannot cast floating point to integer")
1037
+ if not np.isfinite(array).all():
1038
+ raise ValueError("Data contains non-finite values")
1039
+ elif not np.issubdtype(source_dtype, np.integer):
1040
+ # Neither float, nor integer -> cannot cast
1041
+ raise ValueError(f"Cannot cast '{source_dtype}' to integer")
1042
+ dtype_info = np.iinfo(target_dtype)
1043
+ # Check if an integer underflow/overflow would occur during conversion
1044
+ if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
1045
+ raise ValueError("Values do not fit into the given dtype")
1046
+
1047
+ return array.astype(target_dtype)
@@ -148,7 +148,7 @@ class _Region:
148
148
  region_pairs : ndarray, dtype=int
149
149
  The indices of the base pairs in ``base_pairs`` that are part of
150
150
  the region.
151
- scores : ndarray, dtype=int, shape=(n,) (default: None)
151
+ scores : ndarray, dtype=int, shape=(n,)
152
152
  The score for each base pair.
153
153
  """
154
154
 
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
202
202
  base_pairs : ndarray, dtype=int, shape=(n, 2)
203
203
  Each row is equivalent to one base pair and contains the first
204
204
  indices of the residues corresponding to each base.
205
- scores : ndarray, dtype=int, shape=(n,) (default: None)
205
+ scores : ndarray, dtype=int, shape=(n,)
206
206
  The score for each base pair.
207
207
 
208
208
  Returns
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
352
352
  return i
353
353
 
354
354
 
355
- def _get_region_array_for(regions, content=[], dtype=[]):
355
+ def _get_region_array_for(regions, content=(), dtype=()):
356
356
  """
357
357
  Get a :class:`ndarray` of region objects. Each object occurs twice,
358
358
  representing its start and end point. The regions positions in the
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
365
365
  ----------
366
366
  regions : set {_region, ...}
367
367
  The regions to be considered
368
- content : list [function, ...] (default: [])
368
+ content : list [function, ...]
369
369
  The functions to be considered for custom outputs. For a given
370
370
  region they must return a tuple of which the first value is
371
371
  placed at the start position and the second value at the end
372
372
  position of the region relative to the other regions.
373
- dtype : list [str, ...] (default: [])
373
+ dtype : list [str, ...]
374
374
  The data type of the output of the custom functions.
375
375
 
376
376
  Returns
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
554
554
  The maximum pseudoknot order to be found. If a base pair would
555
555
  be of a higher order, its order is specified as -1. If ``None``
556
556
  is given, all base pairs are evaluated.
557
- order : int (default: 0)
557
+ order : int
558
558
  The order that is currently evaluated.
559
559
 
560
560
  Returns
@@ -21,23 +21,23 @@ __all__ = [
21
21
  "residue_iter",
22
22
  ]
23
23
 
24
- import numpy as np
25
24
  from biotite.structure.segments import (
26
25
  apply_segment_wise,
27
26
  get_segment_masks,
28
27
  get_segment_positions,
28
+ get_segment_starts,
29
29
  get_segment_starts_for,
30
30
  segment_iter,
31
31
  spread_segment_wise,
32
32
  )
33
33
 
34
34
 
35
- def get_residue_starts(array, add_exclusive_stop=False):
35
+ def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
36
36
  """
37
37
  Get indices for an atom array, each indicating the beginning of
38
38
  a residue.
39
39
 
40
- A new residue starts, either when the chain ID, residue ID,
40
+ A new residue starts, either when the chain ID, sym ID, residue ID,
41
41
  insertion code or residue name changes from one to the next atom.
42
42
 
43
43
  Parameters
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
48
48
  If true, the exclusive stop of the input atom array, i.e.
49
49
  ``array.array_length()``, is added to the returned array of
50
50
  start indices as last element.
51
+ extra_categories : tuple of str, optional
52
+ Additional annotation categories that induce the start of a new residue,
53
+ when their value change from one atom to the next.
51
54
 
52
55
  Returns
53
56
  -------
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
69
72
  [ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
70
73
  278 292 304]
71
74
  """
72
- if array.array_length() == 0:
73
- return np.array([], dtype=int)
74
-
75
- # These mask are 'true' at indices where the value changes
76
- chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
77
- res_id_changes = array.res_id[1:] != array.res_id[:-1]
78
- ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
79
- res_name_changes = array.res_name[1:] != array.res_name[:-1]
80
-
81
- # If any of these annotation arrays change, a new residue starts
82
- residue_change_mask = (
83
- chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
84
- )
85
-
86
- # Convert mask to indices
87
- # Add 1, to shift the indices from the end of a residue
88
- # to the start of a new residue
89
- residue_starts = np.where(residue_change_mask)[0] + 1
90
-
91
- # The first residue is not included yet -> Insert '[0]'
92
- if add_exclusive_stop:
93
- return np.concatenate(([0], residue_starts, [array.array_length()]))
94
- else:
95
- return np.concatenate(([0], residue_starts))
75
+ categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
76
+ if "sym_id" in array.get_annotation_categories():
77
+ categories.append("sym_id")
78
+ return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
96
79
 
97
80
 
98
81
  def apply_residue_wise(array, data, function, axis=None):
@@ -8,7 +8,12 @@ This module provides functions related to aromatic rings.
8
8
 
9
9
  __name__ = "biotite.structure"
10
10
  __author__ = "Patrick Kunzmann"
11
- __all__ = ["find_aromatic_rings", "find_stacking_interactions", "PiStacking"]
11
+ __all__ = [
12
+ "find_aromatic_rings",
13
+ "find_stacking_interactions",
14
+ "find_pi_cation_interactions",
15
+ "PiStacking",
16
+ ]
12
17
 
13
18
 
14
19
  from enum import IntEnum
@@ -149,7 +154,7 @@ def find_stacking_interactions(
149
154
 
150
155
  The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
151
156
 
152
- - The ring centroids must be within cutoff distance (default: 6.5 Å).
157
+ - The ring centroids must be within cutoff `centroid_cutoff` distance.
153
158
  While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
154
159
  adopted from :footcite:`Bouysset2021` to better identify perpendicular
155
160
  stacking interactions.
@@ -268,6 +273,117 @@ def find_stacking_interactions(
268
273
  ]
269
274
 
270
275
 
276
+ def find_pi_cation_interactions(
277
+ atoms,
278
+ distance_cutoff=5.0,
279
+ angle_tol=np.deg2rad(30.0),
280
+ ):
281
+ """
282
+ Find pi-cation interactions between aromatic rings and cations.
283
+
284
+ Parameters
285
+ ----------
286
+ atoms : AtomArray
287
+ The atoms to be searched for pi-cation interactions.
288
+ Requires an associated :class:`BondList` and ``charge`` annotation.
289
+ distance_cutoff : float, optional
290
+ The cutoff distance between ring centroid and cation.
291
+ angle_tol : float, optional
292
+ The tolerance for the angle between the ring plane normal
293
+ and the centroid-cation vector. Perfect pi-cation interaction
294
+ has 0° angle (perpendicular to ring plane).
295
+ Given in radians.
296
+
297
+ Returns
298
+ -------
299
+ interactions : list of tuple(ndarray, int)
300
+ The pi-cation interactions between aromatic rings and cations.
301
+ Each element in the list represents one pi-cation interaction.
302
+ The first element of each tuple represents atom indices of the
303
+ aromatic ring, the second element is the atom index of the cation.
304
+
305
+ See Also
306
+ --------
307
+ find_aromatic_rings : Used for finding the aromatic rings in this function.
308
+ find_stacking_interactions : Find pi-stacking interactions between rings.
309
+
310
+ Notes
311
+ -----
312
+ The conditions for pi-cation interactions are:
313
+ - The distance between ring centroid and cation must be within
314
+ `distance_cutoff`. :footcite:`Wojcikowski2015` uses 5.0 Å,
315
+ whereas :footcite:`Bouysset2021` uses 4.5 Å.
316
+ - The angle between the ring plane normal and the centroid-cation
317
+ vector must be within `angle_tol` of 0° (perpendicular to plane).
318
+
319
+ Examples
320
+ --------
321
+ >>> from os.path import join
322
+ >>> structure = load_structure(join(path_to_structures, "3wip.cif"), include_bonds=True, extra_fields=["charge"])
323
+ >>> interactions = find_pi_cation_interactions(structure)
324
+ >>> for ring_indices, cation_index in interactions:
325
+ ... print(
326
+ ... structure.res_name[ring_indices[0]],
327
+ ... structure.res_name[cation_index]
328
+ ... )
329
+ TYR ACH
330
+ TRP ACH
331
+ """
332
+ if atoms.bonds is None:
333
+ raise BadStructureError("Structure must have an associated BondList")
334
+
335
+ if atoms.charge is None:
336
+ raise BadStructureError(
337
+ "Structure must have a 'charge' annotation to identify cations."
338
+ )
339
+
340
+ rings = find_aromatic_rings(atoms)
341
+ if len(rings) == 0:
342
+ return []
343
+
344
+ cation_mask = atoms.charge > 0
345
+ cation_indices = np.where(cation_mask)[0]
346
+
347
+ if len(cation_indices) == 0:
348
+ return []
349
+
350
+ # Calculate ring centroids and normals
351
+ ring_centroids = np.array(
352
+ [atoms.coord[atom_indices].mean(axis=0) for atom_indices in rings]
353
+ )
354
+ ring_normals = np.array(
355
+ [_get_ring_normal(atoms.coord[atom_indices]) for atom_indices in rings]
356
+ )
357
+
358
+ cation_coords = atoms.coord[cation_indices]
359
+
360
+ # Create an index array that contains the Cartesian product of all rings and cations
361
+ indices = np.stack(
362
+ [
363
+ np.repeat(np.arange(len(rings)), len(cation_indices)),
364
+ np.tile(np.arange(len(cation_indices)), len(rings)),
365
+ ],
366
+ axis=-1,
367
+ )
368
+
369
+ ## Condition 1: Ring centroids and cations are close enough to each other
370
+ diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
371
+ # Use squared distance to avoid time consuming sqrt computation
372
+ sq_distance = vector_dot(diff, diff)
373
+ is_interacting = sq_distance < distance_cutoff**2
374
+ indices = indices[is_interacting]
375
+
376
+ ## Condition 2: Angle between ring normal and centroid-cation vector
377
+ diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
378
+ norm_vector(diff)
379
+ angles = _minimum_angle(ring_normals[indices[:, 0]], diff)
380
+ is_interacting = _is_within_tolerance(angles, 0, angle_tol)
381
+ indices = indices[is_interacting]
382
+
383
+ # Only return pairs where all conditions were fulfilled
384
+ return [(rings[ring_i], cation_indices[cation_j]) for ring_i, cation_j in indices]
385
+
386
+
271
387
  def _get_ring_normal(ring_coord):
272
388
  """
273
389
  Get the normal vector perpendicular to the ring plane.
Binary file
@@ -35,39 +35,38 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
35
35
  point_number=1000, point_distr="Fibonacci", vdw_radii="ProtOr")
36
36
 
37
37
  Calculate the Solvent Accessible Surface Area (SASA) of a protein.
38
-
38
+
39
39
  This function uses the Shrake-Rupley ("rolling probe")
40
40
  algorithm :footcite:`Shrake1973`:
41
41
  Every atom is occupied by a evenly distributed point mesh. The
42
42
  points that can be reached by the "rolling probe", are surface
43
43
  accessible.
44
-
44
+
45
45
  Parameters
46
46
  ----------
47
47
  array : AtomArray
48
48
  The protein model to calculate the SASA for.
49
49
  probe_radius : float, optional
50
- The VdW-radius of the solvent molecules (default: 1.4).
50
+ The VdW-radius of the solvent molecules.
51
51
  atom_filter : ndarray, dtype=bool, optional
52
52
  If this parameter is given, SASA is only calculated for the
53
53
  filtered atoms.
54
54
  ignore_ions : bool, optional
55
- If true, all monoatomic ions are removed before SASA calculation
56
- (default: True).
55
+ If true, all monoatomic ions are removed before SASA calculation.
57
56
  point_number : int, optional
58
57
  The number of points in the mesh occupying each atom for SASA
59
- calculation (default: 100). The SASA calculation time is
60
- proportional to the amount of sphere points.
58
+ calculation.
59
+ The SASA calculation time is proportional to the amount of sphere points.
61
60
  point_distr : str or function, optional
62
61
  If a function is given, the function is used to calculate the
63
62
  point distribution for the mesh (the function must take `float`
64
63
  *n* as parameter and return a *(n x 3)* :class:`ndarray`).
65
64
  Alternatively a string can be given to choose a built-in
66
65
  distribution:
67
-
66
+
68
67
  - **Fibonacci** - Distribute points using a golden section
69
68
  spiral.
70
-
69
+
71
70
  By default *Fibonacci* is used.
72
71
  vdw_radii : str or ndarray, dtype=float, optional
73
72
  Indicates the set of VdW radii to be used. If an `array`-length
@@ -76,7 +75,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
76
75
  SASA calculation (e.g. solvent atoms) can have arbitrary values
77
76
  (e.g. `NaN`). If instead a string is given, one of the
78
77
  built-in sets is used:
79
-
78
+
80
79
  - **ProtOr** - A set, which does not require hydrogen atoms
81
80
  in the model. Suitable for crystal structures.
82
81
  :footcite:`Tsai1999`
@@ -85,25 +84,25 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
85
84
  in the model (e.g. NMR elucidated structures).
86
85
  Values for main group elements are taken from :footcite:`Mantina2009`,
87
86
  and for relevant transition metals from the :footcite:`RDKit`.
88
-
87
+
89
88
  By default *ProtOr* is used.
90
-
91
-
89
+
90
+
92
91
  Returns
93
92
  -------
94
93
  sasa : ndarray, dtype=bool, shape=(n,)
95
- Atom-wise SASA. `NaN` for atoms where SASA has not been
94
+ Atom-wise SASA. `NaN` for atoms where SASA has not been
96
95
  calculated
97
96
  (solvent atoms, hydrogen atoms (ProtOr), atoms not in `filter`).
98
-
97
+
99
98
  References
100
99
  ----------
101
-
100
+
102
101
  .. footbibliography::
103
-
102
+
104
103
  """
105
104
  cdef int i=0, j=0, k=0, adj_atom_i=0, rel_atom_i=0
106
-
105
+
107
106
  cdef np.ndarray sasa_filter
108
107
  cdef np.ndarray occl_filter
109
108
  if atom_filter is not None:
@@ -122,7 +121,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
122
121
  filter = ~filter_monoatomic_ions(array)
123
122
  sasa_filter = sasa_filter & filter
124
123
  occl_filter = occl_filter & filter
125
-
124
+
126
125
  cdef np.ndarray sphere_points
127
126
  if callable(point_distr):
128
127
  sphere_points = point_distr(point_number)
@@ -131,7 +130,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
131
130
  else:
132
131
  raise ValueError(f"'{point_distr}' is not a valid point distribution")
133
132
  sphere_points = sphere_points.astype(np.float32)
134
-
133
+
135
134
  cdef np.ndarray radii
136
135
  if isinstance(vdw_radii, np.ndarray):
137
136
  radii = vdw_radii.astype(np.float32)
@@ -159,17 +158,17 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
159
158
  raise KeyError(f"'{vdw_radii}' is not a valid radii set")
160
159
  # Increase atom radii by probe size ("rolling probe")
161
160
  radii += probe_radius
162
-
161
+
163
162
  # Memoryview for filter
164
163
  # Problem with creating boolean memoryviews
165
164
  # -> Type uint8 is used
166
165
  cdef np_bool[:] sasa_filter_view = np.frombuffer(sasa_filter,
167
166
  dtype=np.uint8)
168
-
167
+
169
168
  cdef np.ndarray occl_r = radii[occl_filter]
170
169
  # Atom array containing occluding atoms
171
170
  occl_array = array[occl_filter]
172
-
171
+
173
172
  # Memoryviews for coordinates of entire (main) array
174
173
  # and for coordinates of occluding atom array
175
174
  cdef float32[:,:] main_coord = array.coord.astype(np.float32,
@@ -191,10 +190,10 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
191
190
  cdef float32[:] occl_radii_sq = occl_r * occl_r
192
191
  # Memoryview for atomwise SASA
193
192
  cdef float32[:] sasa = np.full(len(array), np.nan, dtype=np.float32)
194
-
193
+
195
194
  # Area of a sphere point on a unit sphere
196
195
  cdef float32 area_per_point = 4.0 * np.pi / point_number
197
-
196
+
198
197
  # Define further statically typed variables
199
198
  # that are needed for SASA calculation
200
199
  cdef int n_accesible = 0
@@ -213,8 +212,8 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
213
212
  cdef float32 occl_y = 0
214
213
  cdef float32 occl_z = 0
215
214
  cdef float32[:,:] relevant_occl_coord = None
216
-
217
- # Cell size is as large as the maximum distance,
215
+
216
+ # Cell size is as large as the maximum distance,
218
217
  # where two atom can intersect.
219
218
  # Therefore intersecting atoms are always in the same or adjacent cell.
220
219
  cell_list = CellList(occl_array, np.max(radii[occl_filter])*2)
@@ -227,7 +226,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
227
226
  cell_indices = cell_list.get_atoms_in_cells(array.coord)
228
227
  cell_indices_view = cell_indices
229
228
  max_adj_list_length = cell_indices.shape[0]
230
-
229
+
231
230
  # Later on, this array stores coordinates for actual
232
231
  # occluding atoms for a certain atom to calculate the
233
232
  # SASA for
@@ -237,7 +236,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
237
236
  # adjacent atoms
238
237
  relevant_occl_coord = np.zeros((max_adj_list_length, 4),
239
238
  dtype=np.float32)
240
-
239
+
241
240
  # Actual SASA calculation
242
241
  for i in range(array_length):
243
242
  # First level: The atoms to calculate SASA for
@@ -5,6 +5,7 @@
5
5
  __name__ = "biotite.structure"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = [
8
+ "get_segment_starts",
8
9
  "apply_segment_wise",
9
10
  "spread_segment_wise",
10
11
  "get_segment_masks",
@@ -16,6 +17,60 @@ __all__ = [
16
17
  import numpy as np
17
18
 
18
19
 
20
+ def get_segment_starts(
21
+ array, add_exclusive_stop, continuous_categories=(), equal_categories=()
22
+ ):
23
+ """
24
+ Generalized version of :func:`get_residue_starts()` for residues and chains.
25
+
26
+ The starts are determined from value changes in the given annotations.
27
+
28
+ Parameters
29
+ ----------
30
+ array : AtomArray or AtomArrayStack
31
+ The atom array (stack) to get the segment starts from.
32
+ add_exclusive_stop : bool, optional
33
+ If true, the exclusive stop of the input atom array,
34
+ i.e. ``array.array_length()``, is added to the returned array of start indices
35
+ as last element.
36
+ continuous_categories : tuple of str, optional
37
+ Annotation categories that are expected to be continuously increasing within a
38
+ segment.
39
+ This means if the value of such an annotation decreases from one atom to
40
+ another, a new segment is started.
41
+ equal_categories : tuple of str, optional
42
+ Annotation categories that are expected to be equal within a segment.
43
+ This means if the value of such an annotation changes from one atom to
44
+ another, a new segment is started.
45
+
46
+ Returns
47
+ -------
48
+ starts : ndarray, dtype=int
49
+ The start indices of segments in `array`.
50
+ """
51
+ if array.array_length() == 0:
52
+ return np.array([], dtype=int)
53
+
54
+ segment_start_mask = np.zeros(array.array_length() - 1, dtype=bool)
55
+ for annot_name in continuous_categories:
56
+ annotation = array.get_annotation(annot_name)
57
+ segment_start_mask |= np.diff(annotation) < 0
58
+ for annot_name in equal_categories:
59
+ annotation = array.get_annotation(annot_name)
60
+ segment_start_mask |= annotation[1:] != annotation[:-1]
61
+
62
+ # Convert mask to indices
63
+ # Add 1, to shift the indices from the end of a segment
64
+ # to the start of a new segment
65
+ chain_starts = np.where(segment_start_mask)[0] + 1
66
+
67
+ # The first chain is not included yet -> Insert '[0]'
68
+ if add_exclusive_stop:
69
+ return np.concatenate(([0], chain_starts, [array.array_length()]))
70
+ else:
71
+ return np.concatenate(([0], chain_starts))
72
+
73
+
19
74
  def apply_segment_wise(starts, data, function, axis=None):
20
75
  """
21
76
  Generalized version of :func:`apply_residue_wise()` for