biotite 1.1.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (160) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/localapp.py +2 -2
  6. biotite/application/msaapp.py +10 -10
  7. biotite/application/muscle/app3.py +3 -3
  8. biotite/application/muscle/app5.py +3 -3
  9. biotite/application/sra/app.py +0 -5
  10. biotite/application/util.py +21 -1
  11. biotite/application/viennarna/rnaalifold.py +8 -8
  12. biotite/application/viennarna/rnaplot.py +10 -8
  13. biotite/application/viennarna/util.py +1 -1
  14. biotite/application/webapp.py +1 -1
  15. biotite/database/afdb/__init__.py +12 -0
  16. biotite/database/afdb/download.py +191 -0
  17. biotite/database/entrez/dbnames.py +10 -0
  18. biotite/database/entrez/download.py +9 -10
  19. biotite/database/entrez/key.py +1 -1
  20. biotite/database/entrez/query.py +5 -4
  21. biotite/database/pubchem/download.py +6 -6
  22. biotite/database/pubchem/error.py +10 -0
  23. biotite/database/pubchem/query.py +12 -23
  24. biotite/database/rcsb/download.py +3 -2
  25. biotite/database/rcsb/query.py +2 -3
  26. biotite/database/uniprot/check.py +2 -2
  27. biotite/database/uniprot/download.py +2 -5
  28. biotite/database/uniprot/query.py +3 -4
  29. biotite/file.py +14 -2
  30. biotite/interface/__init__.py +19 -0
  31. biotite/interface/openmm/__init__.py +20 -0
  32. biotite/interface/openmm/state.py +93 -0
  33. biotite/interface/openmm/system.py +227 -0
  34. biotite/interface/pymol/__init__.py +201 -0
  35. biotite/interface/pymol/cgo.py +346 -0
  36. biotite/interface/pymol/convert.py +185 -0
  37. biotite/interface/pymol/display.py +267 -0
  38. biotite/interface/pymol/object.py +1226 -0
  39. biotite/interface/pymol/shapes.py +178 -0
  40. biotite/interface/pymol/startup.py +169 -0
  41. biotite/interface/rdkit/__init__.py +19 -0
  42. biotite/interface/rdkit/mol.py +490 -0
  43. biotite/interface/version.py +94 -0
  44. biotite/interface/warning.py +19 -0
  45. biotite/sequence/align/__init__.py +0 -4
  46. biotite/sequence/align/alignment.py +33 -11
  47. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  48. biotite/sequence/align/banded.pyx +22 -22
  49. biotite/sequence/align/cigar.py +2 -2
  50. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  51. biotite/sequence/align/kmeralphabet.pyx +2 -2
  52. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  53. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  54. biotite/sequence/align/kmertable.pyx +6 -6
  55. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  56. biotite/sequence/align/localgapped.pyx +47 -47
  57. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/localungapped.pyx +10 -10
  59. biotite/sequence/align/matrix.py +12 -3
  60. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/multiple.pyx +1 -2
  62. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  63. biotite/sequence/align/pairwise.pyx +37 -39
  64. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  65. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/selector.pyx +2 -2
  67. biotite/sequence/align/statistics.py +1 -1
  68. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/alphabet.py +2 -2
  70. biotite/sequence/annotation.py +19 -13
  71. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  72. biotite/sequence/codon.py +1 -2
  73. biotite/sequence/graphics/alignment.py +25 -39
  74. biotite/sequence/graphics/dendrogram.py +4 -2
  75. biotite/sequence/graphics/features.py +2 -2
  76. biotite/sequence/graphics/logo.py +10 -12
  77. biotite/sequence/io/fasta/convert.py +1 -2
  78. biotite/sequence/io/fasta/file.py +1 -1
  79. biotite/sequence/io/fastq/file.py +3 -3
  80. biotite/sequence/io/genbank/file.py +3 -3
  81. biotite/sequence/io/genbank/sequence.py +2 -0
  82. biotite/sequence/io/gff/convert.py +1 -1
  83. biotite/sequence/io/gff/file.py +1 -2
  84. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  85. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  86. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  87. biotite/sequence/profile.py +19 -25
  88. biotite/sequence/search.py +0 -1
  89. biotite/sequence/seqtypes.py +12 -5
  90. biotite/sequence/sequence.py +1 -2
  91. biotite/structure/__init__.py +2 -0
  92. biotite/structure/alphabet/i3d.py +1 -2
  93. biotite/structure/alphabet/pb.py +1 -2
  94. biotite/structure/alphabet/unkerasify.py +8 -2
  95. biotite/structure/atoms.py +35 -27
  96. biotite/structure/basepairs.py +39 -40
  97. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  98. biotite/structure/bonds.pyx +8 -5
  99. biotite/structure/box.py +159 -23
  100. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  101. biotite/structure/celllist.pyx +83 -68
  102. biotite/structure/chains.py +17 -55
  103. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  104. biotite/structure/compare.py +420 -13
  105. biotite/structure/density.py +1 -1
  106. biotite/structure/dotbracket.py +31 -32
  107. biotite/structure/filter.py +8 -8
  108. biotite/structure/geometry.py +15 -15
  109. biotite/structure/graphics/rna.py +19 -16
  110. biotite/structure/hbond.py +18 -21
  111. biotite/structure/info/atoms.py +11 -2
  112. biotite/structure/info/ccd.py +0 -2
  113. biotite/structure/info/components.bcif +0 -0
  114. biotite/structure/info/groups.py +0 -3
  115. biotite/structure/info/misc.py +0 -1
  116. biotite/structure/info/radii.py +92 -22
  117. biotite/structure/info/standardize.py +1 -2
  118. biotite/structure/integrity.py +4 -6
  119. biotite/structure/io/general.py +2 -2
  120. biotite/structure/io/gro/file.py +8 -9
  121. biotite/structure/io/mol/convert.py +1 -1
  122. biotite/structure/io/mol/ctab.py +33 -28
  123. biotite/structure/io/mol/mol.py +1 -1
  124. biotite/structure/io/mol/sdf.py +39 -13
  125. biotite/structure/io/pdb/convert.py +86 -5
  126. biotite/structure/io/pdb/file.py +90 -24
  127. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  128. biotite/structure/io/pdbqt/file.py +4 -4
  129. biotite/structure/io/pdbx/bcif.py +22 -7
  130. biotite/structure/io/pdbx/cif.py +20 -7
  131. biotite/structure/io/pdbx/component.py +6 -0
  132. biotite/structure/io/pdbx/compress.py +71 -34
  133. biotite/structure/io/pdbx/convert.py +429 -77
  134. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  135. biotite/structure/io/pdbx/encoding.pyx +39 -23
  136. biotite/structure/io/trajfile.py +9 -6
  137. biotite/structure/io/util.py +38 -0
  138. biotite/structure/mechanics.py +0 -1
  139. biotite/structure/molecules.py +0 -15
  140. biotite/structure/pseudoknots.py +13 -19
  141. biotite/structure/repair.py +2 -4
  142. biotite/structure/residues.py +20 -48
  143. biotite/structure/rings.py +335 -0
  144. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  145. biotite/structure/sasa.pyx +30 -30
  146. biotite/structure/segments.py +123 -9
  147. biotite/structure/sequence.py +0 -1
  148. biotite/structure/spacegroups.json +1567 -0
  149. biotite/structure/spacegroups.license +26 -0
  150. biotite/structure/sse.py +0 -2
  151. biotite/structure/superimpose.py +75 -253
  152. biotite/structure/tm.py +581 -0
  153. biotite/structure/transform.py +232 -26
  154. biotite/structure/util.py +3 -3
  155. biotite/version.py +9 -4
  156. biotite/visualize.py +111 -1
  157. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
  158. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
  159. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
  160. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -9,11 +9,16 @@ comparing multiple structures with each other.
9
9
 
10
10
  __name__ = "biotite.structure"
11
11
  __author__ = "Patrick Kunzmann"
12
- __all__ = ["rmsd", "rmspd", "rmsf", "average"]
12
+ __all__ = ["rmsd", "rmspd", "rmsf", "average", "lddt"]
13
13
 
14
+ import collections.abc
15
+ import warnings
14
16
  import numpy as np
15
- from biotite.structure.atoms import AtomArrayStack, coord
17
+ from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
18
+ from biotite.structure.celllist import CellList
19
+ from biotite.structure.chains import get_chain_count, get_chain_positions
16
20
  from biotite.structure.geometry import index_distance
21
+ from biotite.structure.residues import get_residue_count, get_residue_positions
17
22
  from biotite.structure.util import vector_dot
18
23
 
19
24
 
@@ -21,7 +26,7 @@ def rmsd(reference, subject):
21
26
  r"""
22
27
  Calculate the RMSD between two structures.
23
28
 
24
- The *root-mean-square-deviation* (RMSD) indicates the overall
29
+ The *root mean square deviation* (RMSD) indicates the overall
25
30
  deviation of each model of a structure to a reference structure.
26
31
  It is defined as:
27
32
 
@@ -48,7 +53,7 @@ def rmsd(reference, subject):
48
53
 
49
54
  See Also
50
55
  --------
51
- rmsf
56
+ rmsf : The *root mean square fluctuation*.
52
57
 
53
58
  Notes
54
59
  -----
@@ -121,11 +126,9 @@ def rmspd(reference, subject, periodic=False, box=None):
121
126
  to ensure correct results.
122
127
  (e.g. with :func:`remove_pbc()`).
123
128
 
124
- See also
129
+ See Also
125
130
  --------
126
- index_distance
127
- remove_pbc
128
- rmsd
131
+ rmsd : The *root mean square fluctuation*.
129
132
  """
130
133
  # Compute index pairs in reference structure -> pair_ij for j < i
131
134
  reflen = reference.array_length()
@@ -173,7 +176,8 @@ def rmsf(reference, subject):
173
176
 
174
177
  See Also
175
178
  --------
176
- rmsd
179
+ rmsd : The *root mean square deviation*.
180
+ average : Average the structure over the models to be used as reference in this function.
177
181
 
178
182
  Notes
179
183
  -----
@@ -218,10 +222,6 @@ def average(atoms):
218
222
  If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
219
223
  returned.
220
224
 
221
- See Also
222
- --------
223
- rmsd, rmsf
224
-
225
225
  Notes
226
226
  -----
227
227
  The calculated average structure is not suitable for visualization
@@ -242,6 +242,244 @@ def average(atoms):
242
242
  return mean_coords
243
243
 
244
244
 
245
+ def lddt(
246
+ reference,
247
+ subject,
248
+ aggregation="all",
249
+ atom_mask=None,
250
+ partner_mask=None,
251
+ inclusion_radius=15,
252
+ distance_bins=(0.5, 1.0, 2.0, 4.0),
253
+ exclude_same_residue=True,
254
+ exclude_same_chain=False,
255
+ filter_function=None,
256
+ symmetric=False,
257
+ ):
258
+ """
259
+ Calculate the *local Distance Difference Test* (lDDT) score of a structure with
260
+ respect to its reference.
261
+ :footcite:`Mariani2013`
262
+
263
+ Parameters
264
+ ----------
265
+ reference : AtomArray
266
+ The reference structure.
267
+ subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
268
+ The structure(s) to evaluate with respect to `reference`.
269
+ The number of atoms must be the same as in `reference`.
270
+ Alternatively, coordinates can be provided directly as
271
+ :class:`ndarray`.
272
+ aggregation : {'all', 'chain', 'residue', 'atom'} or ndarray, shape=(n,), dtype=int, optional
273
+ Defines on which scale the lDDT score is calculated.
274
+
275
+ - `'all'`: The score is computed over all contacts.
276
+ - `'chain'`: The score is calculated for each chain separately.
277
+ - `'residue'`: The score is calculated for each residue separately.
278
+ - `'atom'`: The score is calculated for each atom separately.
279
+
280
+ Alternatively, an array of aggregation bins can be provided, i.e. each contact
281
+ is assigned to the corresponding bin.
282
+ atom_mask : ndarray, shape=(n,), dtype=bool, optional
283
+ If given, the contacts are only computed for the masked atoms.
284
+ Atoms excluded by the mask do not have any contacts and their *lDDT* would
285
+ be NaN in case of ``aggregation="atom"``.
286
+ Providing this mask can significantly speed up the computation, if
287
+ only for certain chains/residues/atoms the *lDDT* is of interest.
288
+ partner_mask : ndarray, shape=(n,), dtype=bool, optional
289
+ If given, only contacts **to** the masked atoms are considered.
290
+ While `atom_mask` does not alter the *lDDT* for the masked atoms,
291
+ `partner_mask` does, as for each atom only the masked atoms are considered
292
+ as potential contact partners.
293
+ inclusion_radius : float, optional
294
+ Pairwise atom distances are considered within this radius in `reference`.
295
+ distance_bins : list of float, optional
296
+ The distance bins for the score calculation, i.e if a distance deviation is
297
+ within the first bin, the score is 1, if it is outside all bins, the score is 0.
298
+ exclude_same_residue : bool, optional
299
+ If true, only atom distances between different residues are considered.
300
+ Otherwise, also atom distances within the same residue are included.
301
+ exclude_same_chain : bool, optional
302
+ If true, only atom distances between different chains are considered.
303
+ Otherwise, also atom distances within the same chain are included.
304
+ filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
305
+ Used for custom contact filtering, if the other parameters are not sufficient.
306
+ A function that takes an array of contact atom indices and returns a mask that
307
+ is ``True`` for all contacts that should be retained.
308
+ All other contacts are not considered for lDDT computation.
309
+ symmetric : bool, optional
310
+ If set to true, the *lDDT* score is computed symmetrically.
311
+ This means both contacts found in the `reference` and `subject` structure are
312
+ considered.
313
+ Hence the score is independent of which structure is given as `reference` and
314
+ `subject`.
315
+ Note that in this case `subject` must be an :class:`AtomArray` as well.
316
+ By default, only contacts in the `reference` are considered.
317
+
318
+ Returns
319
+ -------
320
+ lddt : float or ndarray, dtype=float
321
+ The lDDT score for each model and aggregation bin.
322
+ The shape depends on `subject` and `aggregation`:
323
+ If `subject` is an :class:`AtomArrayStack` (or equivalent coordinate
324
+ :class:`ndarray`), a dimension depicting each model is added.
325
+ if `aggregation` is not ``'all'``, a second dimension with the length equal to
326
+ the number of aggregation bins is added (i.e. number of chains, residues, etc.).
327
+ If both, an :class:`AtomArray` as `subject` and ``aggregation='all'`` is passed,
328
+ a float is returned.
329
+
330
+ Notes
331
+ -----
332
+ The lDDT score measures how well the pairwise atom distances in a model match the
333
+ corresponding distances in a reference.
334
+ Hence, like :func:`rmspd()` it works superimposition-free, but instead of capturing
335
+ the global deviation, only the local environment within the `inclusion_radius` is
336
+ considered.
337
+
338
+ Note that by default, also hydrogen atoms are considered in the distance
339
+ calculation.
340
+ If this is undesired, the hydrogen atoms can be removed prior to the calculation.
341
+
342
+ References
343
+ ----------
344
+
345
+ .. footbibliography::
346
+
347
+ Examples
348
+ --------
349
+
350
+ Calculate the global lDDT of all models to the first model:
351
+
352
+ >>> reference = atom_array_stack[0]
353
+ >>> subject = atom_array_stack[1:]
354
+ >>> print(lddt(reference, subject))
355
+ [0.799 0.769 0.792 0.836 0.799 0.752 0.860 0.769 0.825 0.777 0.760 0.787
356
+ 0.790 0.783 0.804 0.842 0.769 0.797 0.757 0.852 0.811 0.786 0.805 0.755
357
+ 0.734 0.794 0.771 0.778 0.842 0.772 0.815 0.789 0.828 0.750 0.826 0.739
358
+ 0.760]
359
+
360
+ Calculate the residue-wise lDDT for a single model:
361
+
362
+ >>> subject = atom_array_stack[1]
363
+ >>> print(lddt(reference, subject, aggregation="residue"))
364
+ [0.599 0.692 0.870 0.780 0.830 0.881 0.872 0.658 0.782 0.901 0.888 0.885
365
+ 0.856 0.795 0.847 0.603 0.895 0.878 0.871 0.789]
366
+
367
+ As example for custom aggregation, calculate the lDDT for each chemical element:
368
+
369
+ >>> unique_elements = np.unique(reference.element)
370
+ >>> element_bins = np.array(
371
+ ... [np.where(unique_elements == element)[0][0] for element in reference.element]
372
+ ... )
373
+ >>> element_lddt = lddt(reference, subject, aggregation=element_bins)
374
+ >>> for element, lddt_for_element in zip(unique_elements, element_lddt):
375
+ ... print(f"{element}: {lddt_for_element:.3f}")
376
+ C: 0.837
377
+ H: 0.770
378
+ N: 0.811
379
+ O: 0.808
380
+
381
+ If the reference structure has more atoms resolved than the subject structure,
382
+ the missing atoms can be indicated with *NaN* values:
383
+
384
+ >>> reference = atom_array_stack[0]
385
+ >>> subject = atom_array_stack[1].copy()
386
+ >>> # Simulate the situation where the first residue is missing in the subject
387
+ >>> subject.coord[subject.res_id == 1] = np.nan
388
+ >>> global_lddt = lddt(reference, subject)
389
+ >>> print(f"{global_lddt:.3f}")
390
+ 0.751
391
+ """
392
+ reference_coord = coord(reference)
393
+ subject_coord = coord(subject)
394
+ if subject_coord.shape[-2] != reference_coord.shape[-2]:
395
+ raise IndexError(
396
+ f"The given reference has {reference_coord.shape[-2]} atoms, but the "
397
+ f"subject has {subject_coord.shape[-2]} atoms"
398
+ )
399
+
400
+ contacts = _find_contacts(
401
+ reference,
402
+ atom_mask,
403
+ partner_mask,
404
+ inclusion_radius,
405
+ exclude_same_residue,
406
+ exclude_same_chain,
407
+ filter_function,
408
+ )
409
+ if symmetric:
410
+ if not isinstance(subject, AtomArray):
411
+ raise TypeError(
412
+ "Expected 'AtomArray' as subject, as symmetric lDDT is enabled, "
413
+ f"but got '{type(subject).__name__}'"
414
+ )
415
+ subject_contacts = _find_contacts(
416
+ subject,
417
+ atom_mask,
418
+ partner_mask,
419
+ inclusion_radius,
420
+ exclude_same_residue,
421
+ exclude_same_chain,
422
+ filter_function,
423
+ )
424
+ contacts = np.concatenate((contacts, subject_contacts), axis=0)
425
+ # Adding additional contacts may introduce duplicates between the existing and
426
+ # new ones -> filter them out
427
+ contacts = np.unique(contacts, axis=0)
428
+ if (
429
+ isinstance(aggregation, str)
430
+ and aggregation == "all"
431
+ and atom_mask is None
432
+ and partner_mask is None
433
+ ):
434
+ # Remove duplicate pairs as each pair appears twice
435
+ # (if i is in threshold distance to j, j is also in threshold distance to i)
436
+ # keep only the pair where i < j
437
+ # This improves performance due to less distances that need to be computed
438
+ # The assumption also only works when no atoms are masked
439
+ contacts = contacts[contacts[:, 0] < contacts[:, 1]]
440
+
441
+ reference_distances = index_distance(reference_coord, contacts)
442
+ subject_distances = index_distance(subject_coord, contacts)
443
+ deviations = np.abs(subject_distances - reference_distances)
444
+ distance_bins = np.asarray(distance_bins)
445
+ fraction_preserved_bins = np.count_nonzero(
446
+ deviations[..., np.newaxis] <= distance_bins[np.newaxis, :], axis=-1
447
+ ) / len(distance_bins)
448
+
449
+ # Aggregate the fractions over the desired level
450
+ if isinstance(aggregation, str) and aggregation == "all":
451
+ # Average over all contacts
452
+ return np.mean(fraction_preserved_bins, axis=-1)
453
+ else:
454
+ # A string is also a 'Sequence'
455
+ # -> distinguish between string and array, list, etc.
456
+ if isinstance(
457
+ aggregation, (np.ndarray, collections.abc.Sequence)
458
+ ) and not isinstance(aggregation, str):
459
+ return _average_over_indices(
460
+ fraction_preserved_bins,
461
+ bins=np.asarray(aggregation)[contacts[:, 0]],
462
+ )
463
+ elif aggregation == "chain":
464
+ return _average_over_indices(
465
+ fraction_preserved_bins,
466
+ bins=get_chain_positions(reference, contacts[:, 0]),
467
+ n_bins=get_chain_count(reference),
468
+ )
469
+ elif aggregation == "residue":
470
+ return _average_over_indices(
471
+ fraction_preserved_bins,
472
+ bins=get_residue_positions(reference, contacts[:, 0]),
473
+ n_bins=get_residue_count(reference),
474
+ )
475
+ elif aggregation == "atom":
476
+ return _average_over_indices(
477
+ fraction_preserved_bins, contacts[:, 0], reference.array_length()
478
+ )
479
+ else:
480
+ raise ValueError(f"Invalid aggregation level '{aggregation}'")
481
+
482
+
245
483
  def _sq_euclidian(reference, subject):
246
484
  """
247
485
  Calculate squared euclidian distance between atoms in two
@@ -272,3 +510,172 @@ def _sq_euclidian(reference, subject):
272
510
  )
273
511
  dif = subject_coord - reference_coord
274
512
  return vector_dot(dif, dif)
513
+
514
+
515
+ def _to_sparse_indices(all_contacts):
516
+ """
517
+ Create tuples of contact indices from the :meth:`CellList.get_atoms()` return value.
518
+
519
+ In other words, they would mark the non-zero elements in a dense contact matrix.
520
+
521
+ Parameters
522
+ ----------
523
+ all_contacts : ndarray, dtype=int, shape=(m,n)
524
+ The contact indices as returned by :meth:`CellList.get_atoms()`.
525
+ Padded with -1, in the second dimension.
526
+ Dimension *m* marks the query atoms, dimension *n* marks the contact atoms.
527
+
528
+ Returns
529
+ -------
530
+ combined_indices : ndarray, dtype=int, shape=(l,2)
531
+ The contact indices.
532
+ Each column contains the query and contact atom index.
533
+ """
534
+ # Find rows where a query atom has at least one contact
535
+ non_empty_indices = np.where(np.any(all_contacts != -1, axis=1))[0]
536
+ # Take those rows and flatten them
537
+ contact_indices = all_contacts[non_empty_indices].flatten()
538
+ # For each row the corresponding query atom is the same
539
+ # Hence in the flattened form the query atom index is simply repeated
540
+ query_indices = np.repeat(non_empty_indices, all_contacts.shape[1])
541
+ combined_indices = np.stack([query_indices, contact_indices], axis=1)
542
+ # Remove the padding values
543
+ return combined_indices[contact_indices != -1]
544
+
545
+
546
+ def _find_contacts(
547
+ atoms=None,
548
+ atom_mask=None,
549
+ partner_mask=None,
550
+ inclusion_radius=15,
551
+ exclude_same_residue=False,
552
+ exclude_same_chain=True,
553
+ filter_function=None,
554
+ ):
555
+ """
556
+ Find contacts between the atoms in the given structure.
557
+
558
+ Parameters
559
+ ----------
560
+ atoms : AtomArray
561
+ The structure to find the contacts for.
562
+ atom_mask : ndarray, shape=(n,), dtype=bool, optional
563
+ If given, the contacts are only computed for the masked atoms.
564
+ Atoms excluded by the mask do not have any contacts and their *lDDT* would
565
+ be NaN in case of ``aggregation="atom"``.
566
+ Providing this mask can significantly speed up the computation, if
567
+ only for certain chains/residues/atoms the *lDDT* is of interest.
568
+ partner_mask : ndarray, shape=(n,), dtype=bool, optional
569
+ If given, only contacts **to** the masked atoms are considered.
570
+ While `atom_mask` does not alter the *lDDT* for the masked atoms,
571
+ `partner_mask` does, as for each atom only the masked atoms are considered
572
+ as potential contact partners.
573
+ inclusion_radius : float, optional
574
+ Pairwise atom distances are considered within this radius.
575
+ exclude_same_residue : bool, optional
576
+ If true, only atom distances between different residues are considered.
577
+ Otherwise, also atom distances within the same residue are included.
578
+ exclude_same_chain : bool, optional
579
+ If true, only atom distances between different chains are considered.
580
+ Otherwise, also atom distances within the same chain are included.
581
+ filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
582
+ Used for custom contact filtering, if the other parameters are not sufficient.
583
+ A function that takes an array of contact atom indices and returns a mask that
584
+ is ``True`` for all contacts that should be retained.
585
+ All other contacts are not considered for lDDT computation.
586
+
587
+ Returns
588
+ -------
589
+ contacts : ndarray, shape=(n,2), dtype=int
590
+ The array of contacts.
591
+ Each element represents a pair of atom indices that are in contact.
592
+ """
593
+ coords = coord(atoms)
594
+ selection = ~np.isnan(coords).any(axis=-1)
595
+ if partner_mask is not None:
596
+ selection &= partner_mask
597
+ # Use a cell list to find atoms within inclusion radius in O(n) time complexity
598
+ cell_list = CellList(coords, inclusion_radius, selection=selection)
599
+ # Pairs of indices for atoms within the inclusion radius
600
+ if atom_mask is None:
601
+ all_contacts = cell_list.get_atoms(coords, inclusion_radius)
602
+ else:
603
+ filtered_contacts = cell_list.get_atoms(coords[atom_mask], inclusion_radius)
604
+ # Map the contacts for the masked atoms to the original coordinates
605
+ # Rows that were filtered out by the mask are fully padded with -1
606
+ # consistent with the padding of `get_atoms()`
607
+ all_contacts = np.full(
608
+ (coords.shape[0], filtered_contacts.shape[-1]),
609
+ -1,
610
+ dtype=filtered_contacts.dtype,
611
+ )
612
+ all_contacts[atom_mask] = filtered_contacts
613
+ # Convert into pairs of indices
614
+ contacts = _to_sparse_indices(all_contacts)
615
+
616
+ if exclude_same_chain:
617
+ # Do the same for the chain level
618
+ chain_indices = get_chain_positions(atoms, contacts.flatten()).reshape(
619
+ contacts.shape
620
+ )
621
+ contacts = contacts[chain_indices[:, 0] != chain_indices[:, 1]]
622
+ elif exclude_same_residue:
623
+ # Find the index of the residue for each atom
624
+ residue_indices = get_residue_positions(atoms, contacts.flatten()).reshape(
625
+ contacts.shape
626
+ )
627
+ # Remove contacts between atoms of the same residue
628
+ contacts = contacts[residue_indices[:, 0] != residue_indices[:, 1]]
629
+ else:
630
+ # In any case self-contacts should not be considered
631
+ contacts = contacts[contacts[:, 0] != contacts[:, 1]]
632
+ if filter_function is not None:
633
+ mask = filter_function(contacts)
634
+ if mask.shape != (contacts.shape[0],):
635
+ raise IndexError(
636
+ f"Mask returned from filter function has shape {mask.shape}, "
637
+ f"but expected ({contacts.shape[0]},)"
638
+ )
639
+ contacts = contacts[mask, :]
640
+ return contacts
641
+
642
+
643
+ def _average_over_indices(values, bins, n_bins=None):
644
+ """
645
+ For each unique index in `bins`, average the corresponding values in `values`.
646
+
647
+ Based on
648
+ https://stackoverflow.com/questions/79140661/how-to-sum-values-based-on-a-second-index-array-in-a-vectorized-manner
649
+
650
+ Parameters
651
+ ----------
652
+ values : ndarray, shape=(..., n)
653
+ The values to average.
654
+ bins : ndarray, shape=(n,) dtype=int
655
+ Associates each value from `values` with a bin.
656
+ n_bins : int
657
+ The total number of bins.
658
+ This is necessary as the some bin in `bins`may be empty.
659
+ By default the number of bins is determined from `bins`.
660
+
661
+ Returns
662
+ -------
663
+ averaged : ndarray, shape=(..., k)
664
+ The averaged values.
665
+ *k* is the maximum value in `bins` + 1.
666
+ """
667
+ if n_bins is None:
668
+ n_elements_per_bin = np.bincount(bins)
669
+ n_bins = len(n_elements_per_bin)
670
+ else:
671
+ n_elements_per_bin = np.bincount(bins, minlength=n_bins)
672
+ # The last dimension is replaced by the number of bins
673
+ # Broadcasting in 'np.add.at()' requires the replaced dimension to be the first
674
+ aggregated = np.zeros((n_bins, *values.shape[:-1]), dtype=values.dtype)
675
+ np.add.at(aggregated, bins, np.swapaxes(values, 0, -1))
676
+ # If an atom has no contacts, the corresponding value is NaN
677
+ # This result is expected, hence the warning is ignored
678
+ with warnings.catch_warnings():
679
+ warnings.simplefilter("ignore")
680
+ # Bring the bin dimension into the last dimension again
681
+ return np.swapaxes(aggregated, 0, -1) / n_elements_per_bin
@@ -49,7 +49,7 @@ def density(atoms, selection=None, delta=1.0, bins=None, density=False, weights=
49
49
  If False, the number of samples in each bin is returned.
50
50
  Otherwise, returns the probability density function of each bin.
51
51
  See :func:`numpy.histogramdd()` for further details.
52
- weights: ndarray, shape=(n,) or shape=(m,n), optional
52
+ weights : ndarray, shape=(n,) or shape=(m,n), optional
53
53
  An array of values to weight the contribution of *n* atoms in
54
54
  *m* models.
55
55
  If the shape is *(n,)*, the weights will be interpreted as
@@ -31,12 +31,12 @@ def dot_bracket_from_structure(
31
31
 
32
32
  Parameters
33
33
  ----------
34
- atom_array : AtomArray
34
+ nucleic_acid_strand : AtomArray
35
35
  The nucleic acid strand to be represented in DBL-notation.
36
- scores : ndarray, dtype=int, shape=(n,) (default: None)
36
+ scores : ndarray, dtype=int, shape=(n,)
37
37
  The score for each base pair, which is passed on to
38
38
  :func:`pseudoknots()`.
39
- max_pseudoknot_order : int (default: None)
39
+ max_pseudoknot_order : int
40
40
  The maximum pseudoknot order to be found. If a base pair would
41
41
  be of a higher order, it is represented as unpaired. If ``None``
42
42
  is given, all base pairs are evaluated.
@@ -48,8 +48,9 @@ def dot_bracket_from_structure(
48
48
 
49
49
  See Also
50
50
  --------
51
- base_pairs
52
- pseudoknots
51
+ base_pairs : Compute the base pairs from a structure as passed to this function.
52
+ dot_bracket : Compute the dot bracket notation directly from base pairs.
53
+ pseudoknots : Get the pseudoknot order for each base pair.
53
54
 
54
55
  References
55
56
  ----------
@@ -81,10 +82,9 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
81
82
  strand.
82
83
  length : int
83
84
  The number of bases in the strand.
84
- scores : ndarray, dtype=int, shape=(n,) (default: None)
85
- The score for each base pair, which is passed on to
86
- :func:`pseudoknots()`
87
- max_pseudoknot_order : int (default: None)
85
+ scores : ndarray, dtype=int, shape=(n,)
86
+ The score for each base pair, which is passed on to :func:`pseudoknots()`.
87
+ max_pseudoknot_order : int
88
88
  The maximum pseudoknot order to be found. If a base pair would
89
89
  be of a higher order, it is represented as unpaired. If ``None``
90
90
  is given, all pseudoknot orders are evaluated.
@@ -94,6 +94,18 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
94
94
  notations : list [str, ...]
95
95
  The DBL-notation for each solution from :func:`pseudoknots()`.
96
96
 
97
+ See Also
98
+ --------
99
+ base_pairs_from_dot_bracket : The reverse operation.
100
+ dot_bracket_from_structure : Compute the dot bracket notation from a structure.
101
+ base_pairs : Compute the base pairs from a structure as passed to this function.
102
+ pseudoknots : Get the pseudoknot order for each base pair.
103
+
104
+ References
105
+ ----------
106
+
107
+ .. footbibliography::
108
+
97
109
  Examples
98
110
  --------
99
111
  The sequence ``ACGTC`` has a length of 5. If there was to be a
@@ -107,18 +119,6 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
107
119
 
108
120
  >>> dot_bracket(basepairs, 5)[0]
109
121
  '(..).'
110
-
111
-
112
- See Also
113
- --------
114
- dot_bracket_from_structure
115
- base_pairs
116
- pseudoknots
117
-
118
- References
119
- ----------
120
-
121
- .. footbibliography::
122
122
  """
123
123
  # Make sure the lower residue is on the left for each row
124
124
  basepairs = np.sort(basepairs, axis=1)
@@ -159,6 +159,15 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
159
159
  Each row corresponds to the positions of the bases in the
160
160
  sequence.
161
161
 
162
+ See Also
163
+ --------
164
+ dot_bracket : The reverse operation.
165
+
166
+ References
167
+ ----------
168
+
169
+ .. footbibliography::
170
+
162
171
  Examples
163
172
  --------
164
173
  The notation string ``'(..).'`` contains a base pair between the
@@ -167,15 +176,6 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
167
176
 
168
177
  >>> base_pairs_from_dot_bracket('(..).')
169
178
  array([[0, 3]])
170
-
171
- See Also
172
- --------
173
- dot_bracket
174
-
175
- References
176
- ----------
177
-
178
- .. footbibliography::
179
179
  """
180
180
  basepairs = []
181
181
  opened_brackets = [[] for _ in range(len(_OPENING_BRACKETS))]
@@ -203,8 +203,7 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
203
203
  for not_closed in opened_brackets:
204
204
  if not_closed != []:
205
205
  raise ValueError(
206
- "Invalid DBL-notation, not all opening brackets have a "
207
- "closing bracket"
206
+ "Invalid DBL-notation, not all opening brackets have a closing bracket"
208
207
  )
209
208
 
210
209
  # Sort the base pair indices in ascending order
@@ -294,7 +294,9 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
294
294
  lies within the provided boundaries.
295
295
 
296
296
  The result will depend on the atoms' order.
297
- For instance, consider a molecule::
297
+ For instance, consider a molecule:
298
+
299
+ .. code-block:: none
298
300
 
299
301
  C3
300
302
  |
@@ -306,12 +308,12 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
306
308
 
307
309
  Parameters
308
310
  ----------
309
- array: AtomArray
311
+ array : AtomArray
310
312
  The array to filter.
311
- min_len: float
312
- Minmum bond length
313
- max_len: float
314
- Maximum bond length
313
+ min_len : float
314
+ Minmum bond length.
315
+ max_len : float
316
+ Maximum bond length.
315
317
 
316
318
  Returns
317
319
  -------
@@ -364,7 +366,6 @@ def filter_polymer(array, min_size=2, pol_type="peptide"):
364
366
  filter : ndarray, dtype=bool
365
367
  This array is `True` for all indices in `array`, where atoms belong to
366
368
  consecutive polymer entity having at least `min_size` monomers.
367
-
368
369
  """
369
370
  # Import `check_res_id_continuity` here to avoid circular imports
370
371
  from biotite.structure.integrity import check_res_id_continuity
@@ -412,7 +413,6 @@ def filter_intersection(array, intersect):
412
413
  >>> array1 = array1[filter_intersection(array1, array2)]
413
414
  >>> print(array1.chain_id)
414
415
  ['B' 'C' 'D']
415
-
416
416
  """
417
417
  filter = np.full(array.array_length(), True, dtype=bool)
418
418
  intersect_categories = intersect.get_annotation_categories()