biotite 1.1.0__cp312-cp312-win_amd64.whl → 1.3.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/localapp.py +2 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +3 -3
- biotite/application/muscle/app5.py +3 -3
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +21 -1
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +10 -8
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +2 -3
- biotite/database/uniprot/check.py +2 -2
- biotite/database/uniprot/download.py +2 -5
- biotite/database/uniprot/query.py +3 -4
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +33 -11
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +22 -22
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +2 -2
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +6 -6
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +12 -3
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +37 -39
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +2 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +19 -25
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +12 -5
- biotite/sequence/sequence.py +1 -2
- biotite/structure/__init__.py +2 -0
- biotite/structure/alphabet/i3d.py +1 -2
- biotite/structure/alphabet/pb.py +1 -2
- biotite/structure/alphabet/unkerasify.py +8 -2
- biotite/structure/atoms.py +35 -27
- biotite/structure/basepairs.py +39 -40
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +8 -5
- biotite/structure/box.py +159 -23
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -68
- biotite/structure/chains.py +17 -55
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +31 -32
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +15 -15
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +18 -21
- biotite/structure/info/atoms.py +11 -2
- biotite/structure/info/ccd.py +0 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +0 -3
- biotite/structure/info/misc.py +0 -1
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +1 -2
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +39 -13
- biotite/structure/io/pdb/convert.py +86 -5
- biotite/structure/io/pdb/file.py +90 -24
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +4 -4
- biotite/structure/io/pdbx/bcif.py +22 -7
- biotite/structure/io/pdbx/cif.py +20 -7
- biotite/structure/io/pdbx/component.py +6 -0
- biotite/structure/io/pdbx/compress.py +71 -34
- biotite/structure/io/pdbx/convert.py +429 -77
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +0 -15
- biotite/structure/pseudoknots.py +13 -19
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +20 -48
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +30 -30
- biotite/structure/segments.py +123 -9
- biotite/structure/sequence.py +0 -1
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +75 -253
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +232 -26
- biotite/structure/util.py +3 -3
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
- {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/compare.py
CHANGED
|
@@ -9,11 +9,16 @@ comparing multiple structures with each other.
|
|
|
9
9
|
|
|
10
10
|
__name__ = "biotite.structure"
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
|
-
__all__ = ["rmsd", "rmspd", "rmsf", "average"]
|
|
12
|
+
__all__ = ["rmsd", "rmspd", "rmsf", "average", "lddt"]
|
|
13
13
|
|
|
14
|
+
import collections.abc
|
|
15
|
+
import warnings
|
|
14
16
|
import numpy as np
|
|
15
|
-
from biotite.structure.atoms import AtomArrayStack, coord
|
|
17
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
|
|
18
|
+
from biotite.structure.celllist import CellList
|
|
19
|
+
from biotite.structure.chains import get_chain_count, get_chain_positions
|
|
16
20
|
from biotite.structure.geometry import index_distance
|
|
21
|
+
from biotite.structure.residues import get_residue_count, get_residue_positions
|
|
17
22
|
from biotite.structure.util import vector_dot
|
|
18
23
|
|
|
19
24
|
|
|
@@ -21,7 +26,7 @@ def rmsd(reference, subject):
|
|
|
21
26
|
r"""
|
|
22
27
|
Calculate the RMSD between two structures.
|
|
23
28
|
|
|
24
|
-
The *root
|
|
29
|
+
The *root mean square deviation* (RMSD) indicates the overall
|
|
25
30
|
deviation of each model of a structure to a reference structure.
|
|
26
31
|
It is defined as:
|
|
27
32
|
|
|
@@ -48,7 +53,7 @@ def rmsd(reference, subject):
|
|
|
48
53
|
|
|
49
54
|
See Also
|
|
50
55
|
--------
|
|
51
|
-
rmsf
|
|
56
|
+
rmsf : The *root mean square fluctuation*.
|
|
52
57
|
|
|
53
58
|
Notes
|
|
54
59
|
-----
|
|
@@ -121,11 +126,9 @@ def rmspd(reference, subject, periodic=False, box=None):
|
|
|
121
126
|
to ensure correct results.
|
|
122
127
|
(e.g. with :func:`remove_pbc()`).
|
|
123
128
|
|
|
124
|
-
See
|
|
129
|
+
See Also
|
|
125
130
|
--------
|
|
126
|
-
|
|
127
|
-
remove_pbc
|
|
128
|
-
rmsd
|
|
131
|
+
rmsd : The *root mean square fluctuation*.
|
|
129
132
|
"""
|
|
130
133
|
# Compute index pairs in reference structure -> pair_ij for j < i
|
|
131
134
|
reflen = reference.array_length()
|
|
@@ -173,7 +176,8 @@ def rmsf(reference, subject):
|
|
|
173
176
|
|
|
174
177
|
See Also
|
|
175
178
|
--------
|
|
176
|
-
rmsd
|
|
179
|
+
rmsd : The *root mean square deviation*.
|
|
180
|
+
average : Average the structure over the models to be used as reference in this function.
|
|
177
181
|
|
|
178
182
|
Notes
|
|
179
183
|
-----
|
|
@@ -218,10 +222,6 @@ def average(atoms):
|
|
|
218
222
|
If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
|
|
219
223
|
returned.
|
|
220
224
|
|
|
221
|
-
See Also
|
|
222
|
-
--------
|
|
223
|
-
rmsd, rmsf
|
|
224
|
-
|
|
225
225
|
Notes
|
|
226
226
|
-----
|
|
227
227
|
The calculated average structure is not suitable for visualization
|
|
@@ -242,6 +242,244 @@ def average(atoms):
|
|
|
242
242
|
return mean_coords
|
|
243
243
|
|
|
244
244
|
|
|
245
|
+
def lddt(
|
|
246
|
+
reference,
|
|
247
|
+
subject,
|
|
248
|
+
aggregation="all",
|
|
249
|
+
atom_mask=None,
|
|
250
|
+
partner_mask=None,
|
|
251
|
+
inclusion_radius=15,
|
|
252
|
+
distance_bins=(0.5, 1.0, 2.0, 4.0),
|
|
253
|
+
exclude_same_residue=True,
|
|
254
|
+
exclude_same_chain=False,
|
|
255
|
+
filter_function=None,
|
|
256
|
+
symmetric=False,
|
|
257
|
+
):
|
|
258
|
+
"""
|
|
259
|
+
Calculate the *local Distance Difference Test* (lDDT) score of a structure with
|
|
260
|
+
respect to its reference.
|
|
261
|
+
:footcite:`Mariani2013`
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
reference : AtomArray
|
|
266
|
+
The reference structure.
|
|
267
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
268
|
+
The structure(s) to evaluate with respect to `reference`.
|
|
269
|
+
The number of atoms must be the same as in `reference`.
|
|
270
|
+
Alternatively, coordinates can be provided directly as
|
|
271
|
+
:class:`ndarray`.
|
|
272
|
+
aggregation : {'all', 'chain', 'residue', 'atom'} or ndarray, shape=(n,), dtype=int, optional
|
|
273
|
+
Defines on which scale the lDDT score is calculated.
|
|
274
|
+
|
|
275
|
+
- `'all'`: The score is computed over all contacts.
|
|
276
|
+
- `'chain'`: The score is calculated for each chain separately.
|
|
277
|
+
- `'residue'`: The score is calculated for each residue separately.
|
|
278
|
+
- `'atom'`: The score is calculated for each atom separately.
|
|
279
|
+
|
|
280
|
+
Alternatively, an array of aggregation bins can be provided, i.e. each contact
|
|
281
|
+
is assigned to the corresponding bin.
|
|
282
|
+
atom_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
283
|
+
If given, the contacts are only computed for the masked atoms.
|
|
284
|
+
Atoms excluded by the mask do not have any contacts and their *lDDT* would
|
|
285
|
+
be NaN in case of ``aggregation="atom"``.
|
|
286
|
+
Providing this mask can significantly speed up the computation, if
|
|
287
|
+
only for certain chains/residues/atoms the *lDDT* is of interest.
|
|
288
|
+
partner_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
289
|
+
If given, only contacts **to** the masked atoms are considered.
|
|
290
|
+
While `atom_mask` does not alter the *lDDT* for the masked atoms,
|
|
291
|
+
`partner_mask` does, as for each atom only the masked atoms are considered
|
|
292
|
+
as potential contact partners.
|
|
293
|
+
inclusion_radius : float, optional
|
|
294
|
+
Pairwise atom distances are considered within this radius in `reference`.
|
|
295
|
+
distance_bins : list of float, optional
|
|
296
|
+
The distance bins for the score calculation, i.e if a distance deviation is
|
|
297
|
+
within the first bin, the score is 1, if it is outside all bins, the score is 0.
|
|
298
|
+
exclude_same_residue : bool, optional
|
|
299
|
+
If true, only atom distances between different residues are considered.
|
|
300
|
+
Otherwise, also atom distances within the same residue are included.
|
|
301
|
+
exclude_same_chain : bool, optional
|
|
302
|
+
If true, only atom distances between different chains are considered.
|
|
303
|
+
Otherwise, also atom distances within the same chain are included.
|
|
304
|
+
filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
|
|
305
|
+
Used for custom contact filtering, if the other parameters are not sufficient.
|
|
306
|
+
A function that takes an array of contact atom indices and returns a mask that
|
|
307
|
+
is ``True`` for all contacts that should be retained.
|
|
308
|
+
All other contacts are not considered for lDDT computation.
|
|
309
|
+
symmetric : bool, optional
|
|
310
|
+
If set to true, the *lDDT* score is computed symmetrically.
|
|
311
|
+
This means both contacts found in the `reference` and `subject` structure are
|
|
312
|
+
considered.
|
|
313
|
+
Hence the score is independent of which structure is given as `reference` and
|
|
314
|
+
`subject`.
|
|
315
|
+
Note that in this case `subject` must be an :class:`AtomArray` as well.
|
|
316
|
+
By default, only contacts in the `reference` are considered.
|
|
317
|
+
|
|
318
|
+
Returns
|
|
319
|
+
-------
|
|
320
|
+
lddt : float or ndarray, dtype=float
|
|
321
|
+
The lDDT score for each model and aggregation bin.
|
|
322
|
+
The shape depends on `subject` and `aggregation`:
|
|
323
|
+
If `subject` is an :class:`AtomArrayStack` (or equivalent coordinate
|
|
324
|
+
:class:`ndarray`), a dimension depicting each model is added.
|
|
325
|
+
if `aggregation` is not ``'all'``, a second dimension with the length equal to
|
|
326
|
+
the number of aggregation bins is added (i.e. number of chains, residues, etc.).
|
|
327
|
+
If both, an :class:`AtomArray` as `subject` and ``aggregation='all'`` is passed,
|
|
328
|
+
a float is returned.
|
|
329
|
+
|
|
330
|
+
Notes
|
|
331
|
+
-----
|
|
332
|
+
The lDDT score measures how well the pairwise atom distances in a model match the
|
|
333
|
+
corresponding distances in a reference.
|
|
334
|
+
Hence, like :func:`rmspd()` it works superimposition-free, but instead of capturing
|
|
335
|
+
the global deviation, only the local environment within the `inclusion_radius` is
|
|
336
|
+
considered.
|
|
337
|
+
|
|
338
|
+
Note that by default, also hydrogen atoms are considered in the distance
|
|
339
|
+
calculation.
|
|
340
|
+
If this is undesired, the hydrogen atoms can be removed prior to the calculation.
|
|
341
|
+
|
|
342
|
+
References
|
|
343
|
+
----------
|
|
344
|
+
|
|
345
|
+
.. footbibliography::
|
|
346
|
+
|
|
347
|
+
Examples
|
|
348
|
+
--------
|
|
349
|
+
|
|
350
|
+
Calculate the global lDDT of all models to the first model:
|
|
351
|
+
|
|
352
|
+
>>> reference = atom_array_stack[0]
|
|
353
|
+
>>> subject = atom_array_stack[1:]
|
|
354
|
+
>>> print(lddt(reference, subject))
|
|
355
|
+
[0.799 0.769 0.792 0.836 0.799 0.752 0.860 0.769 0.825 0.777 0.760 0.787
|
|
356
|
+
0.790 0.783 0.804 0.842 0.769 0.797 0.757 0.852 0.811 0.786 0.805 0.755
|
|
357
|
+
0.734 0.794 0.771 0.778 0.842 0.772 0.815 0.789 0.828 0.750 0.826 0.739
|
|
358
|
+
0.760]
|
|
359
|
+
|
|
360
|
+
Calculate the residue-wise lDDT for a single model:
|
|
361
|
+
|
|
362
|
+
>>> subject = atom_array_stack[1]
|
|
363
|
+
>>> print(lddt(reference, subject, aggregation="residue"))
|
|
364
|
+
[0.599 0.692 0.870 0.780 0.830 0.881 0.872 0.658 0.782 0.901 0.888 0.885
|
|
365
|
+
0.856 0.795 0.847 0.603 0.895 0.878 0.871 0.789]
|
|
366
|
+
|
|
367
|
+
As example for custom aggregation, calculate the lDDT for each chemical element:
|
|
368
|
+
|
|
369
|
+
>>> unique_elements = np.unique(reference.element)
|
|
370
|
+
>>> element_bins = np.array(
|
|
371
|
+
... [np.where(unique_elements == element)[0][0] for element in reference.element]
|
|
372
|
+
... )
|
|
373
|
+
>>> element_lddt = lddt(reference, subject, aggregation=element_bins)
|
|
374
|
+
>>> for element, lddt_for_element in zip(unique_elements, element_lddt):
|
|
375
|
+
... print(f"{element}: {lddt_for_element:.3f}")
|
|
376
|
+
C: 0.837
|
|
377
|
+
H: 0.770
|
|
378
|
+
N: 0.811
|
|
379
|
+
O: 0.808
|
|
380
|
+
|
|
381
|
+
If the reference structure has more atoms resolved than the subject structure,
|
|
382
|
+
the missing atoms can be indicated with *NaN* values:
|
|
383
|
+
|
|
384
|
+
>>> reference = atom_array_stack[0]
|
|
385
|
+
>>> subject = atom_array_stack[1].copy()
|
|
386
|
+
>>> # Simulate the situation where the first residue is missing in the subject
|
|
387
|
+
>>> subject.coord[subject.res_id == 1] = np.nan
|
|
388
|
+
>>> global_lddt = lddt(reference, subject)
|
|
389
|
+
>>> print(f"{global_lddt:.3f}")
|
|
390
|
+
0.751
|
|
391
|
+
"""
|
|
392
|
+
reference_coord = coord(reference)
|
|
393
|
+
subject_coord = coord(subject)
|
|
394
|
+
if subject_coord.shape[-2] != reference_coord.shape[-2]:
|
|
395
|
+
raise IndexError(
|
|
396
|
+
f"The given reference has {reference_coord.shape[-2]} atoms, but the "
|
|
397
|
+
f"subject has {subject_coord.shape[-2]} atoms"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
contacts = _find_contacts(
|
|
401
|
+
reference,
|
|
402
|
+
atom_mask,
|
|
403
|
+
partner_mask,
|
|
404
|
+
inclusion_radius,
|
|
405
|
+
exclude_same_residue,
|
|
406
|
+
exclude_same_chain,
|
|
407
|
+
filter_function,
|
|
408
|
+
)
|
|
409
|
+
if symmetric:
|
|
410
|
+
if not isinstance(subject, AtomArray):
|
|
411
|
+
raise TypeError(
|
|
412
|
+
"Expected 'AtomArray' as subject, as symmetric lDDT is enabled, "
|
|
413
|
+
f"but got '{type(subject).__name__}'"
|
|
414
|
+
)
|
|
415
|
+
subject_contacts = _find_contacts(
|
|
416
|
+
subject,
|
|
417
|
+
atom_mask,
|
|
418
|
+
partner_mask,
|
|
419
|
+
inclusion_radius,
|
|
420
|
+
exclude_same_residue,
|
|
421
|
+
exclude_same_chain,
|
|
422
|
+
filter_function,
|
|
423
|
+
)
|
|
424
|
+
contacts = np.concatenate((contacts, subject_contacts), axis=0)
|
|
425
|
+
# Adding additional contacts may introduce duplicates between the existing and
|
|
426
|
+
# new ones -> filter them out
|
|
427
|
+
contacts = np.unique(contacts, axis=0)
|
|
428
|
+
if (
|
|
429
|
+
isinstance(aggregation, str)
|
|
430
|
+
and aggregation == "all"
|
|
431
|
+
and atom_mask is None
|
|
432
|
+
and partner_mask is None
|
|
433
|
+
):
|
|
434
|
+
# Remove duplicate pairs as each pair appears twice
|
|
435
|
+
# (if i is in threshold distance to j, j is also in threshold distance to i)
|
|
436
|
+
# keep only the pair where i < j
|
|
437
|
+
# This improves performance due to less distances that need to be computed
|
|
438
|
+
# The assumption also only works when no atoms are masked
|
|
439
|
+
contacts = contacts[contacts[:, 0] < contacts[:, 1]]
|
|
440
|
+
|
|
441
|
+
reference_distances = index_distance(reference_coord, contacts)
|
|
442
|
+
subject_distances = index_distance(subject_coord, contacts)
|
|
443
|
+
deviations = np.abs(subject_distances - reference_distances)
|
|
444
|
+
distance_bins = np.asarray(distance_bins)
|
|
445
|
+
fraction_preserved_bins = np.count_nonzero(
|
|
446
|
+
deviations[..., np.newaxis] <= distance_bins[np.newaxis, :], axis=-1
|
|
447
|
+
) / len(distance_bins)
|
|
448
|
+
|
|
449
|
+
# Aggregate the fractions over the desired level
|
|
450
|
+
if isinstance(aggregation, str) and aggregation == "all":
|
|
451
|
+
# Average over all contacts
|
|
452
|
+
return np.mean(fraction_preserved_bins, axis=-1)
|
|
453
|
+
else:
|
|
454
|
+
# A string is also a 'Sequence'
|
|
455
|
+
# -> distinguish between string and array, list, etc.
|
|
456
|
+
if isinstance(
|
|
457
|
+
aggregation, (np.ndarray, collections.abc.Sequence)
|
|
458
|
+
) and not isinstance(aggregation, str):
|
|
459
|
+
return _average_over_indices(
|
|
460
|
+
fraction_preserved_bins,
|
|
461
|
+
bins=np.asarray(aggregation)[contacts[:, 0]],
|
|
462
|
+
)
|
|
463
|
+
elif aggregation == "chain":
|
|
464
|
+
return _average_over_indices(
|
|
465
|
+
fraction_preserved_bins,
|
|
466
|
+
bins=get_chain_positions(reference, contacts[:, 0]),
|
|
467
|
+
n_bins=get_chain_count(reference),
|
|
468
|
+
)
|
|
469
|
+
elif aggregation == "residue":
|
|
470
|
+
return _average_over_indices(
|
|
471
|
+
fraction_preserved_bins,
|
|
472
|
+
bins=get_residue_positions(reference, contacts[:, 0]),
|
|
473
|
+
n_bins=get_residue_count(reference),
|
|
474
|
+
)
|
|
475
|
+
elif aggregation == "atom":
|
|
476
|
+
return _average_over_indices(
|
|
477
|
+
fraction_preserved_bins, contacts[:, 0], reference.array_length()
|
|
478
|
+
)
|
|
479
|
+
else:
|
|
480
|
+
raise ValueError(f"Invalid aggregation level '{aggregation}'")
|
|
481
|
+
|
|
482
|
+
|
|
245
483
|
def _sq_euclidian(reference, subject):
|
|
246
484
|
"""
|
|
247
485
|
Calculate squared euclidian distance between atoms in two
|
|
@@ -272,3 +510,172 @@ def _sq_euclidian(reference, subject):
|
|
|
272
510
|
)
|
|
273
511
|
dif = subject_coord - reference_coord
|
|
274
512
|
return vector_dot(dif, dif)
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _to_sparse_indices(all_contacts):
|
|
516
|
+
"""
|
|
517
|
+
Create tuples of contact indices from the :meth:`CellList.get_atoms()` return value.
|
|
518
|
+
|
|
519
|
+
In other words, they would mark the non-zero elements in a dense contact matrix.
|
|
520
|
+
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
523
|
+
all_contacts : ndarray, dtype=int, shape=(m,n)
|
|
524
|
+
The contact indices as returned by :meth:`CellList.get_atoms()`.
|
|
525
|
+
Padded with -1, in the second dimension.
|
|
526
|
+
Dimension *m* marks the query atoms, dimension *n* marks the contact atoms.
|
|
527
|
+
|
|
528
|
+
Returns
|
|
529
|
+
-------
|
|
530
|
+
combined_indices : ndarray, dtype=int, shape=(l,2)
|
|
531
|
+
The contact indices.
|
|
532
|
+
Each column contains the query and contact atom index.
|
|
533
|
+
"""
|
|
534
|
+
# Find rows where a query atom has at least one contact
|
|
535
|
+
non_empty_indices = np.where(np.any(all_contacts != -1, axis=1))[0]
|
|
536
|
+
# Take those rows and flatten them
|
|
537
|
+
contact_indices = all_contacts[non_empty_indices].flatten()
|
|
538
|
+
# For each row the corresponding query atom is the same
|
|
539
|
+
# Hence in the flattened form the query atom index is simply repeated
|
|
540
|
+
query_indices = np.repeat(non_empty_indices, all_contacts.shape[1])
|
|
541
|
+
combined_indices = np.stack([query_indices, contact_indices], axis=1)
|
|
542
|
+
# Remove the padding values
|
|
543
|
+
return combined_indices[contact_indices != -1]
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _find_contacts(
|
|
547
|
+
atoms=None,
|
|
548
|
+
atom_mask=None,
|
|
549
|
+
partner_mask=None,
|
|
550
|
+
inclusion_radius=15,
|
|
551
|
+
exclude_same_residue=False,
|
|
552
|
+
exclude_same_chain=True,
|
|
553
|
+
filter_function=None,
|
|
554
|
+
):
|
|
555
|
+
"""
|
|
556
|
+
Find contacts between the atoms in the given structure.
|
|
557
|
+
|
|
558
|
+
Parameters
|
|
559
|
+
----------
|
|
560
|
+
atoms : AtomArray
|
|
561
|
+
The structure to find the contacts for.
|
|
562
|
+
atom_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
563
|
+
If given, the contacts are only computed for the masked atoms.
|
|
564
|
+
Atoms excluded by the mask do not have any contacts and their *lDDT* would
|
|
565
|
+
be NaN in case of ``aggregation="atom"``.
|
|
566
|
+
Providing this mask can significantly speed up the computation, if
|
|
567
|
+
only for certain chains/residues/atoms the *lDDT* is of interest.
|
|
568
|
+
partner_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
569
|
+
If given, only contacts **to** the masked atoms are considered.
|
|
570
|
+
While `atom_mask` does not alter the *lDDT* for the masked atoms,
|
|
571
|
+
`partner_mask` does, as for each atom only the masked atoms are considered
|
|
572
|
+
as potential contact partners.
|
|
573
|
+
inclusion_radius : float, optional
|
|
574
|
+
Pairwise atom distances are considered within this radius.
|
|
575
|
+
exclude_same_residue : bool, optional
|
|
576
|
+
If true, only atom distances between different residues are considered.
|
|
577
|
+
Otherwise, also atom distances within the same residue are included.
|
|
578
|
+
exclude_same_chain : bool, optional
|
|
579
|
+
If true, only atom distances between different chains are considered.
|
|
580
|
+
Otherwise, also atom distances within the same chain are included.
|
|
581
|
+
filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
|
|
582
|
+
Used for custom contact filtering, if the other parameters are not sufficient.
|
|
583
|
+
A function that takes an array of contact atom indices and returns a mask that
|
|
584
|
+
is ``True`` for all contacts that should be retained.
|
|
585
|
+
All other contacts are not considered for lDDT computation.
|
|
586
|
+
|
|
587
|
+
Returns
|
|
588
|
+
-------
|
|
589
|
+
contacts : ndarray, shape=(n,2), dtype=int
|
|
590
|
+
The array of contacts.
|
|
591
|
+
Each element represents a pair of atom indices that are in contact.
|
|
592
|
+
"""
|
|
593
|
+
coords = coord(atoms)
|
|
594
|
+
selection = ~np.isnan(coords).any(axis=-1)
|
|
595
|
+
if partner_mask is not None:
|
|
596
|
+
selection &= partner_mask
|
|
597
|
+
# Use a cell list to find atoms within inclusion radius in O(n) time complexity
|
|
598
|
+
cell_list = CellList(coords, inclusion_radius, selection=selection)
|
|
599
|
+
# Pairs of indices for atoms within the inclusion radius
|
|
600
|
+
if atom_mask is None:
|
|
601
|
+
all_contacts = cell_list.get_atoms(coords, inclusion_radius)
|
|
602
|
+
else:
|
|
603
|
+
filtered_contacts = cell_list.get_atoms(coords[atom_mask], inclusion_radius)
|
|
604
|
+
# Map the contacts for the masked atoms to the original coordinates
|
|
605
|
+
# Rows that were filtered out by the mask are fully padded with -1
|
|
606
|
+
# consistent with the padding of `get_atoms()`
|
|
607
|
+
all_contacts = np.full(
|
|
608
|
+
(coords.shape[0], filtered_contacts.shape[-1]),
|
|
609
|
+
-1,
|
|
610
|
+
dtype=filtered_contacts.dtype,
|
|
611
|
+
)
|
|
612
|
+
all_contacts[atom_mask] = filtered_contacts
|
|
613
|
+
# Convert into pairs of indices
|
|
614
|
+
contacts = _to_sparse_indices(all_contacts)
|
|
615
|
+
|
|
616
|
+
if exclude_same_chain:
|
|
617
|
+
# Do the same for the chain level
|
|
618
|
+
chain_indices = get_chain_positions(atoms, contacts.flatten()).reshape(
|
|
619
|
+
contacts.shape
|
|
620
|
+
)
|
|
621
|
+
contacts = contacts[chain_indices[:, 0] != chain_indices[:, 1]]
|
|
622
|
+
elif exclude_same_residue:
|
|
623
|
+
# Find the index of the residue for each atom
|
|
624
|
+
residue_indices = get_residue_positions(atoms, contacts.flatten()).reshape(
|
|
625
|
+
contacts.shape
|
|
626
|
+
)
|
|
627
|
+
# Remove contacts between atoms of the same residue
|
|
628
|
+
contacts = contacts[residue_indices[:, 0] != residue_indices[:, 1]]
|
|
629
|
+
else:
|
|
630
|
+
# In any case self-contacts should not be considered
|
|
631
|
+
contacts = contacts[contacts[:, 0] != contacts[:, 1]]
|
|
632
|
+
if filter_function is not None:
|
|
633
|
+
mask = filter_function(contacts)
|
|
634
|
+
if mask.shape != (contacts.shape[0],):
|
|
635
|
+
raise IndexError(
|
|
636
|
+
f"Mask returned from filter function has shape {mask.shape}, "
|
|
637
|
+
f"but expected ({contacts.shape[0]},)"
|
|
638
|
+
)
|
|
639
|
+
contacts = contacts[mask, :]
|
|
640
|
+
return contacts
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _average_over_indices(values, bins, n_bins=None):
|
|
644
|
+
"""
|
|
645
|
+
For each unique index in `bins`, average the corresponding values in `values`.
|
|
646
|
+
|
|
647
|
+
Based on
|
|
648
|
+
https://stackoverflow.com/questions/79140661/how-to-sum-values-based-on-a-second-index-array-in-a-vectorized-manner
|
|
649
|
+
|
|
650
|
+
Parameters
|
|
651
|
+
----------
|
|
652
|
+
values : ndarray, shape=(..., n)
|
|
653
|
+
The values to average.
|
|
654
|
+
bins : ndarray, shape=(n,) dtype=int
|
|
655
|
+
Associates each value from `values` with a bin.
|
|
656
|
+
n_bins : int
|
|
657
|
+
The total number of bins.
|
|
658
|
+
This is necessary as the some bin in `bins`may be empty.
|
|
659
|
+
By default the number of bins is determined from `bins`.
|
|
660
|
+
|
|
661
|
+
Returns
|
|
662
|
+
-------
|
|
663
|
+
averaged : ndarray, shape=(..., k)
|
|
664
|
+
The averaged values.
|
|
665
|
+
*k* is the maximum value in `bins` + 1.
|
|
666
|
+
"""
|
|
667
|
+
if n_bins is None:
|
|
668
|
+
n_elements_per_bin = np.bincount(bins)
|
|
669
|
+
n_bins = len(n_elements_per_bin)
|
|
670
|
+
else:
|
|
671
|
+
n_elements_per_bin = np.bincount(bins, minlength=n_bins)
|
|
672
|
+
# The last dimension is replaced by the number of bins
|
|
673
|
+
# Broadcasting in 'np.add.at()' requires the replaced dimension to be the first
|
|
674
|
+
aggregated = np.zeros((n_bins, *values.shape[:-1]), dtype=values.dtype)
|
|
675
|
+
np.add.at(aggregated, bins, np.swapaxes(values, 0, -1))
|
|
676
|
+
# If an atom has no contacts, the corresponding value is NaN
|
|
677
|
+
# This result is expected, hence the warning is ignored
|
|
678
|
+
with warnings.catch_warnings():
|
|
679
|
+
warnings.simplefilter("ignore")
|
|
680
|
+
# Bring the bin dimension into the last dimension again
|
|
681
|
+
return np.swapaxes(aggregated, 0, -1) / n_elements_per_bin
|
biotite/structure/density.py
CHANGED
|
@@ -49,7 +49,7 @@ def density(atoms, selection=None, delta=1.0, bins=None, density=False, weights=
|
|
|
49
49
|
If False, the number of samples in each bin is returned.
|
|
50
50
|
Otherwise, returns the probability density function of each bin.
|
|
51
51
|
See :func:`numpy.histogramdd()` for further details.
|
|
52
|
-
weights: ndarray, shape=(n,) or shape=(m,n), optional
|
|
52
|
+
weights : ndarray, shape=(n,) or shape=(m,n), optional
|
|
53
53
|
An array of values to weight the contribution of *n* atoms in
|
|
54
54
|
*m* models.
|
|
55
55
|
If the shape is *(n,)*, the weights will be interpreted as
|
biotite/structure/dotbracket.py
CHANGED
|
@@ -31,12 +31,12 @@ def dot_bracket_from_structure(
|
|
|
31
31
|
|
|
32
32
|
Parameters
|
|
33
33
|
----------
|
|
34
|
-
|
|
34
|
+
nucleic_acid_strand : AtomArray
|
|
35
35
|
The nucleic acid strand to be represented in DBL-notation.
|
|
36
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
36
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
37
37
|
The score for each base pair, which is passed on to
|
|
38
38
|
:func:`pseudoknots()`.
|
|
39
|
-
max_pseudoknot_order : int
|
|
39
|
+
max_pseudoknot_order : int
|
|
40
40
|
The maximum pseudoknot order to be found. If a base pair would
|
|
41
41
|
be of a higher order, it is represented as unpaired. If ``None``
|
|
42
42
|
is given, all base pairs are evaluated.
|
|
@@ -48,8 +48,9 @@ def dot_bracket_from_structure(
|
|
|
48
48
|
|
|
49
49
|
See Also
|
|
50
50
|
--------
|
|
51
|
-
base_pairs
|
|
52
|
-
|
|
51
|
+
base_pairs : Compute the base pairs from a structure as passed to this function.
|
|
52
|
+
dot_bracket : Compute the dot bracket notation directly from base pairs.
|
|
53
|
+
pseudoknots : Get the pseudoknot order for each base pair.
|
|
53
54
|
|
|
54
55
|
References
|
|
55
56
|
----------
|
|
@@ -81,10 +82,9 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
|
81
82
|
strand.
|
|
82
83
|
length : int
|
|
83
84
|
The number of bases in the strand.
|
|
84
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
85
|
-
The score for each base pair, which is passed on to
|
|
86
|
-
|
|
87
|
-
max_pseudoknot_order : int (default: None)
|
|
85
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
86
|
+
The score for each base pair, which is passed on to :func:`pseudoknots()`.
|
|
87
|
+
max_pseudoknot_order : int
|
|
88
88
|
The maximum pseudoknot order to be found. If a base pair would
|
|
89
89
|
be of a higher order, it is represented as unpaired. If ``None``
|
|
90
90
|
is given, all pseudoknot orders are evaluated.
|
|
@@ -94,6 +94,18 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
|
94
94
|
notations : list [str, ...]
|
|
95
95
|
The DBL-notation for each solution from :func:`pseudoknots()`.
|
|
96
96
|
|
|
97
|
+
See Also
|
|
98
|
+
--------
|
|
99
|
+
base_pairs_from_dot_bracket : The reverse operation.
|
|
100
|
+
dot_bracket_from_structure : Compute the dot bracket notation from a structure.
|
|
101
|
+
base_pairs : Compute the base pairs from a structure as passed to this function.
|
|
102
|
+
pseudoknots : Get the pseudoknot order for each base pair.
|
|
103
|
+
|
|
104
|
+
References
|
|
105
|
+
----------
|
|
106
|
+
|
|
107
|
+
.. footbibliography::
|
|
108
|
+
|
|
97
109
|
Examples
|
|
98
110
|
--------
|
|
99
111
|
The sequence ``ACGTC`` has a length of 5. If there was to be a
|
|
@@ -107,18 +119,6 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
|
107
119
|
|
|
108
120
|
>>> dot_bracket(basepairs, 5)[0]
|
|
109
121
|
'(..).'
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
See Also
|
|
113
|
-
--------
|
|
114
|
-
dot_bracket_from_structure
|
|
115
|
-
base_pairs
|
|
116
|
-
pseudoknots
|
|
117
|
-
|
|
118
|
-
References
|
|
119
|
-
----------
|
|
120
|
-
|
|
121
|
-
.. footbibliography::
|
|
122
122
|
"""
|
|
123
123
|
# Make sure the lower residue is on the left for each row
|
|
124
124
|
basepairs = np.sort(basepairs, axis=1)
|
|
@@ -159,6 +159,15 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
159
159
|
Each row corresponds to the positions of the bases in the
|
|
160
160
|
sequence.
|
|
161
161
|
|
|
162
|
+
See Also
|
|
163
|
+
--------
|
|
164
|
+
dot_bracket : The reverse operation.
|
|
165
|
+
|
|
166
|
+
References
|
|
167
|
+
----------
|
|
168
|
+
|
|
169
|
+
.. footbibliography::
|
|
170
|
+
|
|
162
171
|
Examples
|
|
163
172
|
--------
|
|
164
173
|
The notation string ``'(..).'`` contains a base pair between the
|
|
@@ -167,15 +176,6 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
167
176
|
|
|
168
177
|
>>> base_pairs_from_dot_bracket('(..).')
|
|
169
178
|
array([[0, 3]])
|
|
170
|
-
|
|
171
|
-
See Also
|
|
172
|
-
--------
|
|
173
|
-
dot_bracket
|
|
174
|
-
|
|
175
|
-
References
|
|
176
|
-
----------
|
|
177
|
-
|
|
178
|
-
.. footbibliography::
|
|
179
179
|
"""
|
|
180
180
|
basepairs = []
|
|
181
181
|
opened_brackets = [[] for _ in range(len(_OPENING_BRACKETS))]
|
|
@@ -203,8 +203,7 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
203
203
|
for not_closed in opened_brackets:
|
|
204
204
|
if not_closed != []:
|
|
205
205
|
raise ValueError(
|
|
206
|
-
"Invalid DBL-notation, not all opening brackets have a "
|
|
207
|
-
"closing bracket"
|
|
206
|
+
"Invalid DBL-notation, not all opening brackets have a closing bracket"
|
|
208
207
|
)
|
|
209
208
|
|
|
210
209
|
# Sort the base pair indices in ascending order
|
biotite/structure/filter.py
CHANGED
|
@@ -294,7 +294,9 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
294
294
|
lies within the provided boundaries.
|
|
295
295
|
|
|
296
296
|
The result will depend on the atoms' order.
|
|
297
|
-
For instance, consider a molecule
|
|
297
|
+
For instance, consider a molecule:
|
|
298
|
+
|
|
299
|
+
.. code-block:: none
|
|
298
300
|
|
|
299
301
|
C3
|
|
300
302
|
|
|
|
@@ -306,12 +308,12 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
306
308
|
|
|
307
309
|
Parameters
|
|
308
310
|
----------
|
|
309
|
-
array: AtomArray
|
|
311
|
+
array : AtomArray
|
|
310
312
|
The array to filter.
|
|
311
|
-
min_len: float
|
|
312
|
-
Minmum bond length
|
|
313
|
-
max_len: float
|
|
314
|
-
Maximum bond length
|
|
313
|
+
min_len : float
|
|
314
|
+
Minmum bond length.
|
|
315
|
+
max_len : float
|
|
316
|
+
Maximum bond length.
|
|
315
317
|
|
|
316
318
|
Returns
|
|
317
319
|
-------
|
|
@@ -364,7 +366,6 @@ def filter_polymer(array, min_size=2, pol_type="peptide"):
|
|
|
364
366
|
filter : ndarray, dtype=bool
|
|
365
367
|
This array is `True` for all indices in `array`, where atoms belong to
|
|
366
368
|
consecutive polymer entity having at least `min_size` monomers.
|
|
367
|
-
|
|
368
369
|
"""
|
|
369
370
|
# Import `check_res_id_continuity` here to avoid circular imports
|
|
370
371
|
from biotite.structure.integrity import check_res_id_continuity
|
|
@@ -412,7 +413,6 @@ def filter_intersection(array, intersect):
|
|
|
412
413
|
>>> array1 = array1[filter_intersection(array1, array2)]
|
|
413
414
|
>>> print(array1.chain_id)
|
|
414
415
|
['B' 'C' 'D']
|
|
415
|
-
|
|
416
416
|
"""
|
|
417
417
|
filter = np.full(array.array_length(), True, dtype=bool)
|
|
418
418
|
intersect_categories = intersect.get_annotation_categories()
|