biotite 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +34 -0
- biotite/application/muscle/app3.py +2 -15
- biotite/application/muscle/app5.py +2 -2
- biotite/application/util.py +1 -1
- biotite/application/viennarna/rnaplot.py +6 -2
- biotite/database/rcsb/query.py +6 -6
- biotite/database/uniprot/check.py +20 -15
- biotite/database/uniprot/download.py +1 -1
- biotite/database/uniprot/query.py +1 -1
- biotite/sequence/align/alignment.py +16 -3
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +5 -5
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +17 -0
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +52 -42
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/matrix.py +273 -55
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +3 -0
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +86 -4
- biotite/sequence/seqtypes.py +124 -3
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +4 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +110 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +171 -0
- biotite/structure/alphabet/unkerasify.py +122 -0
- biotite/structure/atoms.py +156 -43
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +72 -21
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/filter.py +1 -1
- biotite/structure/geometry.py +60 -113
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +13 -13
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -32
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +63 -17
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -21
- biotite/structure/info/standardize.py +3 -2
- biotite/structure/io/mol/sdf.py +41 -40
- biotite/structure/io/pdb/convert.py +2 -0
- biotite/structure/io/pdb/file.py +74 -3
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +32 -8
- biotite/structure/io/pdbx/cif.py +148 -107
- biotite/structure/io/pdbx/component.py +9 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +227 -68
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +16 -16
- biotite/structure/molecules.py +141 -141
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/segments.py +1 -2
- biotite/structure/util.py +73 -1
- biotite/version.py +2 -2
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/bonds.pyx
CHANGED
|
@@ -17,6 +17,7 @@ cimport cython
|
|
|
17
17
|
cimport numpy as np
|
|
18
18
|
from libc.stdlib cimport free, realloc
|
|
19
19
|
|
|
20
|
+
from collections.abc import Sequence
|
|
20
21
|
import itertools
|
|
21
22
|
import numbers
|
|
22
23
|
from enum import IntEnum
|
|
@@ -59,6 +60,7 @@ class BondType(IntEnum):
|
|
|
59
60
|
- `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
|
|
60
61
|
- `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
|
|
61
62
|
- `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
|
|
63
|
+
- `COORDINATION` - Coordination complex involving a metal atom
|
|
62
64
|
"""
|
|
63
65
|
ANY = 0
|
|
64
66
|
SINGLE = 1
|
|
@@ -68,6 +70,7 @@ class BondType(IntEnum):
|
|
|
68
70
|
AROMATIC_SINGLE = 5
|
|
69
71
|
AROMATIC_DOUBLE = 6
|
|
70
72
|
AROMATIC_TRIPLE = 7
|
|
73
|
+
COORDINATION = 8
|
|
71
74
|
|
|
72
75
|
|
|
73
76
|
def without_aromaticity(self):
|
|
@@ -88,10 +91,12 @@ class BondType(IntEnum):
|
|
|
88
91
|
>>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
|
|
89
92
|
DOUBLE
|
|
90
93
|
"""
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return BondType
|
|
94
|
+
if self == BondType.AROMATIC_SINGLE:
|
|
95
|
+
return BondType.SINGLE
|
|
96
|
+
elif self == BondType.AROMATIC_DOUBLE:
|
|
97
|
+
return BondType.DOUBLE
|
|
98
|
+
elif self == BondType.AROMATIC_TRIPLE:
|
|
99
|
+
return BondType.TRIPLE
|
|
95
100
|
else:
|
|
96
101
|
return self
|
|
97
102
|
|
|
@@ -305,6 +310,61 @@ class BondList(Copyable):
|
|
|
305
310
|
self._bonds = np.zeros((0, 3), dtype=np.uint32)
|
|
306
311
|
self._max_bonds_per_atom = 0
|
|
307
312
|
|
|
313
|
+
@staticmethod
|
|
314
|
+
def concatenate(bonds_lists):
|
|
315
|
+
"""
|
|
316
|
+
Concatenate multiple :class:`BondList` objects into a single
|
|
317
|
+
:class:`BondList`, respectively.
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
bonds_lists : iterable object of BondList
|
|
322
|
+
The bond lists to be concatenated.
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
concatenated_bonds : BondList
|
|
327
|
+
The concatenated bond lists.
|
|
328
|
+
|
|
329
|
+
Examples
|
|
330
|
+
--------
|
|
331
|
+
|
|
332
|
+
>>> bonds1 = BondList(2, np.array([(0, 1)]))
|
|
333
|
+
>>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
|
|
334
|
+
>>> merged_bonds = BondList.concatenate([bonds1, bonds2])
|
|
335
|
+
>>> print(merged_bonds.get_atom_count())
|
|
336
|
+
5
|
|
337
|
+
>>> print(merged_bonds.as_array()[:, :2])
|
|
338
|
+
[[0 1]
|
|
339
|
+
[2 3]
|
|
340
|
+
[2 4]]
|
|
341
|
+
"""
|
|
342
|
+
# Ensure that the bonds_lists can be iterated over multiple times
|
|
343
|
+
if not isinstance(bonds_lists, Sequence):
|
|
344
|
+
bonds_lists = list(bonds_lists)
|
|
345
|
+
|
|
346
|
+
cdef np.ndarray merged_bonds = np.concatenate(
|
|
347
|
+
[bond_list._bonds for bond_list in bonds_lists]
|
|
348
|
+
)
|
|
349
|
+
# Offset the indices of appended bonds list
|
|
350
|
+
# (consistent with addition of AtomArray)
|
|
351
|
+
cdef int start = 0, stop = 0
|
|
352
|
+
cdef int cum_atom_count = 0
|
|
353
|
+
for bond_list in bonds_lists:
|
|
354
|
+
stop = start + bond_list._bonds.shape[0]
|
|
355
|
+
merged_bonds[start : stop, :2] += cum_atom_count
|
|
356
|
+
cum_atom_count += bond_list._atom_count
|
|
357
|
+
start = stop
|
|
358
|
+
|
|
359
|
+
cdef merged_bond_list = BondList(cum_atom_count)
|
|
360
|
+
# Array is not used in constructor to prevent unnecessary
|
|
361
|
+
# maximum and redundant bond calculation
|
|
362
|
+
merged_bond_list._bonds = merged_bonds
|
|
363
|
+
merged_bond_list._max_bonds_per_atom = max(
|
|
364
|
+
[bond_list._max_bonds_per_atom for bond_list in bonds_lists]
|
|
365
|
+
)
|
|
366
|
+
return merged_bond_list
|
|
367
|
+
|
|
308
368
|
def __copy_create__(self):
|
|
309
369
|
# Create empty bond list to prevent
|
|
310
370
|
# unnecessary removal of redundant atoms
|
|
@@ -453,9 +513,13 @@ class BondList(Copyable):
|
|
|
453
513
|
0 1 SINGLE
|
|
454
514
|
1 2 DOUBLE
|
|
455
515
|
"""
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
516
|
+
bond_types = self._bonds[:,2]
|
|
517
|
+
for aromatic_type, non_aromatic_type in [
|
|
518
|
+
(BondType.AROMATIC_SINGLE, BondType.SINGLE),
|
|
519
|
+
(BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
|
|
520
|
+
(BondType.AROMATIC_TRIPLE, BondType.TRIPLE)
|
|
521
|
+
]:
|
|
522
|
+
bond_types[bond_types == aromatic_type] = non_aromatic_type
|
|
459
523
|
|
|
460
524
|
def remove_bond_order(self):
|
|
461
525
|
"""
|
|
@@ -994,20 +1058,7 @@ class BondList(Copyable):
|
|
|
994
1058
|
)
|
|
995
1059
|
|
|
996
1060
|
def __add__(self, bond_list):
|
|
997
|
-
|
|
998
|
-
= np.concatenate([self._bonds, bond_list._bonds])
|
|
999
|
-
# Offset the indices of appended bonds list
|
|
1000
|
-
# (consistent with addition of AtomArray)
|
|
1001
|
-
merged_bonds[len(self._bonds):, :2] += self._atom_count
|
|
1002
|
-
cdef uint32 merged_count = self._atom_count + bond_list._atom_count
|
|
1003
|
-
cdef merged_bond_list = BondList(merged_count)
|
|
1004
|
-
# Array is not used in constructor to prevent unnecessary
|
|
1005
|
-
# maximum and redundant bond calculation
|
|
1006
|
-
merged_bond_list._bonds = merged_bonds
|
|
1007
|
-
merged_bond_list._max_bonds_per_atom = max(
|
|
1008
|
-
self._max_bonds_per_atom, bond_list._max_bonds_per_atom
|
|
1009
|
-
)
|
|
1010
|
-
return merged_bond_list
|
|
1061
|
+
return BondList.concatenate([self, bond_list])
|
|
1011
1062
|
|
|
1012
1063
|
def __getitem__(self, index):
|
|
1013
1064
|
## Variables for both, integer and boolean index arrays
|
|
Binary file
|
|
Binary file
|
biotite/structure/filter.py
CHANGED
|
@@ -577,7 +577,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
|
|
|
577
577
|
if len(letter_altloc_ids) > 0:
|
|
578
578
|
highest = -1.0
|
|
579
579
|
highest_id = None
|
|
580
|
-
for id in set(letter_altloc_ids):
|
|
580
|
+
for id in sorted(set(letter_altloc_ids)):
|
|
581
581
|
occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
|
|
582
582
|
if occupancy_sum > highest:
|
|
583
583
|
highest = occupancy_sum
|
biotite/structure/geometry.py
CHANGED
|
@@ -25,10 +25,12 @@ __all__ = [
|
|
|
25
25
|
import numpy as np
|
|
26
26
|
from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
|
|
27
27
|
from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
|
|
28
|
-
from biotite.structure.
|
|
29
|
-
from biotite.structure.
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
from biotite.structure.filter import filter_amino_acids
|
|
29
|
+
from biotite.structure.util import (
|
|
30
|
+
coord_for_atom_name_per_residue,
|
|
31
|
+
norm_vector,
|
|
32
|
+
vector_dot,
|
|
33
|
+
)
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
def displacement(atoms1, atoms2, box=None):
|
|
@@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):
|
|
|
480
482
|
|
|
481
483
|
def dihedral_backbone(atom_array):
|
|
482
484
|
"""
|
|
483
|
-
Measure the characteristic backbone dihedral angles of a
|
|
484
|
-
structure.
|
|
485
|
+
Measure the characteristic backbone dihedral angles of a chain.
|
|
485
486
|
|
|
486
487
|
Parameters
|
|
487
488
|
----------
|
|
488
|
-
|
|
489
|
-
The protein structure
|
|
490
|
-
|
|
491
|
-
Chain transitions are allowed, the angles at the transition are
|
|
492
|
-
`NaN`.
|
|
493
|
-
The order of the backbone atoms for each residue must be
|
|
494
|
-
(N, CA, C).
|
|
489
|
+
atoms: AtomArray or AtomArrayStack
|
|
490
|
+
The protein structure to measure the dihedral angles for.
|
|
491
|
+
For missing backbone atoms the corresponding angles are `NaN`.
|
|
495
492
|
|
|
496
493
|
Returns
|
|
497
494
|
-------
|
|
498
495
|
phi, psi, omega : ndarray
|
|
499
|
-
An array containing the 3 backbone dihedral angles for every
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
have *NaN* values.
|
|
503
|
-
output angles are 2-dimensional,
|
|
504
|
-
to the model number.
|
|
505
|
-
|
|
506
|
-
Raises
|
|
507
|
-
------
|
|
508
|
-
BadStructureError
|
|
509
|
-
If the amount of backbone atoms is not equal to amount of
|
|
510
|
-
residues times 3 (for N, CA and C).
|
|
511
|
-
|
|
512
|
-
See Also
|
|
513
|
-
--------
|
|
514
|
-
dihedral
|
|
515
|
-
|
|
516
|
-
Examples
|
|
517
|
-
--------
|
|
518
|
-
|
|
519
|
-
>>> phi, psi, omega = dihedral_backbone(atom_array)
|
|
520
|
-
>>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
|
|
521
|
-
[[ nan -56.145]
|
|
522
|
-
[ -43.980 -51.309]
|
|
523
|
-
[ -66.466 -30.898]
|
|
524
|
-
[ -65.219 -45.945]
|
|
525
|
-
[ -64.747 -30.346]
|
|
526
|
-
[ -73.136 -43.425]
|
|
527
|
-
[ -64.882 -43.255]
|
|
528
|
-
[ -59.509 -25.698]
|
|
529
|
-
[ -77.989 -8.823]
|
|
530
|
-
[ 110.784 8.079]
|
|
531
|
-
[ 55.244 -124.371]
|
|
532
|
-
[ -57.983 -28.766]
|
|
533
|
-
[ -81.834 19.125]
|
|
534
|
-
[-124.057 13.401]
|
|
535
|
-
[ 67.931 25.218]
|
|
536
|
-
[-143.952 131.297]
|
|
537
|
-
[ -70.100 160.068]
|
|
538
|
-
[ -69.484 145.669]
|
|
539
|
-
[ -77.264 124.223]
|
|
540
|
-
[ -78.100 nan]]
|
|
496
|
+
An array containing the 3 backbone dihedral angles for every CA atom.
|
|
497
|
+
`phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
|
|
498
|
+
C-terminus.
|
|
499
|
+
In these places the arrays have *NaN* values.
|
|
500
|
+
If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
|
|
501
|
+
the first dimension corresponds to the model number.
|
|
541
502
|
"""
|
|
542
|
-
|
|
543
|
-
backbone = atom_array[..., bb_filter]
|
|
544
|
-
|
|
545
|
-
if (
|
|
546
|
-
backbone.array_length() % 3 != 0
|
|
547
|
-
or (backbone.atom_name[0::3] != "N").any()
|
|
548
|
-
or (backbone.atom_name[1::3] != "CA").any()
|
|
549
|
-
or (backbone.atom_name[2::3] != "C").any()
|
|
550
|
-
):
|
|
551
|
-
raise BadStructureError(
|
|
552
|
-
"The backbone is invalid, must be repeats of (N, CA, C), "
|
|
553
|
-
"maybe a backbone atom is missing"
|
|
554
|
-
)
|
|
555
|
-
phis = []
|
|
556
|
-
psis = []
|
|
557
|
-
omegas = []
|
|
558
|
-
for chain_bb in chain_iter(backbone):
|
|
559
|
-
phi, psi, omega = _dihedral_backbone(chain_bb)
|
|
560
|
-
phis.append(phi)
|
|
561
|
-
psis.append(psi)
|
|
562
|
-
omegas.append(omega)
|
|
563
|
-
return (
|
|
564
|
-
np.concatenate(phis, axis=-1),
|
|
565
|
-
np.concatenate(psis, axis=-1),
|
|
566
|
-
np.concatenate(omegas, axis=-1),
|
|
567
|
-
)
|
|
503
|
+
amino_acid_mask = filter_amino_acids(atom_array)
|
|
568
504
|
|
|
505
|
+
# Coordinates for dihedral angle calculation
|
|
506
|
+
coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
|
|
507
|
+
atom_array,
|
|
508
|
+
("N", "CA", "C"),
|
|
509
|
+
amino_acid_mask,
|
|
510
|
+
)
|
|
511
|
+
n_residues = coord_n.shape[-2]
|
|
569
512
|
|
|
570
|
-
def _dihedral_backbone(chain_bb):
|
|
571
|
-
bb_coord = chain_bb.coord
|
|
572
513
|
# Coordinates for dihedral angle calculation
|
|
573
514
|
# Dim 0: Model index (only for atom array stacks)
|
|
574
515
|
# Dim 1: Angle index
|
|
575
516
|
# Dim 2: X, Y, Z coordinates
|
|
576
517
|
# Dim 3: Atoms involved in dihedral angle
|
|
577
|
-
if isinstance(
|
|
578
|
-
angle_coord_shape = (
|
|
579
|
-
elif isinstance(
|
|
580
|
-
angle_coord_shape = (
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
# Indices for coordinates of CA atoms
|
|
586
|
-
ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
|
|
518
|
+
if isinstance(atom_array, AtomArray):
|
|
519
|
+
angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
|
|
520
|
+
elif isinstance(atom_array, AtomArrayStack):
|
|
521
|
+
angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
|
|
522
|
+
coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
|
|
523
|
+
coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
|
|
524
|
+
coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)
|
|
525
|
+
|
|
587
526
|
# fmt: off
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
527
|
+
coord_for_phi[..., 1:, :, 0] = coord_c[..., 0:-1, :]
|
|
528
|
+
coord_for_phi[..., 1:, :, 1] = coord_n[..., 1:, :]
|
|
529
|
+
coord_for_phi[..., 1:, :, 2] = coord_ca[..., 1:, :]
|
|
530
|
+
coord_for_phi[..., 1:, :, 3] = coord_c[..., 1:, :]
|
|
531
|
+
|
|
532
|
+
coord_for_psi[..., 0:-1, :, 0] = coord_n[..., 0:-1, :]
|
|
533
|
+
coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
|
|
534
|
+
coord_for_psi[..., 0:-1, :, 2] = coord_c[..., 0:-1, :]
|
|
535
|
+
coord_for_psi[..., 0:-1, :, 3] = coord_n[..., 1:, :]
|
|
536
|
+
|
|
537
|
+
coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
|
|
538
|
+
coord_for_omg[..., 0:-1, :, 1] = coord_c[..., 0:-1, :]
|
|
539
|
+
coord_for_omg[..., 0:-1, :, 2] = coord_n[..., 1:, :]
|
|
540
|
+
coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:, :]
|
|
600
541
|
# fmt: on
|
|
601
542
|
|
|
602
543
|
phi = dihedral(
|
|
603
|
-
|
|
544
|
+
coord_for_phi[..., 0],
|
|
545
|
+
coord_for_phi[..., 1],
|
|
546
|
+
coord_for_phi[..., 2],
|
|
547
|
+
coord_for_phi[..., 3],
|
|
604
548
|
)
|
|
605
549
|
psi = dihedral(
|
|
606
|
-
|
|
550
|
+
coord_for_psi[..., 0],
|
|
551
|
+
coord_for_psi[..., 1],
|
|
552
|
+
coord_for_psi[..., 2],
|
|
553
|
+
coord_for_psi[..., 3],
|
|
607
554
|
)
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
555
|
+
omg = dihedral(
|
|
556
|
+
coord_for_omg[..., 0],
|
|
557
|
+
coord_for_omg[..., 1],
|
|
558
|
+
coord_for_omg[..., 2],
|
|
559
|
+
coord_for_omg[..., 3],
|
|
613
560
|
)
|
|
614
561
|
|
|
615
|
-
return phi, psi,
|
|
562
|
+
return phi, psi, omg
|
|
616
563
|
|
|
617
564
|
|
|
618
565
|
def centroid(atoms):
|
biotite/structure/info/atoms.py
CHANGED
|
@@ -42,19 +42,19 @@ def residue(res_name):
|
|
|
42
42
|
>>> alanine = residue("ALA")
|
|
43
43
|
>>> # Atoms and geometry
|
|
44
44
|
>>> print(alanine)
|
|
45
|
-
0 ALA N N -0.
|
|
46
|
-
0 ALA CA C 0.
|
|
47
|
-
0 ALA C C -0.
|
|
48
|
-
0 ALA O O -1.
|
|
49
|
-
0 ALA CB C 1.
|
|
50
|
-
0 ALA OXT O 0.
|
|
51
|
-
0 ALA H H -1.
|
|
52
|
-
0 ALA H2 H -0.
|
|
53
|
-
0 ALA HA H 0.
|
|
54
|
-
0 ALA HB1 H 1.
|
|
55
|
-
0 ALA HB2 H 0.
|
|
56
|
-
0 ALA HB3 H 2.
|
|
57
|
-
0 ALA HXT H 0.
|
|
45
|
+
0 ALA N N -0.966 0.493 1.500
|
|
46
|
+
0 ALA CA C 0.257 0.418 0.692
|
|
47
|
+
0 ALA C C -0.094 0.017 -0.716
|
|
48
|
+
0 ALA O O -1.056 -0.682 -0.923
|
|
49
|
+
0 ALA CB C 1.204 -0.620 1.296
|
|
50
|
+
0 ALA OXT O 0.661 0.439 -1.742
|
|
51
|
+
0 ALA H H -1.383 -0.425 1.482
|
|
52
|
+
0 ALA H2 H -0.676 0.661 2.452
|
|
53
|
+
0 ALA HA H 0.746 1.392 0.682
|
|
54
|
+
0 ALA HB1 H 1.459 -0.330 2.316
|
|
55
|
+
0 ALA HB2 H 0.715 -1.594 1.307
|
|
56
|
+
0 ALA HB3 H 2.113 -0.676 0.697
|
|
57
|
+
0 ALA HXT H 0.435 0.182 -2.647
|
|
58
58
|
>>> # Bonds
|
|
59
59
|
>>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
|
|
60
60
|
[['N' 'CA']
|
biotite/structure/info/bonds.py
CHANGED
|
@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["bond_type", "bonds_in_residue"]
|
|
8
8
|
|
|
9
|
+
import functools
|
|
9
10
|
from biotite.structure.bonds import BondType
|
|
10
11
|
from biotite.structure.info.ccd import get_from_ccd
|
|
11
12
|
|
|
@@ -69,6 +70,7 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
69
70
|
return None
|
|
70
71
|
|
|
71
72
|
|
|
73
|
+
@functools.cache
|
|
72
74
|
def bonds_in_residue(res_name):
|
|
73
75
|
"""
|
|
74
76
|
Get a dictionary containing all atoms inside a given residue
|
|
@@ -94,6 +96,10 @@ def bonds_in_residue(res_name):
|
|
|
94
96
|
In other functionalities throughout *Biotite* that uses this
|
|
95
97
|
function.
|
|
96
98
|
|
|
99
|
+
Notes
|
|
100
|
+
-----
|
|
101
|
+
The returned values are cached for faster access in subsequent calls.
|
|
102
|
+
|
|
97
103
|
Examples
|
|
98
104
|
--------
|
|
99
105
|
>>> bonds = bonds_in_residue("PHE")
|
|
@@ -126,16 +132,16 @@ def bonds_in_residue(res_name):
|
|
|
126
132
|
"""
|
|
127
133
|
global _intra_bonds
|
|
128
134
|
if res_name not in _intra_bonds:
|
|
129
|
-
|
|
130
|
-
if
|
|
135
|
+
chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
|
|
136
|
+
if chem_comp_bond is None:
|
|
131
137
|
_intra_bonds[res_name] = {}
|
|
132
138
|
else:
|
|
133
139
|
bonds_for_residue = {}
|
|
134
140
|
for atom1, atom2, order, aromatic_flag in zip(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
141
|
+
chem_comp_bond["atom_id_1"].as_array(),
|
|
142
|
+
chem_comp_bond["atom_id_2"].as_array(),
|
|
143
|
+
chem_comp_bond["value_order"].as_array(),
|
|
144
|
+
chem_comp_bond["pdbx_aromatic_flag"].as_array(),
|
|
139
145
|
):
|
|
140
146
|
bond_type = BOND_TYPES[order, aromatic_flag]
|
|
141
147
|
bonds_for_residue[atom1.item(), atom2.item()] = bond_type
|
biotite/structure/info/ccd.py
CHANGED
|
@@ -4,23 +4,23 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.info"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["get_ccd", "get_from_ccd"]
|
|
7
|
+
__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
|
|
8
8
|
|
|
9
|
+
import functools
|
|
10
|
+
import importlib
|
|
11
|
+
import inspect
|
|
12
|
+
import pkgutil
|
|
9
13
|
from pathlib import Path
|
|
10
14
|
import numpy as np
|
|
11
15
|
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
_CCD_FILE = Path(__file__).parent / "components.bcif"
|
|
17
|
+
_SPECIAL_ID_COLUMN_NAMES = {
|
|
14
18
|
"chem_comp": "id",
|
|
15
|
-
"chem_comp_atom": "comp_id",
|
|
16
|
-
"chem_comp_bond": "comp_id",
|
|
17
19
|
}
|
|
18
|
-
|
|
19
|
-
_ccd_block = None
|
|
20
|
-
# For each category this index gives the start and stop for each residue
|
|
21
|
-
_residue_index = {}
|
|
20
|
+
_DEFAULT_ID_COLUMN_NAME = "comp_id"
|
|
22
21
|
|
|
23
22
|
|
|
23
|
+
@functools.cache
|
|
24
24
|
def get_ccd():
|
|
25
25
|
"""
|
|
26
26
|
Get the internal subset of the PDB
|
|
@@ -29,8 +29,16 @@ def get_ccd():
|
|
|
29
29
|
|
|
30
30
|
Returns
|
|
31
31
|
-------
|
|
32
|
-
ccd :
|
|
32
|
+
ccd : BinaryCIFBlock
|
|
33
33
|
The CCD.
|
|
34
|
+
It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
|
|
35
|
+
|
|
36
|
+
Warnings
|
|
37
|
+
--------
|
|
38
|
+
|
|
39
|
+
Consider the return value as read-only.
|
|
40
|
+
As other functions cache data from it, changing data may lead to undefined
|
|
41
|
+
behavior.
|
|
34
42
|
|
|
35
43
|
References
|
|
36
44
|
----------
|
|
@@ -41,13 +49,49 @@ def get_ccd():
|
|
|
41
49
|
# Avoid circular import
|
|
42
50
|
from biotite.structure.io.pdbx.bcif import BinaryCIFFile
|
|
43
51
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
52
|
+
try:
|
|
53
|
+
return BinaryCIFFile.read(_CCD_FILE).block
|
|
54
|
+
except FileNotFoundError:
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def set_ccd_path(ccd_path):
|
|
61
|
+
"""
|
|
62
|
+
Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
|
|
49
63
|
|
|
64
|
+
This function also clears the cache of functions depending on the CCD to ensure
|
|
65
|
+
that the new CCD is used.
|
|
50
66
|
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
ccd_path : path-like
|
|
70
|
+
The path to the custom CCD in BinaryCIF format, prepared with the
|
|
71
|
+
``setup_ccd.py`` module.
|
|
72
|
+
|
|
73
|
+
Notes
|
|
74
|
+
-----
|
|
75
|
+
This function is intended for advanced users who need to add information for
|
|
76
|
+
compounds, which are not part of the internal CCD.
|
|
77
|
+
The reason might be that an updated version already exists upstream or that
|
|
78
|
+
the user wants to add custom compounds to the CCD.
|
|
79
|
+
"""
|
|
80
|
+
global _CCD_FILE
|
|
81
|
+
_CCD_FILE = Path(ccd_path)
|
|
82
|
+
|
|
83
|
+
# Clear caches in all functions in biotite.structure.info
|
|
84
|
+
info_modules = [
|
|
85
|
+
importlib.import_module(f"biotite.structure.info.{mod_name}")
|
|
86
|
+
for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
|
|
87
|
+
]
|
|
88
|
+
for module in info_modules:
|
|
89
|
+
for _, function in inspect.getmembers(module, callable):
|
|
90
|
+
if hasattr(function, "cache_clear"):
|
|
91
|
+
function.cache_clear()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@functools.cache
|
|
51
95
|
def get_from_ccd(category_name, comp_id, column_name=None):
|
|
52
96
|
"""
|
|
53
97
|
Get the rows for the given residue in the given category from the
|
|
@@ -67,9 +111,13 @@ def get_from_ccd(category_name, comp_id, column_name=None):
|
|
|
67
111
|
|
|
68
112
|
Returns
|
|
69
113
|
-------
|
|
70
|
-
|
|
71
|
-
The
|
|
72
|
-
|
|
114
|
+
slice : BinaryCIFCategory or BinaryCIFColumn
|
|
115
|
+
The category or column (if `column_name` is provided) containing only the rows
|
|
116
|
+
for the given residue.
|
|
117
|
+
|
|
118
|
+
Notes
|
|
119
|
+
-----
|
|
120
|
+
The returned values are cached for faster access in subsequent calls.
|
|
73
121
|
|
|
74
122
|
References
|
|
75
123
|
----------
|
|
@@ -77,28 +125,41 @@ def get_from_ccd(category_name, comp_id, column_name=None):
|
|
|
77
125
|
.. footbibliography::
|
|
78
126
|
|
|
79
127
|
"""
|
|
80
|
-
global _residue_index
|
|
81
|
-
ccd = get_ccd()
|
|
82
|
-
category = ccd[category_name]
|
|
83
|
-
if category_name not in _residue_index:
|
|
84
|
-
_residue_index[category_name] = _index_residues(
|
|
85
|
-
category[INDEX_COLUMN_NAME[category_name]].as_array()
|
|
86
|
-
)
|
|
87
128
|
try:
|
|
88
|
-
start, stop = _residue_index
|
|
129
|
+
start, stop = _residue_index(category_name)[comp_id]
|
|
89
130
|
except KeyError:
|
|
90
131
|
return None
|
|
91
132
|
|
|
133
|
+
category = get_ccd()[category_name]
|
|
92
134
|
if column_name is None:
|
|
93
|
-
return
|
|
94
|
-
col_name: category[col_name].as_array()[start:stop]
|
|
95
|
-
for col_name in category.keys()
|
|
96
|
-
}
|
|
135
|
+
return _filter_category(category, slice(start, stop))
|
|
97
136
|
else:
|
|
98
|
-
return category[column_name]
|
|
137
|
+
return _filter_column(category[column_name], slice(start, stop))
|
|
138
|
+
|
|
99
139
|
|
|
140
|
+
@functools.cache
|
|
141
|
+
def _residue_index(category_name):
|
|
142
|
+
"""
|
|
143
|
+
Get the start and stop index for each component name in the given
|
|
144
|
+
CCD category.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
category_name : str
|
|
149
|
+
The category to determine start and stop indices for each component in.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
index : dict (str -> (int, int))
|
|
154
|
+
The index maps each present component name to the corresponding
|
|
155
|
+
start and exclusive stop index in `id_column`.
|
|
156
|
+
"""
|
|
157
|
+
category = get_ccd()[category_name]
|
|
158
|
+
id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
|
|
159
|
+
category_name, _DEFAULT_ID_COLUMN_NAME
|
|
160
|
+
)
|
|
161
|
+
id_column = category[id_column_name].as_array()
|
|
100
162
|
|
|
101
|
-
def _index_residues(id_column):
|
|
102
163
|
residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
|
|
103
164
|
# The final start is the exclusive stop of last residue
|
|
104
165
|
residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
|
|
@@ -107,3 +168,35 @@ def _index_residues(id_column):
|
|
|
107
168
|
comp_id = id_column[residue_starts[i]].item()
|
|
108
169
|
index[comp_id] = (residue_starts[i], residue_starts[i + 1])
|
|
109
170
|
return index
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _filter_category(category, index):
|
|
174
|
+
"""
|
|
175
|
+
Reduce the category to the values for the given index.∂
|
|
176
|
+
"""
|
|
177
|
+
# Avoid circular import
|
|
178
|
+
from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
|
|
179
|
+
|
|
180
|
+
return BinaryCIFCategory(
|
|
181
|
+
{key: _filter_column(column, index) for key, column in category.items()}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _filter_column(column, index):
|
|
186
|
+
"""
|
|
187
|
+
Reduce the column to the values for the given index.
|
|
188
|
+
"""
|
|
189
|
+
# Avoid circular import
|
|
190
|
+
from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
|
|
191
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
192
|
+
|
|
193
|
+
data_array = column.data.array[index]
|
|
194
|
+
mask_array = column.mask.array[index] if column.mask is not None else None
|
|
195
|
+
return BinaryCIFColumn(
|
|
196
|
+
BinaryCIFData(data_array),
|
|
197
|
+
(
|
|
198
|
+
BinaryCIFData(mask_array)
|
|
199
|
+
if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
|
|
200
|
+
else None
|
|
201
|
+
),
|
|
202
|
+
)
|
|
Binary file
|