biotite 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/application/dssp/app.py +13 -3
  2. biotite/application/localapp.py +34 -0
  3. biotite/application/muscle/app3.py +2 -15
  4. biotite/application/muscle/app5.py +2 -2
  5. biotite/application/util.py +1 -1
  6. biotite/application/viennarna/rnaplot.py +6 -2
  7. biotite/database/rcsb/query.py +6 -6
  8. biotite/database/uniprot/check.py +20 -15
  9. biotite/database/uniprot/download.py +1 -1
  10. biotite/database/uniprot/query.py +1 -1
  11. biotite/sequence/align/alignment.py +16 -3
  12. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  13. biotite/sequence/align/banded.pyx +5 -5
  14. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  15. biotite/sequence/align/kmeralphabet.pyx +17 -0
  16. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  17. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  18. biotite/sequence/align/kmertable.pyx +52 -42
  19. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  20. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  21. biotite/sequence/align/matrix.py +273 -55
  22. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  23. biotite/sequence/align/matrix_data/PB.license +21 -0
  24. biotite/sequence/align/matrix_data/PB.mat +18 -0
  25. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  26. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  27. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  28. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  29. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  30. biotite/sequence/alphabet.py +3 -0
  31. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  32. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  33. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  34. biotite/sequence/graphics/colorschemes.py +44 -11
  35. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  36. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  37. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  38. biotite/sequence/profile.py +86 -4
  39. biotite/sequence/seqtypes.py +124 -3
  40. biotite/setup_ccd.py +197 -0
  41. biotite/structure/__init__.py +4 -3
  42. biotite/structure/alphabet/__init__.py +25 -0
  43. biotite/structure/alphabet/encoder.py +332 -0
  44. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  45. biotite/structure/alphabet/i3d.py +110 -0
  46. biotite/structure/alphabet/layers.py +86 -0
  47. biotite/structure/alphabet/pb.license +21 -0
  48. biotite/structure/alphabet/pb.py +171 -0
  49. biotite/structure/alphabet/unkerasify.py +122 -0
  50. biotite/structure/atoms.py +129 -40
  51. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  52. biotite/structure/bonds.pyx +72 -21
  53. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  54. biotite/structure/charges.cpython-312-darwin.so +0 -0
  55. biotite/structure/geometry.py +60 -113
  56. biotite/structure/info/__init__.py +1 -0
  57. biotite/structure/info/atoms.py +13 -13
  58. biotite/structure/info/bonds.py +12 -6
  59. biotite/structure/info/ccd.py +125 -32
  60. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  61. biotite/structure/info/groups.py +63 -17
  62. biotite/structure/info/masses.py +9 -6
  63. biotite/structure/info/misc.py +15 -21
  64. biotite/structure/info/standardize.py +3 -2
  65. biotite/structure/io/mol/sdf.py +41 -40
  66. biotite/structure/io/pdb/convert.py +2 -0
  67. biotite/structure/io/pdb/file.py +74 -3
  68. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  69. biotite/structure/io/pdbqt/file.py +32 -32
  70. biotite/structure/io/pdbx/__init__.py +1 -0
  71. biotite/structure/io/pdbx/bcif.py +32 -8
  72. biotite/structure/io/pdbx/cif.py +72 -59
  73. biotite/structure/io/pdbx/component.py +9 -4
  74. biotite/structure/io/pdbx/compress.py +321 -0
  75. biotite/structure/io/pdbx/convert.py +194 -48
  76. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  77. biotite/structure/io/pdbx/encoding.pyx +98 -17
  78. biotite/structure/molecules.py +141 -141
  79. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  80. biotite/structure/segments.py +1 -2
  81. biotite/structure/util.py +73 -1
  82. biotite/version.py +2 -2
  83. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
  84. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
  85. biotite/structure/info/ccd/README.rst +0 -8
  86. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  87. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  88. biotite/structure/info/ccd/nucleotides.txt +0 -798
  89. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
  90. {biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -25,10 +25,12 @@ __all__ = [
25
25
  import numpy as np
26
26
  from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
27
27
  from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
28
- from biotite.structure.chains import chain_iter
29
- from biotite.structure.error import BadStructureError
30
- from biotite.structure.filter import filter_peptide_backbone
31
- from biotite.structure.util import norm_vector, vector_dot
28
+ from biotite.structure.filter import filter_amino_acids
29
+ from biotite.structure.util import (
30
+ coord_for_atom_name_per_residue,
31
+ norm_vector,
32
+ vector_dot,
33
+ )
32
34
 
33
35
 
34
36
  def displacement(atoms1, atoms2, box=None):
@@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):
480
482
 
481
483
  def dihedral_backbone(atom_array):
482
484
  """
483
- Measure the characteristic backbone dihedral angles of a protein
484
- structure.
485
+ Measure the characteristic backbone dihedral angles of a chain.
485
486
 
486
487
  Parameters
487
488
  ----------
488
- atom_array: AtomArray or AtomArrayStack
489
- The protein structure. A complete backbone, without gaps,
490
- is required here.
491
- Chain transitions are allowed, the angles at the transition are
492
- `NaN`.
493
- The order of the backbone atoms for each residue must be
494
- (N, CA, C).
489
+ atoms: AtomArray or AtomArrayStack
490
+ The protein structure to measure the dihedral angles for.
491
+ For missing backbone atoms the corresponding angles are `NaN`.
495
492
 
496
493
  Returns
497
494
  -------
498
495
  phi, psi, omega : ndarray
499
- An array containing the 3 backbone dihedral angles for every
500
- CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega'
501
- are not defined at the C-terminus. In these places the arrays
502
- have *NaN* values. If an :class:`AtomArrayStack` is given, the
503
- output angles are 2-dimensional, the first dimension corresponds
504
- to the model number.
505
-
506
- Raises
507
- ------
508
- BadStructureError
509
- If the amount of backbone atoms is not equal to amount of
510
- residues times 3 (for N, CA and C).
511
-
512
- See Also
513
- --------
514
- dihedral
515
-
516
- Examples
517
- --------
518
-
519
- >>> phi, psi, omega = dihedral_backbone(atom_array)
520
- >>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
521
- [[ nan -56.145]
522
- [ -43.980 -51.309]
523
- [ -66.466 -30.898]
524
- [ -65.219 -45.945]
525
- [ -64.747 -30.346]
526
- [ -73.136 -43.425]
527
- [ -64.882 -43.255]
528
- [ -59.509 -25.698]
529
- [ -77.989 -8.823]
530
- [ 110.784 8.079]
531
- [ 55.244 -124.371]
532
- [ -57.983 -28.766]
533
- [ -81.834 19.125]
534
- [-124.057 13.401]
535
- [ 67.931 25.218]
536
- [-143.952 131.297]
537
- [ -70.100 160.068]
538
- [ -69.484 145.669]
539
- [ -77.264 124.223]
540
- [ -78.100 nan]]
496
+ An array containing the 3 backbone dihedral angles for every CA atom.
497
+ `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
498
+ C-terminus.
499
+ In these places the arrays have *NaN* values.
500
+ If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
501
+ the first dimension corresponds to the model number.
541
502
  """
542
- bb_filter = filter_peptide_backbone(atom_array)
543
- backbone = atom_array[..., bb_filter]
544
-
545
- if (
546
- backbone.array_length() % 3 != 0
547
- or (backbone.atom_name[0::3] != "N").any()
548
- or (backbone.atom_name[1::3] != "CA").any()
549
- or (backbone.atom_name[2::3] != "C").any()
550
- ):
551
- raise BadStructureError(
552
- "The backbone is invalid, must be repeats of (N, CA, C), "
553
- "maybe a backbone atom is missing"
554
- )
555
- phis = []
556
- psis = []
557
- omegas = []
558
- for chain_bb in chain_iter(backbone):
559
- phi, psi, omega = _dihedral_backbone(chain_bb)
560
- phis.append(phi)
561
- psis.append(psi)
562
- omegas.append(omega)
563
- return (
564
- np.concatenate(phis, axis=-1),
565
- np.concatenate(psis, axis=-1),
566
- np.concatenate(omegas, axis=-1),
567
- )
503
+ amino_acid_mask = filter_amino_acids(atom_array)
568
504
 
505
+ # Coordinates for dihedral angle calculation
506
+ coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
507
+ atom_array,
508
+ ("N", "CA", "C"),
509
+ amino_acid_mask,
510
+ )
511
+ n_residues = coord_n.shape[-2]
569
512
 
570
- def _dihedral_backbone(chain_bb):
571
- bb_coord = chain_bb.coord
572
513
  # Coordinates for dihedral angle calculation
573
514
  # Dim 0: Model index (only for atom array stacks)
574
515
  # Dim 1: Angle index
575
516
  # Dim 2: X, Y, Z coordinates
576
517
  # Dim 3: Atoms involved in dihedral angle
577
- if isinstance(chain_bb, AtomArray):
578
- angle_coord_shape = (len(bb_coord) // 3, 3, 4)
579
- elif isinstance(chain_bb, AtomArrayStack):
580
- angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4)
581
- phi_coord = np.full(angle_coord_shape, np.nan)
582
- psi_coord = np.full(angle_coord_shape, np.nan)
583
- omega_coord = np.full(angle_coord_shape, np.nan)
584
-
585
- # Indices for coordinates of CA atoms
586
- ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
518
+ if isinstance(atom_array, AtomArray):
519
+ angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
520
+ elif isinstance(atom_array, AtomArrayStack):
521
+ angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
522
+ coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
523
+ coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
524
+ coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)
525
+
587
526
  # fmt: off
588
- phi_coord [..., 1:, :, 0] = bb_coord[..., ca_i[1: ]-2, :]
589
- phi_coord [..., 1:, :, 1] = bb_coord[..., ca_i[1: ]-1, :]
590
- phi_coord [..., 1:, :, 2] = bb_coord[..., ca_i[1: ], :]
591
- phi_coord [..., 1:, :, 3] = bb_coord[..., ca_i[1: ]+1, :]
592
- psi_coord [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :]
593
- psi_coord [..., :-1, :, 1] = bb_coord[..., ca_i[:-1], :]
594
- psi_coord [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :]
595
- psi_coord [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :]
596
- omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1], :]
597
- omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :]
598
- omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :]
599
- omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :]
527
+ coord_for_phi[..., 1:, :, 0] = coord_c[..., 0:-1, :]
528
+ coord_for_phi[..., 1:, :, 1] = coord_n[..., 1:, :]
529
+ coord_for_phi[..., 1:, :, 2] = coord_ca[..., 1:, :]
530
+ coord_for_phi[..., 1:, :, 3] = coord_c[..., 1:, :]
531
+
532
+ coord_for_psi[..., 0:-1, :, 0] = coord_n[..., 0:-1, :]
533
+ coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
534
+ coord_for_psi[..., 0:-1, :, 2] = coord_c[..., 0:-1, :]
535
+ coord_for_psi[..., 0:-1, :, 3] = coord_n[..., 1:, :]
536
+
537
+ coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
538
+ coord_for_omg[..., 0:-1, :, 1] = coord_c[..., 0:-1, :]
539
+ coord_for_omg[..., 0:-1, :, 2] = coord_n[..., 1:, :]
540
+ coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:, :]
600
541
  # fmt: on
601
542
 
602
543
  phi = dihedral(
603
- phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3]
544
+ coord_for_phi[..., 0],
545
+ coord_for_phi[..., 1],
546
+ coord_for_phi[..., 2],
547
+ coord_for_phi[..., 3],
604
548
  )
605
549
  psi = dihedral(
606
- psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3]
550
+ coord_for_psi[..., 0],
551
+ coord_for_psi[..., 1],
552
+ coord_for_psi[..., 2],
553
+ coord_for_psi[..., 3],
607
554
  )
608
- omega = dihedral(
609
- omega_coord[..., 0],
610
- omega_coord[..., 1],
611
- omega_coord[..., 2],
612
- omega_coord[..., 3],
555
+ omg = dihedral(
556
+ coord_for_omg[..., 0],
557
+ coord_for_omg[..., 1],
558
+ coord_for_omg[..., 2],
559
+ coord_for_omg[..., 3],
613
560
  )
614
561
 
615
- return phi, psi, omega
562
+ return phi, psi, omg
616
563
 
617
564
 
618
565
  def centroid(atoms):
@@ -16,6 +16,7 @@ __author__ = "Patrick Kunzmann, Tom David Müller"
16
16
 
17
17
  from .atoms import *
18
18
  from .bonds import *
19
+ from .ccd import *
19
20
  from .groups import *
20
21
  from .masses import *
21
22
  from .misc import *
@@ -42,19 +42,19 @@ def residue(res_name):
42
42
  >>> alanine = residue("ALA")
43
43
  >>> # Atoms and geometry
44
44
  >>> print(alanine)
45
- 0 ALA N N -0.970 0.490 1.500
46
- 0 ALA CA C 0.260 0.420 0.690
47
- 0 ALA C C -0.090 0.020 -0.720
48
- 0 ALA O O -1.060 -0.680 -0.920
49
- 0 ALA CB C 1.200 -0.620 1.300
50
- 0 ALA OXT O 0.660 0.440 -1.740
51
- 0 ALA H H -1.380 -0.420 1.480
52
- 0 ALA H2 H -0.680 0.660 2.450
53
- 0 ALA HA H 0.750 1.390 0.680
54
- 0 ALA HB1 H 1.460 -0.330 2.320
55
- 0 ALA HB2 H 0.720 -1.590 1.310
56
- 0 ALA HB3 H 2.110 -0.680 0.700
57
- 0 ALA HXT H 0.440 0.180 -2.650
45
+ 0 ALA N N -0.966 0.493 1.500
46
+ 0 ALA CA C 0.257 0.418 0.692
47
+ 0 ALA C C -0.094 0.017 -0.716
48
+ 0 ALA O O -1.056 -0.682 -0.923
49
+ 0 ALA CB C 1.204 -0.620 1.296
50
+ 0 ALA OXT O 0.661 0.439 -1.742
51
+ 0 ALA H H -1.383 -0.425 1.482
52
+ 0 ALA H2 H -0.676 0.661 2.452
53
+ 0 ALA HA H 0.746 1.392 0.682
54
+ 0 ALA HB1 H 1.459 -0.330 2.316
55
+ 0 ALA HB2 H 0.715 -1.594 1.307
56
+ 0 ALA HB3 H 2.113 -0.676 0.697
57
+ 0 ALA HXT H 0.435 0.182 -2.647
58
58
  >>> # Bonds
59
59
  >>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
60
60
  [['N' 'CA']
@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["bond_type", "bonds_in_residue"]
8
8
 
9
+ import functools
9
10
  from biotite.structure.bonds import BondType
10
11
  from biotite.structure.info.ccd import get_from_ccd
11
12
 
@@ -69,6 +70,7 @@ def bond_type(res_name, atom_name1, atom_name2):
69
70
  return None
70
71
 
71
72
 
73
+ @functools.cache
72
74
  def bonds_in_residue(res_name):
73
75
  """
74
76
  Get a dictionary containing all atoms inside a given residue
@@ -94,6 +96,10 @@ def bonds_in_residue(res_name):
94
96
  In other functionalities throughout *Biotite* that uses this
95
97
  function.
96
98
 
99
+ Notes
100
+ -----
101
+ The returned values are cached for faster access in subsequent calls.
102
+
97
103
  Examples
98
104
  --------
99
105
  >>> bonds = bonds_in_residue("PHE")
@@ -126,16 +132,16 @@ def bonds_in_residue(res_name):
126
132
  """
127
133
  global _intra_bonds
128
134
  if res_name not in _intra_bonds:
129
- chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
130
- if chem_comp_bond_dict is None:
135
+ chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
136
+ if chem_comp_bond is None:
131
137
  _intra_bonds[res_name] = {}
132
138
  else:
133
139
  bonds_for_residue = {}
134
140
  for atom1, atom2, order, aromatic_flag in zip(
135
- chem_comp_bond_dict["atom_id_1"],
136
- chem_comp_bond_dict["atom_id_2"],
137
- chem_comp_bond_dict["value_order"],
138
- chem_comp_bond_dict["pdbx_aromatic_flag"],
141
+ chem_comp_bond["atom_id_1"].as_array(),
142
+ chem_comp_bond["atom_id_2"].as_array(),
143
+ chem_comp_bond["value_order"].as_array(),
144
+ chem_comp_bond["pdbx_aromatic_flag"].as_array(),
139
145
  ):
140
146
  bond_type = BOND_TYPES[order, aromatic_flag]
141
147
  bonds_for_residue[atom1.item(), atom2.item()] = bond_type
@@ -4,23 +4,23 @@
4
4
 
5
5
  __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
- __all__ = ["get_ccd", "get_from_ccd"]
7
+ __all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
8
8
 
9
+ import functools
10
+ import importlib
11
+ import inspect
12
+ import pkgutil
9
13
  from pathlib import Path
10
14
  import numpy as np
11
15
 
12
- CCD_DIR = Path(__file__).parent / "ccd"
13
- INDEX_COLUMN_NAME = {
16
+ _CCD_FILE = Path(__file__).parent / "components.bcif"
17
+ _SPECIAL_ID_COLUMN_NAMES = {
14
18
  "chem_comp": "id",
15
- "chem_comp_atom": "comp_id",
16
- "chem_comp_bond": "comp_id",
17
19
  }
18
-
19
- _ccd_block = None
20
- # For each category this index gives the start and stop for each residue
21
- _residue_index = {}
20
+ _DEFAULT_ID_COLUMN_NAME = "comp_id"
22
21
 
23
22
 
23
+ @functools.cache
24
24
  def get_ccd():
25
25
  """
26
26
  Get the internal subset of the PDB
@@ -29,8 +29,16 @@ def get_ccd():
29
29
 
30
30
  Returns
31
31
  -------
32
- ccd : BinaryCIFFile
32
+ ccd : BinaryCIFBlock
33
33
  The CCD.
34
+ It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
35
+
36
+ Warnings
37
+ --------
38
+
39
+ Consider the return value as read-only.
40
+ As other functions cache data from it, changing data may lead to undefined
41
+ behavior.
34
42
 
35
43
  References
36
44
  ----------
@@ -41,13 +49,49 @@ def get_ccd():
41
49
  # Avoid circular import
42
50
  from biotite.structure.io.pdbx.bcif import BinaryCIFFile
43
51
 
44
- global _ccd_block
45
- if _ccd_block is None:
46
- # Load CCD once and cache it for subsequent calls
47
- _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
48
- return _ccd_block
52
+ try:
53
+ return BinaryCIFFile.read(_CCD_FILE).block
54
+ except FileNotFoundError:
55
+ raise RuntimeError(
56
+ "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
57
+ )
58
+
59
+
60
+ def set_ccd_path(ccd_path):
61
+ """
62
+ Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
49
63
 
64
+ This function also clears the cache of functions depending on the CCD to ensure
65
+ that the new CCD is used.
50
66
 
67
+ Parameters
68
+ ----------
69
+ ccd_path : path-like
70
+ The path to the custom CCD in BinaryCIF format, prepared with the
71
+ ``setup_ccd.py`` module.
72
+
73
+ Notes
74
+ -----
75
+ This function is intended for advanced users who need to add information for
76
+ compounds, which are not part of the internal CCD.
77
+ The reason might be that an updated version already exists upstream or that
78
+ the user wants to add custom compounds to the CCD.
79
+ """
80
+ global _CCD_FILE
81
+ _CCD_FILE = Path(ccd_path)
82
+
83
+ # Clear caches in all functions in biotite.structure.info
84
+ info_modules = [
85
+ importlib.import_module(f"biotite.structure.info.{mod_name}")
86
+ for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
87
+ ]
88
+ for module in info_modules:
89
+ for _, function in inspect.getmembers(module, callable):
90
+ if hasattr(function, "cache_clear"):
91
+ function.cache_clear()
92
+
93
+
94
+ @functools.cache
51
95
  def get_from_ccd(category_name, comp_id, column_name=None):
52
96
  """
53
97
  Get the rows for the given residue in the given category from the
@@ -67,9 +111,13 @@ def get_from_ccd(category_name, comp_id, column_name=None):
67
111
 
68
112
  Returns
69
113
  -------
70
- value : ndarray or dict or None
71
- The array of the given column or all columns as dictionary.
72
- ``None`` if the `comp_id` is not found in the category.
114
+ slice : BinaryCIFCategory or BinaryCIFColumn
115
+ The category or column (if `column_name` is provided) containing only the rows
116
+ for the given residue.
117
+
118
+ Notes
119
+ -----
120
+ The returned values are cached for faster access in subsequent calls.
73
121
 
74
122
  References
75
123
  ----------
@@ -77,28 +125,41 @@ def get_from_ccd(category_name, comp_id, column_name=None):
77
125
  .. footbibliography::
78
126
 
79
127
  """
80
- global _residue_index
81
- ccd = get_ccd()
82
- category = ccd[category_name]
83
- if category_name not in _residue_index:
84
- _residue_index[category_name] = _index_residues(
85
- category[INDEX_COLUMN_NAME[category_name]].as_array()
86
- )
87
128
  try:
88
- start, stop = _residue_index[category_name][comp_id]
129
+ start, stop = _residue_index(category_name)[comp_id]
89
130
  except KeyError:
90
131
  return None
91
132
 
133
+ category = get_ccd()[category_name]
92
134
  if column_name is None:
93
- return {
94
- col_name: category[col_name].as_array()[start:stop]
95
- for col_name in category.keys()
96
- }
135
+ return _filter_category(category, slice(start, stop))
97
136
  else:
98
- return category[column_name].as_array()[start:stop]
137
+ return _filter_column(category[column_name], slice(start, stop))
138
+
99
139
 
140
+ @functools.cache
141
+ def _residue_index(category_name):
142
+ """
143
+ Get the start and stop index for each component name in the given
144
+ CCD category.
145
+
146
+ Parameters
147
+ ----------
148
+ category_name : str
149
+ The category to determine start and stop indices for each component in.
150
+
151
+ Returns
152
+ -------
153
+ index : dict (str -> (int, int))
154
+ The index maps each present component name to the corresponding
155
+ start and exclusive stop index in `id_column`.
156
+ """
157
+ category = get_ccd()[category_name]
158
+ id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
159
+ category_name, _DEFAULT_ID_COLUMN_NAME
160
+ )
161
+ id_column = category[id_column_name].as_array()
100
162
 
101
- def _index_residues(id_column):
102
163
  residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
103
164
  # The final start is the exclusive stop of last residue
104
165
  residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
@@ -107,3 +168,35 @@ def _index_residues(id_column):
107
168
  comp_id = id_column[residue_starts[i]].item()
108
169
  index[comp_id] = (residue_starts[i], residue_starts[i + 1])
109
170
  return index
171
+
172
+
173
+ def _filter_category(category, index):
174
+ """
175
+ Reduce the category to the values for the given index.∂
176
+ """
177
+ # Avoid circular import
178
+ from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
179
+
180
+ return BinaryCIFCategory(
181
+ {key: _filter_column(column, index) for key, column in category.items()}
182
+ )
183
+
184
+
185
+ def _filter_column(column, index):
186
+ """
187
+ Reduce the column to the values for the given index.
188
+ """
189
+ # Avoid circular import
190
+ from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
191
+ from biotite.structure.io.pdbx.component import MaskValue
192
+
193
+ data_array = column.data.array[index]
194
+ mask_array = column.mask.array[index] if column.mask is not None else None
195
+ return BinaryCIFColumn(
196
+ BinaryCIFData(data_array),
197
+ (
198
+ BinaryCIFData(mask_array)
199
+ if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
200
+ else None
201
+ ),
202
+ )
@@ -6,14 +6,45 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Tom David Müller, Patrick Kunzmann"
7
7
  __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
8
 
9
- from pathlib import Path
10
-
11
- CCD_DIR = Path(__file__).parent / "ccd"
12
-
13
-
14
- group_lists = {}
15
-
16
-
9
+ import functools
10
+ import numpy as np
11
+ from biotite.structure.info.ccd import get_ccd
12
+
13
+ _AMINO_ACID_TYPES = [
14
+ "D-beta-peptide, C-gamma linking",
15
+ "D-gamma-peptide, C-delta linking",
16
+ "D-peptide COOH carboxy terminus",
17
+ "D-peptide NH3 amino terminus",
18
+ "D-peptide linking",
19
+ "L-beta-peptide, C-gamma linking",
20
+ "L-gamma-peptide, C-delta linking",
21
+ "L-peptide COOH carboxy terminus",
22
+ "L-peptide NH3 amino terminus",
23
+ "L-peptide linking",
24
+ "peptide linking",
25
+ ]
26
+ _NUCLEOTIDE_TYPES = [
27
+ "DNA OH 3 prime terminus",
28
+ "DNA OH 5 prime terminus",
29
+ "DNA linking",
30
+ "L-DNA linking",
31
+ "L-RNA linking",
32
+ "RNA OH 3 prime terminus",
33
+ "RNA OH 5 prime terminus",
34
+ "RNA linking",
35
+ ]
36
+ _CARBOHYDRATE_TYPES = [
37
+ "D-saccharide",
38
+ "D-saccharide, alpha linking",
39
+ "D-saccharide, beta linking",
40
+ "L-saccharide",
41
+ "L-saccharide, alpha linking",
42
+ "L-saccharide, beta linking",
43
+ "saccharide",
44
+ ]
45
+
46
+
47
+ @functools.cache
17
48
  def amino_acid_names():
18
49
  """
19
50
  Get a tuple of amino acid three-letter codes according to the
@@ -32,9 +63,10 @@ def amino_acid_names():
32
63
  .. footbibliography::
33
64
 
34
65
  """
35
- return _get_group_members("amino_acids")
66
+ return _get_group_members(_AMINO_ACID_TYPES)
36
67
 
37
68
 
69
+ @functools.cache
38
70
  def nucleotide_names():
39
71
  """
40
72
  Get a tuple of nucleotide three-letter codes according to the
@@ -53,9 +85,10 @@ def nucleotide_names():
53
85
  .. footbibliography::
54
86
 
55
87
  """
56
- return _get_group_members("nucleotides")
88
+ return _get_group_members(_NUCLEOTIDE_TYPES)
57
89
 
58
90
 
91
+ @functools.cache
59
92
  def carbohydrate_names():
60
93
  """
61
94
  Get a tuple of carbohydrate three-letter codes according to the
@@ -74,12 +107,25 @@ def carbohydrate_names():
74
107
  .. footbibliography::
75
108
 
76
109
  """
77
- return _get_group_members("carbohydrates")
110
+ return _get_group_members(_CARBOHYDRATE_TYPES)
111
+
112
+
113
+ def _get_group_members(match_types):
114
+ """
115
+ Identify component IDs that matches a given component *type* from the CCD.
78
116
 
117
+ Parameters
118
+ ----------
119
+ match_types : list of str
120
+ The component types to extract.
79
121
 
80
- def _get_group_members(group_name):
81
- global group_lists
82
- if group_name not in group_lists:
83
- with open(CCD_DIR / f"{group_name}.txt", "r") as file:
84
- group_lists[group_name] = tuple(file.read().split())
85
- return group_lists[group_name]
122
+ Returns
123
+ -------
124
+ comp_ids : list of str
125
+ The extracted component IDs.
126
+ """
127
+ category = get_ccd()["chem_comp"]
128
+ comp_ids = category["id"].as_array()
129
+ types = category["type"].as_array()
130
+ # Ignore case
131
+ return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()
@@ -95,15 +95,11 @@ def mass(item, is_residue=None):
95
95
  if is_residue is None:
96
96
  result_mass = _atom_masses.get(item.upper())
97
97
  if result_mass is None:
98
- result_mass = get_from_ccd(
99
- "chem_comp", item.upper(), "formula_weight"
100
- ).item()
98
+ result_mass = _mass_for_residue(item)
101
99
  elif not is_residue:
102
100
  result_mass = _atom_masses.get(item.upper())
103
101
  else:
104
- result_mass = get_from_ccd(
105
- "chem_comp", item.upper(), "formula_weight"
106
- ).item()
102
+ result_mass = _mass_for_residue(item)
107
103
 
108
104
  elif isinstance(item, Atom):
109
105
  result_mass = mass(item.element, is_residue=False)
@@ -116,3 +112,10 @@ def mass(item, is_residue=None):
116
112
  if result_mass is None:
117
113
  raise KeyError(f"{item} is not known")
118
114
  return result_mass
115
+
116
+
117
+ def _mass_for_residue(res_name):
118
+ column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
119
+ if column is None:
120
+ raise KeyError(f"Residue '{res_name}' is not known")
121
+ return column.as_item()