biotite 0.39.0__cp312-cp312-macosx_11_0_arm64.whl → 0.41.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +256 -235
  8. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +241 -220
  11. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  12. biotite/sequence/align/kmersimilarity.c +213 -194
  13. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  14. biotite/sequence/align/kmertable.cpp +231 -203
  15. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  16. biotite/sequence/align/localgapped.c +256 -235
  17. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  18. biotite/sequence/align/localungapped.c +233 -212
  19. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  20. biotite/sequence/align/multiple.c +253 -232
  21. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  22. biotite/sequence/align/pairwise.c +272 -251
  23. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  24. biotite/sequence/align/permutation.c +213 -194
  25. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  26. biotite/sequence/align/selector.c +215 -195
  27. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  28. biotite/sequence/align/tracetable.c +213 -193
  29. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +233 -212
  32. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +213 -194
  35. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  36. biotite/sequence/phylo/tree.c +225 -200
  37. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  38. biotite/sequence/phylo/upgma.c +213 -194
  39. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1435 -1277
  43. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  44. biotite/structure/celllist.c +215 -195
  45. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  46. biotite/structure/charges.c +1050 -1099
  47. biotite/structure/charges.cpython-312-darwin.so +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +217 -196
  70. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +215 -195
  72. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  73. biotite/structure/io/mmtf/decode.c +223 -202
  74. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  75. biotite/structure/io/mmtf/encode.c +213 -194
  76. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112803 -0
  97. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +213 -194
  102. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -57,6 +57,8 @@ def dot_bracket_from_structure(
57
57
  .. footbibliography::
58
58
  """
59
59
  basepairs = base_pairs(nucleic_acid_strand)
60
+ if len(basepairs) == 0:
61
+ return ['']
60
62
  basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
61
63
  length = get_residue_count(nucleic_acid_strand)
62
64
  return dot_bracket(basepairs, length, scores=scores,
@@ -10,9 +10,9 @@ arrays and atom array stacks.
10
10
  __name__ = "biotite.structure"
11
11
  __author__ = "Patrick Kunzmann, Tom David Müller"
12
12
  __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
13
- "filter_canonical_nucleotides", "filter_amino_acids",
14
- "filter_canonical_amino_acids", "filter_carbohydrates",
15
- "filter_backbone", "filter_intersection", "filter_first_altloc",
13
+ "filter_canonical_nucleotides", "filter_amino_acids",
14
+ "filter_canonical_amino_acids", "filter_carbohydrates",
15
+ "filter_backbone", "filter_intersection", "filter_first_altloc",
16
16
  "filter_highest_occupancy_altloc", "filter_peptide_backbone",
17
17
  "filter_phosphate_backbone", "filter_linear_bond_continuity",
18
18
  "filter_polymer"]
@@ -20,13 +20,10 @@ __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
20
20
  import warnings
21
21
 
22
22
  import numpy as np
23
- import operator as op
24
- from functools import partial, reduce
25
- from .atoms import Atom, AtomArray, AtomArrayStack, array as atom_array
23
+ from functools import partial
24
+ from .atoms import array as atom_array
26
25
  from .residues import get_residue_starts, get_residue_count
27
- from .info.nucleotides import nucleotide_names
28
- from .info.amino_acids import amino_acid_names
29
- from .info.carbohydrates import carbohydrate_names
26
+ from .info.groups import amino_acid_names, carbohydrate_names, nucleotide_names
30
27
 
31
28
 
32
29
  _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
@@ -34,10 +31,6 @@ _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
34
31
  "TRP","TYR","VAL", "SEC"]
35
32
  _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
36
33
 
37
- _nucleotide_list = nucleotide_names()
38
- _amino_acid_list = amino_acid_names()
39
- _carbohydrate_list = carbohydrate_names()
40
-
41
34
  _solvent_list = ["HOH","SOL"]
42
35
 
43
36
  _peptide_backbone_atoms = ['N', 'CA', 'C']
@@ -118,22 +111,22 @@ def filter_nucleotides(array):
118
111
 
119
112
  Notes
120
113
  -----
121
- Nucleotides are identified according to the PDB chemical component
114
+ Nucleotides are identified according to the PDB chemical component
122
115
  dictionary. A residue is considered a nucleotide if it its
123
116
  ``_chem_comp.type`` property has one of the following values (case
124
117
  insensitive):
125
118
 
126
- ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
127
- ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
119
+ ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
120
+ ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
128
121
  ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
129
122
  ``RNA OH 5 PRIME TERMINUS``
130
123
  """
131
- return np.isin(array.res_name, _nucleotide_list)
124
+ return np.isin(array.res_name, nucleotide_names())
132
125
 
133
126
 
134
127
  def filter_canonical_amino_acids(array):
135
128
  """
136
- Filter all atoms of one array that belong to canonical amino acid
129
+ Filter all atoms of one array that belong to canonical amino acid
137
130
  residues.
138
131
 
139
132
  Parameters
@@ -164,23 +157,23 @@ def filter_amino_acids(array):
164
157
  filter : ndarray, dtype=bool
165
158
  This array is `True` for all indices in `array`, where the atom
166
159
  belongs to an amino acid residue.
167
-
160
+
168
161
  Notes
169
162
  -----
170
- Amino acids are identified according to the PDB chemical component
163
+ Amino acids are identified according to the PDB chemical component
171
164
  dictionary. A residue is considered an amino acid if it its
172
165
  ``_chem_comp.type`` property has one of the following values (case
173
166
  insensitive):
174
167
 
175
- ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
176
- ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
177
- ``D-PEPTIDE NH3 AMINO TERMINUS``,
178
- ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
179
- ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
180
- ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
168
+ ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
169
+ ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
170
+ ``D-PEPTIDE NH3 AMINO TERMINUS``,
171
+ ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
172
+ ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
173
+ ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
181
174
  ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
182
175
  """
183
- return np.isin(array.res_name, _amino_acid_list)
176
+ return np.isin(array.res_name, amino_acid_names())
184
177
 
185
178
 
186
179
  def filter_carbohydrates(array):
@@ -197,20 +190,20 @@ def filter_carbohydrates(array):
197
190
  filter : ndarray, dtype=bool
198
191
  This array is `True` for all indices in `array`, where the atom
199
192
  belongs to a carbohydrate.
200
-
193
+
201
194
  Notes
202
195
  -----
203
- Carbohydrates are identified according to the PDB chemical component
196
+ Carbohydrates are identified according to the PDB chemical component
204
197
  dictionary. A residue is considered a carbohydrate if it its
205
198
  ``_chem_comp.type`` property has one of the following values (case
206
199
  insensitive):
207
200
 
208
- ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
209
- ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
210
- ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
201
+ ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
202
+ ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
203
+ ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
211
204
  ``SACCHARIDE``
212
205
  """
213
- return np.isin(array.res_name, _carbohydrate_list)
206
+ return np.isin(array.res_name, carbohydrate_names())
214
207
 
215
208
 
216
209
  def filter_backbone(array):
@@ -299,7 +292,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
299
292
 
300
293
  The result will depend on the atoms' order.
301
294
  For instance, consider a molecule::
302
-
295
+
303
296
  C3
304
297
  |
305
298
  C1-C2-C4
@@ -323,7 +316,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
323
316
  This array is `True` for all indices in `array`, where an atom
324
317
  has a bond length with the next atom within [`min_len`, `max_len`]
325
318
  boundaries.
326
-
319
+
327
320
  Notes
328
321
  -----
329
322
  Note that this function purely uses distances between consecutive atoms.
@@ -438,7 +431,7 @@ def filter_first_altloc(atoms, altloc_ids):
438
431
  Filter all atoms, that have the first *altloc* ID appearing in a
439
432
  residue.
440
433
 
441
- Structure files (PDB, PDBx, MMTF) allow for duplicate atom records,
434
+ Structure files (PDB, PDBx) allow for duplicate atom records,
442
435
  in case a residue is found in multiple alternate locations
443
436
  (*altloc*).
444
437
  This function is used to remove such duplicate atoms by choosing a
@@ -507,7 +500,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
507
500
  For each residue, filter all atoms, that have the *altloc* ID
508
501
  with the highest occupancy for this residue.
509
502
 
510
- Structure files (PDB, PDBx, MMTF) allow for duplicate atom records,
503
+ Structure files (PDB, PDBx) allow for duplicate atom records,
511
504
  in case a residue is found in multiple alternate locations
512
505
  (*altloc*).
513
506
  This function is used to remove such duplicate atoms by choosing a
@@ -6,23 +6,20 @@
6
6
  A subpackage for obtaining all kinds of chemical information about atoms
7
7
  and residues, including masses, radii, bonds, etc.
8
8
 
9
- Most information is extracted from the chemical compound dictionary
9
+ Most information is extracted from the *Chemical Component Dictionary*
10
10
  of the
11
- `wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_
12
- via tools from the
13
- `biotite-util <https://github.com/biotite-dev/biotite-util>`_
14
- repository.
11
+ `wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_.
15
12
  """
16
13
 
17
14
  __name__ = "biotite.structure.info"
18
15
  __author__ = "Patrick Kunzmann, Tom David Müller"
19
16
 
17
+ from .groups import *
18
+
20
19
  from .atoms import *
21
20
  from .bonds import *
21
+ from .groups import *
22
22
  from .masses import *
23
23
  from .misc import *
24
24
  from .radii import *
25
25
  from .standardize import *
26
- from .nucleotides import *
27
- from .amino_acids import *
28
- from .carbohydrates import *
@@ -6,36 +6,15 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["residue"]
8
8
 
9
- from os.path import join, dirname, realpath
10
- import msgpack
11
- import numpy as np
12
- from ..atoms import AtomArray
13
- from ..bonds import BondList
9
+ from .ccd import get_ccd
14
10
 
15
11
 
16
- _residues = None
17
-
18
-
19
- def _init_dataset():
20
- """
21
- Load the residue dataset from MessagePack file.
22
-
23
- Since loading the database is computationally expensive,
24
- this is only done, when the residue database is actually required.
25
- """
26
- global _residues
27
- if _residues is not None:
28
- # Database is already initialized
29
- return
30
-
31
- # Residue data is taken from
32
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
33
- # (2019/01/27)
34
- _info_dir = dirname(realpath(__file__))
35
- with open(join(_info_dir, "residues.msgpack"), "rb") as file:
36
- _residues = msgpack.unpack(
37
- file, use_list=False, raw=False
38
- )
12
+ non_hetero_residues = set([
13
+ "ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
14
+ "ILE","LEU","LYS","MET","PHE","PRO","PYL","SER","THR",
15
+ "TRP","TYR","VAL", "SEC",
16
+ "A", "DA", "G", "DG", "C", "DC", "U", "DT",
17
+ ])
39
18
 
40
19
 
41
20
  def residue(res_name):
@@ -62,19 +41,19 @@ def residue(res_name):
62
41
  >>> alanine = residue("ALA")
63
42
  >>> # Atoms and geometry
64
43
  >>> print(alanine)
65
- 0 ALA N N -0.966 0.493 1.500
66
- 0 ALA CA C 0.257 0.418 0.692
67
- 0 ALA C C -0.094 0.017 -0.716
68
- 0 ALA O O -1.056 -0.682 -0.923
69
- 0 ALA CB C 1.204 -0.620 1.296
70
- 0 ALA OXT O 0.661 0.439 -1.742
71
- 0 ALA H H -1.383 -0.425 1.482
72
- 0 ALA H2 H -0.676 0.661 2.452
73
- 0 ALA HA H 0.746 1.392 0.682
74
- 0 ALA HB1 H 1.459 -0.330 2.316
75
- 0 ALA HB2 H 0.715 -1.594 1.307
76
- 0 ALA HB3 H 2.113 -0.676 0.697
77
- 0 ALA HXT H 0.435 0.182 -2.647
44
+ 0 ALA N N -0.970 0.490 1.500
45
+ 0 ALA CA C 0.260 0.420 0.690
46
+ 0 ALA C C -0.090 0.020 -0.720
47
+ 0 ALA O O -1.060 -0.680 -0.920
48
+ 0 ALA CB C 1.200 -0.620 1.300
49
+ 0 ALA OXT O 0.660 0.440 -1.740
50
+ 0 ALA H H -1.380 -0.420 1.480
51
+ 0 ALA H2 H -0.680 0.660 2.450
52
+ 0 ALA HA H 0.750 1.390 0.680
53
+ 0 ALA HB1 H 1.460 -0.330 2.320
54
+ 0 ALA HB2 H 0.720 -1.590 1.310
55
+ 0 ALA HB3 H 2.110 -0.680 0.700
56
+ 0 ALA HXT H 0.440 0.180 -2.650
78
57
  >>> # Bonds
79
58
  >>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
80
59
  [['N' 'CA']
@@ -90,30 +69,14 @@ def residue(res_name):
90
69
  ['CB' 'HB3']
91
70
  ['OXT' 'HXT']]
92
71
  """
93
- _init_dataset()
94
- array_dict = _residues[res_name]
95
-
96
- array = AtomArray(len(array_dict["res_name"]))
97
-
98
- array.add_annotation("charge", int)
99
-
100
- array.res_name = array_dict["res_name"]
101
- array.atom_name = array_dict["atom_name"]
102
- array.element = array_dict["element"]
103
- array.charge = array_dict["charge"]
104
- array.hetero = array_dict["hetero"]
105
-
106
- array.coord[:,0] = array_dict["coord_x"]
107
- array.coord[:,1] = array_dict["coord_y"]
108
- array.coord[:,2] = array_dict["coord_z"]
109
-
110
- array.bonds = BondList(
111
- array.array_length(),
112
- bonds = np.stack([
113
- array_dict["bond_i"],
114
- array_dict["bond_j"],
115
- array_dict["bond_type"]
116
- ]).T
117
- )
118
-
119
- return array
72
+ # Avoid circular import
73
+ from ..io.pdbx import get_component
74
+
75
+ try:
76
+ component = get_component(get_ccd(), res_name=res_name)
77
+ except KeyError:
78
+ raise KeyError(
79
+ f"No atom information found for residue '{res_name}' in CCD"
80
+ )
81
+ component.hetero[:] = res_name not in non_hetero_residues
82
+ return component
@@ -4,98 +4,23 @@
4
4
 
5
5
  __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
- __all__ = ["bond_dataset", "bond_order", "bond_type", "bonds_in_residue"]
7
+ __all__ = ["bond_type", "bonds_in_residue"]
8
8
 
9
- import warnings
10
- import copy
11
- from os.path import join, dirname, realpath
12
- import msgpack
13
9
  from ..bonds import BondType
10
+ from .ccd import get_from_ccd
14
11
 
15
12
 
16
- _intra_bonds = None
13
+ BOND_TYPES = {
14
+ ("SING", "N") : BondType.SINGLE,
15
+ ("DOUB", "N") : BondType.DOUBLE,
16
+ ("TRIP", "N") : BondType.TRIPLE,
17
+ ("QUAD", "N") : BondType.QUADRUPLE,
18
+ ("SING", "Y") : BondType.AROMATIC_SINGLE,
19
+ ("DOUB", "Y") : BondType.AROMATIC_DOUBLE,
20
+ ("TRIP", "Y") : BondType.AROMATIC_TRIPLE,
21
+ }
17
22
 
18
-
19
- def _init_dataset():
20
- """
21
- Load the bond dataset from MessagePack file.
22
-
23
- Since loading the database is computationally expensive,
24
- this is only done, when the bond database is actually required.
25
- """
26
- global _intra_bonds
27
- if _intra_bonds is not None:
28
- # Database is already initialized
29
- return
30
-
31
- # Bonds are taken from
32
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
33
- # (2019/01/27)
34
- _info_dir = dirname(realpath(__file__))
35
- with open(join(_info_dir, "intra_bonds.msgpack"), "rb") as file:
36
- _intra_bonds= msgpack.unpack(
37
- file, use_list=False, raw=False, strict_map_key=False
38
- )
39
-
40
-
41
- def bond_dataset():
42
- """
43
- Get a copy of the complete bond dataset extracted from the chemical
44
- components dictionary.
45
-
46
- This dataset does only contain intra-residue bonds.
47
-
48
- Returns
49
- -------
50
- bonds : dict (str -> dict ((str, str) -> int))
51
- The bonds as nested dictionary.
52
- It maps residue names (up to 3-letters, upper case) to
53
- inner dictionaries.
54
- Each of these dictionary contains the bond information for the
55
- given residue.
56
- Specifically, it uses a set of two atom names, that are bonded,
57
- as keys and the respective :class:`BondType`
58
- (represented by an integer) as values.
59
- """
60
- _init_dataset()
61
- return copy.copy(_intra_bonds)
62
-
63
-
64
- def bond_order(res_name, atom_name1, atom_name2):
65
- """
66
- Get the bond order for two atoms of the same residue, based
67
- on the PDB chemical components dictionary.
68
-
69
- DEPRECATED: Please use :func:`bond_type()` instead.
70
-
71
- Parameters
72
- ----------
73
- res_name : str
74
- The up to 3-letter name of the residue
75
- `atom_name1` and `atom_name2` belong to.
76
- atom_name1, atom_name2 : str
77
- The names of the two atoms to get the bond order from.
78
-
79
- Returns
80
- -------
81
- order : int or None
82
- The order of the bond between `atom_name1` and `atom_name2`.
83
- If the atoms form no bond, if any of the two atoms does not
84
- exist in the context of the residue or if the residue is unknown
85
- to the chemical components dictionary, `None` is returned.
86
- """
87
- warnings.warn("Please use `bond_type()` instead", DeprecationWarning)
88
-
89
- _init_dataset()
90
- btype = bond_type(res_name, atom_name1, atom_name2)
91
- if btype is None:
92
- return None
93
- elif btype == BondType.AROMATIC_SINGLE:
94
- return 1
95
- elif btype == BondType.AROMATIC_DOUBLE:
96
- return 2
97
- else:
98
- return int(btype)
23
+ _intra_bonds = {}
99
24
 
100
25
 
101
26
  def bond_type(res_name, atom_name1, atom_name2):
@@ -110,7 +35,7 @@ def bond_type(res_name, atom_name1, atom_name2):
110
35
  `atom_name1` and `atom_name2` belong to.
111
36
  atom_name1, atom_name2 : str
112
37
  The names of the two atoms to get the bond order from.
113
-
38
+
114
39
  Returns
115
40
  -------
116
41
  order : BondType or None
@@ -119,7 +44,7 @@ def bond_type(res_name, atom_name1, atom_name2):
119
44
  If the atoms form no bond, if any of the two atoms does not
120
45
  exist in the context of the residue or if the residue is unknown
121
46
  to the chemical components dictionary, `None` is returned.
122
-
47
+
123
48
  Examples
124
49
  --------
125
50
 
@@ -132,14 +57,13 @@ def bond_type(res_name, atom_name1, atom_name2):
132
57
  >>> print(bond_type("PHE", "FOO", "BAR"))
133
58
  None
134
59
  """
135
- _init_dataset()
136
- group_bonds = _intra_bonds.get(res_name.upper())
137
- if group_bonds is None:
60
+ bonds_for_residue = bonds_in_residue(res_name)
61
+ if bonds_for_residue is None:
138
62
  return None
139
- # Try both atom aroders
140
- bond_type_int = group_bonds.get(
63
+ # Try both atom orders
64
+ bond_type_int = bonds_for_residue.get(
141
65
  (atom_name1, atom_name2),
142
- group_bonds.get((atom_name2, atom_name1))
66
+ bonds_for_residue.get((atom_name2, atom_name1))
143
67
  )
144
68
  if bond_type_int is not None:
145
69
  return BondType(bond_type_int)
@@ -156,15 +80,22 @@ def bonds_in_residue(res_name):
156
80
  ----------
157
81
  res_name : str
158
82
  The up to 3-letter name of the residue to get the bonds for.
159
-
83
+
160
84
  Returns
161
85
  -------
162
- bonds : dict (str -> int)
86
+ bonds : dict ((str, str) -> int)
163
87
  A dictionary that maps tuples of two atom names to their
164
88
  respective bond types (represented as integer).
165
- `None` if the residue is unknown to the
89
+ Empty, if the residue is unknown to the
166
90
  chemical components dictionary.
167
-
91
+
92
+ Warnings
93
+ --------
94
+ Treat the returned dictionary as immutable.
95
+ Modifying the dictionary may lead to unexpected behavior.
96
+ In other functionalities throughout *Biotite* that uses this
97
+ function.
98
+
168
99
  Examples
169
100
  --------
170
101
  >>> bonds = bonds_in_residue("PHE")
@@ -195,5 +126,20 @@ def bonds_in_residue(res_name):
195
126
  H2 + N -> BondType.SINGLE
196
127
  HXT + OXT -> BondType.SINGLE
197
128
  """
198
- _init_dataset()
199
- return copy.copy(_intra_bonds.get(res_name.upper()))
129
+ global _intra_bonds
130
+ if res_name not in _intra_bonds:
131
+ chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
132
+ if chem_comp_bond_dict is None:
133
+ _intra_bonds[res_name] = {}
134
+ else:
135
+ bonds_for_residue = {}
136
+ for atom1, atom2, order, aromatic_flag in zip(
137
+ chem_comp_bond_dict["atom_id_1"],
138
+ chem_comp_bond_dict["atom_id_2"],
139
+ chem_comp_bond_dict["value_order"],
140
+ chem_comp_bond_dict["pdbx_aromatic_flag"]
141
+ ):
142
+ bond_type = BOND_TYPES[order, aromatic_flag]
143
+ bonds_for_residue[atom1.item(), atom2.item()] = bond_type
144
+ _intra_bonds[res_name] = bonds_for_residue
145
+ return _intra_bonds[res_name]
@@ -0,0 +1,8 @@
1
+ These files are based on the
2
+ `Chemical Component Dictionary <https://www.wwpdb.org/data/ccd>`_
3
+ and were created using ``setup_ccd.py``.
4
+
5
+ To keep the size of the repository small, the original commit should be
6
+ rewritten, if the formats of the affected files are compatible with the
7
+ original ones.
8
+ The name of the commit is ``Add CCD dataset``.