biotite 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.2.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/dssp/app.py +13 -3
  6. biotite/application/localapp.py +36 -2
  7. biotite/application/msaapp.py +10 -10
  8. biotite/application/muscle/app3.py +5 -18
  9. biotite/application/muscle/app5.py +5 -5
  10. biotite/application/sra/app.py +0 -5
  11. biotite/application/util.py +22 -2
  12. biotite/application/viennarna/rnaalifold.py +8 -8
  13. biotite/application/viennarna/rnaplot.py +9 -3
  14. biotite/application/viennarna/util.py +1 -1
  15. biotite/application/webapp.py +1 -1
  16. biotite/database/afdb/__init__.py +12 -0
  17. biotite/database/afdb/download.py +191 -0
  18. biotite/database/entrez/dbnames.py +10 -0
  19. biotite/database/entrez/download.py +9 -10
  20. biotite/database/entrez/key.py +1 -1
  21. biotite/database/entrez/query.py +5 -4
  22. biotite/database/pubchem/download.py +6 -6
  23. biotite/database/pubchem/error.py +10 -0
  24. biotite/database/pubchem/query.py +12 -23
  25. biotite/database/rcsb/download.py +3 -2
  26. biotite/database/rcsb/query.py +8 -9
  27. biotite/database/uniprot/check.py +22 -17
  28. biotite/database/uniprot/download.py +3 -6
  29. biotite/database/uniprot/query.py +4 -5
  30. biotite/file.py +14 -2
  31. biotite/interface/__init__.py +19 -0
  32. biotite/interface/openmm/__init__.py +16 -0
  33. biotite/interface/openmm/state.py +93 -0
  34. biotite/interface/openmm/system.py +227 -0
  35. biotite/interface/pymol/__init__.py +198 -0
  36. biotite/interface/pymol/cgo.py +346 -0
  37. biotite/interface/pymol/convert.py +185 -0
  38. biotite/interface/pymol/display.py +267 -0
  39. biotite/interface/pymol/object.py +1226 -0
  40. biotite/interface/pymol/shapes.py +178 -0
  41. biotite/interface/pymol/startup.py +169 -0
  42. biotite/interface/rdkit/__init__.py +15 -0
  43. biotite/interface/rdkit/mol.py +490 -0
  44. biotite/interface/version.py +71 -0
  45. biotite/interface/warning.py +19 -0
  46. biotite/sequence/align/__init__.py +0 -4
  47. biotite/sequence/align/alignment.py +49 -14
  48. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  49. biotite/sequence/align/banded.pyx +26 -26
  50. biotite/sequence/align/cigar.py +2 -2
  51. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  52. biotite/sequence/align/kmeralphabet.pyx +19 -2
  53. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  54. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  55. biotite/sequence/align/kmertable.pyx +58 -48
  56. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/localgapped.pyx +47 -47
  58. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  59. biotite/sequence/align/localungapped.pyx +10 -10
  60. biotite/sequence/align/matrix.py +284 -57
  61. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  62. biotite/sequence/align/matrix_data/PB.license +21 -0
  63. biotite/sequence/align/matrix_data/PB.mat +18 -0
  64. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  66. biotite/sequence/align/pairwise.pyx +35 -35
  67. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  68. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/selector.pyx +2 -2
  70. biotite/sequence/align/statistics.py +1 -1
  71. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  72. biotite/sequence/alphabet.py +5 -2
  73. biotite/sequence/annotation.py +19 -13
  74. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  75. biotite/sequence/codon.py +1 -2
  76. biotite/sequence/graphics/alignment.py +25 -39
  77. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  78. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  79. biotite/sequence/graphics/colorschemes.py +44 -11
  80. biotite/sequence/graphics/dendrogram.py +4 -2
  81. biotite/sequence/graphics/features.py +2 -2
  82. biotite/sequence/graphics/logo.py +10 -12
  83. biotite/sequence/io/fasta/convert.py +1 -2
  84. biotite/sequence/io/fasta/file.py +1 -1
  85. biotite/sequence/io/fastq/file.py +3 -3
  86. biotite/sequence/io/genbank/file.py +3 -3
  87. biotite/sequence/io/genbank/sequence.py +2 -0
  88. biotite/sequence/io/gff/convert.py +1 -1
  89. biotite/sequence/io/gff/file.py +1 -2
  90. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  91. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  92. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  93. biotite/sequence/profile.py +105 -29
  94. biotite/sequence/search.py +0 -1
  95. biotite/sequence/seqtypes.py +136 -8
  96. biotite/sequence/sequence.py +1 -2
  97. biotite/setup_ccd.py +197 -0
  98. biotite/structure/__init__.py +6 -3
  99. biotite/structure/alphabet/__init__.py +25 -0
  100. biotite/structure/alphabet/encoder.py +332 -0
  101. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  102. biotite/structure/alphabet/i3d.py +109 -0
  103. biotite/structure/alphabet/layers.py +86 -0
  104. biotite/structure/alphabet/pb.license +21 -0
  105. biotite/structure/alphabet/pb.py +170 -0
  106. biotite/structure/alphabet/unkerasify.py +128 -0
  107. biotite/structure/atoms.py +163 -66
  108. biotite/structure/basepairs.py +26 -26
  109. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  110. biotite/structure/bonds.pyx +79 -25
  111. biotite/structure/box.py +19 -21
  112. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  113. biotite/structure/celllist.pyx +83 -67
  114. biotite/structure/chains.py +5 -37
  115. biotite/structure/charges.cpython-311-darwin.so +0 -0
  116. biotite/structure/compare.py +420 -13
  117. biotite/structure/density.py +1 -1
  118. biotite/structure/dotbracket.py +27 -28
  119. biotite/structure/filter.py +8 -8
  120. biotite/structure/geometry.py +74 -127
  121. biotite/structure/hbond.py +17 -19
  122. biotite/structure/info/__init__.py +1 -0
  123. biotite/structure/info/atoms.py +24 -15
  124. biotite/structure/info/bonds.py +12 -6
  125. biotite/structure/info/ccd.py +125 -34
  126. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  127. biotite/structure/info/groups.py +62 -19
  128. biotite/structure/info/masses.py +9 -6
  129. biotite/structure/info/misc.py +15 -22
  130. biotite/structure/info/radii.py +92 -22
  131. biotite/structure/info/standardize.py +4 -4
  132. biotite/structure/integrity.py +4 -6
  133. biotite/structure/io/general.py +2 -2
  134. biotite/structure/io/gro/file.py +8 -9
  135. biotite/structure/io/mol/convert.py +1 -1
  136. biotite/structure/io/mol/ctab.py +33 -28
  137. biotite/structure/io/mol/mol.py +1 -1
  138. biotite/structure/io/mol/sdf.py +80 -53
  139. biotite/structure/io/pdb/convert.py +4 -3
  140. biotite/structure/io/pdb/file.py +85 -25
  141. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  142. biotite/structure/io/pdbqt/file.py +36 -36
  143. biotite/structure/io/pdbx/__init__.py +1 -0
  144. biotite/structure/io/pdbx/bcif.py +54 -15
  145. biotite/structure/io/pdbx/cif.py +92 -66
  146. biotite/structure/io/pdbx/component.py +15 -4
  147. biotite/structure/io/pdbx/compress.py +321 -0
  148. biotite/structure/io/pdbx/convert.py +410 -75
  149. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  150. biotite/structure/io/pdbx/encoding.pyx +98 -17
  151. biotite/structure/io/trajfile.py +9 -6
  152. biotite/structure/io/util.py +38 -0
  153. biotite/structure/mechanics.py +0 -1
  154. biotite/structure/molecules.py +141 -156
  155. biotite/structure/pseudoknots.py +7 -13
  156. biotite/structure/repair.py +2 -4
  157. biotite/structure/residues.py +13 -24
  158. biotite/structure/rings.py +335 -0
  159. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  160. biotite/structure/sasa.pyx +2 -1
  161. biotite/structure/segments.py +69 -11
  162. biotite/structure/sequence.py +0 -1
  163. biotite/structure/sse.py +0 -2
  164. biotite/structure/superimpose.py +74 -62
  165. biotite/structure/tm.py +581 -0
  166. biotite/structure/transform.py +12 -25
  167. biotite/structure/util.py +76 -4
  168. biotite/version.py +9 -4
  169. biotite/visualize.py +111 -1
  170. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
  171. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
  172. biotite/structure/info/ccd/README.rst +0 -8
  173. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  174. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  175. biotite/structure/info/ccd/nucleotides.txt +0 -798
  176. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
  177. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -4,23 +4,23 @@
4
4
 
5
5
  __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
- __all__ = ["get_ccd", "get_from_ccd"]
7
+ __all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
8
8
 
9
+ import functools
10
+ import importlib
11
+ import inspect
12
+ import pkgutil
9
13
  from pathlib import Path
10
14
  import numpy as np
11
15
 
12
- CCD_DIR = Path(__file__).parent / "ccd"
13
- INDEX_COLUMN_NAME = {
16
+ _CCD_FILE = Path(__file__).parent / "components.bcif"
17
+ _SPECIAL_ID_COLUMN_NAMES = {
14
18
  "chem_comp": "id",
15
- "chem_comp_atom": "comp_id",
16
- "chem_comp_bond": "comp_id",
17
19
  }
18
-
19
- _ccd_block = None
20
- # For each category this index gives the start and stop for each residue
21
- _residue_index = {}
20
+ _DEFAULT_ID_COLUMN_NAME = "comp_id"
22
21
 
23
22
 
23
+ @functools.cache
24
24
  def get_ccd():
25
25
  """
26
26
  Get the internal subset of the PDB
@@ -29,25 +29,68 @@ def get_ccd():
29
29
 
30
30
  Returns
31
31
  -------
32
- ccd : BinaryCIFFile
32
+ ccd : BinaryCIFBlock
33
33
  The CCD.
34
+ It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
35
+
36
+ Warnings
37
+ --------
38
+
39
+ Consider the return value as read-only.
40
+ As other functions cache data from it, changing data may lead to undefined
41
+ behavior.
34
42
 
35
43
  References
36
44
  ----------
37
45
 
38
46
  .. footbibliography::
39
-
40
47
  """
41
48
  # Avoid circular import
42
49
  from biotite.structure.io.pdbx.bcif import BinaryCIFFile
43
50
 
44
- global _ccd_block
45
- if _ccd_block is None:
46
- # Load CCD once and cache it for subsequent calls
47
- _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
48
- return _ccd_block
51
+ try:
52
+ return BinaryCIFFile.read(_CCD_FILE).block
53
+ except FileNotFoundError:
54
+ raise RuntimeError(
55
+ "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
56
+ )
57
+
58
+
59
+ def set_ccd_path(ccd_path):
60
+ """
61
+ Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
62
+
63
+ This function also clears the cache of functions depending on the CCD to ensure
64
+ that the new CCD is used.
65
+
66
+ Parameters
67
+ ----------
68
+ ccd_path : path-like
69
+ The path to the custom CCD in BinaryCIF format, prepared with the
70
+ ``setup_ccd.py`` module.
71
+
72
+ Notes
73
+ -----
74
+ This function is intended for advanced users who need to add information for
75
+ compounds, which are not part of the internal CCD.
76
+ The reason might be that an updated version already exists upstream or that
77
+ the user wants to add custom compounds to the CCD.
78
+ """
79
+ global _CCD_FILE
80
+ _CCD_FILE = Path(ccd_path)
81
+
82
+ # Clear caches in all functions in biotite.structure.info
83
+ info_modules = [
84
+ importlib.import_module(f"biotite.structure.info.{mod_name}")
85
+ for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
86
+ ]
87
+ for module in info_modules:
88
+ for _, function in inspect.getmembers(module, callable):
89
+ if hasattr(function, "cache_clear"):
90
+ function.cache_clear()
49
91
 
50
92
 
93
+ @functools.cache
51
94
  def get_from_ccd(category_name, comp_id, column_name=None):
52
95
  """
53
96
  Get the rows for the given residue in the given category from the
@@ -67,38 +110,54 @@ def get_from_ccd(category_name, comp_id, column_name=None):
67
110
 
68
111
  Returns
69
112
  -------
70
- value : ndarray or dict or None
71
- The array of the given column or all columns as dictionary.
72
- ``None`` if the `comp_id` is not found in the category.
113
+ slice : BinaryCIFCategory or BinaryCIFColumn
114
+ The category or column (if `column_name` is provided) containing only the rows
115
+ for the given residue.
116
+
117
+ Notes
118
+ -----
119
+ The returned values are cached for faster access in subsequent calls.
73
120
 
74
121
  References
75
122
  ----------
76
123
 
77
124
  .. footbibliography::
78
-
79
125
  """
80
- global _residue_index
81
- ccd = get_ccd()
82
- category = ccd[category_name]
83
- if category_name not in _residue_index:
84
- _residue_index[category_name] = _index_residues(
85
- category[INDEX_COLUMN_NAME[category_name]].as_array()
86
- )
87
126
  try:
88
- start, stop = _residue_index[category_name][comp_id]
127
+ start, stop = _residue_index(category_name)[comp_id]
89
128
  except KeyError:
90
129
  return None
91
130
 
131
+ category = get_ccd()[category_name]
92
132
  if column_name is None:
93
- return {
94
- col_name: category[col_name].as_array()[start:stop]
95
- for col_name in category.keys()
96
- }
133
+ return _filter_category(category, slice(start, stop))
97
134
  else:
98
- return category[column_name].as_array()[start:stop]
135
+ return _filter_column(category[column_name], slice(start, stop))
99
136
 
100
137
 
101
- def _index_residues(id_column):
138
+ @functools.cache
139
+ def _residue_index(category_name):
140
+ """
141
+ Get the start and stop index for each component name in the given
142
+ CCD category.
143
+
144
+ Parameters
145
+ ----------
146
+ category_name : str
147
+ The category to determine start and stop indices for each component in.
148
+
149
+ Returns
150
+ -------
151
+ index : dict (str -> (int, int))
152
+ The index maps each present component name to the corresponding
153
+ start and exclusive stop index in `id_column`.
154
+ """
155
+ category = get_ccd()[category_name]
156
+ id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
157
+ category_name, _DEFAULT_ID_COLUMN_NAME
158
+ )
159
+ id_column = category[id_column_name].as_array()
160
+
102
161
  residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
103
162
  # The final start is the exclusive stop of last residue
104
163
  residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
@@ -107,3 +166,35 @@ def _index_residues(id_column):
107
166
  comp_id = id_column[residue_starts[i]].item()
108
167
  index[comp_id] = (residue_starts[i], residue_starts[i + 1])
109
168
  return index
169
+
170
+
171
+ def _filter_category(category, index):
172
+ """
173
+ Reduce the category to the values for the given index.∂
174
+ """
175
+ # Avoid circular import
176
+ from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
177
+
178
+ return BinaryCIFCategory(
179
+ {key: _filter_column(column, index) for key, column in category.items()}
180
+ )
181
+
182
+
183
+ def _filter_column(column, index):
184
+ """
185
+ Reduce the column to the values for the given index.
186
+ """
187
+ # Avoid circular import
188
+ from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
189
+ from biotite.structure.io.pdbx.component import MaskValue
190
+
191
+ data_array = column.data.array[index]
192
+ mask_array = column.mask.array[index] if column.mask is not None else None
193
+ return BinaryCIFColumn(
194
+ BinaryCIFData(data_array),
195
+ (
196
+ BinaryCIFData(mask_array)
197
+ if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
198
+ else None
199
+ ),
200
+ )
@@ -6,14 +6,45 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Tom David Müller, Patrick Kunzmann"
7
7
  __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
8
 
9
- from pathlib import Path
10
-
11
- CCD_DIR = Path(__file__).parent / "ccd"
12
-
13
-
14
- group_lists = {}
15
-
16
-
9
+ import functools
10
+ import numpy as np
11
+ from biotite.structure.info.ccd import get_ccd
12
+
13
+ _AMINO_ACID_TYPES = [
14
+ "D-beta-peptide, C-gamma linking",
15
+ "D-gamma-peptide, C-delta linking",
16
+ "D-peptide COOH carboxy terminus",
17
+ "D-peptide NH3 amino terminus",
18
+ "D-peptide linking",
19
+ "L-beta-peptide, C-gamma linking",
20
+ "L-gamma-peptide, C-delta linking",
21
+ "L-peptide COOH carboxy terminus",
22
+ "L-peptide NH3 amino terminus",
23
+ "L-peptide linking",
24
+ "peptide linking",
25
+ ]
26
+ _NUCLEOTIDE_TYPES = [
27
+ "DNA OH 3 prime terminus",
28
+ "DNA OH 5 prime terminus",
29
+ "DNA linking",
30
+ "L-DNA linking",
31
+ "L-RNA linking",
32
+ "RNA OH 3 prime terminus",
33
+ "RNA OH 5 prime terminus",
34
+ "RNA linking",
35
+ ]
36
+ _CARBOHYDRATE_TYPES = [
37
+ "D-saccharide",
38
+ "D-saccharide, alpha linking",
39
+ "D-saccharide, beta linking",
40
+ "L-saccharide",
41
+ "L-saccharide, alpha linking",
42
+ "L-saccharide, beta linking",
43
+ "saccharide",
44
+ ]
45
+
46
+
47
+ @functools.cache
17
48
  def amino_acid_names():
18
49
  """
19
50
  Get a tuple of amino acid three-letter codes according to the
@@ -30,11 +61,11 @@ def amino_acid_names():
30
61
  ----------
31
62
 
32
63
  .. footbibliography::
33
-
34
64
  """
35
- return _get_group_members("amino_acids")
65
+ return _get_group_members(_AMINO_ACID_TYPES)
36
66
 
37
67
 
68
+ @functools.cache
38
69
  def nucleotide_names():
39
70
  """
40
71
  Get a tuple of nucleotide three-letter codes according to the
@@ -51,11 +82,11 @@ def nucleotide_names():
51
82
  ----------
52
83
 
53
84
  .. footbibliography::
54
-
55
85
  """
56
- return _get_group_members("nucleotides")
86
+ return _get_group_members(_NUCLEOTIDE_TYPES)
57
87
 
58
88
 
89
+ @functools.cache
59
90
  def carbohydrate_names():
60
91
  """
61
92
  Get a tuple of carbohydrate three-letter codes according to the
@@ -72,14 +103,26 @@ def carbohydrate_names():
72
103
  ----------
73
104
 
74
105
  .. footbibliography::
106
+ """
107
+ return _get_group_members(_CARBOHYDRATE_TYPES)
108
+
75
109
 
110
+ def _get_group_members(match_types):
76
111
  """
77
- return _get_group_members("carbohydrates")
112
+ Identify component IDs that matches a given component *type* from the CCD.
78
113
 
114
+ Parameters
115
+ ----------
116
+ match_types : list of str
117
+ The component types to extract.
79
118
 
80
- def _get_group_members(group_name):
81
- global group_lists
82
- if group_name not in group_lists:
83
- with open(CCD_DIR / f"{group_name}.txt", "r") as file:
84
- group_lists[group_name] = tuple(file.read().split())
85
- return group_lists[group_name]
119
+ Returns
120
+ -------
121
+ comp_ids : list of str
122
+ The extracted component IDs.
123
+ """
124
+ category = get_ccd()["chem_comp"]
125
+ comp_ids = category["id"].as_array()
126
+ types = category["type"].as_array()
127
+ # Ignore case
128
+ return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()
@@ -95,15 +95,11 @@ def mass(item, is_residue=None):
95
95
  if is_residue is None:
96
96
  result_mass = _atom_masses.get(item.upper())
97
97
  if result_mass is None:
98
- result_mass = get_from_ccd(
99
- "chem_comp", item.upper(), "formula_weight"
100
- ).item()
98
+ result_mass = _mass_for_residue(item)
101
99
  elif not is_residue:
102
100
  result_mass = _atom_masses.get(item.upper())
103
101
  else:
104
- result_mass = get_from_ccd(
105
- "chem_comp", item.upper(), "formula_weight"
106
- ).item()
102
+ result_mass = _mass_for_residue(item)
107
103
 
108
104
  elif isinstance(item, Atom):
109
105
  result_mass = mass(item.element, is_residue=False)
@@ -116,3 +112,10 @@ def mass(item, is_residue=None):
116
112
  if result_mass is None:
117
113
  raise KeyError(f"{item} is not known")
118
114
  return result_mass
115
+
116
+
117
+ def _mass_for_residue(res_name):
118
+ column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
119
+ if column is None:
120
+ raise KeyError(f"Residue '{res_name}' is not known")
121
+ return column.as_item()
@@ -11,19 +11,13 @@ from biotite.structure.info.ccd import get_ccd, get_from_ccd
11
11
 
12
12
  def all_residues():
13
13
  """
14
- Get a list of all residues/compound names in the
15
- PDB chemical components dictionary.
14
+ Get a list of all residues/compound names in the PDB
15
+ *Chemical Component Dictionary* (CCD).
16
16
 
17
17
  Returns
18
18
  -------
19
19
  residues : list of str
20
- A list of all available The up to 3-letter residue names.
21
-
22
- Examples
23
- --------
24
-
25
- >>> print(all_residues()[1000 : 1010])
26
- ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
20
+ A list of all available residue names.
27
21
  """
28
22
  return get_ccd()["chem_comp"]["id"].as_array().tolist()
29
23
 
@@ -51,10 +45,10 @@ def full_name(res_name):
51
45
  >>> print(full_name("MAN"))
52
46
  alpha-D-mannopyranose
53
47
  """
54
- array = get_from_ccd("chem_comp", res_name.upper(), "name")
55
- if array is None:
48
+ column = get_from_ccd("chem_comp", res_name.upper(), "name")
49
+ if column is None:
56
50
  return None
57
- return array.item()
51
+ return column.as_item()
58
52
 
59
53
 
60
54
  def link_type(res_name):
@@ -84,10 +78,10 @@ def link_type(res_name):
84
78
  >>> print(link_type("HOH"))
85
79
  NON-POLYMER
86
80
  """
87
- array = get_from_ccd("chem_comp", res_name.upper(), "type")
88
- if array is None:
81
+ column = get_from_ccd("chem_comp", res_name.upper(), "type")
82
+ if column is None:
89
83
  return None
90
- return array.item()
84
+ return column.as_item()
91
85
 
92
86
 
93
87
  def one_letter_code(res_name):
@@ -107,7 +101,7 @@ def one_letter_code(res_name):
107
101
  -------
108
102
  one_letter_code : str or None
109
103
  The one-letter code.
110
- None if the compound is not present in the CCD or if no
104
+ ``None`` if the compound is not present in the CCD or if no
111
105
  one-letter code is defined for this compound.
112
106
 
113
107
  Examples
@@ -133,12 +127,11 @@ def one_letter_code(res_name):
133
127
  alpha-D-mannopyranose
134
128
  >>> print(one_letter_code("MAN"))
135
129
  None
136
-
137
130
  """
138
- array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
139
- if array is None:
131
+ column = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
132
+ if column is None:
140
133
  return None
141
- item = array.item()
142
- if item == "":
134
+ if column.mask is not None:
135
+ # Value is masked, i.e. inapplicable or missing
143
136
  return None
144
- return item
137
+ return column.as_item()
@@ -26,37 +26,106 @@ _PROTOR_RADII = {
26
26
  ("S", 1, 0) : 1.77,
27
27
  ("S", 2, 0) : 1.77, # Not official, added for completeness (MET)
28
28
  ("S", 2, 1) : 1.77,
29
- ("F", 1, 0) : 1.47, # Taken from _SINGLE_RADII
30
- ("CL", 1, 0) : 1.75, # Taken from _SINGLE_RADII
31
- ("BR", 1, 0) : 1.85, # Taken from _SINGLE_RADII
29
+ ("F", 1, 0) : 1.47, # Taken from _SINGLE_ATOM_VDW_RADII
30
+ ("CL", 1, 0) : 1.75, # Taken from _SINGLE_ATOM_VDW_RADII
31
+ ("BR", 1, 0) : 1.85, # Taken from _SINGLE_ATOM_VDW_RADII
32
32
  ("I", 1, 0) : 1.98, # Taken from _SINGLE_RADII
33
33
  }
34
34
 
35
- _SINGLE_RADII = {
36
- "H": 1.20,
35
+ _SINGLE_ATOM_VDW_RADII = {
36
+ # Main group
37
+ # Row 1 (Period 1)
38
+ "H": 1.10,
37
39
  "HE": 1.40,
38
40
 
41
+ # Row 2 (Period 2)
42
+ "LI": 1.81,
43
+ "BE": 1.53,
44
+ "B": 1.92,
39
45
  "C": 1.70,
40
46
  "N": 1.55,
41
47
  "O": 1.52,
42
48
  "F": 1.47,
43
49
  "NE": 1.54,
44
50
 
51
+ # Row 3 (Period 3)
52
+ "NA": 2.27,
53
+ "MG": 1.73,
54
+ "AL": 1.84,
45
55
  "SI": 2.10,
46
56
  "P": 1.80,
47
57
  "S": 1.80,
48
58
  "CL": 1.75,
49
59
  "AR": 1.88,
50
60
 
61
+ # Row 4 (Period 4)
62
+ "K": 2.75,
63
+ "CA": 2.31,
64
+ "GA": 1.87,
65
+ "GE": 2.11,
51
66
  "AS": 1.85,
52
67
  "SE": 1.90,
53
- "BR": 1.85,
68
+ "BR": 1.83,
54
69
  "KR": 2.02,
55
70
 
71
+ # Row 5 (Period 5)
72
+ "RB": 3.03,
73
+ "SR": 2.49,
74
+ "IN": 1.93,
75
+ "SN": 2.17,
76
+ "SB": 2.06,
56
77
  "TE": 2.06,
57
78
  "I": 1.98,
58
79
  "XE": 2.16,
80
+
81
+ # Row 6 (Period 6)
82
+ "CS": 3.43,
83
+ "BA": 2.68,
84
+ "TL": 1.96,
85
+ "PB": 2.02,
86
+ "BI": 2.07,
87
+ "PO": 1.97,
88
+ "AT": 2.02,
89
+ "RN": 2.20,
90
+
91
+ # Row 7 (Period 7)
92
+ "FR": 3.48,
93
+ "RA": 2.83,
94
+
95
+ # Transition metals (relevant ones only)
96
+ # Row 1
97
+ "FE": 2.05,
98
+ "CU": 2.00,
99
+ "ZN": 2.10,
100
+ "MN": 2.05,
101
+ "CO": 2.00,
102
+ "NI": 2.00,
103
+
104
+ # Row 2
105
+ 'MO': 2.10,
106
+ 'RU': 2.05,
107
+
108
+ # Row 3
109
+ 'W': 2.10,
110
+ 'PT': 2.05,
111
+ 'AU': 2.10,
59
112
  }
113
+ """
114
+ Van der Waals radii for main group and transition elements.
115
+
116
+ Main group:
117
+ Source: https://pubs.acs.org/doi/10.1021/jp8111556, Table 12 (Mantina et al. 2009)
118
+
119
+ Transition metals:
120
+ Source: RDKit, 2024.9.4 Release
121
+ https://github.com/rdkit/rdkit/blob/af6347963f25cfe8fe4db0638410b2f3a8e8bd89/Code/GraphMol/atomic_data.cpp#L51
122
+
123
+ Where available, these values were cross-checked vs the CRC Handbook of
124
+ Chemistry and Physics (105th edition) and verified that they are closely
125
+ in line (barring very minor discrepancies, usually < 0.05 Å).
126
+ We cannot use the CRC values directly as they are not permissively licensed.
127
+ """
128
+
60
129
  # fmt: on
61
130
 
62
131
  # A dictionary that caches radii for each residue
@@ -65,16 +134,15 @@ _protor_radii = {}
65
134
 
66
135
  def vdw_radius_protor(res_name, atom_name):
67
136
  """
68
- Estimate the Van-der-Waals radius of an non-hydrogen atom,
137
+ Estimate the Van-der-Waals radius of a heavy atom,
69
138
  that includes the radius added by potential bonded hydrogen atoms.
70
139
  The respective radii are taken from the ProtOr dataset.
71
140
  :footcite:`Tsai1999`
72
141
 
73
142
  This is especially useful for macromolecular structures where no
74
143
  hydrogen atoms are resolved, e.g. crystal structures.
75
- The valency of the non-hydrogen atom and the amount of normally
76
- bonded hydrogen atoms is taken from the chemical compound dictionary
77
- dataset.
144
+ The valency of the heavy atom and the amount of normally
145
+ bonded hydrogen atoms is taken from the *Chemical Component Dictionary*.
78
146
 
79
147
  Parameters
80
148
  ----------
@@ -86,12 +154,13 @@ def vdw_radius_protor(res_name, atom_name):
86
154
 
87
155
  Returns
88
156
  -------
89
- The Van-der-Waals radius of the given atom.
90
- If the radius cannot be estimated for the atom, `None` is returned.
157
+ radius : float
158
+ The Van-der-Waals radius of the given atom.
159
+ If the radius cannot be estimated for the atom, `None` is returned.
91
160
 
92
- See also
161
+ See Also
93
162
  --------
94
- vdw_radius_single
163
+ vdw_radius_single : *Van-der-Waals* radii for structures with annotated hydrogen atoms.
95
164
 
96
165
  References
97
166
  ----------
@@ -114,7 +183,7 @@ def vdw_radius_protor(res_name, atom_name):
114
183
  # Use cached radii for the residue, if already calculated
115
184
  if atom_name not in _protor_radii[res_name]:
116
185
  raise KeyError(
117
- f"Residue '{res_name}' does not contain an atom named " f"'{atom_name}'"
186
+ f"Residue '{res_name}' does not contain an atom named '{atom_name}'"
118
187
  )
119
188
  return _protor_radii[res_name].get(atom_name)
120
189
  else:
@@ -166,8 +235,8 @@ def _calculate_protor_radii(res_name):
166
235
 
167
236
  def vdw_radius_single(element):
168
237
  """
169
- Get the Van-der-Waals radius of an atom from the given element.
170
- :footcite:`Bondi1964`
238
+ Get the *Van-der-Waals* radius of an atom from the given element.
239
+ :footcite:`Mantina2009`
171
240
 
172
241
  Parameters
173
242
  ----------
@@ -176,12 +245,13 @@ def vdw_radius_single(element):
176
245
 
177
246
  Returns
178
247
  -------
179
- The Van-der-Waals radius of the atom.
180
- If the radius is unknown for the element, `None` is returned.
248
+ radius : float
249
+ The Van-der-Waals radius of the atom.
250
+ If the radius is unknown for the element, `None` is returned.
181
251
 
182
- See also
252
+ See Also
183
253
  --------
184
- vdw_radius_protor
254
+ vdw_radius_protor : *Van-der-Waals* radii for structures without annotated hydrogen atoms.
185
255
 
186
256
  References
187
257
  ----------
@@ -194,4 +264,4 @@ def vdw_radius_single(element):
194
264
  >>> print(vdw_radius_single("C"))
195
265
  1.7
196
266
  """
197
- return _SINGLE_RADII.get(element.upper())
267
+ return _SINGLE_ATOM_VDW_RADII.get(element.upper())
@@ -121,16 +121,16 @@ def standardize_order(atoms):
121
121
  stop = starts[i + 1]
122
122
 
123
123
  res_name = atoms.res_name[start]
124
- standard_atom_names = get_from_ccd("chem_comp_atom", res_name, "atom_id")
125
- if standard_atom_names is None:
124
+ chem_comp_atom = get_from_ccd("chem_comp_atom", res_name, "atom_id")
125
+ if chem_comp_atom is None:
126
126
  # If the residue is not in the CCD, keep the current order
127
127
  warnings.warn(
128
- f"Residue '{res_name}' is not in the CCD, "
129
- f"keeping current atom order"
128
+ f"Residue '{res_name}' is not in the CCD, keeping current atom order"
130
129
  )
131
130
  reordered_indices[start:stop] = np.arange(start, stop)
132
131
  continue
133
132
 
133
+ standard_atom_names = chem_comp_atom.as_array()
134
134
  reordered_indices[start:stop] = (
135
135
  _reorder(atoms.atom_name[start:stop], standard_atom_names) + start
136
136
  )
@@ -47,7 +47,7 @@ def check_atom_id_continuity(array):
47
47
  Returns
48
48
  -------
49
49
  discontinuity : ndarray, dtype=int
50
- Contains the indices of atoms after a discontinuity
50
+ Contains the indices of atoms after a discontinuity.
51
51
  """
52
52
  ids = array.atom_id
53
53
  return _check_continuity(ids)
@@ -69,7 +69,7 @@ def check_res_id_continuity(array):
69
69
  Returns
70
70
  -------
71
71
  discontinuity : ndarray, dtype=int
72
- Contains the indices of atoms after a discontinuity
72
+ Contains the indices of atoms after a discontinuity.
73
73
  """
74
74
  ids = array.res_id
75
75
  return _check_continuity(ids)
@@ -96,10 +96,8 @@ def check_linear_continuity(array, min_len=1.2, max_len=1.8):
96
96
 
97
97
  See Also
98
98
  --------
99
- biotite.structure.filter.filter_linear_bond_continuity :
100
- A function to filter for atoms preserving the continuity (used here).
101
- biotite.structure.bonds.BondList :
102
- A class that doesn't depend on the atoms' order to identify bonds.
99
+ filter_linear_bond_continuity : A function to filter for atoms preserving the continuity (used here).
100
+ BondList : A class that doesn't depend on the atoms' order to identify bonds.
103
101
  """
104
102
  con_mask = filter_linear_bond_continuity(array, min_len, max_len)
105
103
  # The continuity mask `con_mask` points to atoms for which the next atom is continuous.