biotite 0.39.0__cp311-cp311-win_amd64.whl → 0.41.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +258 -237
  8. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +243 -222
  11. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmersimilarity.c +215 -196
  13. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  14. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  15. biotite/sequence/align/kmertable.cpp +233 -205
  16. biotite/sequence/align/localgapped.c +258 -237
  17. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  18. biotite/sequence/align/localungapped.c +235 -214
  19. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  20. biotite/sequence/align/multiple.c +255 -234
  21. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  22. biotite/sequence/align/pairwise.c +274 -253
  23. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/permutation.c +215 -196
  25. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  26. biotite/sequence/align/selector.c +217 -197
  27. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  28. biotite/sequence/align/tracetable.c +215 -195
  29. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +235 -214
  32. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +215 -196
  35. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/tree.c +227 -202
  37. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  38. biotite/sequence/phylo/upgma.c +215 -196
  39. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1437 -1279
  43. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  44. biotite/structure/celllist.c +217 -197
  45. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  46. biotite/structure/charges.c +1052 -1101
  47. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +219 -198
  70. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +217 -197
  72. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  73. biotite/structure/io/mmtf/decode.c +225 -204
  74. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  75. biotite/structure/io/mmtf/encode.c +215 -196
  76. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112813 -0
  97. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +215 -196
  102. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,95 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_ccd", "get_from_ccd"]
8
+
9
+ from pathlib import Path
10
+ import numpy as np
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+ INDEX_COLUMN_NAME = {
15
+ "chem_comp": "id",
16
+ "chem_comp_atom": "comp_id",
17
+ "chem_comp_bond": "comp_id",
18
+ }
19
+
20
+ _ccd_block = None
21
+ # For each category this index gives the start and stop for each residue
22
+ _residue_index = {}
23
+
24
+
25
+ def get_ccd():
26
+ """
27
+ Get the PDB *Chemical Component Dictionary* (CCD).
28
+
29
+ Returns
30
+ -------
31
+ ccd : BinaryCIFFile
32
+ The CCD.
33
+ """
34
+ # Avoid circular import
35
+ from ..io.pdbx.bcif import BinaryCIFFile
36
+
37
+ global _ccd_block
38
+ if _ccd_block is None:
39
+ # Load CCD once and cache it for subsequent calls
40
+ _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
41
+ return _ccd_block
42
+
43
+
44
+ def get_from_ccd(category_name, comp_id, column_name=None):
45
+ """
46
+ Get the rows for the given residue in the given category from the
47
+ PDB *Chemical Component Dictionary* (CCD).
48
+
49
+ Parameters
50
+ ----------
51
+ category_name : str
52
+ The category in the CCD.
53
+ comp_id : str
54
+ The residue identifier, i.e. the ``res_name``.
55
+ column_name : str, optional
56
+ The name of the column to be retrieved.
57
+ If None, all columns are returned as dictionary.
58
+ By default None.
59
+
60
+ Returns
61
+ -------
62
+ value : ndarray or dict or None
63
+ The array of the given column or all columns as dictionary.
64
+ ``None`` if the `comp_id` is not found in the category.
65
+ """
66
+ global _residue_index
67
+ ccd = get_ccd()
68
+ category = ccd[category_name]
69
+ if category_name not in _residue_index:
70
+ _residue_index[category_name] = _index_residues(
71
+ category[INDEX_COLUMN_NAME[category_name]].as_array()
72
+ )
73
+ try:
74
+ start, stop = _residue_index[category_name][comp_id]
75
+ except KeyError:
76
+ return None
77
+
78
+ if column_name is None:
79
+ return {
80
+ col_name: category[col_name].as_array()[start:stop]
81
+ for col_name in category.keys()
82
+ }
83
+ else:
84
+ return category[column_name].as_array()[start:stop]
85
+
86
+
87
+ def _index_residues(id_column):
88
+ residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
89
+ # The final start is the exclusive stop of last residue
90
+ residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
91
+ index = {}
92
+ for i in range(len(residue_starts)-1):
93
+ comp_id = id_column[residue_starts[i]].item()
94
+ index[comp_id] = (residue_starts[i], residue_starts[i+1])
95
+ return index
@@ -0,0 +1,90 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Tom David Müller, Patrick Kunzmann"
7
+ __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
+
9
+ from pathlib import Path
10
+ import copy
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+
15
+
16
+ group_lists = {}
17
+
18
+
19
+ def amino_acid_names():
20
+ """
21
+ Get a tuple of amino acid three-letter codes according to the
22
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
23
+
24
+ Returns
25
+ -------
26
+ amino_acid_names : tuple of str
27
+ A list of three-letter-codes containing residues that are
28
+ peptide monomers.
29
+
30
+ Notes
31
+ -----
32
+
33
+ References
34
+ ----------
35
+
36
+ .. footbibliography::
37
+ """
38
+ return _get_group_members("amino_acids")
39
+
40
+
41
+ def nucleotide_names():
42
+ """
43
+ Get a tuple of nucleotide three-letter codes according to the
44
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
45
+
46
+ Returns
47
+ -------
48
+ nucleotide_names : tuple of str
49
+ A list of three-letter-codes containing residues that are
50
+ DNA/RNA monomers.
51
+
52
+ Notes
53
+ -----
54
+
55
+ References
56
+ ----------
57
+
58
+ .. footbibliography::
59
+ """
60
+ return _get_group_members("nucleotides")
61
+
62
+
63
+ def carbohydrate_names():
64
+ """
65
+ Get a tuple of carbohydrate three-letter codes according to the
66
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
67
+
68
+ Returns
69
+ -------
70
+ carbohydrate_names : tuple of str
71
+ A list of three-letter-codes containing residues that are
72
+ saccharide monomers.
73
+
74
+ Notes
75
+ -----
76
+
77
+ References
78
+ ----------
79
+
80
+ .. footbibliography::
81
+ """
82
+ return _get_group_members("carbohydrates")
83
+
84
+
85
+ def _get_group_members(group_name):
86
+ global group_lists
87
+ if group_name not in group_lists:
88
+ with open(CCD_DIR / f"{group_name}.txt", "r") as file:
89
+ group_lists[group_name] = tuple(file.read().split())
90
+ return group_lists[group_name]
@@ -7,20 +7,14 @@ __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["mass"]
8
8
 
9
9
  import json
10
- from os.path import join, dirname, realpath
11
- import msgpack
10
+ from pathlib import Path
12
11
  from ..atoms import Atom, AtomArray, AtomArrayStack
12
+ from .ccd import get_from_ccd
13
13
 
14
14
 
15
- _info_dir = dirname(realpath(__file__))
16
15
  # Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
17
- with open(join(_info_dir, "atom_masses.json"), "r") as file:
18
- _atom_masses = json.load(file)
19
- # Masses are taken from
20
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
21
- # (2019/01/27)
22
- with open(join(_info_dir, "residue_masses.msgpack"), "rb") as file:
23
- _res_masses = msgpack.load(file, raw=False)
16
+ ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
17
+ _atom_masses = None
24
18
 
25
19
 
26
20
  def mass(item, is_residue=None):
@@ -34,7 +28,7 @@ def mass(item, is_residue=None):
34
28
  from the molecule.
35
29
  For example non-terminal residues in a protein or nucleotide chain
36
30
  miss the mass of a water molecule.
37
-
31
+
38
32
  Parameters
39
33
  ----------
40
34
  item : str or Atom or AtomArray or AtomArrayStack
@@ -50,17 +44,17 @@ def mass(item, is_residue=None):
50
44
  If set to false, the string is strictly interpreted as element.
51
45
  By default the string will be interpreted as element at first
52
46
  and secondly as residue name, if the element is unknown.
53
-
47
+
54
48
  Returns
55
49
  -------
56
50
  mass : float or None
57
51
  The mass of the given object in *u*. None if the mass is unknown.
58
-
52
+
59
53
  References
60
54
  ----------
61
-
55
+
62
56
  .. footbibliography::
63
-
57
+
64
58
  Examples
65
59
  --------
66
60
 
@@ -94,29 +88,36 @@ def mass(item, is_residue=None):
94
88
  >>> print(mass("N"))
95
89
  14.007
96
90
  """
91
+ global _atom_masses
92
+ with open(ATOM_MASSES_FILE, "r") as file:
93
+ _atom_masses = json.load(file)
97
94
 
98
95
  if isinstance(item, str):
99
96
  if is_residue is None:
100
97
  result_mass = _atom_masses.get(item.upper())
101
98
  if result_mass is None:
102
- result_mass = _res_masses.get(item.upper())
99
+ result_mass = get_from_ccd(
100
+ "chem_comp", item.upper(), "formula_weight"
101
+ ).item()
103
102
  elif not is_residue:
104
103
  result_mass = _atom_masses.get(item.upper())
105
104
  else:
106
- result_mass = _res_masses.get(item.upper())
107
-
105
+ result_mass = get_from_ccd(
106
+ "chem_comp", item.upper(), "formula_weight"
107
+ ).item()
108
+
108
109
  elif isinstance(item, Atom):
109
110
  result_mass = mass(item.element, is_residue=False)
110
111
  elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
111
112
  result_mass = sum(
112
113
  (mass(element, is_residue=False) for element in item.element)
113
114
  )
114
-
115
+
115
116
  else:
116
117
  raise TypeError(
117
118
  f"Cannot calculate mass for {type(item).__name__} objects"
118
119
  )
119
-
120
+
120
121
  if result_mass is None:
121
122
  raise KeyError(f"{item} is not known")
122
123
  return result_mass
@@ -4,39 +4,28 @@
4
4
 
5
5
  __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
- __all__ = ["all_residues", "full_name", "link_type"]
7
+ __all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
8
8
 
9
- from os.path import join, dirname, realpath
10
- import msgpack
11
-
12
-
13
- _info_dir = dirname(realpath(__file__))
14
- # Data is taken from
15
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
16
- # (2019/01/27)
17
- with open(join(_info_dir, "residue_names.msgpack"), "rb") as file:
18
- _res_names = msgpack.load(file, raw=False)
19
- with open(join(_info_dir, "link_types.msgpack"), "rb") as file:
20
- _link_types = msgpack.load(file, raw=False)
9
+ from .ccd import get_ccd, get_from_ccd
21
10
 
22
11
 
23
12
  def all_residues():
24
13
  """
25
14
  Get a list of all residues/compound names in the
26
15
  PDB chemical components dictionary.
27
-
16
+
28
17
  Returns
29
18
  -------
30
19
  residues : list of str
31
20
  A list of all available The up to 3-letter residue names.
32
-
21
+
33
22
  Examples
34
23
  --------
35
24
 
36
25
  >>> print(all_residues()[1000 : 1010])
37
- ['0Y4', '0Y5', '0Y7', '0Y8', '0Y9', '0YA', '0YB', '0YC', '0YD', '0YE']
26
+ ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
38
27
  """
39
- return list(_res_names.keys())
28
+ return get_ccd()["chem_comp"]["id"].as_array().tolist()
40
29
 
41
30
 
42
31
  def full_name(res_name):
@@ -48,19 +37,24 @@ def full_name(res_name):
48
37
  ----------
49
38
  res_name : str
50
39
  The up to 3-letter residue name.
51
-
40
+
52
41
  Returns
53
42
  -------
54
- name : str
43
+ name : str or None
55
44
  The full name of the residue.
56
-
45
+ If the residue is unknown to the chemical components dictionary,
46
+ ``None`` is returned.
47
+
57
48
  Examples
58
49
  --------
59
50
 
60
51
  >>> print(full_name("MAN"))
61
52
  alpha-D-mannopyranose
62
53
  """
63
- return _res_names.get(res_name.upper())
54
+ array = get_from_ccd("chem_comp", res_name.upper(), "name")
55
+ if array is None:
56
+ return None
57
+ return array.item()
64
58
 
65
59
 
66
60
  def link_type(res_name):
@@ -72,12 +66,14 @@ def link_type(res_name):
72
66
  ----------
73
67
  res_name : str
74
68
  The up to 3-letter residue name.
75
-
69
+
76
70
  Returns
77
71
  -------
78
- link_type : str
72
+ link_type : str or None
79
73
  The link type.
80
-
74
+ If the residue is unknown to the chemical components dictionary,
75
+ ``None`` is returned.
76
+
81
77
  Examples
82
78
  --------
83
79
 
@@ -88,4 +84,61 @@ def link_type(res_name):
88
84
  >>> print(link_type("HOH"))
89
85
  NON-POLYMER
90
86
  """
91
- return _link_types.get(res_name.upper())
87
+ array = get_from_ccd("chem_comp", res_name.upper(), "type")
88
+ if array is None:
89
+ return None
90
+ return array.item()
91
+
92
+
93
+ def one_letter_code(res_name):
94
+ """
95
+ Get the one-letter code of a residue/compound,
96
+ based on the PDB chemical components dictionary.
97
+
98
+ The one-letter code is only defined for amino acids and nucleotides
99
+ and for compounds that are structurally similar to them.
100
+
101
+ Parameters
102
+ ----------
103
+ res_name : str
104
+ The up to 3-letter residue name.
105
+
106
+ Returns
107
+ -------
108
+ one_letter_code : str or None
109
+ The one-letter code.
110
+ None if the compound is not present in the CCD or if no
111
+ one-letter code is defined for this compound.
112
+
113
+ Examples
114
+ --------
115
+
116
+ Get the one letter code for an amino acid (or a nucleotide).
117
+
118
+ >>> print(full_name("ALA"))
119
+ ALANINE
120
+ >>> print(one_letter_code("ALA"))
121
+ A
122
+
123
+ For similar compounds, the one-letter code is also defined.
124
+
125
+ >>> print(full_name("DAL"))
126
+ D-ALANINE
127
+ >>> print(one_letter_code("DAL"))
128
+ A
129
+
130
+ For other compounds, the one-letter code is not defined.
131
+
132
+ >>> print(full_name("MAN"))
133
+ alpha-D-mannopyranose
134
+ >>> print(one_letter_code("MAN"))
135
+ None
136
+
137
+ """
138
+ array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
139
+ if array is None:
140
+ return None
141
+ item = array.item()
142
+ if item == "":
143
+ return None
144
+ return item
@@ -6,15 +6,13 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["standardize_order"]
8
8
 
9
+ import warnings
9
10
  import numpy as np
10
- from .atoms import residue
11
+ from .ccd import get_from_ccd
11
12
  from ..residues import get_residue_starts
12
13
  from ..error import BadStructureError
13
14
 
14
15
 
15
- _atom_name_cache = {}
16
-
17
-
18
16
  def standardize_order(atoms):
19
17
  """
20
18
  Get an index array for an input :class:`AtomArray` or
@@ -34,20 +32,20 @@ def standardize_order(atoms):
34
32
  atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
35
33
  Input structure with atoms that are potentially not in the
36
34
  *standard* order.
37
-
35
+
38
36
  Returns
39
37
  -------
40
38
  indices : ndarray, dtype=int, shape=(n,)
41
39
  When this index array is applied on the input `atoms`,
42
40
  the atoms for each residue are reordered to obtain the
43
41
  standard *RCSB PDB* atom order.
44
-
42
+
45
43
  Raises
46
44
  ------
47
45
  BadStructureError
48
46
  If the input `atoms` have duplicate atoms (same atom name)
49
47
  within a residue.
50
-
48
+
51
49
  Examples
52
50
  --------
53
51
 
@@ -123,11 +121,18 @@ def standardize_order(atoms):
123
121
  stop = starts[i+1]
124
122
 
125
123
  res_name = atoms.res_name[start]
126
- standard_atom_names = _atom_name_cache.get(res_name)
124
+ standard_atom_names = get_from_ccd(
125
+ "chem_comp_atom", res_name, "atom_id"
126
+ )
127
127
  if standard_atom_names is None:
128
- standard_atom_names = residue(res_name).atom_name
129
- _atom_name_cache[res_name] = standard_atom_names
130
-
128
+ # If the residue is not in the CCD, keep the current order
129
+ warnings.warn(
130
+ f"Residue '{res_name}' is not in the CCD, "
131
+ f"keeping current atom order"
132
+ )
133
+ reordered_indices[start : stop] = np.arange(start, stop)
134
+ continue
135
+
131
136
  reordered_indices[start : stop] = _reorder(
132
137
  atoms.atom_name[start : stop], standard_atom_names
133
138
  ) + start
@@ -152,7 +157,7 @@ def _reorder(origin, target):
152
157
  The atom names to reorder.
153
158
  target : ndarray, dtype=str
154
159
  The atom names in target order.
155
-
160
+
156
161
  Returns
157
162
  -------
158
163
  indices : ndarray, dtype=int
@@ -12,7 +12,7 @@ __author__ = "Patrick Kunzmann, Daniel Bauer"
12
12
  __all__ = ["check_id_continuity", "check_atom_id_continuity",
13
13
  "check_res_id_continuity", "check_backbone_continuity",
14
14
  "check_duplicate_atoms", "check_bond_continuity",
15
- "check_linear_continuity", "renumber_atom_ids", "renumber_res_ids"]
15
+ "check_linear_continuity"]
16
16
 
17
17
  import numpy as np
18
18
  import warnings
@@ -32,17 +32,17 @@ def check_id_continuity(array):
32
32
  """
33
33
  Check if the residue IDs are incremented by more than 1 or
34
34
  decremented, from one atom to the next one.
35
-
35
+
36
36
  An increment by more than 1 is as strong clue for missing residues,
37
37
  a decrement means probably a start of a new chain.
38
38
 
39
39
  DEPRECATED: Use :func:`check_res_id_continuity()` instead.
40
-
40
+
41
41
  Parameters
42
42
  ----------
43
43
  array : AtomArray or AtomArrayStack
44
44
  The array to be checked.
45
-
45
+
46
46
  Returns
47
47
  -------
48
48
  discontinuity : ndarray, dtype=int
@@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
60
60
  """
61
61
  Check if the atom IDs are incremented by more than 1 or
62
62
  decremented, from one atom to the next one.
63
-
63
+
64
64
  An increment by more than 1 is as strong clue for missing atoms.
65
-
65
+
66
66
  Parameters
67
67
  ----------
68
68
  array : AtomArray or AtomArrayStack
69
69
  The array to be checked.
70
-
70
+
71
71
  Returns
72
72
  -------
73
73
  discontinuity : ndarray, dtype=int
@@ -81,15 +81,15 @@ def check_res_id_continuity(array):
81
81
  """
82
82
  Check if the residue IDs are incremented by more than 1 or
83
83
  decremented, from one atom to the next one.
84
-
84
+
85
85
  An increment by more than 1 is as strong clue for missing residues,
86
86
  a decrement means probably a start of a new chain.
87
-
87
+
88
88
  Parameters
89
89
  ----------
90
90
  array : AtomArray or AtomArrayStack
91
91
  The array to be checked.
92
-
92
+
93
93
  Returns
94
94
  -------
95
95
  discontinuity : ndarray, dtype=int
@@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
168
168
  """
169
169
  Check if the (peptide or phosphate) backbone atoms have
170
170
  non-reasonable distance to the next atom.
171
-
171
+
172
172
  A large or very small distance is a very strong clue, that there is
173
173
  no bond between those atoms, therefore the chain is discontinued.
174
174
 
@@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
206
206
  """
207
207
  Check if a structure contains duplicate atoms, i.e. two atoms in a
208
208
  structure have the same annotations (coordinates may be different).
209
-
209
+
210
210
  Duplicate atoms may appear, when a structure has occupancy for an
211
211
  atom at two or more positions or when the *altloc* positions are
212
212
  improperly read.
213
-
213
+
214
214
  Parameters
215
215
  ----------
216
216
  array : AtomArray or AtomArrayStack
217
217
  The array to be checked.
218
-
218
+
219
219
  Returns
220
220
  -------
221
221
  duplicate : ndarray, dtype=int
@@ -228,16 +228,16 @@ def check_duplicate_atoms(array):
228
228
  for i in range(1, array.array_length()):
229
229
  # Start with assumption that all atoms in the array
230
230
  # until index i are duplicates of the atom at index i
231
- is_dublicate = np.full(i, True, dtype=bool)
231
+ is_duplicate = np.full(i, True, dtype=bool)
232
232
  for annot in annots:
233
233
  # For each annotation array filter out the atoms until
234
234
  # index i that have an unequal annotation
235
- # to the atom at index i
236
- is_dublicate &= (annot[:i] == annot[i])
235
+ # to the atom at index i
236
+ is_duplicate &= (annot[:i] == annot[i])
237
237
  # After checking all annotation arrays,
238
238
  # if there still is any duplicate to the atom at index i,
239
239
  # add i the the list of duplicate atom indices
240
- if is_dublicate.any():
240
+ if is_duplicate.any():
241
241
  duplicates.append(i)
242
242
  return np.array(duplicates)
243
243
 
@@ -255,7 +255,7 @@ def check_in_box(array):
255
255
  ----------
256
256
  array : AtomArray or AtomArrayStack
257
257
  The array to be checked.
258
-
258
+
259
259
  Returns
260
260
  -------
261
261
  outside : ndarray, dtype=int
@@ -266,54 +266,3 @@ def check_in_box(array):
266
266
  box = array.box
267
267
  fractions = coord_to_fraction(array, box)
268
268
  return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
269
-
270
-
271
- def renumber_atom_ids(array, start=None):
272
- """
273
- Renumber the atom IDs of the given array.
274
-
275
- Parameters
276
- ----------
277
- array : AtomArray or AtomArrayStack
278
- The array to be checked.
279
- start : int, optional
280
- The starting index for renumbering.
281
- The first ID in the array is taken by default.
282
-
283
- Returns
284
- -------
285
- array : AtomArray or AtomArrayStack
286
- The renumbered array.
287
- """
288
- if "atom_id" not in array.get_annotation_categories():
289
- raise ValueError("The atom array must have the 'atom_id' annotation")
290
- if start is None:
291
- start = array.atom_id[0]
292
- array.atom_id = np.arange(start, array.shape[-1]+1)
293
- return array
294
-
295
-
296
- def renumber_res_ids(array, start=None):
297
- """
298
- Renumber the residue IDs of the given array.
299
-
300
- Parameters
301
- ----------
302
- array : AtomArray or AtomArrayStack
303
- The array to be checked.
304
- start : int, optional
305
- The starting index for renumbering.
306
- The first ID in the array is taken by default.
307
-
308
- Returns
309
- -------
310
- array : AtomArray or AtomArrayStack
311
- The renumbered array.
312
- """
313
- if start is None:
314
- start = array.res_id[0]
315
- diff = np.diff(array.res_id)
316
- diff[diff != 0] = 1
317
- new_res_ids = np.concatenate(([start], diff)).cumsum()
318
- array.res_id = new_res_ids
319
- return array