biotite 0.39.0__cp312-cp312-win_amd64.whl → 0.41.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +256 -235
  8. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +241 -220
  11. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmersimilarity.c +213 -194
  13. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  14. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  15. biotite/sequence/align/kmertable.cpp +231 -203
  16. biotite/sequence/align/localgapped.c +256 -235
  17. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  18. biotite/sequence/align/localungapped.c +233 -212
  19. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  20. biotite/sequence/align/multiple.c +253 -232
  21. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  22. biotite/sequence/align/pairwise.c +272 -251
  23. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  24. biotite/sequence/align/permutation.c +213 -194
  25. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  26. biotite/sequence/align/selector.c +215 -195
  27. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  28. biotite/sequence/align/tracetable.c +213 -193
  29. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +233 -212
  32. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +213 -194
  35. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/tree.c +225 -200
  37. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  38. biotite/sequence/phylo/upgma.c +213 -194
  39. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1435 -1277
  43. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  44. biotite/structure/celllist.c +215 -195
  45. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  46. biotite/structure/charges.c +1050 -1099
  47. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +217 -196
  70. biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +215 -195
  72. biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
  73. biotite/structure/io/mmtf/decode.c +223 -202
  74. biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
  75. biotite/structure/io/mmtf/encode.c +213 -194
  76. biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112803 -0
  97. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +213 -194
  102. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module contains functionalities for repairing malformed structures.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann, Daniel Bauer"
11
+ __all__ = ["renumber_atom_ids", "renumber_res_ids",
12
+ "create_continuous_res_ids", "infer_elements", "create_atom_names"]
13
+
14
+ from collections import Counter
15
+ import warnings
16
+ import numpy as np
17
+ from .atoms import AtomArray, AtomArrayStack
18
+ from .residues import get_residue_starts
19
+ from .chains import get_chain_starts
20
+
21
+
22
+ def renumber_atom_ids(array, start=None):
23
+ """
24
+ Renumber the atom IDs of the given array.
25
+
26
+ DEPRECATED.
27
+
28
+ Parameters
29
+ ----------
30
+ array : AtomArray or AtomArrayStack
31
+ The array to be checked.
32
+ start : int, optional
33
+ The starting index for renumbering.
34
+ The first ID in the array is taken by default.
35
+
36
+ Returns
37
+ -------
38
+ array : AtomArray or AtomArrayStack
39
+ The renumbered array.
40
+ """
41
+ warnings.warn(
42
+ "'renumber_atom_ids()' is deprecated",
43
+ DeprecationWarning
44
+ )
45
+ if "atom_id" not in array.get_annotation_categories():
46
+ raise ValueError("The atom array must have the 'atom_id' annotation")
47
+ if start is None:
48
+ start = array.atom_id[0]
49
+ array = array.copy()
50
+ array.atom_id = np.arange(start, array.shape[-1]+1)
51
+ return array
52
+
53
+
54
+ def renumber_res_ids(array, start=None):
55
+ """
56
+ Renumber the residue IDs of the given array, so that are continuous.
57
+
58
+ DEPRECATED: Use :func:`create_continuous_res_ids()`instead.
59
+
60
+ Parameters
61
+ ----------
62
+ array : AtomArray or AtomArrayStack
63
+ The array to be checked.
64
+ start : int, optional
65
+ The starting index for renumbering.
66
+ The first ID in the array is taken by default.
67
+
68
+ Returns
69
+ -------
70
+ array : AtomArray or AtomArrayStack
71
+ The renumbered array.
72
+ """
73
+ warnings.warn(
74
+ "'renumber_res_ids()' is deprecated, use 'create_continuous_res_ids()'",
75
+ DeprecationWarning
76
+ )
77
+ if start is None:
78
+ start = array.res_id[0]
79
+ diff = np.diff(array.res_id)
80
+ diff[diff != 0] = 1
81
+ new_res_ids = np.concatenate(([start], diff)).cumsum()
82
+ array = array.copy()
83
+ array.res_id = new_res_ids
84
+ return array
85
+
86
+
87
+ def create_continuous_res_ids(atoms, restart_each_chain=True):
88
+ """
89
+ Create an array of continuous residue IDs for a given structure.
90
+
91
+ This means that residue IDs are incremented by 1 for each residue.
92
+
93
+ Parameters
94
+ ----------
95
+ atoms : AtomArray or AtomArrayStack
96
+ The atoms for which the continuous residue IDs should be created.
97
+ restart_each_chain : bool, optional
98
+ If true, the residue IDs are reset to 1 for each chain.
99
+
100
+ Returns
101
+ -------
102
+ res_ids : ndarray, dtype=int
103
+ The continuous residue IDs.
104
+
105
+ Examples
106
+ --------
107
+
108
+ >>> # Remove a residue to make the residue IDs discontinuous
109
+ >>> atom_array = atom_array[atom_array.res_id != 5]
110
+ >>> res_ids, _ = get_residues(atom_array)
111
+ >>> print(res_ids)
112
+ [ 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]
113
+ >>> atom_array.res_id = create_continuous_res_ids(atom_array)
114
+ >>> res_ids, _ = get_residues(atom_array)
115
+ >>> print(res_ids)
116
+ [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
117
+
118
+ """
119
+ res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
120
+ res_starts = get_residue_starts(atoms)
121
+ res_ids_diff[res_starts] = 1
122
+ res_ids = np.cumsum(res_ids_diff)
123
+
124
+ if restart_each_chain:
125
+ chain_starts = get_chain_starts(atoms)
126
+ for start in chain_starts:
127
+ res_ids[start:] -= res_ids[start] - 1
128
+
129
+ return res_ids
130
+
131
+
132
+ def infer_elements(atoms):
133
+ """
134
+ Infer the elements of atoms based on their atom name.
135
+
136
+ Parameters
137
+ ----------
138
+ atoms : AtomArray or AtomArrayStack or array-like of str
139
+ The atoms for which the elements should be inferred.
140
+ Alternatively the atom names can be passed directly.
141
+
142
+ Returns
143
+ -------
144
+ elements : ndarray, dtype=str
145
+ The inferred elements.
146
+
147
+ See Also
148
+ --------
149
+ create_atoms_names : The opposite of this function
150
+
151
+ Examples
152
+ --------
153
+
154
+ >>> print(infer_elements(atom_array)[:10])
155
+ ['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
156
+ >>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
157
+ ['C' 'C' 'C' 'O' 'H' 'H' 'FE']
158
+
159
+ """
160
+ if isinstance(atoms, (AtomArray, AtomArrayStack)):
161
+ atom_names = atoms.atom_name
162
+ else:
163
+ atom_names = atoms
164
+ return np.array([_guess_element(name) for name in atom_names])
165
+
166
+
167
+ def create_atom_names(atoms):
168
+ """
169
+ Create atom names for a single residue based on elements.
170
+
171
+ The atom names are simply enumerated separately for each element.
172
+
173
+ Parameters
174
+ ----------
175
+ atoms : AtomArray or AtomArrayStack or array-like of str
176
+ The atoms for which the atom names should be created.
177
+ Alternatively the elements can be passed directly.
178
+
179
+ Returns
180
+ -------
181
+ atom_names : ndarray, dtype=str
182
+ The atom names.
183
+
184
+ See Also
185
+ --------
186
+ infer_elements : The opposite of this function
187
+
188
+ Notes
189
+ -----
190
+ The atom names created this way may differ from the ones in the
191
+ original source, as different schemes for atom naming exist.
192
+ This function only ensures that the created atom names are unique.
193
+ This is e.g. necessary for writing bonds to PDBx files.
194
+
195
+ Note that this function should be used only on single residues,
196
+ otherwise enumeration would continue in the next residue.
197
+
198
+ Examples
199
+ --------
200
+
201
+ >>> atoms = residue("URA") # Uracil
202
+ >>> print(atoms.element)
203
+ ['N' 'C' 'O' 'N' 'C' 'O' 'C' 'C' 'H' 'H' 'H' 'H']
204
+ >>> print(create_atom_names(atoms))
205
+ ['N1' 'C1' 'O1' 'N2' 'C2' 'O2' 'C3' 'C4' 'H1' 'H2' 'H3' 'H4']
206
+ """
207
+ if isinstance(atoms, (AtomArray, AtomArrayStack)):
208
+ elements = atoms.element
209
+ else:
210
+ elements = atoms
211
+
212
+ atom_names = np.zeros(len(elements), dtype="U6")
213
+ element_counter = Counter()
214
+ for i, elem in enumerate(elements):
215
+ element_counter[elem] += 1
216
+ atom_names[i] = f"{elem}{element_counter[elem]}"
217
+ return atom_names
218
+
219
+
220
+ _elements = [elem.upper() for elem in
221
+ ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
222
+ "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
223
+ "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
224
+ "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te",
225
+ "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
226
+ "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt",
227
+ "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa",
228
+ "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf",
229
+ "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts",
230
+ "Og"]
231
+ ]
232
+ def _guess_element(atom_name):
233
+ # remove digits (1H -> H)
234
+ elem = "".join([i for i in atom_name if not i.isdigit()])
235
+ elem = elem.upper()
236
+ if len(elem) == 0:
237
+ return ""
238
+
239
+ # Some often used elements for biomolecules
240
+ if elem.startswith("C") or elem.startswith("N") or \
241
+ elem.startswith("O") or elem.startswith("S") or \
242
+ elem.startswith("H"):
243
+ return elem[0]
244
+
245
+ # Exactly match element abbreviations
246
+ try:
247
+ return _elements[_elements.index(elem[:2])]
248
+ except ValueError:
249
+ try:
250
+ return _elements[_elements.index(elem[0])]
251
+ except ValueError:
252
+ warnings.warn(f"Could not infer element for '{atom_name}'")
253
+ return ""