biotite 0.40.0__cp312-cp312-macosx_11_0_arm64.whl → 0.41.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/__init__.py +1 -1
  2. biotite/database/pubchem/download.py +23 -23
  3. biotite/database/pubchem/query.py +7 -7
  4. biotite/file.py +17 -9
  5. biotite/sequence/align/banded.c +117 -117
  6. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  7. biotite/sequence/align/cigar.py +60 -15
  8. biotite/sequence/align/kmeralphabet.c +117 -117
  9. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  10. biotite/sequence/align/kmersimilarity.c +117 -117
  11. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  12. biotite/sequence/align/kmertable.cpp +117 -117
  13. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  14. biotite/sequence/align/localgapped.c +117 -117
  15. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  16. biotite/sequence/align/localungapped.c +117 -117
  17. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  18. biotite/sequence/align/multiple.c +117 -117
  19. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  20. biotite/sequence/align/pairwise.c +117 -117
  21. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  22. biotite/sequence/align/permutation.c +117 -117
  23. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  24. biotite/sequence/align/selector.c +117 -117
  25. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  26. biotite/sequence/align/tracetable.c +117 -117
  27. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  28. biotite/sequence/annotation.py +2 -2
  29. biotite/sequence/codec.c +117 -117
  30. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  31. biotite/sequence/io/fasta/convert.py +27 -24
  32. biotite/sequence/phylo/nj.c +117 -117
  33. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  34. biotite/sequence/phylo/tree.c +117 -117
  35. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  36. biotite/sequence/phylo/upgma.c +117 -117
  37. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  38. biotite/structure/__init__.py +2 -0
  39. biotite/structure/bonds.c +1122 -913
  40. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  41. biotite/structure/celllist.c +117 -117
  42. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  43. biotite/structure/charges.c +117 -117
  44. biotite/structure/charges.cpython-312-darwin.so +0 -0
  45. biotite/structure/dotbracket.py +2 -0
  46. biotite/structure/info/atoms.py +6 -1
  47. biotite/structure/info/bonds.py +1 -1
  48. biotite/structure/info/ccd/amino_acids.txt +17 -0
  49. biotite/structure/info/ccd/carbohydrates.txt +2 -0
  50. biotite/structure/info/ccd/components.bcif +0 -0
  51. biotite/structure/info/ccd/nucleotides.txt +1 -0
  52. biotite/structure/info/misc.py +69 -5
  53. biotite/structure/integrity.py +19 -70
  54. biotite/structure/io/ctab.py +12 -106
  55. biotite/structure/io/general.py +157 -165
  56. biotite/structure/io/gro/file.py +16 -16
  57. biotite/structure/io/mmtf/convertarray.c +117 -117
  58. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  59. biotite/structure/io/mmtf/convertfile.c +117 -117
  60. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  61. biotite/structure/io/mmtf/decode.c +117 -117
  62. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  63. biotite/structure/io/mmtf/encode.c +117 -117
  64. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  65. biotite/structure/io/mol/__init__.py +4 -2
  66. biotite/structure/io/mol/convert.py +71 -7
  67. biotite/structure/io/mol/ctab.py +414 -0
  68. biotite/structure/io/mol/header.py +116 -0
  69. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  70. biotite/structure/io/mol/sdf.py +909 -0
  71. biotite/structure/io/pdb/file.py +84 -31
  72. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  73. biotite/structure/io/pdbx/__init__.py +0 -1
  74. biotite/structure/io/pdbx/bcif.py +2 -3
  75. biotite/structure/io/pdbx/cif.py +9 -5
  76. biotite/structure/io/pdbx/component.py +4 -1
  77. biotite/structure/io/pdbx/convert.py +203 -79
  78. biotite/structure/io/pdbx/encoding.c +117 -117
  79. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  80. biotite/structure/repair.py +253 -0
  81. biotite/structure/sasa.c +117 -117
  82. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  83. biotite/structure/sequence.py +112 -0
  84. biotite/structure/superimpose.py +472 -13
  85. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
  86. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
  87. biotite/structure/io/pdbx/error.py +0 -14
  88. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  89. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
  90. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module contains functionalities for repairing malformed structures.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann, Daniel Bauer"
11
+ __all__ = ["renumber_atom_ids", "renumber_res_ids",
12
+ "create_continuous_res_ids", "infer_elements", "create_atom_names"]
13
+
14
+ from collections import Counter
15
+ import warnings
16
+ import numpy as np
17
+ from .atoms import AtomArray, AtomArrayStack
18
+ from .residues import get_residue_starts
19
+ from .chains import get_chain_starts
20
+
21
+
22
+ def renumber_atom_ids(array, start=None):
23
+ """
24
+ Renumber the atom IDs of the given array.
25
+
26
+ DEPRECATED.
27
+
28
+ Parameters
29
+ ----------
30
+ array : AtomArray or AtomArrayStack
31
+ The array to be checked.
32
+ start : int, optional
33
+ The starting index for renumbering.
34
+ The first ID in the array is taken by default.
35
+
36
+ Returns
37
+ -------
38
+ array : AtomArray or AtomArrayStack
39
+ The renumbered array.
40
+ """
41
+ warnings.warn(
42
+ "'renumber_atom_ids()' is deprecated",
43
+ DeprecationWarning
44
+ )
45
+ if "atom_id" not in array.get_annotation_categories():
46
+ raise ValueError("The atom array must have the 'atom_id' annotation")
47
+ if start is None:
48
+ start = array.atom_id[0]
49
+ array = array.copy()
50
+ array.atom_id = np.arange(start, array.shape[-1]+1)
51
+ return array
52
+
53
+
54
+ def renumber_res_ids(array, start=None):
55
+ """
56
+ Renumber the residue IDs of the given array, so that are continuous.
57
+
58
+ DEPRECATED: Use :func:`create_continuous_res_ids()`instead.
59
+
60
+ Parameters
61
+ ----------
62
+ array : AtomArray or AtomArrayStack
63
+ The array to be checked.
64
+ start : int, optional
65
+ The starting index for renumbering.
66
+ The first ID in the array is taken by default.
67
+
68
+ Returns
69
+ -------
70
+ array : AtomArray or AtomArrayStack
71
+ The renumbered array.
72
+ """
73
+ warnings.warn(
74
+ "'renumber_res_ids()' is deprecated, use 'create_continuous_res_ids()'",
75
+ DeprecationWarning
76
+ )
77
+ if start is None:
78
+ start = array.res_id[0]
79
+ diff = np.diff(array.res_id)
80
+ diff[diff != 0] = 1
81
+ new_res_ids = np.concatenate(([start], diff)).cumsum()
82
+ array = array.copy()
83
+ array.res_id = new_res_ids
84
+ return array
85
+
86
+
87
+ def create_continuous_res_ids(atoms, restart_each_chain=True):
88
+ """
89
+ Create an array of continuous residue IDs for a given structure.
90
+
91
+ This means that residue IDs are incremented by 1 for each residue.
92
+
93
+ Parameters
94
+ ----------
95
+ atoms : AtomArray or AtomArrayStack
96
+ The atoms for which the continuous residue IDs should be created.
97
+ restart_each_chain : bool, optional
98
+ If true, the residue IDs are reset to 1 for each chain.
99
+
100
+ Returns
101
+ -------
102
+ res_ids : ndarray, dtype=int
103
+ The continuous residue IDs.
104
+
105
+ Examples
106
+ --------
107
+
108
+ >>> # Remove a residue to make the residue IDs discontinuous
109
+ >>> atom_array = atom_array[atom_array.res_id != 5]
110
+ >>> res_ids, _ = get_residues(atom_array)
111
+ >>> print(res_ids)
112
+ [ 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]
113
+ >>> atom_array.res_id = create_continuous_res_ids(atom_array)
114
+ >>> res_ids, _ = get_residues(atom_array)
115
+ >>> print(res_ids)
116
+ [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
117
+
118
+ """
119
+ res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
120
+ res_starts = get_residue_starts(atoms)
121
+ res_ids_diff[res_starts] = 1
122
+ res_ids = np.cumsum(res_ids_diff)
123
+
124
+ if restart_each_chain:
125
+ chain_starts = get_chain_starts(atoms)
126
+ for start in chain_starts:
127
+ res_ids[start:] -= res_ids[start] - 1
128
+
129
+ return res_ids
130
+
131
+
132
+ def infer_elements(atoms):
133
+ """
134
+ Infer the elements of atoms based on their atom name.
135
+
136
+ Parameters
137
+ ----------
138
+ atoms : AtomArray or AtomArrayStack or array-like of str
139
+ The atoms for which the elements should be inferred.
140
+ Alternatively the atom names can be passed directly.
141
+
142
+ Returns
143
+ -------
144
+ elements : ndarray, dtype=str
145
+ The inferred elements.
146
+
147
+ See Also
148
+ --------
149
+ create_atoms_names : The opposite of this function
150
+
151
+ Examples
152
+ --------
153
+
154
+ >>> print(infer_elements(atom_array)[:10])
155
+ ['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
156
+ >>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
157
+ ['C' 'C' 'C' 'O' 'H' 'H' 'FE']
158
+
159
+ """
160
+ if isinstance(atoms, (AtomArray, AtomArrayStack)):
161
+ atom_names = atoms.atom_name
162
+ else:
163
+ atom_names = atoms
164
+ return np.array([_guess_element(name) for name in atom_names])
165
+
166
+
167
+ def create_atom_names(atoms):
168
+ """
169
+ Create atom names for a single residue based on elements.
170
+
171
+ The atom names are simply enumerated separately for each element.
172
+
173
+ Parameters
174
+ ----------
175
+ atoms : AtomArray or AtomArrayStack or array-like of str
176
+ The atoms for which the atom names should be created.
177
+ Alternatively the elements can be passed directly.
178
+
179
+ Returns
180
+ -------
181
+ atom_names : ndarray, dtype=str
182
+ The atom names.
183
+
184
+ See Also
185
+ --------
186
+ infer_elements : The opposite of this function
187
+
188
+ Notes
189
+ -----
190
+ The atom names created this way may differ from the ones in the
191
+ original source, as different schemes for atom naming exist.
192
+ This function only ensures that the created atom names are unique.
193
+ This is e.g. necessary for writing bonds to PDBx files.
194
+
195
+ Note that this function should be used only on single residues,
196
+ otherwise enumeration would continue in the next residue.
197
+
198
+ Examples
199
+ --------
200
+
201
+ >>> atoms = residue("URA") # Uracil
202
+ >>> print(atoms.element)
203
+ ['N' 'C' 'O' 'N' 'C' 'O' 'C' 'C' 'H' 'H' 'H' 'H']
204
+ >>> print(create_atom_names(atoms))
205
+ ['N1' 'C1' 'O1' 'N2' 'C2' 'O2' 'C3' 'C4' 'H1' 'H2' 'H3' 'H4']
206
+ """
207
+ if isinstance(atoms, (AtomArray, AtomArrayStack)):
208
+ elements = atoms.element
209
+ else:
210
+ elements = atoms
211
+
212
+ atom_names = np.zeros(len(elements), dtype="U6")
213
+ element_counter = Counter()
214
+ for i, elem in enumerate(elements):
215
+ element_counter[elem] += 1
216
+ atom_names[i] = f"{elem}{element_counter[elem]}"
217
+ return atom_names
218
+
219
+
220
+ _elements = [elem.upper() for elem in
221
+ ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
222
+ "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
223
+ "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
224
+ "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te",
225
+ "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
226
+ "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt",
227
+ "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa",
228
+ "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf",
229
+ "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts",
230
+ "Og"]
231
+ ]
232
+ def _guess_element(atom_name):
233
+ # remove digits (1H -> H)
234
+ elem = "".join([i for i in atom_name if not i.isdigit()])
235
+ elem = elem.upper()
236
+ if len(elem) == 0:
237
+ return ""
238
+
239
+ # Some often used elements for biomolecules
240
+ if elem.startswith("C") or elem.startswith("N") or \
241
+ elem.startswith("O") or elem.startswith("S") or \
242
+ elem.startswith("H"):
243
+ return elem[0]
244
+
245
+ # Exactly match element abbreviations
246
+ try:
247
+ return _elements[_elements.index(elem[:2])]
248
+ except ValueError:
249
+ try:
250
+ return _elements[_elements.index(elem[0])]
251
+ except ValueError:
252
+ warnings.warn(f"Could not infer element for '{atom_name}'")
253
+ return ""