biotite 0.41.2__cp312-cp312-macosx_11_0_arm64.whl → 1.0.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  60. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  68. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  102. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  103. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cpython-312-darwin.so +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/sse.py CHANGED
@@ -12,51 +12,43 @@ __author__ = "Patrick Kunzmann"
12
12
  __all__ = ["annotate_sse"]
13
13
 
14
14
  import numpy as np
15
- from .celllist import CellList
16
- from .geometry import distance, angle, dihedral
17
- from .filter import filter_amino_acids
18
- from .residues import get_residue_starts
19
- from .integrity import check_res_id_continuity
15
+ from biotite.structure.celllist import CellList
16
+ from biotite.structure.filter import filter_amino_acids
17
+ from biotite.structure.geometry import angle, dihedral, distance
18
+ from biotite.structure.integrity import check_res_id_continuity
19
+ from biotite.structure.residues import get_residue_starts
20
20
 
21
+ _r_helix = (np.deg2rad(89 - 12), np.deg2rad(89 + 12))
22
+ _a_helix = (np.deg2rad(50 - 20), np.deg2rad(50 + 20))
23
+ _d2_helix = ((5.5 - 0.5), (5.5 + 0.5)) # Not used in the algorithm description
24
+ _d3_helix = ((5.3 - 0.5), (5.3 + 0.5))
25
+ _d4_helix = ((6.4 - 0.6), (6.4 + 0.6))
21
26
 
22
- _r_helix = (np.deg2rad(89-12), np.deg2rad(89+12))
23
- _a_helix = (np.deg2rad(50-20), np.deg2rad(50+20))
24
- _d2_helix = ((5.5-0.5), (5.5+0.5)) # Not used in the algorithm description
25
- _d3_helix = ((5.3-0.5), (5.3+0.5))
26
- _d4_helix = ((6.4-0.6), (6.4+0.6))
27
+ _r_strand = (np.deg2rad(124 - 14), np.deg2rad(124 + 14))
28
+ _a_strand = (np.deg2rad(-180), np.deg2rad(-125), np.deg2rad(145), np.deg2rad(180))
29
+ _d2_strand = ((6.7 - 0.6), (6.7 + 0.6))
30
+ _d3_strand = ((9.9 - 0.9), (9.9 + 0.9))
31
+ _d4_strand = ((12.4 - 1.1), (12.4 + 1.1))
27
32
 
28
- _r_strand = (np.deg2rad(124-14), np.deg2rad(124+14))
29
- _a_strand = (np.deg2rad(-180), np.deg2rad(-125),
30
- np.deg2rad(145), np.deg2rad(180))
31
- _d2_strand = ((6.7-0.6), (6.7+0.6))
32
- _d3_strand = ((9.9-0.9), (9.9+0.9))
33
- _d4_strand = ((12.4-1.1), (12.4+1.1))
34
33
 
35
-
36
- def annotate_sse(atom_array, chain_id=None):
34
+ def annotate_sse(atom_array):
37
35
  r"""
38
36
  Calculate the secondary structure elements (SSEs) of a
39
37
  peptide chain based on the `P-SEA` algorithm.
40
38
  :footcite:`Labesse1997`
41
-
39
+
42
40
  The annotation is based CA coordinates only, specifically
43
41
  distances and dihedral angles.
44
42
  Discontinuities between chains are detected by residue ID.
45
-
43
+
46
44
  Parameters
47
45
  ----------
48
46
  atom_array : AtomArray
49
47
  The atom array to annotate for.
50
48
  Non-peptide residues are also allowed and obtain a ``''``
51
49
  SSE.
52
- chain_id : str, optional
53
- The peptide atoms belonging to this chain are filtered and
54
- annotated.
55
- DEPRECATED: By now multiple chains can be annotated at once.
56
- To annotate only a certain chain, filter the `atom_array` before
57
- giving it as input to this function.
58
-
59
-
50
+
51
+
60
52
  Returns
61
53
  -------
62
54
  sse : ndarray
@@ -67,37 +59,30 @@ def annotate_sse(atom_array, chain_id=None):
67
59
  :math:`{\beta}`-strand/sheet, ``'c'`` means coil.
68
60
  ``''`` indicates that a residue is not an amino acid or it
69
61
  comprises no ``CA`` atom.
70
-
62
+
71
63
  Notes
72
64
  -----
73
65
  Although this function is based on the original `P-SEA` algorithm,
74
66
  there are deviations compared to the official `P-SEA` software in
75
67
  some cases.
76
68
  Do not rely on getting the exact same results.
77
-
69
+
78
70
  References
79
71
  ----------
80
72
 
81
73
  .. footbibliography::
82
-
74
+
83
75
  Examples
84
76
  --------
85
-
77
+
86
78
  SSE of PDB 1L2Y:
87
-
88
- >>> sse = annotate_sse(atom_array, "A")
79
+
80
+ >>> sse = annotate_sse(atom_array)
89
81
  >>> print(sse)
90
82
  ['c' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
91
83
  'c' 'c']
92
-
93
- """
94
- if chain_id is not None:
95
- # Filter all CA atoms in the relevant chain
96
- atom_array = atom_array[
97
- (atom_array.chain_id == chain_id) & filter_amino_acids(atom_array)
98
- ]
99
-
100
84
 
85
+ """
101
86
  residue_starts = get_residue_starts(atom_array)
102
87
  # Sort CA coord into the coord array at the respective residue index
103
88
  # If a residue has no CA, e.g. because it is not an amino acid,
@@ -106,9 +91,9 @@ def annotate_sse(atom_array, chain_id=None):
106
91
  ca_indices = np.where(
107
92
  filter_amino_acids(atom_array) & (atom_array.atom_name == "CA")
108
93
  )[0]
109
- ca_coord[
110
- np.searchsorted(residue_starts, ca_indices, "right") - 1
111
- ] = atom_array.coord[ca_indices]
94
+ ca_coord[np.searchsorted(residue_starts, ca_indices, "right") - 1] = (
95
+ atom_array.coord[ca_indices]
96
+ )
112
97
 
113
98
  if len(ca_coord) <= 5:
114
99
  # The number of atoms is too small #
@@ -125,12 +110,12 @@ def annotate_sse(atom_array, chain_id=None):
125
110
  # purpose of geometric measurements
126
111
  # -> the distances/angles spanning discontinuities are NaN
127
112
  discont_indices = check_res_id_continuity(atom_array)
128
- discont_res_indices = np.searchsorted(
129
- residue_starts, discont_indices, "right"
130
- ) - 1
113
+ discont_res_indices = np.searchsorted(residue_starts, discont_indices, "right") - 1
131
114
  ca_coord = np.insert(
132
- ca_coord, discont_res_indices,
133
- np.full((len(discont_res_indices),3), np.nan), axis=0
115
+ ca_coord,
116
+ discont_res_indices,
117
+ np.full((len(discont_res_indices), 3), np.nan),
118
+ axis=0,
134
119
  )
135
120
  # Later the SSE for virtual residues are removed again
136
121
  # via this mask
@@ -139,73 +124,74 @@ def annotate_sse(atom_array, chain_id=None):
139
124
 
140
125
  length = len(ca_coord)
141
126
 
142
-
143
127
  # The distances and angles are not defined for the entire interval,
144
128
  # therefore the indices do not have the full range
145
129
  # Values that are not defined are NaN
146
130
  d2i = np.full(length, np.nan)
147
131
  d3i = np.full(length, np.nan)
148
132
  d4i = np.full(length, np.nan)
149
- ri = np.full(length, np.nan)
150
- ai = np.full(length, np.nan)
151
-
152
- d2i[1 : length-1] = distance(ca_coord[0 : length-2], ca_coord[2 : length])
153
- d3i[1 : length-2] = distance(ca_coord[0 : length-3], ca_coord[3 : length])
154
- d4i[1 : length-3] = distance(ca_coord[0 : length-4], ca_coord[4 : length])
155
- ri[1 : length-1] = angle(
156
- ca_coord[0 : length-2],
157
- ca_coord[1 : length-1],
158
- ca_coord[2 : length]
133
+ ri = np.full(length, np.nan)
134
+ ai = np.full(length, np.nan)
135
+
136
+ d2i[1 : length - 1] = distance(ca_coord[0 : length - 2], ca_coord[2:length])
137
+ d3i[1 : length - 2] = distance(ca_coord[0 : length - 3], ca_coord[3:length])
138
+ d4i[1 : length - 3] = distance(ca_coord[0 : length - 4], ca_coord[4:length])
139
+ ri[1 : length - 1] = angle(
140
+ ca_coord[0 : length - 2], ca_coord[1 : length - 1], ca_coord[2:length]
159
141
  )
160
- ai[1 : length-2] = dihedral(
161
- ca_coord[0 : length-3],
162
- ca_coord[1 : length-2],
163
- ca_coord[2 : length-1],
164
- ca_coord[3 : length-0]
142
+ ai[1 : length - 2] = dihedral(
143
+ ca_coord[0 : length - 3],
144
+ ca_coord[1 : length - 2],
145
+ ca_coord[2 : length - 1],
146
+ ca_coord[3 : length - 0],
165
147
  )
166
-
148
+
167
149
  # Find CA that meet criteria for potential helices and strands
168
- relaxed_helix = (
169
- (d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1])
170
- ) | (
171
- (ri >= _r_helix[0] ) & ( ri <= _r_helix[1])
150
+ relaxed_helix = ((d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1])) | (
151
+ (ri >= _r_helix[0]) & (ri <= _r_helix[1])
172
152
  )
173
153
  strict_helix = (
174
- (d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1]) &
175
- (d4i >= _d4_helix[0]) & (d4i <= _d4_helix[1])
154
+ (d3i >= _d3_helix[0])
155
+ & (d3i <= _d3_helix[1])
156
+ & (d4i >= _d4_helix[0])
157
+ & (d4i <= _d4_helix[1])
176
158
  ) | (
177
- (ri >= _r_helix[0] ) & ( ri <= _r_helix[1]) &
178
- (ai >= _a_helix[0] ) & ( ai <= _a_helix[1])
159
+ (ri >= _r_helix[0])
160
+ & (ri <= _r_helix[1])
161
+ & (ai >= _a_helix[0])
162
+ & (ai <= _a_helix[1])
179
163
  )
180
164
 
181
165
  relaxed_strand = (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1])
182
166
  strict_strand = (
183
- (d2i >= _d2_strand[0]) & (d2i <= _d2_strand[1]) &
184
- (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1]) &
185
- (d4i >= _d4_strand[0]) & (d4i <= _d4_strand[1])
167
+ (d2i >= _d2_strand[0])
168
+ & (d2i <= _d2_strand[1])
169
+ & (d3i >= _d3_strand[0])
170
+ & (d3i <= _d3_strand[1])
171
+ & (d4i >= _d4_strand[0])
172
+ & (d4i <= _d4_strand[1])
186
173
  ) | (
187
- (ri >= _r_strand[0] ) & ( ri <= _r_strand[1]) &
188
- (
174
+ (ri >= _r_strand[0])
175
+ & (ri <= _r_strand[1])
176
+ & (
189
177
  # Account for periodic boundary of dihedral angle
190
- ((ai >= _a_strand[0] ) & ( ai <= _a_strand[1])) |
191
- ((ai >= _a_strand[2] ) & ( ai <= _a_strand[3]))
178
+ ((ai >= _a_strand[0]) & (ai <= _a_strand[1]))
179
+ | ((ai >= _a_strand[2]) & (ai <= _a_strand[3]))
192
180
  )
193
181
  )
194
182
 
195
-
196
183
  helix_mask = _mask_consecutive(strict_helix, 5)
197
184
  helix_mask = _extend_region(helix_mask, relaxed_helix)
198
-
185
+
199
186
  strand_mask = _mask_consecutive(strict_strand, 4)
200
187
  short_strand_mask = _mask_regions_with_contacts(
201
188
  ca_coord,
202
189
  _mask_consecutive(strict_strand, 3),
203
- min_contacts=5, min_distance=4.2, max_distance=5.2
204
- )
205
- strand_mask = _extend_region(
206
- strand_mask | short_strand_mask, relaxed_strand
190
+ min_contacts=5,
191
+ min_distance=4.2,
192
+ max_distance=5.2,
207
193
  )
208
-
194
+ strand_mask = _extend_region(strand_mask | short_strand_mask, relaxed_strand)
209
195
 
210
196
  sse = np.full(length, "c", dtype="U1")
211
197
  sse[helix_mask] = "a"
@@ -215,7 +201,7 @@ def annotate_sse(atom_array, chain_id=None):
215
201
  sse[np.isnan(ca_coord).any(axis=-1)] = ""
216
202
  # Remove SSE for virtual atoms and return
217
203
  return sse[no_virtual_mask]
218
-
204
+
219
205
 
220
206
  def _mask_consecutive(mask, number):
221
207
  """
@@ -228,17 +214,17 @@ def _mask_consecutive(mask, number):
228
214
  # if it and the following `number-1` elements are True
229
215
  # The elements `mask[-(number-1):]` cannot have the sufficient count
230
216
  # by this definition, as they are at the end of the array
231
- counts = np.zeros(len(mask) - (number-1), dtype=int)
217
+ counts = np.zeros(len(mask) - (number - 1), dtype=int)
232
218
  for i in range(number):
233
219
  counts[mask[i : i + len(counts)]] += 1
234
- consecutive_seed = (counts == number)
235
-
220
+ consecutive_seed = counts == number
221
+
236
222
  # Not only that element, but also the
237
223
  # following `number-1` elements are in a consecutive region
238
224
  consecutive_mask = np.zeros(len(mask), dtype=bool)
239
225
  for i in range(number):
240
226
  consecutive_mask[i : i + len(consecutive_seed)] |= consecutive_seed
241
-
227
+
242
228
  return consecutive_mask
243
229
 
244
230
 
@@ -253,7 +239,7 @@ def _extend_region(base_condition_mask, extension_condition_mask):
253
239
  # Prepend absent region to the start to capture the event,
254
240
  # that the first element is already the start of a region
255
241
  region_change_mask = np.diff(np.append([False], base_condition_mask))
256
-
242
+
257
243
  # These masks point to the first `False` element
258
244
  # left and right of a 'True' region
259
245
  # The left end is the element before the first element of a 'True' region
@@ -262,7 +248,7 @@ def _extend_region(base_condition_mask, extension_condition_mask):
262
248
  left_end_mask = np.append(left_end_mask[1:], [False])
263
249
  # The right end is first element of a 'False' region
264
250
  right_end_mask = region_change_mask & ~base_condition_mask
265
-
251
+
266
252
  # The 'base_condition_mask' gets additional 'True' elements
267
253
  # at left or right ends, which meet the extension criterion
268
254
  return base_condition_mask | (
@@ -270,8 +256,9 @@ def _extend_region(base_condition_mask, extension_condition_mask):
270
256
  )
271
257
 
272
258
 
273
- def _mask_regions_with_contacts(coord, candidate_mask,
274
- min_contacts, min_distance, max_distance):
259
+ def _mask_regions_with_contacts(
260
+ coord, candidate_mask, min_contacts, min_distance, max_distance
261
+ ):
275
262
  """
276
263
  Mask regions of `candidate_mask` that have at least `min_contacts`
277
264
  contacts with `coord` in the range `min_distance` to `max_distance`.
@@ -281,47 +268,41 @@ def _mask_regions_with_contacts(coord, candidate_mask,
281
268
  # No potential contacts -> no contacts
282
269
  # -> no residue can satisfy 'min_contacts'
283
270
  return np.zeros(len(candidate_mask), dtype=bool)
284
-
285
- cell_list = CellList(
286
- potential_contact_coord, max_distance
287
- )
271
+
272
+ cell_list = CellList(potential_contact_coord, max_distance)
288
273
  # For each candidate position,
289
274
  # get all contacts within maximum distance
290
275
  all_within_max_dist_indices = cell_list.get_atoms(
291
276
  coord[candidate_mask], max_distance
292
277
  )
293
-
278
+
294
279
  contacts = np.zeros(len(coord), dtype=int)
295
280
  for i, atom_index in enumerate(np.where(candidate_mask)[0]):
296
281
  within_max_dist_indices = all_within_max_dist_indices[i]
297
282
  # Remove padding values
298
- within_max_dist_indices = within_max_dist_indices[
299
- within_max_dist_indices != -1
300
- ]
301
- # Now count all contacts within maximum distance
283
+ within_max_dist_indices = within_max_dist_indices[within_max_dist_indices != -1]
284
+ # Now count all contacts within maximum distance
302
285
  # that also satisfy the minimum distance
303
286
  contacts[atom_index] = np.count_nonzero(
304
287
  distance(
305
- coord[atom_index],
306
- potential_contact_coord[within_max_dist_indices]
307
- ) > min_distance
288
+ coord[atom_index], potential_contact_coord[within_max_dist_indices]
289
+ )
290
+ > min_distance
308
291
  )
309
-
292
+
310
293
  # Count the number of contacts per region
311
294
  # These indices mark the start of either a 'True' or 'False' region
312
295
  # Prepend absent region to the start to capture the event,
313
296
  # that the first element is already the start of a region
314
- region_change_indices = np.where(
315
- np.diff(np.append([False], candidate_mask))
316
- )[0]
297
+ region_change_indices = np.where(np.diff(np.append([False], candidate_mask)))[0]
317
298
  # Add exclusive stop
318
299
  region_change_indices = np.append(region_change_indices, [len(coord)])
319
300
  output_mask = np.zeros(len(candidate_mask), dtype=bool)
320
301
  for i in range(len(region_change_indices) - 1):
321
302
  start = region_change_indices[i]
322
- stop = region_change_indices[i+1]
323
- total_contacts = np.sum(contacts[start : stop])
303
+ stop = region_change_indices[i + 1]
304
+ total_contacts = np.sum(contacts[start:stop])
324
305
  if total_contacts >= min_contacts:
325
- output_mask[start : stop] = True
326
-
327
- return output_mask
306
+ output_mask[start:stop] = True
307
+
308
+ return output_mask
@@ -8,19 +8,22 @@ This module provides functions for structure superimposition.
8
8
 
9
9
  __name__ = "biotite.structure"
10
10
  __author__ = "Patrick Kunzmann, Claude J. Rogers"
11
- __all__ = ["superimpose", "superimpose_homologs",
12
- "superimpose_without_outliers",
13
- "AffineTransformation", "superimpose_apply"]
11
+ __all__ = [
12
+ "superimpose",
13
+ "superimpose_homologs",
14
+ "superimpose_without_outliers",
15
+ "AffineTransformation",
16
+ ]
14
17
 
15
18
 
16
19
  import numpy as np
17
- from .atoms import coord
18
- from .geometry import centroid, distance
19
- from .filter import filter_amino_acids, filter_nucleotides
20
- from .sequence import to_sequence
21
- from ..sequence.alphabet import common_alphabet
22
- from ..sequence.seqtypes import ProteinSequence
23
- from ..sequence.align import SubstitutionMatrix, align_optimal, get_codes
20
+ from biotite.sequence.align import SubstitutionMatrix, align_optimal, get_codes
21
+ from biotite.sequence.alphabet import common_alphabet
22
+ from biotite.sequence.seqtypes import ProteinSequence
23
+ from biotite.structure.atoms import coord
24
+ from biotite.structure.filter import filter_amino_acids, filter_nucleotides
25
+ from biotite.structure.geometry import centroid, distance
26
+ from biotite.structure.sequence import to_sequence
24
27
 
25
28
 
26
29
  class AffineTransformation:
@@ -45,12 +48,12 @@ class AffineTransformation:
45
48
  The dimensions are always expanded to *(m,3)* or *(m,3,3)*,
46
49
  respectively.
47
50
  """
51
+
48
52
  def __init__(self, center_translation, rotation, target_translation):
49
53
  self.center_translation = _expand_dims(center_translation, 2)
50
54
  self.rotation = _expand_dims(rotation, 3)
51
55
  self.target_translation = _expand_dims(target_translation, 2)
52
56
 
53
-
54
57
  def apply(self, atoms):
55
58
  """
56
59
  Apply this transformation on the given structure.
@@ -118,7 +121,6 @@ class AffineTransformation:
118
121
  superimposed.coord = superimposed_coord
119
122
  return superimposed
120
123
 
121
-
122
124
  def as_matrix(self):
123
125
  """
124
126
  Get the translations and rotation as a combined 4x4
@@ -316,16 +318,19 @@ def superimpose(fixed, mobile, atom_mask=None):
316
318
  mob_centered_filtered = mob_filtered - mob_centroid[:, np.newaxis, :]
317
319
  fix_centered_filtered = fix_filtered - fix_centroid[:, np.newaxis, :]
318
320
 
319
- rotation = _get_rotation_matrices(
320
- fix_centered_filtered, mob_centered_filtered
321
- )
321
+ rotation = _get_rotation_matrices(fix_centered_filtered, mob_centered_filtered)
322
322
  transform = AffineTransformation(-mob_centroid, rotation, fix_centroid)
323
323
  return transform.apply(mobile), transform
324
324
 
325
325
 
326
- def superimpose_without_outliers(fixed, mobile, min_anchors=3,
327
- max_iterations=10, quantiles=(0.25, 0.75),
328
- outlier_threshold=1.5):
326
+ def superimpose_without_outliers(
327
+ fixed,
328
+ mobile,
329
+ min_anchors=3,
330
+ max_iterations=10,
331
+ quantiles=(0.25, 0.75),
332
+ outlier_threshold=1.5,
333
+ ):
329
334
  r"""
330
335
  Superimpose structures onto a fixed structure, ignoring
331
336
  conformational outliers.
@@ -458,8 +463,9 @@ def superimpose_without_outliers(fixed, mobile, min_anchors=3,
458
463
  return transform.apply(mobile), transform, anchor_indices
459
464
 
460
465
 
461
- def superimpose_homologs(fixed, mobile, substitution_matrix=None,
462
- gap_penalty=-10, min_anchors=3, **kwargs):
466
+ def superimpose_homologs(
467
+ fixed, mobile, substitution_matrix=None, gap_penalty=-10, min_anchors=3, **kwargs
468
+ ):
463
469
  r"""
464
470
  Superimpose one protein or nucleotide chain onto another one,
465
471
  considering sequence differences and conformational outliers.
@@ -530,8 +536,8 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
530
536
  fixed_anchor_indices = _get_backbone_anchor_indices(fixed)
531
537
  mobile_anchor_indices = _get_backbone_anchor_indices(mobile)
532
538
  if (
533
- len(fixed_anchor_indices) < min_anchors or
534
- len(mobile_anchor_indices) < min_anchors
539
+ len(fixed_anchor_indices) < min_anchors
540
+ or len(mobile_anchor_indices) < min_anchors
535
541
  ):
536
542
  raise ValueError(
537
543
  "Structures have too few CA atoms for required number of anchors"
@@ -562,7 +568,7 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
562
568
  fixed[..., fixed_anchor_indices],
563
569
  mobile[..., mobile_anchor_indices],
564
570
  min_anchors,
565
- **kwargs
571
+ **kwargs,
566
572
  )
567
573
  fixed_anchor_indices = fixed_anchor_indices[selected_anchor_indices]
568
574
  mobile_anchor_indices = mobile_anchor_indices[selected_anchor_indices]
@@ -575,54 +581,18 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
575
581
  )
576
582
 
577
583
 
578
- def superimpose_apply(atoms, transformation):
579
- """
580
- Superimpose structures using a given :class:`AffineTransformation`.
581
-
582
- The :class:`AffineTransformation` can be obtained by prior
583
- superimposition.
584
-
585
- DEPRECATED: Use :func:`AffineTransformation.apply()` instead.
586
-
587
- Parameters
588
- ----------
589
- atoms : AtomArray or ndarray, shape(n,), dtype=float
590
- The structure to apply the transformation on.
591
- Alternatively coordinates can be given.
592
- transformation: AffineTransformation
593
- The transformation, obtained by :func:`superimpose()`.
594
-
595
- Returns
596
- -------
597
- fitted : AtomArray or AtomArrayStack
598
- A copy of the `atoms` structure,
599
- with transformations applied.
600
- Only coordinates are returned, if coordinates were given in
601
- `atoms`.
602
-
603
- See Also
604
- --------
605
- superimpose
606
- """
607
- return transformation.apply(atoms)
608
-
609
-
610
584
  def _reshape_to_3d(coord):
611
585
  """
612
586
  Reshape the coordinate array to 3D, if it is 2D.
613
587
  """
614
588
  if coord.ndim < 2:
615
- raise ValueError(
616
- "Coordinates must be at least two-dimensional"
617
- )
589
+ raise ValueError("Coordinates must be at least two-dimensional")
618
590
  if coord.ndim == 2:
619
591
  return coord[np.newaxis, ...]
620
592
  elif coord.ndim == 3:
621
593
  return coord
622
594
  else:
623
- raise ValueError(
624
- "Coordinates must be at most three-dimensional"
625
- )
595
+ raise ValueError("Coordinates must be at most three-dimensional")
626
596
 
627
597
 
628
598
  def _get_rotation_matrices(fixed, mobile):
@@ -634,10 +604,10 @@ def _get_rotation_matrices(fixed, mobile):
634
604
  Both sets of coordinates must already be centered at origin.
635
605
  """
636
606
  # Calculate cross-covariance matrices
637
- cov = np.sum(fixed[:,:,:,np.newaxis] * mobile[:,:,np.newaxis,:], axis=1)
607
+ cov = np.sum(fixed[:, :, :, np.newaxis] * mobile[:, :, np.newaxis, :], axis=1)
638
608
  v, s, w = np.linalg.svd(cov)
639
609
  # Remove possibility of reflected atom coordinates
640
- reflected_mask = (np.linalg.det(v) * np.linalg.det(w) < 0)
610
+ reflected_mask = np.linalg.det(v) * np.linalg.det(w) < 0
641
611
  v[reflected_mask, :, -1] *= -1
642
612
  matrices = np.matmul(v, w)
643
613
  return matrices
@@ -649,11 +619,7 @@ def _multi_matmul(matrices, vectors):
649
619
  with m x n vectors.
650
620
  """
651
621
  return np.transpose(
652
- np.matmul(
653
- matrices,
654
- np.transpose(vectors, axes=(0, 2, 1))
655
- ),
656
- axes=(0, 2, 1)
622
+ np.matmul(matrices, np.transpose(vectors, axes=(0, 2, 1))), axes=(0, 2, 1)
657
623
  )
658
624
 
659
625
 
@@ -663,8 +629,8 @@ def _get_backbone_anchor_indices(atoms):
663
629
  nucleotide and return their indices.
664
630
  """
665
631
  return np.where(
666
- ((filter_amino_acids(atoms)) & (atoms.atom_name == "CA")) |
667
- ((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
632
+ ((filter_amino_acids(atoms)) & (atoms.atom_name == "CA"))
633
+ | ((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
668
634
  )[0]
669
635
 
670
636
 
@@ -717,11 +683,7 @@ def _find_matching_anchors(
717
683
  def _to_sequence(atoms):
718
684
  sequences, _ = to_sequence(atoms, allow_hetero=True)
719
685
  if len(sequences) == 0:
720
- raise ValueError(
721
- "Structure does not contain any amino acids or nucleotides"
722
- )
686
+ raise ValueError("Structure does not contain any amino acids or nucleotides")
723
687
  if len(sequences) > 1:
724
- raise ValueError(
725
- "Structure contains multiple chains, but only one is allowed"
726
- )
727
- return sequences[0]
688
+ raise ValueError("Structure contains multiple chains, but only one is allowed")
689
+ return sequences[0]