biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +6 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,308 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows estimation of secondary structure elements in protein
7
+ structures.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["annotate_sse"]
13
+
14
+ import numpy as np
15
+ from biotite.structure.celllist import CellList
16
+ from biotite.structure.filter import filter_amino_acids
17
+ from biotite.structure.geometry import angle, dihedral, distance
18
+ from biotite.structure.integrity import check_res_id_continuity
19
+ from biotite.structure.residues import get_residue_starts
20
+
21
+ _r_helix = (np.deg2rad(89 - 12), np.deg2rad(89 + 12))
22
+ _a_helix = (np.deg2rad(50 - 20), np.deg2rad(50 + 20))
23
+ _d2_helix = ((5.5 - 0.5), (5.5 + 0.5)) # Not used in the algorithm description
24
+ _d3_helix = ((5.3 - 0.5), (5.3 + 0.5))
25
+ _d4_helix = ((6.4 - 0.6), (6.4 + 0.6))
26
+
27
+ _r_strand = (np.deg2rad(124 - 14), np.deg2rad(124 + 14))
28
+ _a_strand = (np.deg2rad(-180), np.deg2rad(-125), np.deg2rad(145), np.deg2rad(180))
29
+ _d2_strand = ((6.7 - 0.6), (6.7 + 0.6))
30
+ _d3_strand = ((9.9 - 0.9), (9.9 + 0.9))
31
+ _d4_strand = ((12.4 - 1.1), (12.4 + 1.1))
32
+
33
+
34
+ def annotate_sse(atom_array):
35
+ r"""
36
+ Calculate the secondary structure elements (SSEs) of a
37
+ peptide chain based on the `P-SEA` algorithm.
38
+ :footcite:`Labesse1997`
39
+
40
+ The annotation is based CA coordinates only, specifically
41
+ distances and dihedral angles.
42
+ Discontinuities between chains are detected by residue ID.
43
+
44
+ Parameters
45
+ ----------
46
+ atom_array : AtomArray
47
+ The atom array to annotate for.
48
+ Non-peptide residues are also allowed and obtain a ``''``
49
+ SSE.
50
+
51
+
52
+ Returns
53
+ -------
54
+ sse : ndarray
55
+ An array containing the secondary structure elements,
56
+ where the index corresponds to a residue of `atom_array`
57
+ (see e.g. :func:`get_residues()`).
58
+ ``'a'`` means :math:`{\alpha}`-helix, ``'b'`` means
59
+ :math:`{\beta}`-strand/sheet, ``'c'`` means coil.
60
+ ``''`` indicates that a residue is not an amino acid or it
61
+ comprises no ``CA`` atom.
62
+
63
+ Notes
64
+ -----
65
+ Although this function is based on the original `P-SEA` algorithm,
66
+ there are deviations compared to the official `P-SEA` software in
67
+ some cases.
68
+ Do not rely on getting the exact same results.
69
+
70
+ References
71
+ ----------
72
+
73
+ .. footbibliography::
74
+
75
+ Examples
76
+ --------
77
+
78
+ SSE of PDB 1L2Y:
79
+
80
+ >>> sse = annotate_sse(atom_array)
81
+ >>> print(sse)
82
+ ['c' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
83
+ 'c' 'c']
84
+
85
+ """
86
+ residue_starts = get_residue_starts(atom_array)
87
+ # Sort CA coord into the coord array at the respective residue index
88
+ # If a residue has no CA, e.g. because it is not an amino acid,
89
+ # the coordinates for that residue remain NaN
90
+ ca_coord = np.full((len(residue_starts), 3), np.nan, dtype=np.float32)
91
+ ca_indices = np.where(
92
+ filter_amino_acids(atom_array) & (atom_array.atom_name == "CA")
93
+ )[0]
94
+ ca_coord[np.searchsorted(residue_starts, ca_indices, "right") - 1] = (
95
+ atom_array.coord[ca_indices]
96
+ )
97
+
98
+ if len(ca_coord) <= 5:
99
+ # The number of atoms is too small #
100
+ # to measure the distances/angles
101
+ # -> Return an SSE array where each amino acid is 'coil'
102
+ sse = np.full(len(ca_coord), "c", dtype="U1")
103
+ # Residues where coord are NaN do not belong to amino acids
104
+ # (or at least they have no CA)
105
+ sse[np.isnan(ca_coord).any(axis=-1)] = ""
106
+ return sse
107
+
108
+ # Add virtual residues w/o CA coord at chain discontinuity indices
109
+ # This ensures that such discontinuities are recognized for the
110
+ # purpose of geometric measurements
111
+ # -> the distances/angles spanning discontinuities are NaN
112
+ discont_indices = check_res_id_continuity(atom_array)
113
+ discont_res_indices = np.searchsorted(residue_starts, discont_indices, "right") - 1
114
+ ca_coord = np.insert(
115
+ ca_coord,
116
+ discont_res_indices,
117
+ np.full((len(discont_res_indices), 3), np.nan),
118
+ axis=0,
119
+ )
120
+ # Later the SSE for virtual residues are removed again
121
+ # via this mask
122
+ no_virtual_mask = np.ones(len(residue_starts), dtype=bool)
123
+ no_virtual_mask = np.insert(no_virtual_mask, discont_res_indices, False)
124
+
125
+ length = len(ca_coord)
126
+
127
+ # The distances and angles are not defined for the entire interval,
128
+ # therefore the indices do not have the full range
129
+ # Values that are not defined are NaN
130
+ d2i = np.full(length, np.nan)
131
+ d3i = np.full(length, np.nan)
132
+ d4i = np.full(length, np.nan)
133
+ ri = np.full(length, np.nan)
134
+ ai = np.full(length, np.nan)
135
+
136
+ d2i[1 : length - 1] = distance(ca_coord[0 : length - 2], ca_coord[2:length])
137
+ d3i[1 : length - 2] = distance(ca_coord[0 : length - 3], ca_coord[3:length])
138
+ d4i[1 : length - 3] = distance(ca_coord[0 : length - 4], ca_coord[4:length])
139
+ ri[1 : length - 1] = angle(
140
+ ca_coord[0 : length - 2], ca_coord[1 : length - 1], ca_coord[2:length]
141
+ )
142
+ ai[1 : length - 2] = dihedral(
143
+ ca_coord[0 : length - 3],
144
+ ca_coord[1 : length - 2],
145
+ ca_coord[2 : length - 1],
146
+ ca_coord[3 : length - 0],
147
+ )
148
+
149
+ # Find CA that meet criteria for potential helices and strands
150
+ relaxed_helix = ((d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1])) | (
151
+ (ri >= _r_helix[0]) & (ri <= _r_helix[1])
152
+ )
153
+ strict_helix = (
154
+ (d3i >= _d3_helix[0])
155
+ & (d3i <= _d3_helix[1])
156
+ & (d4i >= _d4_helix[0])
157
+ & (d4i <= _d4_helix[1])
158
+ ) | (
159
+ (ri >= _r_helix[0])
160
+ & (ri <= _r_helix[1])
161
+ & (ai >= _a_helix[0])
162
+ & (ai <= _a_helix[1])
163
+ )
164
+
165
+ relaxed_strand = (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1])
166
+ strict_strand = (
167
+ (d2i >= _d2_strand[0])
168
+ & (d2i <= _d2_strand[1])
169
+ & (d3i >= _d3_strand[0])
170
+ & (d3i <= _d3_strand[1])
171
+ & (d4i >= _d4_strand[0])
172
+ & (d4i <= _d4_strand[1])
173
+ ) | (
174
+ (ri >= _r_strand[0])
175
+ & (ri <= _r_strand[1])
176
+ & (
177
+ # Account for periodic boundary of dihedral angle
178
+ ((ai >= _a_strand[0]) & (ai <= _a_strand[1]))
179
+ | ((ai >= _a_strand[2]) & (ai <= _a_strand[3]))
180
+ )
181
+ )
182
+
183
+ helix_mask = _mask_consecutive(strict_helix, 5)
184
+ helix_mask = _extend_region(helix_mask, relaxed_helix)
185
+
186
+ strand_mask = _mask_consecutive(strict_strand, 4)
187
+ short_strand_mask = _mask_regions_with_contacts(
188
+ ca_coord,
189
+ _mask_consecutive(strict_strand, 3),
190
+ min_contacts=5,
191
+ min_distance=4.2,
192
+ max_distance=5.2,
193
+ )
194
+ strand_mask = _extend_region(strand_mask | short_strand_mask, relaxed_strand)
195
+
196
+ sse = np.full(length, "c", dtype="U1")
197
+ sse[helix_mask] = "a"
198
+ sse[strand_mask] = "b"
199
+ # Residues where coord are NaN do not belong to amino acids
200
+ # (or at least they have no CA)
201
+ sse[np.isnan(ca_coord).any(axis=-1)] = ""
202
+ # Remove SSE for virtual atoms and return
203
+ return sse[no_virtual_mask]
204
+
205
+
206
+ def _mask_consecutive(mask, number):
207
+ """
208
+ Find all regions in a mask with `number` consecutive ``True``
209
+ values.
210
+ Return a mask that is ``True`` for all indices in such a region and
211
+ ``False`` otherwise.
212
+ """
213
+ # An element is in a consecutive region,
214
+ # if it and the following `number-1` elements are True
215
+ # The elements `mask[-(number-1):]` cannot have the sufficient count
216
+ # by this definition, as they are at the end of the array
217
+ counts = np.zeros(len(mask) - (number - 1), dtype=int)
218
+ for i in range(number):
219
+ counts[mask[i : i + len(counts)]] += 1
220
+ consecutive_seed = counts == number
221
+
222
+ # Not only that element, but also the
223
+ # following `number-1` elements are in a consecutive region
224
+ consecutive_mask = np.zeros(len(mask), dtype=bool)
225
+ for i in range(number):
226
+ consecutive_mask[i : i + len(consecutive_seed)] |= consecutive_seed
227
+
228
+ return consecutive_mask
229
+
230
+
231
+ def _extend_region(base_condition_mask, extension_condition_mask):
232
+ """
233
+ Extend a ``True`` region in `base_condition_mask` by at maximum of
234
+ one element at each side, if such element fulfills
235
+ `extension_condition_mask.`
236
+ """
237
+ # This mask always marks the start
238
+ # of either a 'True' or 'False' region
239
+ # Prepend absent region to the start to capture the event,
240
+ # that the first element is already the start of a region
241
+ region_change_mask = np.diff(np.append([False], base_condition_mask))
242
+
243
+ # These masks point to the first `False` element
244
+ # left and right of a 'True' region
245
+ # The left end is the element before the first element of a 'True' region
246
+ left_end_mask = region_change_mask & base_condition_mask
247
+ # Therefore the mask needs to be shifted to the left
248
+ left_end_mask = np.append(left_end_mask[1:], [False])
249
+ # The right end is first element of a 'False' region
250
+ right_end_mask = region_change_mask & ~base_condition_mask
251
+
252
+ # The 'base_condition_mask' gets additional 'True' elements
253
+ # at left or right ends, which meet the extension criterion
254
+ return base_condition_mask | (
255
+ (left_end_mask | right_end_mask) & extension_condition_mask
256
+ )
257
+
258
+
259
+ def _mask_regions_with_contacts(
260
+ coord, candidate_mask, min_contacts, min_distance, max_distance
261
+ ):
262
+ """
263
+ Mask regions of `candidate_mask` that have at least `min_contacts`
264
+ contacts with `coord` in the range `min_distance` to `max_distance`.
265
+ """
266
+ potential_contact_coord = coord[~np.isnan(coord).any(axis=-1)]
267
+ if len(potential_contact_coord) == 0:
268
+ # No potential contacts -> no contacts
269
+ # -> no residue can satisfy 'min_contacts'
270
+ return np.zeros(len(candidate_mask), dtype=bool)
271
+
272
+ cell_list = CellList(potential_contact_coord, max_distance)
273
+ # For each candidate position,
274
+ # get all contacts within maximum distance
275
+ all_within_max_dist_indices = cell_list.get_atoms(
276
+ coord[candidate_mask], max_distance
277
+ )
278
+
279
+ contacts = np.zeros(len(coord), dtype=int)
280
+ for i, atom_index in enumerate(np.where(candidate_mask)[0]):
281
+ within_max_dist_indices = all_within_max_dist_indices[i]
282
+ # Remove padding values
283
+ within_max_dist_indices = within_max_dist_indices[within_max_dist_indices != -1]
284
+ # Now count all contacts within maximum distance
285
+ # that also satisfy the minimum distance
286
+ contacts[atom_index] = np.count_nonzero(
287
+ distance(
288
+ coord[atom_index], potential_contact_coord[within_max_dist_indices]
289
+ )
290
+ > min_distance
291
+ )
292
+
293
+ # Count the number of contacts per region
294
+ # These indices mark the start of either a 'True' or 'False' region
295
+ # Prepend absent region to the start to capture the event,
296
+ # that the first element is already the start of a region
297
+ region_change_indices = np.where(np.diff(np.append([False], candidate_mask)))[0]
298
+ # Add exclusive stop
299
+ region_change_indices = np.append(region_change_indices, [len(coord)])
300
+ output_mask = np.zeros(len(candidate_mask), dtype=bool)
301
+ for i in range(len(region_change_indices) - 1):
302
+ start = region_change_indices[i]
303
+ stop = region_change_indices[i + 1]
304
+ total_contacts = np.sum(contacts[start:stop])
305
+ if total_contacts >= min_contacts:
306
+ output_mask[start:stop] = True
307
+
308
+ return output_mask