biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,327 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows estimation of secondary structure elements in protein
7
+ structures.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["annotate_sse"]
13
+
14
+ import numpy as np
15
+ from .celllist import CellList
16
+ from .geometry import distance, angle, dihedral
17
+ from .filter import filter_amino_acids
18
+ from .residues import get_residue_starts
19
+ from .integrity import check_res_id_continuity
20
+
21
+
22
+ _r_helix = (np.deg2rad(89-12), np.deg2rad(89+12))
23
+ _a_helix = (np.deg2rad(50-20), np.deg2rad(50+20))
24
+ _d2_helix = ((5.5-0.5), (5.5+0.5)) # Not used in the algorithm description
25
+ _d3_helix = ((5.3-0.5), (5.3+0.5))
26
+ _d4_helix = ((6.4-0.6), (6.4+0.6))
27
+
28
+ _r_strand = (np.deg2rad(124-14), np.deg2rad(124+14))
29
+ _a_strand = (np.deg2rad(-180), np.deg2rad(-125),
30
+ np.deg2rad(145), np.deg2rad(180))
31
+ _d2_strand = ((6.7-0.6), (6.7+0.6))
32
+ _d3_strand = ((9.9-0.9), (9.9+0.9))
33
+ _d4_strand = ((12.4-1.1), (12.4+1.1))
34
+
35
+
36
+ def annotate_sse(atom_array, chain_id=None):
37
+ r"""
38
+ Calculate the secondary structure elements (SSEs) of a
39
+ peptide chain based on the `P-SEA` algorithm.
40
+ :footcite:`Labesse1997`
41
+
42
+ The annotation is based CA coordinates only, specifically
43
+ distances and dihedral angles.
44
+ Discontinuities between chains are detected by residue ID.
45
+
46
+ Parameters
47
+ ----------
48
+ atom_array : AtomArray
49
+ The atom array to annotate for.
50
+ Non-peptide residues are also allowed and obtain a ``''``
51
+ SSE.
52
+ chain_id : str, optional
53
+ The peptide atoms belonging to this chain are filtered and
54
+ annotated.
55
+ DEPRECATED: By now multiple chains can be annotated at once.
56
+ To annotate only a certain chain, filter the `atom_array` before
57
+ giving it as input to this function.
58
+
59
+
60
+ Returns
61
+ -------
62
+ sse : ndarray
63
+ An array containing the secondary structure elements,
64
+ where the index corresponds to a residue of `atom_array`
65
+ (see e.g. :func:`get_residues()`).
66
+ ``'a'`` means :math:`{\alpha}`-helix, ``'b'`` means
67
+ :math:`{\beta}`-strand/sheet, ``'c'`` means coil.
68
+ ``''`` indicates that a residue is not an amino acid or it
69
+ comprises no ``CA`` atom.
70
+
71
+ Notes
72
+ -----
73
+ Although this function is based on the original `P-SEA` algorithm,
74
+ there are deviations compared to the official `P-SEA` software in
75
+ some cases.
76
+ Do not rely on getting the exact same results.
77
+
78
+ References
79
+ ----------
80
+
81
+ .. footbibliography::
82
+
83
+ Examples
84
+ --------
85
+
86
+ SSE of PDB 1L2Y:
87
+
88
+ >>> sse = annotate_sse(atom_array, "A")
89
+ >>> print(sse)
90
+ ['c' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
91
+ 'c' 'c']
92
+
93
+ """
94
+ if chain_id is not None:
95
+ # Filter all CA atoms in the relevant chain
96
+ atom_array = atom_array[
97
+ (atom_array.chain_id == chain_id) & filter_amino_acids(atom_array)
98
+ ]
99
+
100
+
101
+ residue_starts = get_residue_starts(atom_array)
102
+ # Sort CA coord into the coord array at the respective residue index
103
+ # If a residue has no CA, e.g. because it is not an amino acid,
104
+ # the coordinates for that residue remain NaN
105
+ ca_coord = np.full((len(residue_starts), 3), np.nan, dtype=np.float32)
106
+ ca_indices = np.where(
107
+ filter_amino_acids(atom_array) & (atom_array.atom_name == "CA")
108
+ )[0]
109
+ ca_coord[
110
+ np.searchsorted(residue_starts, ca_indices, "right") - 1
111
+ ] = atom_array.coord[ca_indices]
112
+
113
+ if len(ca_coord) <= 5:
114
+ # The number of atoms is too small #
115
+ # to measure the distances/angles
116
+ # -> Return an SSE array where each amino acid is 'coil'
117
+ sse = np.full(len(ca_coord), "c", dtype="U1")
118
+ # Residues where coord are NaN do not belong to amino acids
119
+ # (or at least they have no CA)
120
+ sse[np.isnan(ca_coord).any(axis=-1)] = ""
121
+ return sse
122
+
123
+ # Add virtual residues w/o CA coord at chain discontinuity indices
124
+ # This ensures that such discontinuities are recognized for the
125
+ # purpose of geometric measurements
126
+ # -> the distances/angles spanning discontinuities are NaN
127
+ discont_indices = check_res_id_continuity(atom_array)
128
+ discont_res_indices = np.searchsorted(
129
+ residue_starts, discont_indices, "right"
130
+ ) - 1
131
+ ca_coord = np.insert(
132
+ ca_coord, discont_res_indices,
133
+ np.full((len(discont_res_indices),3), np.nan), axis=0
134
+ )
135
+ # Later the SSE for virtual residues are removed again
136
+ # via this mask
137
+ no_virtual_mask = np.ones(len(residue_starts), dtype=bool)
138
+ no_virtual_mask = np.insert(no_virtual_mask, discont_res_indices, False)
139
+
140
+ length = len(ca_coord)
141
+
142
+
143
+ # The distances and angles are not defined for the entire interval,
144
+ # therefore the indices do not have the full range
145
+ # Values that are not defined are NaN
146
+ d2i = np.full(length, np.nan)
147
+ d3i = np.full(length, np.nan)
148
+ d4i = np.full(length, np.nan)
149
+ ri = np.full(length, np.nan)
150
+ ai = np.full(length, np.nan)
151
+
152
+ d2i[1 : length-1] = distance(ca_coord[0 : length-2], ca_coord[2 : length])
153
+ d3i[1 : length-2] = distance(ca_coord[0 : length-3], ca_coord[3 : length])
154
+ d4i[1 : length-3] = distance(ca_coord[0 : length-4], ca_coord[4 : length])
155
+ ri[1 : length-1] = angle(
156
+ ca_coord[0 : length-2],
157
+ ca_coord[1 : length-1],
158
+ ca_coord[2 : length]
159
+ )
160
+ ai[1 : length-2] = dihedral(
161
+ ca_coord[0 : length-3],
162
+ ca_coord[1 : length-2],
163
+ ca_coord[2 : length-1],
164
+ ca_coord[3 : length-0]
165
+ )
166
+
167
+ # Find CA that meet criteria for potential helices and strands
168
+ relaxed_helix = (
169
+ (d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1])
170
+ ) | (
171
+ (ri >= _r_helix[0] ) & ( ri <= _r_helix[1])
172
+ )
173
+ strict_helix = (
174
+ (d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1]) &
175
+ (d4i >= _d4_helix[0]) & (d4i <= _d4_helix[1])
176
+ ) | (
177
+ (ri >= _r_helix[0] ) & ( ri <= _r_helix[1]) &
178
+ (ai >= _a_helix[0] ) & ( ai <= _a_helix[1])
179
+ )
180
+
181
+ relaxed_strand = (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1])
182
+ strict_strand = (
183
+ (d2i >= _d2_strand[0]) & (d2i <= _d2_strand[1]) &
184
+ (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1]) &
185
+ (d4i >= _d4_strand[0]) & (d4i <= _d4_strand[1])
186
+ ) | (
187
+ (ri >= _r_strand[0] ) & ( ri <= _r_strand[1]) &
188
+ (
189
+ # Account for periodic boundary of dihedral angle
190
+ ((ai >= _a_strand[0] ) & ( ai <= _a_strand[1])) |
191
+ ((ai >= _a_strand[2] ) & ( ai <= _a_strand[3]))
192
+ )
193
+ )
194
+
195
+
196
+ helix_mask = _mask_consecutive(strict_helix, 5)
197
+ helix_mask = _extend_region(helix_mask, relaxed_helix)
198
+
199
+ strand_mask = _mask_consecutive(strict_strand, 4)
200
+ short_strand_mask = _mask_regions_with_contacts(
201
+ ca_coord,
202
+ _mask_consecutive(strict_strand, 3),
203
+ min_contacts=5, min_distance=4.2, max_distance=5.2
204
+ )
205
+ strand_mask = _extend_region(
206
+ strand_mask | short_strand_mask, relaxed_strand
207
+ )
208
+
209
+
210
+ sse = np.full(length, "c", dtype="U1")
211
+ sse[helix_mask] = "a"
212
+ sse[strand_mask] = "b"
213
+ # Residues where coord are NaN do not belong to amino acids
214
+ # (or at least they have no CA)
215
+ sse[np.isnan(ca_coord).any(axis=-1)] = ""
216
+ # Remove SSE for virtual atoms and return
217
+ return sse[no_virtual_mask]
218
+
219
+
220
+ def _mask_consecutive(mask, number):
221
+ """
222
+ Find all regions in a mask with `number` consecutive ``True``
223
+ values.
224
+ Return a mask that is ``True`` for all indices in such a region and
225
+ ``False`` otherwise.
226
+ """
227
+ # An element is in a consecutive region,
228
+ # if it and the following `number-1` elements are True
229
+ # The elements `mask[-(number-1):]` cannot have the sufficient count
230
+ # by this definition, as they are at the end of the array
231
+ counts = np.zeros(len(mask) - (number-1), dtype=int)
232
+ for i in range(number):
233
+ counts[mask[i : i + len(counts)]] += 1
234
+ consecutive_seed = (counts == number)
235
+
236
+ # Not only that element, but also the
237
+ # following `number-1` elements are in a consecutive region
238
+ consecutive_mask = np.zeros(len(mask), dtype=bool)
239
+ for i in range(number):
240
+ consecutive_mask[i : i + len(consecutive_seed)] |= consecutive_seed
241
+
242
+ return consecutive_mask
243
+
244
+
245
+ def _extend_region(base_condition_mask, extension_condition_mask):
246
+ """
247
+ Extend a ``True`` region in `base_condition_mask` by at maximum of
248
+ one element at each side, if such element fulfills
249
+ `extension_condition_mask.`
250
+ """
251
+ # This mask always marks the start
252
+ # of either a 'True' or 'False' region
253
+ # Prepend absent region to the start to capture the event,
254
+ # that the first element is already the start of a region
255
+ region_change_mask = np.diff(np.append([False], base_condition_mask))
256
+
257
+ # These masks point to the first `False` element
258
+ # left and right of a 'True' region
259
+ # The left end is the element before the first element of a 'True' region
260
+ left_end_mask = region_change_mask & base_condition_mask
261
+ # Therefore the mask needs to be shifted to the left
262
+ left_end_mask = np.append(left_end_mask[1:], [False])
263
+ # The right end is first element of a 'False' region
264
+ right_end_mask = region_change_mask & ~base_condition_mask
265
+
266
+ # The 'base_condition_mask' gets additional 'True' elements
267
+ # at left or right ends, which meet the extension criterion
268
+ return base_condition_mask | (
269
+ (left_end_mask | right_end_mask) & extension_condition_mask
270
+ )
271
+
272
+
273
+ def _mask_regions_with_contacts(coord, candidate_mask,
274
+ min_contacts, min_distance, max_distance):
275
+ """
276
+ Mask regions of `candidate_mask` that have at least `min_contacts`
277
+ contacts with `coord` in the range `min_distance` to `max_distance`.
278
+ """
279
+ potential_contact_coord = coord[~np.isnan(coord).any(axis=-1)]
280
+ if len(potential_contact_coord) == 0:
281
+ # No potential contacts -> no contacts
282
+ # -> no residue can satisfy 'min_contacts'
283
+ return np.zeros(len(candidate_mask), dtype=bool)
284
+
285
+ cell_list = CellList(
286
+ potential_contact_coord, max_distance
287
+ )
288
+ # For each candidate position,
289
+ # get all contacts within maximum distance
290
+ all_within_max_dist_indices = cell_list.get_atoms(
291
+ coord[candidate_mask], max_distance
292
+ )
293
+
294
+ contacts = np.zeros(len(coord), dtype=int)
295
+ for i, atom_index in enumerate(np.where(candidate_mask)[0]):
296
+ within_max_dist_indices = all_within_max_dist_indices[i]
297
+ # Remove padding values
298
+ within_max_dist_indices = within_max_dist_indices[
299
+ within_max_dist_indices != -1
300
+ ]
301
+ # Now count all contacts within maximum distance
302
+ # that also satisfy the minimum distance
303
+ contacts[atom_index] = np.count_nonzero(
304
+ distance(
305
+ coord[atom_index],
306
+ potential_contact_coord[within_max_dist_indices]
307
+ ) > min_distance
308
+ )
309
+
310
+ # Count the number of contacts per region
311
+ # These indices mark the start of either a 'True' or 'False' region
312
+ # Prepend absent region to the start to capture the event,
313
+ # that the first element is already the start of a region
314
+ region_change_indices = np.where(
315
+ np.diff(np.append([False], candidate_mask))
316
+ )[0]
317
+ # Add exclusive stop
318
+ region_change_indices = np.append(region_change_indices, [len(coord)])
319
+ output_mask = np.zeros(len(candidate_mask), dtype=bool)
320
+ for i in range(len(region_change_indices) - 1):
321
+ start = region_change_indices[i]
322
+ stop = region_change_indices[i+1]
323
+ total_contacts = np.sum(contacts[start : stop])
324
+ if total_contacts >= min_contacts:
325
+ output_mask[start : stop] = True
326
+
327
+ return output_mask