biotite 1.1.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,171 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Conversion of structures into the *Protein Blocks* structural alphabet.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
12
+
13
+ import numpy as np
14
+ from biotite.sequence.alphabet import LetterAlphabet
15
+ from biotite.sequence.sequence import Sequence
16
+ from biotite.structure.chains import get_chain_starts
17
+ from biotite.structure.geometry import dihedral_backbone
18
+
19
+ # PB reference angles, adapted from PBxplore
20
+ PB_ANGLES = np.array(
21
+ [
22
+ [41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68],
23
+ [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90],
24
+ [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63],
25
+ [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16],
26
+ [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40],
27
+ [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
28
+ [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
29
+ [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
30
+ [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
31
+ [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
32
+ [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
33
+ [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
34
+ [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
35
+ [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
36
+ [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
37
+ [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
38
+ ]
39
+ ) # fmt: skip
40
+
41
+
42
+ class ProteinBlocksSequence(Sequence):
43
+ """
44
+ Representation of a structure in the *Protein Blocks* structural alphabet.
45
+ :footcite:`Brevern2000`
46
+
47
+ Parameters
48
+ ----------
49
+ sequence : iterable object, optional
50
+ The *Protein Blocks* sequence.
51
+ This may either be a list or a string.
52
+ May take upper or lower case letters.
53
+ By default the sequence is empty.
54
+
55
+ See also
56
+ --------
57
+ to_protein_blocks : Create *Protein Blocks* sequences from a structure.
58
+
59
+ References
60
+ ----------
61
+
62
+ .. footbibliography::
63
+
64
+ """
65
+
66
+ alphabet = LetterAlphabet("abcdefghijklmnopz")
67
+ undefined_symbol = "z"
68
+
69
+ def __init__(self, sequence=""):
70
+ if isinstance(sequence, str):
71
+ sequence = sequence.lower()
72
+ else:
73
+ sequence = [symbol.upper() for symbol in sequence]
74
+ super().__init__(sequence)
75
+
76
+ def get_alphabet(self):
77
+ return ProteinBlocksSequence.alphabet
78
+
79
+ def remove_undefined(self):
80
+ """
81
+ Remove undefined symbols from the sequence.
82
+
83
+ Returns
84
+ -------
85
+ filtered_sequence : ProteinBlocksSequence
86
+ The sequence without undefined symbols.
87
+ """
88
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
89
+ ProteinBlocksSequence.undefined_symbol
90
+ )
91
+ filtered_code = self.code[self.code != undefined_code]
92
+ filtered_sequence = ProteinBlocksSequence()
93
+ filtered_sequence.code = filtered_code
94
+ return filtered_sequence
95
+
96
+
97
+ def to_protein_blocks(atoms):
98
+ """
99
+ Encode each chain in the given structure to the *Protein Blocks* structural
100
+ alphabet.
101
+ :footcite:`Brevern2000`
102
+
103
+ Parameters
104
+ ----------
105
+ atoms : AtomArray
106
+ The atom array to encode.
107
+ May contain multiple chains.
108
+
109
+ Returns
110
+ -------
111
+ sequences : list of Sequence, length=n
112
+ The encoded *Protein Blocks* sequence for each peptide chain in the structure.
113
+ chain_start_indices : ndarray, shape=(n,), dtype=int
114
+ The atom index where each chain starts.
115
+
116
+ References
117
+ ----------
118
+
119
+ .. footbibliography::
120
+
121
+ Examples
122
+ --------
123
+
124
+ >>> sequences, chain_starts = to_protein_blocks(atom_array)
125
+ >>> print(sequences[0])
126
+ zzmmmmmnopjmnopacdzz
127
+ """
128
+ sequences = []
129
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
130
+ for i in range(len(chain_start_indices) - 1):
131
+ start = chain_start_indices[i]
132
+ stop = chain_start_indices[i + 1]
133
+ chain = atoms[start:stop]
134
+ sequences.append(_to_protein_blocks(chain))
135
+ return sequences, chain_start_indices[:-1]
136
+
137
+
138
+ def _to_protein_blocks(chain):
139
+ undefined_code = ProteinBlocksSequence.alphabet.encode(
140
+ ProteinBlocksSequence.undefined_symbol
141
+ )
142
+
143
+ phi, psi, _ = dihedral_backbone(chain)
144
+
145
+ pb_angles = np.full((len(phi), 8), np.nan)
146
+ pb_angles[2:-2, 0] = psi[:-4]
147
+ pb_angles[2:-2, 1] = phi[1:-3]
148
+ pb_angles[2:-2, 2] = psi[1:-3]
149
+ pb_angles[2:-2, 3] = phi[2:-2]
150
+ pb_angles[2:-2, 4] = psi[2:-2]
151
+ pb_angles[2:-2, 5] = phi[3:-1]
152
+ pb_angles[2:-2, 6] = psi[3:-1]
153
+ pb_angles[2:-2, 7] = phi[4:]
154
+ pb_angles = np.rad2deg(pb_angles)
155
+
156
+ # Angle RMSD of all reference angles with all actual angles
157
+ rmsda = np.sum(
158
+ ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
159
+ axis=-1,
160
+ )
161
+ # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
162
+ pb_seq_code = np.full(len(pb_angles), undefined_code, dtype=np.uint8)
163
+ pb_available_mask = ~np.isnan(rmsda).any(axis=0)
164
+ # Chose PB, where the RMSDA to the reference angle is lowest
165
+ # Due to the definition of Biotite symbol codes
166
+ # the index of the chosen PB is directly the symbol code
167
+ pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
168
+ # Put the array of symbol codes into actual sequence objects
169
+ pb_sequence = ProteinBlocksSequence()
170
+ pb_sequence.code = pb_seq_code
171
+ return pb_sequence
@@ -0,0 +1,122 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Parser for extracting weights from Keras files.
7
+
8
+ Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
9
+ """
10
+
11
+ __name__ = "biotite.structure.alphabet"
12
+ __author__ = "Martin Larralde"
13
+ __all__ = ["load_kerasify"]
14
+
15
+ import enum
16
+ import functools
17
+ import itertools
18
+ import struct
19
+ import numpy as np
20
+ from biotite.structure.alphabet.layers import DenseLayer, Layer
21
+
22
+
23
+ class LayerType(enum.IntEnum):
24
+ DENSE = 1
25
+ CONVOLUTION2D = 2
26
+ FLATTEN = 3
27
+ ELU = 4
28
+ ACTIVATION = 5
29
+ MAXPOOLING2D = 6
30
+ LSTM = 7
31
+ EMBEDDING = 8
32
+
33
+
34
+ class ActivationType(enum.IntEnum):
35
+ LINEAR = 1
36
+ RELU = 2
37
+ SOFTPLUS = 3
38
+ SIGMOID = 4
39
+ TANH = 5
40
+ HARD_SIGMOID = 6
41
+
42
+
43
+ class KerasifyParser:
44
+ """An incomplete parser for model files serialized with `kerasify`.
45
+
46
+ Notes
47
+ -----
48
+ Only dense layers are supported, since the ``foldseek`` VQ-VAE model
49
+ is only using 3 dense layers.
50
+ """
51
+
52
+ def __init__(self, file) -> None:
53
+ self.file = file
54
+ self.buffer = bytearray(1024)
55
+ (self.n_layers,) = self._get("I")
56
+
57
+ def read(self):
58
+ if self.n_layers == 0:
59
+ return None
60
+
61
+ self.n_layers -= 1
62
+ layer_type = LayerType(self._get("I")[0])
63
+ if layer_type == LayerType.DENSE:
64
+ (w0,) = self._get("I")
65
+ (w1,) = self._get("I")
66
+ (b0,) = self._get("I")
67
+ weights = (
68
+ np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
69
+ .reshape(w0, w1)
70
+ .copy()
71
+ )
72
+ biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
73
+ activation = ActivationType(self._get("I")[0])
74
+ if activation not in (ActivationType.LINEAR, ActivationType.RELU):
75
+ raise NotImplementedError(
76
+ f"Unsupported activation type: {activation!r}"
77
+ )
78
+ return DenseLayer(weights, biases, activation == ActivationType.RELU)
79
+ else:
80
+ raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
81
+
82
+ def __iter__(self):
83
+ return self
84
+
85
+ def __next__(self) -> Layer:
86
+ layer = self.read()
87
+ if layer is None:
88
+ raise StopIteration
89
+ return layer
90
+
91
+ def _read(self, format: str) -> memoryview:
92
+ n = struct.calcsize(format)
93
+ if len(self.buffer) < n:
94
+ self.buffer.extend(
95
+ itertools.islice(itertools.repeat(0), n - len(self.buffer))
96
+ )
97
+ v = memoryview(self.buffer)[:n]
98
+ self.file.readinto(v) # type: ignore
99
+ return v
100
+
101
+ def _get(self, format: str):
102
+ v = self._read(format)
103
+ return struct.unpack(format, v)
104
+
105
+
106
+ @functools.cache
107
+ def load_kerasify(file_path):
108
+ """
109
+ Load the the model layers from a ``.kerasify`` file.
110
+
111
+ Parameters
112
+ ----------
113
+ file_path : str
114
+ The path to the ``.kerasify`` file.
115
+
116
+ Returns
117
+ -------
118
+ layers : tuple of Layer
119
+ The model layers.
120
+ """
121
+ with open(file_path, "rb") as file:
122
+ return tuple(KerasifyParser(file))