biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,25 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for converting structures to structural alphabet sequences.
7
+
8
+ Structural alphabets represent the local geometry of each residue in a structure as
9
+ symbol in a sequence.
10
+ This allows using sequence-based functionality from :mod:`biotite.sequence` on
11
+ structural data.
12
+
13
+ For each supported structural alphabet, this subpackage provides a conversion function
14
+ that converts each chain of a given structure into a :class:`Sequence` object from the
15
+ respective structural alphabet.
16
+
17
+ Note that the structural alphabets use lower-case letters as symbols, in order to
18
+ distinguish them better from the nucleotide and amino acid alphabets.
19
+ """
20
+
21
+ __name__ = "biotite.structure.alphabet"
22
+ __author__ = "Martin Larralde, Patrick Kunzmann"
23
+
24
+ from .i3d import *
25
+ from .pb import *
@@ -0,0 +1,332 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Implementation of the encoder neural network adapted from ``foldseek``.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Martin Larralde"
11
+ __all__ = ["Encoder", "VirtualCenterEncoder", "PartnerIndexEncoder", "FeatureEncoder"]
12
+
13
+ import abc
14
+ from importlib.resources import files as resource_files
15
+ import numpy
16
+ import numpy.ma
17
+ from biotite.structure.alphabet.layers import CentroidLayer, Model
18
+ from biotite.structure.alphabet.unkerasify import load_kerasify
19
+
20
+
21
+ class _BaseEncoder(abc.ABC):
22
+ @abc.abstractmethod
23
+ def encode(self, ca, cb, n, c):
24
+ """
25
+ Encode the given atom coordinates to a different representation.
26
+
27
+ Parameters
28
+ ----------
29
+ ca, cb, n, c : ndarray, shape=(n, 3), dtype=float
30
+ The coordinates of the ``CA``, ``CB``, ``N`` and ``C`` atoms for each
31
+ residue.
32
+ *NaN* if missing, e.g. ``CB`` for glycine.
33
+
34
+ Returns
35
+ -------
36
+ encoded : MaskedArray, shape=(n, m), dtype=float
37
+ The encoded representation.
38
+ """
39
+ raise NotImplementedError
40
+
41
+
42
+ class VirtualCenterEncoder(_BaseEncoder):
43
+ r"""
44
+ An encoder for converting a protein structure to a virtual center.
45
+
46
+ For each residue, the coordinates of the virtual center are computed
47
+ from the coordinates of the ``CA``, ``CB`` and ``N`` atoms. The virtual center
48
+ :math:`V` is defined by the angle :math:`\theta = \angle V C_{\alpha} C_{\beta}`,
49
+ the dihedral angle :math:`\tau = \angle V C_{\alpha} C_{\beta} N` and the length
50
+ :math:`l = |V - C_{\alpha}|`. The default parameters used
51
+ in ``foldseek`` were selected after optimization on a validation set.
52
+
53
+ Parameters
54
+ ----------
55
+ distance_alpha_beta : float
56
+ The default distance between the ``CA`` and ``CB`` atoms to use when
57
+ reconstructing missing *Cβ* coordinates.
58
+ distance_alpha_v : float
59
+ The distance between the virtual center *V* and the ``CA`` atom, used to compute
60
+ the virtual center coordinates.
61
+ theta : float
62
+ The angle θ between the virtual center *V*, the ``CA`` and ``CB`` atoms, used to
63
+ compute the virtual center coordinates.
64
+ tau : float
65
+ The dihedral angle τ between the virtual center *V* and the ``CA``, ``CB``
66
+ and ``N`` atoms, used to compute the virtual center coordinates.
67
+ """
68
+
69
+ _DISTANCE_ALPHA_BETA = 1.5336
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ distance_alpha_beta=_DISTANCE_ALPHA_BETA,
75
+ distance_alpha_v=2.0,
76
+ theta=270.0,
77
+ tau=0.0,
78
+ ):
79
+ self.theta = theta
80
+ self.tau = tau
81
+ self.distance_alpha_v = distance_alpha_v
82
+ self.distance_alpha_beta = distance_alpha_beta
83
+
84
+ @property
85
+ def theta(self):
86
+ return numpy.rad2deg(self._theta)
87
+
88
+ @theta.setter
89
+ def theta(self, theta):
90
+ self._theta = numpy.deg2rad(theta)
91
+ self._cos_theta = numpy.cos(self._theta)
92
+ self._sin_theta = numpy.sin(self._theta)
93
+
94
+ @property
95
+ def tau(self):
96
+ return numpy.rad2deg(self._tau)
97
+
98
+ @tau.setter
99
+ def tau(self, tau):
100
+ self._tau = numpy.deg2rad(tau)
101
+ self._cos_tau = numpy.cos(self._tau)
102
+ self._sin_tau = numpy.sin(self._tau)
103
+
104
+ def _compute_virtual_center(self, ca, cb, n):
105
+ assert ca.shape == n.shape
106
+ assert ca.shape == cb.shape
107
+ v = cb - ca
108
+ a = cb - ca
109
+ b = n - ca
110
+ # normal angle
111
+ k = _normalize(numpy.cross(a, b, axis=-1), inplace=True)
112
+ v = (
113
+ v * self._cos_theta
114
+ + numpy.cross(k, v) * self._sin_theta
115
+ + k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_theta)
116
+ )
117
+ # dihedral angle
118
+ k = _normalize(n - ca, inplace=True)
119
+ v = (
120
+ v * self._cos_tau
121
+ + numpy.cross(k, v) * self._sin_tau
122
+ + k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_tau)
123
+ )
124
+ # apply final vector to Cα
125
+ v *= self.distance_alpha_v
126
+ v += ca
127
+ return v
128
+
129
+ def _approximate_cb_position(self, ca, n, c):
130
+ """
131
+ Approximate the position of ``CB`` from the backbone atoms.
132
+ """
133
+ assert ca.shape == n.shape
134
+ assert ca.shape == c.shape
135
+ v1 = _normalize(c - ca, inplace=True)
136
+ v2 = _normalize(n - ca, inplace=True)
137
+ v3 = v1 / 3.0
138
+
139
+ b1 = numpy.add(v2, v3, out=v2)
140
+ b2 = numpy.cross(v1, b1, axis=-1)
141
+ u1 = _normalize(b1, inplace=True)
142
+ u2 = _normalize(b2, inplace=True)
143
+
144
+ out = (numpy.sqrt(8) / 3.0) * ((-u1 / 2.0) - (u2 * numpy.sqrt(3) / 2.0)) - v3
145
+ out *= self.distance_alpha_beta
146
+ out += ca
147
+ return out
148
+
149
+ def _create_nan_mask(self, ca, n, c):
150
+ """
151
+ Mask any column which contains at least one *NaN* value.
152
+ """
153
+ mask_ca = numpy.isnan(ca).max(axis=1)
154
+ mask_n = numpy.isnan(n).max(axis=1)
155
+ mask_c = numpy.isnan(c).max(axis=1)
156
+ return (mask_ca | mask_n | mask_c).repeat(3).reshape(-1, 3)
157
+
158
+ def encode(self, ca, cb, n, c):
159
+ ca = numpy.asarray(ca)
160
+ cb = numpy.asarray(cb)
161
+ n = numpy.asarray(n)
162
+ c = numpy.asarray(c)
163
+
164
+ assert ca.shape == cb.shape
165
+ assert ca.shape == c.shape
166
+ assert ca.shape == n.shape
167
+
168
+ # fix CB positions if needed
169
+ nan_indices = numpy.isnan(cb)
170
+ if numpy.any(nan_indices):
171
+ cb_approx = self._approximate_cb_position(ca, n, c)
172
+ # avoid writing to CB directly since it should be callee-save
173
+ cb_approx[~nan_indices] = cb[~nan_indices]
174
+ cb = cb_approx
175
+ # compute virtual center
176
+ vc = self._compute_virtual_center(ca, cb, n)
177
+ # mask residues without coordinates
178
+ return numpy.ma.masked_array(
179
+ vc,
180
+ mask=self._create_nan_mask(ca, n, c),
181
+ fill_value=numpy.nan,
182
+ )
183
+
184
+
185
+ class PartnerIndexEncoder(_BaseEncoder):
186
+ """
187
+ An encoder for converting a protein structure to partner indices.
188
+
189
+ For each residue, the coordinates of the virtual center are computed from the
190
+ coordinates of the ``CA``, ``CB`` and ``N`` atoms.
191
+ A pairwise distance matrix is then created, and the index of the closest partner
192
+ residue is extracted for each position.
193
+ """
194
+
195
+ def __init__(self):
196
+ self.vc_encoder = VirtualCenterEncoder()
197
+
198
+ def _find_residue_partners(
199
+ self,
200
+ x,
201
+ ):
202
+ # compute pairwise squared distance matrix
203
+ r = numpy.sum(x * x, axis=-1).reshape(-1, 1)
204
+ r[0] = r[-1] = numpy.nan
205
+ D = r - 2 * numpy.ma.dot(x, x.T) + r.T
206
+ # avoid selecting residue itself as the best
207
+ D[numpy.diag_indices_from(D)] = numpy.inf
208
+ # get the closest non-masked residue
209
+ return numpy.nan_to_num(D, copy=False, nan=numpy.inf).argmin(axis=1)
210
+
211
+ def encode(self, ca, cb, n, c):
212
+ # encode backbone atoms to virtual center
213
+ vc = self.vc_encoder.encode(ca, cb, n, c)
214
+ # find closest neighbor for each residue
215
+ return self._find_residue_partners(vc)
216
+
217
+
218
+ class FeatureEncoder(_BaseEncoder):
219
+ """
220
+ An encoder for converting a protein structure to structural descriptors.
221
+ """
222
+
223
+ def __init__(self):
224
+ self.partner_index_encoder = PartnerIndexEncoder()
225
+ self.vc_encoder = self.partner_index_encoder.vc_encoder
226
+
227
+ def _calc_conformation_descriptors(self, ca, partner_index, dtype=numpy.float32):
228
+ # build arrays of indices to use for vectorized angles
229
+ i = numpy.arange(1, ca.shape[-2] - 1)
230
+ j = partner_index[i]
231
+ # compute conformational descriptors
232
+ u1 = _normalize(ca[..., i, :] - ca[..., i - 1, :], inplace=True)
233
+ u2 = _normalize(ca[..., i + 1, :] - ca[..., i, :], inplace=True)
234
+ u3 = _normalize(ca[..., j, :] - ca[..., j - 1, :], inplace=True)
235
+ u4 = _normalize(ca[..., j + 1, :] - ca[..., j, :], inplace=True)
236
+ u5 = _normalize(ca[..., j, :] - ca[..., i, :], inplace=True)
237
+ desc = numpy.zeros((ca.shape[0], 10), dtype=dtype)
238
+ desc[i, 0] = numpy.sum(u1 * u2, axis=-1)
239
+ desc[i, 1] = numpy.sum(u3 * u4, axis=-1)
240
+ desc[i, 2] = numpy.sum(u1 * u5, axis=-1)
241
+ desc[i, 3] = numpy.sum(u3 * u5, axis=-1)
242
+ desc[i, 4] = numpy.sum(u1 * u4, axis=-1)
243
+ desc[i, 5] = numpy.sum(u2 * u3, axis=-1)
244
+ desc[i, 6] = numpy.sum(u1 * u3, axis=-1)
245
+ desc[i, 7] = numpy.linalg.norm(ca[i] - ca[j], axis=-1)
246
+ desc[i, 8] = numpy.clip(j - i, -4, 4)
247
+ desc[i, 9] = numpy.copysign(numpy.log(numpy.abs(j - i) + 1), j - i)
248
+ return desc
249
+
250
+ def _create_descriptor_mask(self, mask, partner_index):
251
+ i = numpy.arange(1, mask.shape[0] - 1)
252
+ j = partner_index[i]
253
+ out = numpy.zeros((mask.shape[0], 10), dtype=numpy.bool_)
254
+ out[1:-1, :] |= (
255
+ mask[i - 1] | mask[i] | mask[i + 1] | mask[j - 1] | mask[j] | mask[j + 1]
256
+ ).reshape(mask.shape[0] - 2, 1)
257
+ out[0] = out[-1] = True
258
+ return out
259
+
260
+ def encode(self, ca, cb, n, c):
261
+ # encode backbone atoms to virtual center
262
+ vc = self.vc_encoder.encode(ca, cb, n, c)
263
+ # find closest neighbor for each residue
264
+ partner_index = self.partner_index_encoder._find_residue_partners(vc)
265
+ # build position features from residue angles
266
+ descriptors = self._calc_conformation_descriptors(ca, partner_index)
267
+ # create mask
268
+ mask = self._create_descriptor_mask(vc.mask[:, 0], partner_index)
269
+ return numpy.ma.masked_array(
270
+ descriptors,
271
+ mask=mask,
272
+ fill_value=numpy.nan,
273
+ )
274
+
275
+
276
+ class Encoder(_BaseEncoder):
277
+ """
278
+ An encoder for converting a protein structure to 3di states.
279
+ """
280
+
281
+ _INVALID_STATE = 2
282
+ _CENTROIDS = numpy.array(
283
+ [
284
+ [-1.0729, -0.3600],
285
+ [-0.1356, -1.8914],
286
+ [0.4948, -0.4205],
287
+ [-0.9874, 0.8128],
288
+ [-1.6621, -0.4259],
289
+ [2.1394, 0.0486],
290
+ [1.5558, -0.1503],
291
+ [2.9179, 1.1437],
292
+ [-2.8814, 0.9956],
293
+ [-1.1400, -2.0068],
294
+ [3.2025, 1.7356],
295
+ [1.7769, -1.3037],
296
+ [0.6901, -1.2554],
297
+ [-1.1061, -1.3397],
298
+ [2.1495, -0.8030],
299
+ [2.3060, -1.4988],
300
+ [2.5522, 0.6046],
301
+ [0.7786, -2.1660],
302
+ [-2.3030, 0.3813],
303
+ [1.0290, 0.8772],
304
+ ]
305
+ )
306
+
307
+ def __init__(self):
308
+ self.feature_encoder = FeatureEncoder()
309
+ layers = load_kerasify(
310
+ resource_files(__package__).joinpath("encoder_weights_3di.kerasify")
311
+ )
312
+ self.vae_encoder = Model(layers + (CentroidLayer(self._CENTROIDS),))
313
+
314
+ def encode(
315
+ self,
316
+ ca,
317
+ cb,
318
+ n,
319
+ c,
320
+ ):
321
+ descriptors = self.feature_encoder.encode(ca, cb, n, c)
322
+ states = self.vae_encoder(descriptors.data)
323
+ return numpy.ma.masked_array(
324
+ states,
325
+ mask=descriptors.mask[:, 0],
326
+ fill_value=self._INVALID_STATE,
327
+ )
328
+
329
+
330
+ def _normalize(x, *, inplace=False):
331
+ norm = numpy.linalg.norm(x, axis=-1).reshape(*x.shape[:-1], 1)
332
+ return numpy.divide(x, norm, out=x if inplace else None, where=norm != 0)
@@ -0,0 +1,109 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ NumPy port of the ``foldseek`` code for encoding structures to 3di.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Martin Larralde"
11
+ __all__ = ["I3DSequence", "to_3di"]
12
+
13
+ import warnings
14
+ from biotite.sequence.alphabet import LetterAlphabet
15
+ from biotite.sequence.sequence import Sequence
16
+ from biotite.structure.alphabet.encoder import Encoder
17
+ from biotite.structure.chains import get_chain_starts
18
+ from biotite.structure.util import coord_for_atom_name_per_residue
19
+
20
+
21
+ class I3DSequence(Sequence):
22
+ """
23
+ Representation of a structure in the 3Di structural alphabet.
24
+ :footcite:`VanKempen2024`
25
+
26
+ Parameters
27
+ ----------
28
+ sequence : iterable object, optional
29
+ The 3Di sequence.
30
+ This may either be a list or a string.
31
+ May take upper or lower case letters.
32
+ By default the sequence is empty.
33
+
34
+ See Also
35
+ --------
36
+ to_3di : Create 3Di sequences from a structure.
37
+
38
+ References
39
+ ----------
40
+
41
+ .. footbibliography::
42
+ """
43
+
44
+ alphabet = LetterAlphabet("acdefghiklmnpqrstvwy")
45
+ undefined_symbol = "d"
46
+
47
+ def __init__(self, sequence=""):
48
+ if isinstance(sequence, str):
49
+ sequence = sequence.lower()
50
+ else:
51
+ sequence = [symbol.upper() for symbol in sequence]
52
+ super().__init__(sequence)
53
+
54
+ def get_alphabet(self):
55
+ return I3DSequence.alphabet
56
+
57
+ def __repr__(self):
58
+ return f'I3DSequence("{"".join(self.symbols)}")'
59
+
60
+
61
+ def to_3di(atoms):
62
+ """
63
+ Encode each chain in the given structure to the 3Di structure alphabet.
64
+ :footcite:`VanKempen2024`
65
+
66
+ Parameters
67
+ ----------
68
+ atoms : AtomArray
69
+ The atom array to encode.
70
+ May contain multiple chains.
71
+
72
+ Returns
73
+ -------
74
+ sequences : list of Sequence, length=n
75
+ The encoded 3Di sequence for each peptide chain in the structure.
76
+ chain_start_indices : ndarray, shape=(n,), dtype=int
77
+ The atom index where each chain starts.
78
+
79
+ References
80
+ ----------
81
+
82
+ .. footbibliography::
83
+
84
+ Examples
85
+ --------
86
+
87
+ >>> sequences, chain_starts = to_3di(atom_array)
88
+ >>> print(sequences[0])
89
+ dqqvvcvvcpnvvnvdhgdd
90
+ """
91
+ sequences = []
92
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
93
+ for i in range(len(chain_start_indices) - 1):
94
+ start = chain_start_indices[i]
95
+ stop = chain_start_indices[i + 1]
96
+ chain = atoms[start:stop]
97
+ sequence = I3DSequence()
98
+ if chain.array_length() == 0:
99
+ warnings.warn("Ignoring empty chain")
100
+ else:
101
+ sequence.code = (
102
+ Encoder()
103
+ .encode(
104
+ *coord_for_atom_name_per_residue(chain, ["CA", "CB", "N", "C"]),
105
+ )
106
+ .filled()
107
+ )
108
+ sequences.append(sequence)
109
+ return sequences, chain_start_indices[:-1]
@@ -0,0 +1,86 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Implementation of the neural network layers used in ``foldseek``.
7
+ """
8
+
9
+ __name__ = "biotite.structure.alphabet"
10
+ __author__ = "Martin Larralde"
11
+ __all__ = ["Layer", "DenseLayer", "CentroidLayer", "Model"]
12
+
13
+ import abc
14
+ import functools
15
+ import numpy
16
+
17
+
18
+ class Layer(abc.ABC):
19
+ @abc.abstractmethod
20
+ def __call__(self, x):
21
+ raise NotImplementedError
22
+
23
+
24
+ class DenseLayer(Layer):
25
+ def __init__(self, weights, biases=None, activation: bool = True):
26
+ self.activation = activation
27
+ self.weights = numpy.asarray(weights)
28
+ if biases is None:
29
+ self.biases = numpy.zeros(self.weights.shape[1])
30
+ else:
31
+ self.biases = numpy.asarray(biases)
32
+
33
+ def __call__(self, x):
34
+ x = numpy.asarray(x)
35
+ out = x @ self.weights
36
+ out += self.biases
37
+
38
+ if self.activation:
39
+ return _relu(out, out=out)
40
+ else:
41
+ return out
42
+
43
+
44
+ class CentroidLayer(Layer):
45
+ def __init__(self, centroids) -> None:
46
+ self.centroids = numpy.asarray(centroids)
47
+ self.r2 = numpy.sum(self.centroids**2, axis=1).reshape(-1, 1).T
48
+
49
+ def __call__(self, x):
50
+ # compute pairwise squared distance matrix
51
+ r1 = numpy.sum(x**2, axis=1).reshape(-1, 1)
52
+ D = r1 - 2 * x @ self.centroids.T + self.r2
53
+ # find closest centroid
54
+ states = numpy.empty(D.shape[0], dtype=numpy.uint8)
55
+ D.argmin(axis=1, out=states)
56
+ return states
57
+
58
+
59
+ class Model:
60
+ def __init__(self, layers=()):
61
+ self.layers = list(layers)
62
+
63
+ def __call__(self, x):
64
+ return functools.reduce(lambda x, f: f(x), self.layers, x)
65
+
66
+
67
+ def _relu(
68
+ x,
69
+ out=None,
70
+ *,
71
+ where=True,
72
+ casting="same_kind",
73
+ order="K",
74
+ dtype=None,
75
+ subok=True,
76
+ ):
77
+ return numpy.maximum(
78
+ 0.0,
79
+ x,
80
+ out=out,
81
+ where=where,
82
+ casting=casting,
83
+ order=order,
84
+ dtype=dtype,
85
+ subok=subok,
86
+ )
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Poulain, A. G. de Brevern
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.