biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,581 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides functions for computing the TM-score between two structures and
7
+ for computing the superimposition to do so.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = [
13
+ "tm_score",
14
+ "superimpose_structural_homologs",
15
+ ]
16
+
17
+ import itertools
18
+ import numpy as np
19
+ from biotite.sequence.align.alignment import get_codes, remove_gaps
20
+ from biotite.sequence.align.matrix import SubstitutionMatrix
21
+ from biotite.sequence.align.pairwise import align_optimal
22
+ from biotite.sequence.seqtypes import PurePositionalSequence
23
+ from biotite.structure.filter import filter_amino_acids
24
+ from biotite.structure.geometry import distance
25
+ from biotite.structure.residues import get_residue_count
26
+ from biotite.structure.superimpose import superimpose
27
+ from biotite.structure.util import coord_for_atom_name_per_residue
28
+
29
+ # Minimum value for d0
30
+ # This is not part of the explanation in the paper, but it is implemented in TM-align
31
+ _D0_MIN = 0.5
32
+ # Gap open penalty for hybrid alignment
33
+ _HYBRID_PENALTY = -1
34
+ # Gap open penalty for pure TM-based alignment
35
+ _TM_GAP_PENALTY = -0.6
36
+ # Arbitrary scale factor to avoid rounding errors when converting scores to integer
37
+ _SCORE_SCALING = 100
38
+
39
+
40
+ def tm_score(
41
+ reference, subject, reference_indices, subject_indices, reference_length="shorter"
42
+ ):
43
+ """
44
+ Compute the *TM*-score for the given protein structures. :footcite:`Zhang2004`
45
+
46
+ Parameters
47
+ ----------
48
+ reference, subject : AtomArray or ndarray, dtype=float
49
+ The protein structures to be compared.
50
+ The number of their atoms may differ from each other.
51
+ Alternatively, coordinates can be provided directly as
52
+ :class:`ndarray`.
53
+ reference_indices, subject_indices : ndarray, dtype=int, shape=(n,)
54
+ The indices of the atoms in the reference and subject, respectively,
55
+ that correspond to each other.
56
+ In consequence, the length of both arrays must be equal.
57
+ reference_length : int or {"shorter", "longer", "reference"}
58
+ The reference length used to normalize the TM-score.
59
+ If "shorter", the number of residues in the smaller structure is used.
60
+ If "longer", the number of residues in the larger structure is used.
61
+ If "reference", the number of residues in the reference structure is used.
62
+ The length can also be provided directly as an integer.
63
+
64
+ Returns
65
+ -------
66
+ tm_score : float
67
+ The *TM*-score for the given structure.
68
+
69
+ See Also
70
+ --------
71
+ superimpose_structural_homologs :
72
+ Aims to minimize the *TM*-score between two structures.
73
+ It also returns the corresponding atom indices that can be passed to
74
+ :func:`tm_score()`.
75
+
76
+ Notes
77
+ -----
78
+ This functions takes the coordinates as they are.
79
+ It is recommended to use superimpose them using
80
+ :func:`superimpose_structural_homologs()` before, as that function aims to find a
81
+ superimposition that minimizes the *TM*-score.
82
+
83
+ References
84
+ ----------
85
+
86
+ .. footbibliography::
87
+
88
+ Examples
89
+ --------
90
+
91
+ >>> reference = atom_array_stack[0]
92
+ >>> subject = atom_array_stack[1]
93
+ >>> superimposed, _, ref_indices, sub_indices = superimpose_structural_homologs(
94
+ ... reference, subject, max_iterations=1
95
+ ... )
96
+ >>> print(tm_score(reference, superimposed, ref_indices, sub_indices))
97
+ 0.69...
98
+ """
99
+ if not np.all(filter_amino_acids(reference)):
100
+ raise ValueError("Reference structure must be peptide only")
101
+ if not np.all(filter_amino_acids(subject)):
102
+ raise ValueError("Subject structure must be peptide only")
103
+ ref_length = _get_reference_length(
104
+ reference_length, get_residue_count(reference), get_residue_count(subject)
105
+ )
106
+ distances = distance(reference[reference_indices], subject[subject_indices])
107
+ return np.sum(_tm_score(distances, ref_length)).item() / ref_length
108
+
109
+
110
+ def superimpose_structural_homologs(
111
+ fixed,
112
+ mobile,
113
+ structural_alphabet="3di",
114
+ substitution_matrix=None,
115
+ max_iterations=float("inf"),
116
+ reference_length="shorter",
117
+ ):
118
+ """
119
+ Superimpose two remotely homologous protein structures.
120
+
121
+ This method relies on structural similarity between the two given structures,
122
+ inspired by the *TM-align algorithm*. :footcite:`Zhang2005`.
123
+ Thus, this method is better suited for structurally homologous pairs in the
124
+ *twilight zone*, i.e. with low amino acid sequence similarity.
125
+
126
+ Parameters
127
+ ----------
128
+ fixed : AtomArray, shape(n,)
129
+ The fixed structure.
130
+ Must contain only peptide chains.
131
+ mobile : AtomArray, shape(n,)
132
+ The structure which is superimposed on the `fixed` structure.
133
+ Must contain only peptide chains.
134
+ Must contain the same number of chains as `fixed`.
135
+ structural_alphabet : {"3di", "pb"}, optional
136
+ The structural alphabet to use for finding corresponding residues using sequence
137
+ alignment.
138
+ Either *3Di* or *Protein Blocks*.
139
+ substitution_matrix : SubstitutionMatrix, optional
140
+ The substitution matrix to use for finding corresponding residues using sequence
141
+ alignment.
142
+ max_iterations : int, optional
143
+ The maximum number of iterations to perform in the last step.
144
+ reference_length : int or {"shorter", "longer", "reference"}
145
+ The reference length used to normalize the TM-score and to compute :math:`d_0`.
146
+ If "shorter", the number of residues in the smaller structure is used.
147
+ If "longer", the number of residues in the larger structure is used.
148
+ If "reference", the number of residues in the fixed structure is used.
149
+ The length can also be provided directly as an integer.
150
+
151
+ Returns
152
+ -------
153
+ fitted : AtomArray or AtomArrayStack
154
+ A copy of the `mobile` structure, superimposed on the fixed structure.
155
+ transform : AffineTransformation
156
+ This object contains the affine transformation(s) that were
157
+ applied on `mobile`.
158
+ :meth:`AffineTransformation.apply()` can be used to transform
159
+ another AtomArray in the same way.
160
+ fixed_indices, mobile_indices : ndarray, shape(k,), dtype=int
161
+ The indices of the corresponding ``CA`` atoms in the fixed and mobile structure,
162
+ respectively.
163
+ These atoms were used for the superimposition, if their pairwise distance is
164
+ below the :math:`d_0` threshold :footcite:`Zhang2004`.
165
+
166
+ See Also
167
+ --------
168
+ superimpose_homologs : Analogous functionality for structures with high sequence similarity.
169
+
170
+ Notes
171
+ -----
172
+ The challenge of aligning two structures with different number of residues is
173
+ finding the corresponding residues between them.
174
+ This algorithm inspired by *TM-align* :footcite:`Zhang2005` uses a 3 step heuristic:
175
+
176
+ 1. Find corresponding residues using a structural alphabet alignment and superimpose
177
+ the chains based on them.
178
+ 2. Refine the corresponding residues using a sequence alignment based on a hybrid
179
+ positional substitution matrix:
180
+ The scores are a 50/50 combination of the structural alphabet substitution score
181
+ and the distance-based TM-score between two residues.
182
+ The superimposition is updated based on the new corresponding residues.
183
+ 3. Refine the corresponding residues using a sequence alignment with a pure
184
+ TM-score based positional substitution matrix.
185
+ Update the superimposition based on the new corresponding residues.
186
+ Repeat this step until the correspondences are stable.
187
+
188
+ References
189
+ ----------
190
+
191
+ .. footbibliography::
192
+
193
+ Examples
194
+ --------
195
+
196
+ >>> fixed = atom_array_stack[0]
197
+ >>> mobile = atom_array_stack[1]
198
+ >>> superimposed, _, fix_indices, mob_indices = superimpose_structural_homologs(
199
+ ... fixed, mobile, max_iterations=1
200
+ ... )
201
+ >>> print(tm_score(fixed, superimposed, fix_indices, mob_indices))
202
+ 0.69...
203
+ >>> print(rmsd(fixed[fix_indices], superimposed[mob_indices]))
204
+ 0.83...
205
+ """
206
+ # Avoid circular imports
207
+ from biotite.structure.alphabet.i3d import to_3di
208
+ from biotite.structure.alphabet.pb import to_protein_blocks
209
+
210
+ match structural_alphabet.lower():
211
+ case "3di":
212
+ conversion_function = to_3di
213
+ if substitution_matrix is None:
214
+ substitution_matrix = SubstitutionMatrix.std_3di_matrix()
215
+ case "pb":
216
+ conversion_function = to_protein_blocks
217
+ if substitution_matrix is None:
218
+ substitution_matrix = SubstitutionMatrix.std_protein_blocks_matrix()
219
+ case _:
220
+ raise ValueError(
221
+ f"Unsupported structural alphabet: '{structural_alphabet}'"
222
+ )
223
+
224
+ # Concatenate the structural sequences for simplicity
225
+ # In the the sequence alignment, this will make barely a difference compared
226
+ # to separate alignments, as there is no gap extension penalty
227
+ fixed_seq = _concatenate_sequences(conversion_function(fixed)[0])
228
+ mobile_seq = _concatenate_sequences(conversion_function(mobile)[0])
229
+ fixed_ca_coord = coord_for_atom_name_per_residue(fixed, ["CA"])[0]
230
+ mobile_ca_coord = coord_for_atom_name_per_residue(mobile, ["CA"])[0]
231
+ # NaN values (i.e. residues without CA atom) would let the superimposition fail
232
+ fixed_not_nan_mask = ~np.isnan(fixed_ca_coord).any(axis=-1)
233
+ mobile_not_nan_mask = ~np.isnan(mobile_ca_coord).any(axis=-1)
234
+ fixed_seq = fixed_seq[fixed_not_nan_mask]
235
+ fixed_ca_coord = fixed_ca_coord[fixed_not_nan_mask]
236
+ mobile_seq = mobile_seq[mobile_not_nan_mask]
237
+ mobile_ca_coord = mobile_ca_coord[mobile_not_nan_mask]
238
+ reference_length = _get_reference_length(
239
+ reference_length, len(fixed_seq), len(mobile_seq)
240
+ )
241
+
242
+ # 1. step
243
+ anchors = _find_anchors_structure_based(fixed_seq, mobile_seq, substitution_matrix)
244
+ _, transform = superimpose(
245
+ *_filter_by_anchors(fixed_ca_coord, mobile_ca_coord, anchors)
246
+ )
247
+ superimposed_ca_coord = transform.apply(mobile_ca_coord)
248
+
249
+ # 2. step
250
+ anchors = _find_anchors_hybrid(
251
+ fixed_seq,
252
+ mobile_seq,
253
+ fixed_ca_coord,
254
+ superimposed_ca_coord,
255
+ substitution_matrix,
256
+ reference_length,
257
+ )
258
+ _, transform = superimpose(
259
+ *_filter_by_anchors(
260
+ fixed_ca_coord,
261
+ mobile_ca_coord,
262
+ anchors,
263
+ )
264
+ )
265
+ superimposed_ca_coord = transform.apply(mobile_ca_coord)
266
+
267
+ # 3. step
268
+ for n_iterations in itertools.count(1):
269
+ previous_anchors = anchors
270
+ anchors = _find_anchors_tm_based(
271
+ fixed_ca_coord, superimposed_ca_coord, reference_length
272
+ )
273
+ _, transform = superimpose(
274
+ *_filter_by_anchors(
275
+ fixed_ca_coord,
276
+ mobile_ca_coord,
277
+ anchors,
278
+ superimposed_ca_coord,
279
+ reference_length,
280
+ )
281
+ )
282
+ superimposed_ca_coord = transform.apply(mobile_ca_coord)
283
+ if n_iterations >= max_iterations or np.array_equal(previous_anchors, anchors):
284
+ break
285
+
286
+ # The anchors currently refer to the CA atoms only
287
+ # -> map anchors back to all-atom indices
288
+ fixed_anchors = np.where(fixed.atom_name == "CA")[0][anchors[:, 0]]
289
+ mobile_anchors = np.where(mobile.atom_name == "CA")[0][anchors[:, 1]]
290
+ return transform.apply(mobile), transform, fixed_anchors, mobile_anchors
291
+
292
+
293
+ def _concatenate_sequences(sequences):
294
+ """
295
+ Concatenate the sequences into a single sequence.
296
+
297
+ Parameters
298
+ ----------
299
+ sequences : list of Sequence
300
+ The sequences to concatenate.
301
+
302
+ Returns
303
+ -------
304
+ sequence : Sequence
305
+ The concatenated sequence.
306
+ """
307
+ # Start with an empty sequence of the same type
308
+ return sum(sequences, start=type(sequences[0])())
309
+
310
+
311
+ def _filter_by_anchors(
312
+ fixed_ca_coord,
313
+ mobile_ca_coord,
314
+ anchors,
315
+ superimposed_ca_coord=None,
316
+ reference_length=None,
317
+ ):
318
+ """
319
+ Filter the coordinates by the anchor indices.
320
+
321
+ Parameters
322
+ ----------
323
+ fixed_ca_coord, mobile_ca_coord : ndarray, shape=(n,3)
324
+ The coordinates of the CA atoms of the fixed and mobile structure,
325
+ respectively.
326
+ anchors : ndarray, shape=(k,2)
327
+ The anchor indices.
328
+ superimposed_ca_coord : ndarray, shape=(m,3), optional
329
+ The coordinates of the CA atoms of the superimposed structure.
330
+ If given, the anchors are additionally filtered by the distance between the
331
+ fixed and superimposed structure, which must be lower than :math:`d_0`.
332
+ reference_length : int, optional
333
+ The reference length used to compute :math:`d_0`.
334
+ Needs to be given if `superimposed_ca_coord` is given.
335
+
336
+ Returns
337
+ -------
338
+ anchor_fixed_coord, anchor_mobile_coord : ndarray, shape=(k,3)
339
+ The anchor coordinates of the fixed and mobile structure.
340
+ """
341
+ anchor_fixed_coord = fixed_ca_coord[anchors[:, 0]]
342
+ anchor_mobile_coord = mobile_ca_coord[anchors[:, 1]]
343
+ if reference_length is not None and superimposed_ca_coord is not None:
344
+ anchor_superimposed_coord = superimposed_ca_coord[anchors[:, 1]]
345
+ mask = _mask_by_d0_threshold(
346
+ anchor_fixed_coord, anchor_superimposed_coord, reference_length
347
+ )
348
+ anchor_fixed_coord = anchor_fixed_coord[mask]
349
+ anchor_mobile_coord = anchor_mobile_coord[mask]
350
+ return anchor_fixed_coord, anchor_mobile_coord
351
+
352
+
353
+ def _find_anchors_structure_based(fixed_seq, mobile_seq, substitution_matrix):
354
+ alignment = align_optimal(
355
+ fixed_seq,
356
+ mobile_seq,
357
+ substitution_matrix,
358
+ gap_penalty=(-_get_median_match_score(substitution_matrix), 0),
359
+ terminal_penalty=False,
360
+ max_number=1,
361
+ )[0]
362
+ # Cannot anchor gaps
363
+ alignment = remove_gaps(alignment)
364
+ # Anchors must be structurally similar
365
+ alignment_codes = get_codes(alignment)
366
+ score_matrix = substitution_matrix.score_matrix()
367
+ anchor_mask = score_matrix[alignment_codes[0], alignment_codes[1]] > 0
368
+ anchors = alignment.trace[anchor_mask]
369
+ return anchors
370
+
371
+
372
+ def _find_anchors_hybrid(
373
+ fixed_seq,
374
+ mobile_seq,
375
+ fixed_ca_coord,
376
+ mobile_ca_coord,
377
+ substitution_matrix,
378
+ reference_length,
379
+ ):
380
+ # Bring substitution scores into the range of pairwise TM scores
381
+ scale_factor = _get_median_match_score(substitution_matrix)
382
+ # Create positional substitution matrix to be able to add the TM-score to it:
383
+ # The TM-score is based on the coordinates of a particular residue and not on the
384
+ # general symbol in the structural alphabet
385
+ # Hence, the shape of the substitution matrix must reflect the number of residues
386
+ # instead of the number of symbols in the structural alphabet
387
+ positional_matrix, fixed_seq, mobile_seq = substitution_matrix.as_positional(
388
+ fixed_seq,
389
+ mobile_seq,
390
+ )
391
+
392
+ tm_score_matrix = _pairwise_tm_score(
393
+ fixed_ca_coord, mobile_ca_coord, reference_length
394
+ )
395
+ sa_score_matrix = positional_matrix.score_matrix()
396
+ # Scale the score matrix and the gap penalty to avoid rounding errors
397
+ # when the score matrix is converted to integer type
398
+ hybrid_score_matrix = _SCORE_SCALING * (
399
+ sa_score_matrix / scale_factor + tm_score_matrix
400
+ )
401
+ gap_penalty = _SCORE_SCALING * _HYBRID_PENALTY
402
+ hybrid_matrix = SubstitutionMatrix(
403
+ positional_matrix.get_alphabet1(),
404
+ positional_matrix.get_alphabet2(),
405
+ hybrid_score_matrix.astype(np.int32),
406
+ )
407
+ alignment = align_optimal(
408
+ fixed_seq,
409
+ mobile_seq,
410
+ hybrid_matrix,
411
+ (gap_penalty, 0),
412
+ terminal_penalty=False,
413
+ max_number=1,
414
+ )[0]
415
+ alignment = remove_gaps(alignment)
416
+ anchors = alignment.trace
417
+ return anchors
418
+
419
+
420
+ def _find_anchors_tm_based(fixed_ca_coord, mobile_ca_coord, reference_length):
421
+ # The substitution matrix is positional -> Any positional sequence suffices
422
+ fixed_seq = PurePositionalSequence(len(fixed_ca_coord))
423
+ mobile_seq = PurePositionalSequence(len(mobile_ca_coord))
424
+ tm_score_matrix = _SCORE_SCALING * _pairwise_tm_score(
425
+ fixed_ca_coord, mobile_ca_coord, reference_length
426
+ )
427
+ gap_penalty = _SCORE_SCALING * _TM_GAP_PENALTY
428
+ matrix = SubstitutionMatrix(
429
+ fixed_seq.alphabet,
430
+ mobile_seq.alphabet,
431
+ tm_score_matrix.astype(np.int32),
432
+ )
433
+ alignment = align_optimal(
434
+ fixed_seq,
435
+ mobile_seq,
436
+ matrix,
437
+ (gap_penalty, 0),
438
+ terminal_penalty=False,
439
+ max_number=1,
440
+ )[0]
441
+ alignment = remove_gaps(alignment)
442
+ anchors = alignment.trace
443
+ return anchors
444
+
445
+
446
+ def _get_median_match_score(substitution_matrix):
447
+ """
448
+ Get the median score of two symbols matching.
449
+
450
+ Parameters
451
+ ----------
452
+ substitution_matrix : SubstitutionMatrix
453
+ The substitution matrix to get the median match score from.
454
+ Must be symmetric.
455
+
456
+ Returns
457
+ -------
458
+ score : int
459
+ The median match score.
460
+
461
+ Notes
462
+ -----
463
+ The median is used instead of the mean, as the score range can be quite large,
464
+ especially when the matrix assigns an arbitrary score to the *undefined symbol*.
465
+ Furthermore, this ensures that the return value is an integer, which is required
466
+ for using it as gap penalty.
467
+ """
468
+ return np.median(np.diagonal(substitution_matrix.score_matrix()))
469
+
470
+
471
+ def _mask_by_d0_threshold(fixed_ca_coord, mobile_ca_coord, reference_length):
472
+ """
473
+ Mask every pairwise distance below the :math:`d_0` threshold.
474
+
475
+ Parameters
476
+ ----------
477
+ fixed_ca_coord, mobile_ca_coord : ndarray, shape=(n,3)
478
+ The coordinates of the CA atoms of the fixed and mobile structure whose distance
479
+ is measured.
480
+ reference_length : int
481
+ The reference length used to compute :math:`d_0`.
482
+
483
+ Returns
484
+ -------
485
+ mask : ndarray, shape=(n,), dtype=bool
486
+ A boolean mask that indicates which distances are below the :math:`d_0`
487
+ threshold.
488
+ """
489
+ mask = distance(fixed_ca_coord, mobile_ca_coord) < _d0(reference_length)
490
+ if not np.any(mask):
491
+ raise ValueError("No anchors found, the structures are too dissimilar")
492
+ return mask
493
+
494
+
495
+ def _pairwise_tm_score(reference_coord, subject_coord, reference_length):
496
+ """
497
+ Compute the TM score for the Cartesian product of two coordinate arrays.
498
+
499
+ Parameters
500
+ ----------
501
+ reference_coord, subject_coord : ndarray, shape=(p,3) or shape=(q,3), dtype=float
502
+ The coordinates of the CA atoms to compute all pairwise distances between.
503
+ reference_length : int
504
+ The reference length used to compute :math:`d_0`.
505
+
506
+ Returns
507
+ -------
508
+ tm_score : ndarray, shape=(p,q), dtype=float
509
+ The TM score for the Cartesian product of the two coordinate arrays.
510
+ """
511
+ distance_matrix = distance(
512
+ reference_coord[:, np.newaxis, :],
513
+ subject_coord[np.newaxis, :, :],
514
+ )
515
+ return _tm_score(distance_matrix, reference_length)
516
+
517
+
518
+ def _tm_score(distance, reference_length):
519
+ """
520
+ Compute the TM score for the given distances.
521
+
522
+ Parameters
523
+ ----------
524
+ distance : float or ndarray
525
+ The distance(s) between the CA atoms of two residues.
526
+ reference_length : int
527
+ The reference length used to compute :math:`d_0`.
528
+
529
+ Returns
530
+ -------
531
+ tm_score : float or ndarray
532
+ The TM score for the given distances.
533
+ """
534
+ return 1 / (1 + (distance / _d0(reference_length)) ** 2)
535
+
536
+
537
+ def _d0(reference_length):
538
+ """
539
+ Compute the :math:`d_0` threshold.
540
+
541
+ Parameters
542
+ ----------
543
+ reference_length : int
544
+ The reference length used to compute :math:`d_0`.
545
+
546
+ Returns
547
+ -------
548
+ d0 : float
549
+ The :math:`d_0` threshold.
550
+ """
551
+ # Constants taken from Zhang2004
552
+ return max(
553
+ # Avoid complex solutions -> clip to positive values
554
+ # For short sequence lengths _D0_MIN takes precedence anyway
555
+ 1.24 * max((reference_length - 15), 0) ** (1 / 3) - 1.8,
556
+ _D0_MIN,
557
+ )
558
+
559
+
560
+ def _get_reference_length(user_parameter, reference_length, subject_length):
561
+ """
562
+ Get the reference length to normalize the TM-score and compute :math:`d_0`.
563
+
564
+ Parameters
565
+ ----------
566
+ user_parameter : int or {"shorter", "longer", "reference"}
567
+ The value given by the user via the `reference_length` parameter.
568
+ reference_length, subject_length : int
569
+ The lengths of the reference and subject structure, respectively.
570
+ """
571
+ match user_parameter:
572
+ case "shorter":
573
+ return min(reference_length, subject_length)
574
+ case "longer":
575
+ return max(reference_length, subject_length)
576
+ case "reference":
577
+ return reference_length
578
+ case int(number):
579
+ return number
580
+ case _:
581
+ raise ValueError(f"Unsupported reference length: '{user_parameter}'")