biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,631 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __all__ = ["SubstitutionMatrix"]
6
+ __name__ = "biotite.sequence.align"
7
+ __author__ = "Patrick Kunzmann"
8
+
9
+ import functools
10
+ from pathlib import Path
11
+ import numpy as np
12
+ from biotite.sequence.seqtypes import (
13
+ NucleotideSequence,
14
+ PositionalSequence,
15
+ ProteinSequence,
16
+ )
17
+
18
+ # Directory of matrix files
19
+ _DB_DIR = Path(__file__).parent / "matrix_data"
20
+
21
+
22
+ class SubstitutionMatrix(object):
23
+ """
24
+ A :class:`SubstitutionMatrix` is the foundation for scoring in
25
+ sequence alignments.
26
+ A :class:`SubstitutionMatrix` maps each possible pairing of a symbol
27
+ of a first alphabet with a symbol of a second alphabet to a score
28
+ (integer).
29
+
30
+ The class uses a 2-D (m x n) :class:`ndarray`
31
+ (dtype=:attr:`numpy.int32`),
32
+ where each element stores the score for a symbol pairing, indexed
33
+ by the symbol codes of the respective symbols in an *m*-length
34
+ alphabet 1 and an *n*-length alphabet 2.
35
+
36
+ There are 3 ways to creates instances:
37
+
38
+ At first a 2-D :class:`ndarray` containing the scores can be
39
+ directly provided.
40
+
41
+ Secondly a dictionary can be provided, where the keys are pairing
42
+ tuples and values are the corresponding scores.
43
+ The pairing tuples consist of a symbol of alphabet 1 as first
44
+ element and a symbol of alphabet 2 as second element. Parings have
45
+ to be provided for each possible combination.
46
+
47
+ At last a valid matrix name can be given, which is loaded from the
48
+ internal matrix database. The following matrices are avaliable:
49
+
50
+ - Nucleotide substitution matrices from NCBI database
51
+ - **NUC** - Also usable with ambiguous alphabet
52
+
53
+ - Protein substitution matrices from NCBI database
54
+
55
+ - **PAM<n>**
56
+ - **BLOSUM<n>**
57
+ - **MATCH** - Only differentiates between match and mismatch
58
+ - **IDENTITY** - Strongly penalizes mismatches
59
+ - **GONNET** - Not usable with default protein alphabet
60
+ - **DAYHOFF**
61
+
62
+ - Corrected protein substitution matrices :footcite:`Hess2016`,
63
+ **<BLOCKS>** is the BLOCKS version, the matrix is based on
64
+
65
+ - **BLOSUM<n>_<BLOCKS>**
66
+ - **RBLOSUM<n>_<BLOCKS>**
67
+ - **CorBLOSUM<n>_<BLOCKS>**
68
+
69
+ - Structural alphabet substitution matrices
70
+
71
+ - **3Di** - For 3Di alphabet from ``foldseek`` :footcite:`VanKempen2024`
72
+ - **PB** - For Protein Blocks alphabet from *PBexplore* :footcite:`Barnoud2017`
73
+
74
+ A list of all available matrix names is returned by
75
+ :meth:`list_db()`.
76
+
77
+ Since this class can handle two different alphabets, it is possible
78
+ to align two different types of sequences.
79
+
80
+ Objects of this class are immutable.
81
+
82
+ Parameters
83
+ ----------
84
+ alphabet1 : Alphabet, length=m
85
+ The first alphabet of the substitution matrix.
86
+ alphabet2 : Alphabet, length=n
87
+ The second alphabet of the substitution matrix.
88
+ score_matrix : ndarray, shape=(m,n) or dict or str
89
+ Either a symbol code indexed :class:`ndarray` containing the scores,
90
+ or a dictionary mapping the symbol pairing to scores,
91
+ or a string referencing a matrix in the internal database.
92
+
93
+ Attributes
94
+ ----------
95
+ shape : tuple
96
+ The shape of the substitution matrix.
97
+
98
+ Raises
99
+ ------
100
+ KeyError
101
+ If the matrix dictionary misses a symbol given in the alphabet.
102
+
103
+ References
104
+ ----------
105
+
106
+ .. footbibliography::
107
+
108
+ Examples
109
+ --------
110
+
111
+ Creating a matrix for two different (nonsense) alphabets
112
+ via a matrix dictionary:
113
+
114
+ >>> alph1 = Alphabet(["foo","bar"])
115
+ >>> alph2 = Alphabet([1,2,3])
116
+ >>> matrix_dict = {("foo",1):5, ("foo",2):10, ("foo",3):15,
117
+ ... ("bar",1):42, ("bar",2):42, ("bar",3):42}
118
+ >>> matrix = SubstitutionMatrix(alph1, alph2, matrix_dict)
119
+ >>> print(matrix.score_matrix())
120
+ [[ 5 10 15]
121
+ [42 42 42]]
122
+ >>> print(matrix.get_score("foo", 2))
123
+ 10
124
+ >>> print(matrix.get_score_by_code(0, 1))
125
+ 10
126
+
127
+ Creating an identity substitution matrix via the score matrix:
128
+
129
+ >>> alph = NucleotideSequence.alphabet_unamb
130
+ >>> matrix = SubstitutionMatrix(alph, alph, np.identity(len(alph), dtype=int))
131
+ >>> print(matrix)
132
+ A C G T
133
+ A 1 0 0 0
134
+ C 0 1 0 0
135
+ G 0 0 1 0
136
+ T 0 0 0 1
137
+
138
+ Creating a matrix via database name:
139
+
140
+ >>> alph = ProteinSequence.alphabet
141
+ >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
142
+ """
143
+
144
+ def __init__(self, alphabet1, alphabet2, score_matrix):
145
+ self._alph1 = alphabet1
146
+ self._alph2 = alphabet2
147
+ if isinstance(score_matrix, dict):
148
+ self._fill_with_matrix_dict(score_matrix)
149
+ elif isinstance(score_matrix, np.ndarray):
150
+ alph_shape = (len(alphabet1), len(alphabet2))
151
+ if score_matrix.shape != alph_shape:
152
+ raise ValueError(
153
+ f"Matrix has shape {score_matrix.shape}, "
154
+ f"but {alph_shape} is required"
155
+ )
156
+ if not np.issubdtype(score_matrix.dtype, np.integer):
157
+ raise TypeError("Score matrix must be an integer ndarray")
158
+ self._matrix = score_matrix.astype(np.int32)
159
+ # If the score matrix was converted from a a float matrix,
160
+ # inf values would be converted to 2**31,
161
+ # which is probably undesired and gives overflow issues in the alignment
162
+ # functions
163
+ if (
164
+ np.any(self._matrix == np.iinfo(np.int32).max) or
165
+ np.any(self._matrix == np.iinfo(np.int32).min)
166
+ ): # fmt: skip
167
+ raise ValueError(
168
+ "Score values are too large. "
169
+ "Maybe it was converted from a float matrix containing inf values?"
170
+ )
171
+ elif isinstance(score_matrix, str):
172
+ matrix_dict = SubstitutionMatrix.dict_from_db(score_matrix)
173
+ self._fill_with_matrix_dict(matrix_dict)
174
+ else:
175
+ raise TypeError(
176
+ "Matrix must be either a dictionary, an 2-D ndarray or a string"
177
+ )
178
+ # This class is immutable and has a getter function for the
179
+ # score matrix -> make the score matrix read-only
180
+ self._matrix.setflags(write=False)
181
+
182
+ @property
183
+ def shape(self):
184
+ """
185
+ Get the shape (i.e. the length of both alphabets)
186
+ of the substitution matrix.
187
+
188
+ Returns
189
+ -------
190
+ shape : tuple
191
+ Matrix shape.
192
+ """
193
+ return (len(self._alph1), len(self._alph2))
194
+
195
+ def get_alphabet1(self):
196
+ """
197
+ Get the first alphabet.
198
+
199
+ Returns
200
+ -------
201
+ alphabet : Alphabet
202
+ The first alphabet.
203
+ """
204
+ return self._alph1
205
+
206
+ def get_alphabet2(self):
207
+ """
208
+ Get the second alphabet.
209
+
210
+ Returns
211
+ -------
212
+ alphabet : Alphabet
213
+ The second alphabet.
214
+ """
215
+ return self._alph2
216
+
217
+ def score_matrix(self):
218
+ """
219
+ Get the 2-D :class:`ndarray` containing the score values.
220
+
221
+ Returns
222
+ -------
223
+ matrix : ndarray, shape=(m,n), dtype=np.int32
224
+ The symbol code indexed score matrix.
225
+ The array is read-only.
226
+ """
227
+ return self._matrix
228
+
229
+ def transpose(self):
230
+ """
231
+ Get a copy of this instance, where the alphabets are
232
+ interchanged.
233
+
234
+ Returns
235
+ -------
236
+ transposed : SubstitutionMatrix
237
+ The transposed substitution matrix.
238
+ """
239
+ new_alph1 = self._alph2
240
+ new_alph2 = self._alph1
241
+ new_matrix = np.transpose(self._matrix)
242
+ return SubstitutionMatrix(new_alph1, new_alph2, new_matrix)
243
+
244
+ def is_symmetric(self):
245
+ """
246
+ Check whether the substitution matrix is symmetric,
247
+ i.e. both alphabets are identical
248
+ and the score matrix is symmetric.
249
+
250
+ Returns
251
+ -------
252
+ is_symmetric : bool
253
+ True, if both alphabets are identical and the score matrix
254
+ is symmetric, false otherwise.
255
+ """
256
+ return self._alph1 == self._alph2 and np.array_equal(
257
+ self._matrix, np.transpose(self._matrix)
258
+ )
259
+
260
+ def get_score_by_code(self, code1, code2):
261
+ """
262
+ Get the substitution score of two symbols,
263
+ represented by their code.
264
+
265
+ Parameters
266
+ ----------
267
+ code1, code2 : int
268
+ Symbol codes of the two symbols to be aligned.
269
+
270
+ Returns
271
+ -------
272
+ score : int
273
+ The substitution / alignment score.
274
+ """
275
+ return self._matrix[code1, code2]
276
+
277
+ def get_score(self, symbol1, symbol2):
278
+ """
279
+ Get the substitution score of two symbols.
280
+
281
+ Parameters
282
+ ----------
283
+ symbol1, symbol2 : object
284
+ Symbols to be aligned.
285
+
286
+ Returns
287
+ -------
288
+ score : int
289
+ The substitution / alignment score.
290
+ """
291
+ code1 = self._alph1.encode(symbol1)
292
+ code2 = self._alph2.encode(symbol2)
293
+ return self._matrix[code1, code2]
294
+
295
+ def as_positional(self, sequence1, sequence2):
296
+ """
297
+ Transform this substitution matrix and two sequences into positional
298
+ equivalents.
299
+
300
+ This means the new substitution matrix is position-specific: It has the lengths
301
+ of the sequences instead of the lengths of their alphabets.
302
+ Its scores represent the same scores as the original matrix, but now mapped
303
+ onto the positions of the sequences.
304
+
305
+ Parameters
306
+ ----------
307
+ sequence1, sequence2 : seq.Sequence, length=n
308
+ The sequences to create the positional equivalents from.
309
+
310
+ Returns
311
+ -------
312
+ pos_matrix : align.SubstitutionMatrix, shape=(n, n)
313
+ The position-specific substitution matrix.
314
+ pos_sequence1, pos_sequence2 : PositionalSequence, length=n
315
+ The positional sequences.
316
+
317
+ Notes
318
+ -----
319
+ After the transformation the substitution scores remain the same, i.e.
320
+ `substitution_matrix.get_score(sequence1[i], sequence2[j])` is equal to
321
+ `pos_matrix.get_score(pos_sequence1[i], pos_sequence2[j])`.
322
+
323
+ Examples
324
+ --------
325
+
326
+ Run an alignment with the usual substitution matrix:
327
+
328
+ >>> seq1 = ProteinSequence("BIQTITE")
329
+ >>> seq2 = ProteinSequence("IQLITE")
330
+ >>> matrix = SubstitutionMatrix.std_protein_matrix()
331
+ >>> print(matrix)
332
+ A C D E F G H I K L M N P Q R S T V W Y B Z X *
333
+ A 4 0 -2 -1 -2 0 -2 -1 -1 -1 -1 -2 -1 -1 -1 1 0 0 -3 -2 -2 -1 0 -4
334
+ C 0 9 -3 -4 -2 -3 -3 -1 -3 -1 -1 -3 -3 -3 -3 -1 -1 -1 -2 -2 -3 -3 -2 -4
335
+ D -2 -3 6 2 -3 -1 -1 -3 -1 -4 -3 1 -1 0 -2 0 -1 -3 -4 -3 4 1 -1 -4
336
+ E -1 -4 2 5 -3 -2 0 -3 1 -3 -2 0 -1 2 0 0 -1 -2 -3 -2 1 4 -1 -4
337
+ F -2 -2 -3 -3 6 -3 -1 0 -3 0 0 -3 -4 -3 -3 -2 -2 -1 1 3 -3 -3 -1 -4
338
+ G 0 -3 -1 -2 -3 6 -2 -4 -2 -4 -3 0 -2 -2 -2 0 -2 -3 -2 -3 -1 -2 -1 -4
339
+ H -2 -3 -1 0 -1 -2 8 -3 -1 -3 -2 1 -2 0 0 -1 -2 -3 -2 2 0 0 -1 -4
340
+ I -1 -1 -3 -3 0 -4 -3 4 -3 2 1 -3 -3 -3 -3 -2 -1 3 -3 -1 -3 -3 -1 -4
341
+ K -1 -3 -1 1 -3 -2 -1 -3 5 -2 -1 0 -1 1 2 0 -1 -2 -3 -2 0 1 -1 -4
342
+ L -1 -1 -4 -3 0 -4 -3 2 -2 4 2 -3 -3 -2 -2 -2 -1 1 -2 -1 -4 -3 -1 -4
343
+ M -1 -1 -3 -2 0 -3 -2 1 -1 2 5 -2 -2 0 -1 -1 -1 1 -1 -1 -3 -1 -1 -4
344
+ N -2 -3 1 0 -3 0 1 -3 0 -3 -2 6 -2 0 0 1 0 -3 -4 -2 3 0 -1 -4
345
+ P -1 -3 -1 -1 -4 -2 -2 -3 -1 -3 -2 -2 7 -1 -2 -1 -1 -2 -4 -3 -2 -1 -2 -4
346
+ Q -1 -3 0 2 -3 -2 0 -3 1 -2 0 0 -1 5 1 0 -1 -2 -2 -1 0 3 -1 -4
347
+ R -1 -3 -2 0 -3 -2 0 -3 2 -2 -1 0 -2 1 5 -1 -1 -3 -3 -2 -1 0 -1 -4
348
+ S 1 -1 0 0 -2 0 -1 -2 0 -2 -1 1 -1 0 -1 4 1 -2 -3 -2 0 0 0 -4
349
+ T 0 -1 -1 -1 -2 -2 -2 -1 -1 -1 -1 0 -1 -1 -1 1 5 0 -2 -2 -1 -1 0 -4
350
+ V 0 -1 -3 -2 -1 -3 -3 3 -2 1 1 -3 -2 -2 -3 -2 0 4 -3 -1 -3 -2 -1 -4
351
+ W -3 -2 -4 -3 1 -2 -2 -3 -3 -2 -1 -4 -4 -2 -3 -3 -2 -3 11 2 -4 -3 -2 -4
352
+ Y -2 -2 -3 -2 3 -3 2 -1 -2 -1 -1 -2 -3 -1 -2 -2 -2 -1 2 7 -3 -2 -1 -4
353
+ B -2 -3 4 1 -3 -1 0 -3 0 -4 -3 3 -2 0 -1 0 -1 -3 -4 -3 4 1 -1 -4
354
+ Z -1 -3 1 4 -3 -2 0 -3 1 -3 -1 0 -1 3 0 0 -1 -2 -3 -2 1 4 -1 -4
355
+ X 0 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 0 0 -1 -2 -1 -1 -1 -1 -4
356
+ * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
357
+ >>> alignment = align_optimal(seq1, seq2, matrix, gap_penalty=-10)[0]
358
+ >>> print(alignment)
359
+ BIQTITE
360
+ -IQLITE
361
+
362
+ Running the alignment with positional equivalents gives the same result:
363
+
364
+ >>> pos_matrix, pos_seq1, pos_seq2 = matrix.as_positional(seq1, seq2)
365
+ >>> print(pos_matrix)
366
+ I Q L I T E
367
+ B -3 0 -4 -3 -1 1
368
+ I 4 -3 2 4 -1 -3
369
+ Q -3 5 -2 -3 -1 2
370
+ T -1 -1 -1 -1 5 -1
371
+ I 4 -3 2 4 -1 -3
372
+ T -1 -1 -1 -1 5 -1
373
+ E -3 2 -3 -3 -1 5
374
+ >>> pos_alignment = align_optimal(pos_seq1, pos_seq2, pos_matrix, gap_penalty=-10)[0]
375
+ >>> print(pos_alignment)
376
+ BIQTITE
377
+ -IQLITE
378
+
379
+ Increase the substitution score for the first symbols in both sequences to align
380
+ to each other:
381
+
382
+ >>> score_matrix = pos_matrix.score_matrix().copy()
383
+ >>> score_matrix[0, 0] = 100
384
+ >>> biased_matrix = SubstitutionMatrix(
385
+ ... pos_matrix.get_alphabet1(), pos_matrix.get_alphabet2(), score_matrix
386
+ ... )
387
+ >>> print(biased_matrix)
388
+ I Q L I T E
389
+ B 100 0 -4 -3 -1 1
390
+ I 4 -3 2 4 -1 -3
391
+ Q -3 5 -2 -3 -1 2
392
+ T -1 -1 -1 -1 5 -1
393
+ I 4 -3 2 4 -1 -3
394
+ T -1 -1 -1 -1 5 -1
395
+ E -3 2 -3 -3 -1 5
396
+ >>> biased_alignment = align_optimal(pos_seq1, pos_seq2, biased_matrix, gap_penalty=-10)[0]
397
+ >>> print(biased_alignment)
398
+ BIQTITE
399
+ I-QLITE
400
+ """
401
+ pos_sequence1 = PositionalSequence(sequence1)
402
+ pos_sequence2 = PositionalSequence(sequence2)
403
+
404
+ pos_score_matrix = self._matrix[
405
+ tuple(_cartesian_product(sequence1.code, sequence2.code).T)
406
+ ].reshape(len(sequence1), len(sequence2))
407
+ pos_matrix = SubstitutionMatrix(
408
+ pos_sequence1.get_alphabet(),
409
+ pos_sequence2.get_alphabet(),
410
+ pos_score_matrix,
411
+ )
412
+
413
+ return pos_matrix, pos_sequence1, pos_sequence2
414
+
415
+ def __repr__(self):
416
+ """Represent SubstitutionMatrix as a string for debugging."""
417
+ return (
418
+ f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
419
+ f"np.{np.array_repr(self._matrix)})"
420
+ )
421
+
422
+ def __eq__(self, item):
423
+ if not isinstance(item, SubstitutionMatrix):
424
+ return False
425
+ if self._alph1 != item.get_alphabet1():
426
+ return False
427
+ if self._alph2 != item.get_alphabet2():
428
+ return False
429
+ if not np.array_equal(self.score_matrix(), item.score_matrix()):
430
+ return False
431
+ return True
432
+
433
+ def __ne__(self, item):
434
+ return not self == item
435
+
436
+ def __str__(self):
437
+ # Create matrix in NCBI format
438
+ string = " "
439
+ for symbol in self._alph2:
440
+ string += f" {str(symbol):>3}"
441
+ string += "\n"
442
+ for i, symbol in enumerate(self._alph1):
443
+ string += f"{str(symbol):>1}"
444
+ for j in range(len(self._alph2)):
445
+ string += f" {int(self._matrix[i, j]):>3d}"
446
+ string += "\n"
447
+ # Remove terminal line break
448
+ string = string[:-1]
449
+ return string
450
+
451
+ @staticmethod
452
+ def dict_from_str(string):
453
+ """
454
+ Create a matrix dictionary from a string in NCBI matrix format.
455
+
456
+ Symbols of the first alphabet are taken from the left column,
457
+ symbols of the second alphabet are taken from the top row.
458
+
459
+ The keys of the dictionary consist of tuples containing the
460
+ aligned symbols and the values are the corresponding scores.
461
+
462
+ Parameters
463
+ ----------
464
+ string : str
465
+ The string containing the substitution matrix in NCBI format.
466
+
467
+ Returns
468
+ -------
469
+ matrix_dict : dict
470
+ A dictionary representing the substitution matrix.
471
+ """
472
+ lines = [line.strip() for line in string.split("\n")]
473
+ lines = [line for line in lines if len(line) != 0 and line[0] != "#"]
474
+ symbols1 = [line.split()[0] for line in lines[1:]]
475
+ symbols2 = [e for e in lines[0].split()]
476
+ scores = np.array([line.split()[1:] for line in lines[1:]]).astype(int)
477
+ scores = np.transpose(scores)
478
+
479
+ matrix_dict = {}
480
+ for i in range(len(symbols1)):
481
+ for j in range(len(symbols2)):
482
+ matrix_dict[(symbols1[i], symbols2[j])] = scores[i, j]
483
+ return matrix_dict
484
+
485
+ @staticmethod
486
+ def dict_from_db(matrix_name):
487
+ """
488
+ Create a matrix dictionary from a valid matrix name in the
489
+ internal matrix database.
490
+
491
+ The keys of the dictionary consist of tuples containing the
492
+ aligned symbols and the values are the corresponding scores.
493
+
494
+ Parameters
495
+ ----------
496
+ matrix_name : str
497
+ The name of the matrix in the internal database.
498
+
499
+ Returns
500
+ -------
501
+ matrix_dict : dict
502
+ A dictionary representing the substitution matrix.
503
+ """
504
+ filename = _DB_DIR / f"{matrix_name}.mat"
505
+ with open(filename, "r") as f:
506
+ return SubstitutionMatrix.dict_from_str(f.read())
507
+
508
+ @staticmethod
509
+ def list_db():
510
+ """
511
+ List all matrix names in the internal database.
512
+
513
+ Returns
514
+ -------
515
+ db_list : list
516
+ List of matrix names in the internal database.
517
+ """
518
+ return [path.stem for path in _DB_DIR.glob("*.mat")]
519
+
520
+ @staticmethod
521
+ @functools.cache
522
+ def std_protein_matrix():
523
+ """
524
+ Get the default :class:`SubstitutionMatrix` for protein sequence
525
+ alignments, which is BLOSUM62.
526
+
527
+ Returns
528
+ -------
529
+ matrix : SubstitutionMatrix
530
+ Default matrix.
531
+ """
532
+ return SubstitutionMatrix(
533
+ ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
534
+ )
535
+
536
+ @staticmethod
537
+ @functools.cache
538
+ def std_nucleotide_matrix():
539
+ """
540
+ Get the default :class:`SubstitutionMatrix` for DNA sequence
541
+ alignments.
542
+
543
+ Returns
544
+ -------
545
+ matrix : SubstitutionMatrix
546
+ Default matrix.
547
+ """
548
+ return SubstitutionMatrix(
549
+ NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
550
+ )
551
+
552
+ @staticmethod
553
+ @functools.cache
554
+ def std_3di_matrix():
555
+ """
556
+ Get the default :class:`SubstitutionMatrix` for 3Di sequence
557
+ alignments.
558
+ :footcite:`VanKempen2024`
559
+
560
+ Returns
561
+ -------
562
+ matrix : SubstitutionMatrix
563
+ Default matrix.
564
+ """
565
+ # Import inside function to avoid circular import
566
+ from biotite.structure.alphabet.i3d import I3DSequence
567
+
568
+ return SubstitutionMatrix(I3DSequence.alphabet, I3DSequence.alphabet, "3Di")
569
+
570
+ @staticmethod
571
+ @functools.cache
572
+ def std_protein_blocks_matrix(undefined_match=200, undefined_mismatch=-200):
573
+ """
574
+ Get the default :class:`SubstitutionMatrix` for Protein Blocks sequences.
575
+
576
+ The matrix is adapted from *PBxplore* :footcite:`Barnoud2017`.
577
+
578
+ Parameters
579
+ ----------
580
+ undefined_match, undefined_mismatch : int, optional
581
+ The match and mismatch score for undefined symbols.
582
+ The default values were chosen arbitrarily, but are in the order of
583
+ magnitude of the other score values.
584
+
585
+ Returns
586
+ -------
587
+ matrix : SubstitutionMatrix
588
+ Default matrix.
589
+
590
+ References
591
+ ----------
592
+
593
+ .. footbibliography::
594
+ """
595
+ from biotite.structure.alphabet.pb import ProteinBlocksSequence
596
+
597
+ alphabet = ProteinBlocksSequence.alphabet
598
+ undefined_symbol = ProteinBlocksSequence.undefined_symbol
599
+ matrix_dict = SubstitutionMatrix.dict_from_db("PB")
600
+ # Add match/mismatch scores for undefined symbols residues
601
+ for symbol in alphabet:
602
+ if symbol == undefined_symbol:
603
+ continue
604
+ matrix_dict[symbol, undefined_symbol] = undefined_mismatch
605
+ matrix_dict[undefined_symbol, symbol] = undefined_mismatch
606
+ matrix_dict[undefined_symbol, undefined_symbol] = undefined_match
607
+ return SubstitutionMatrix(
608
+ alphabet,
609
+ alphabet,
610
+ matrix_dict,
611
+ )
612
+
613
+ def _fill_with_matrix_dict(self, matrix_dict):
614
+ self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
615
+ for i in range(len(self._alph1)):
616
+ for j in range(len(self._alph2)):
617
+ sym1 = self._alph1.decode(i)
618
+ sym2 = self._alph2.decode(j)
619
+ self._matrix[i, j] = int(matrix_dict[sym1, sym2])
620
+
621
+
622
+ def _cartesian_product(array1, array2):
623
+ """
624
+ Create all combinations of elements from two arrays.
625
+ """
626
+ return np.transpose(
627
+ [
628
+ np.repeat(array1, len(array2)),
629
+ np.tile(array2, len(array1)),
630
+ ]
631
+ )
@@ -0,0 +1,24 @@
1
+ # 3Di bit/2
2
+ # Background (precomputed optional): 0.0489372 0.0306991 0.101049 0.0329671 0.0276149 0.0416262 0.0452521 0.030876 0.0297251 0.0607036 0.0150238 0.0215826 0.0783843 0.0512926 0.0264886 0.0610702 0.0201311 0.215998 0.0310265 0.0295417 0.00001
3
+ # Lambda (precomputed optional): 0.351568
4
+ a c d e f g h i k l m n p q r s t v w y
5
+ a 6 -3 1 2 3 -2 -2 -7 -3 -3 -10 -5 -1 1 -4 -7 -5 -6 0 -2
6
+ c -3 6 -2 -8 -5 -4 -4 -12 -13 1 -14 0 0 1 -1 0 -8 1 -7 -9
7
+ d 1 -2 4 -3 0 1 1 -3 -5 -4 -5 -2 1 -1 -1 -4 -2 -3 -2 -2
8
+ e 2 -8 -3 9 -2 -7 -4 -12 -10 -7 -17 -8 -6 -3 -8 -10 -10 -13 -6 -3
9
+ f 3 -5 0 -2 7 -3 -3 -5 1 -3 -9 -5 -2 2 -5 -8 -3 -7 4 -4
10
+ g -2 -4 1 -7 -3 6 3 0 -7 -7 -1 -2 -2 -4 3 -3 4 -6 -4 -2
11
+ h -2 -4 1 -4 -3 3 6 -4 -7 -6 -6 0 -1 -3 1 -3 -1 -5 -5 3
12
+ i -7 -12 -3 -12 -5 0 -4 8 -5 -11 7 -7 -6 -6 -3 -9 6 -12 -5 -8
13
+ k -3 -13 -5 -10 1 -7 -7 -5 9 -11 -8 -12 -6 -5 -9 -14 -5 -15 5 -8
14
+ l -3 1 -4 -7 -3 -7 -6 -11 -11 6 -16 -3 -2 2 -4 -4 -9 0 -8 -9
15
+ m -10 -14 -5 -17 -9 -1 -6 7 -8 -16 10 -9 -9 -10 -5 -10 3 -16 -6 -9
16
+ n -5 0 -2 -8 -5 -2 0 -7 -12 -3 -9 7 0 -2 2 3 -4 0 -8 -5
17
+ p -1 0 1 -6 -2 -2 -1 -6 -6 -2 -9 0 4 0 0 -2 -4 0 -4 -5
18
+ q 1 1 -1 -3 2 -4 -3 -6 -5 2 -10 -2 0 5 -2 -4 -5 -1 -2 -5
19
+ r -4 -1 -1 -8 -5 3 1 -3 -9 -4 -5 2 0 -2 6 2 0 -1 -6 -3
20
+ s -7 0 -4 -10 -8 -3 -3 -9 -14 -4 -10 3 -2 -4 2 6 -6 0 -11 -9
21
+ t -5 -8 -2 -10 -3 4 -1 6 -5 -9 3 -4 -4 -5 0 -6 8 -9 -5 -5
22
+ v -6 1 -3 -13 -7 -6 -5 -12 -15 0 -16 0 0 -1 -1 0 -9 3 -10 -11
23
+ w 0 -7 -2 -6 4 -4 -5 -5 5 -8 -6 -8 -4 -2 -6 -11 -5 -10 8 -6
24
+ y -2 -9 -2 -3 -4 -2 3 -8 -8 -9 -9 -5 -5 -5 -3 -9 -5 -11 -6 9
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum100_3.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 100
6
+ # Entropy = 1.4516, Expected = -1.0948
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 8 -3 -4 -5 -2 -2 -3 -1 -4 -4 -4 -2 -3 -5 -2 1 -1 -6 -5 -2 -4 -2 -2 -10
9
+ R -3 10 -2 -5 -8 0 -2 -6 -1 -7 -6 3 -4 -6 -5 -3 -3 -7 -5 -6 -4 -1 -3 -10
10
+ N -4 -2 11 1 -5 -1 -2 -2 0 -7 -7 -1 -5 -7 -5 0 -1 -8 -5 -7 5 -2 -3 -10
11
+ D -5 -5 1 10 -8 -2 2 -4 -3 -8 -8 -3 -8 -8 -5 -2 -4 -10 -7 -8 6 0 -4 -10
12
+ C -2 -8 -5 -8 14 -7 -9 -7 -8 -3 -5 -8 -4 -4 -8 -3 -3 -7 -6 -3 -7 -8 -5 -10
13
+ Q -2 0 -1 -2 -7 11 2 -5 1 -6 -5 2 -2 -6 -4 -2 -3 -5 -4 -5 -2 5 -2 -10
14
+ E -3 -2 -2 2 -9 2 10 -6 -2 -7 -7 0 -5 -8 -4 -2 -3 -8 -7 -5 0 7 -3 -10
15
+ G -1 -6 -2 -4 -7 -5 -6 9 -6 -9 -8 -5 -7 -8 -6 -2 -5 -7 -8 -8 -3 -5 -4 -10
16
+ H -4 -1 0 -3 -8 1 -2 -6 13 -7 -6 -3 -5 -4 -5 -3 -4 -5 1 -7 -2 -1 -4 -10
17
+ I -4 -7 -7 -8 -3 -6 -7 -9 -7 8 2 -6 1 -2 -7 -5 -3 -6 -4 4 -8 -7 -3 -10
18
+ L -4 -6 -7 -8 -5 -5 -7 -8 -6 2 8 -6 3 0 -7 -6 -4 -5 -4 0 -8 -6 -3 -10
19
+ K -2 3 -1 -3 -8 2 0 -5 -3 -6 -6 10 -4 -6 -3 -2 -3 -8 -5 -5 -2 0 -3 -10
20
+ M -3 -4 -5 -8 -4 -2 -5 -7 -5 1 3 -4 12 -1 -5 -4 -2 -4 -5 0 -7 -4 -3 -10
21
+ F -5 -6 -7 -8 -4 -6 -8 -8 -4 -2 0 -6 -1 11 -7 -5 -5 0 4 -3 -7 -7 -4 -10
22
+ P -2 -5 -5 -5 -8 -4 -4 -6 -5 -7 -7 -3 -5 -7 12 -3 -4 -8 -7 -6 -5 -4 -4 -10
23
+ S 1 -3 0 -2 -3 -2 -2 -2 -3 -5 -6 -2 -4 -5 -3 9 2 -7 -5 -4 -1 -2 -2 -10
24
+ T -1 -3 -1 -4 -3 -3 -3 -5 -4 -3 -4 -3 -2 -5 -4 2 9 -7 -5 -1 -2 -3 -2 -10
25
+ W -6 -7 -8 -10 -7 -5 -8 -7 -5 -6 -5 -8 -4 0 -8 -7 -7 17 2 -5 -9 -7 -6 -10
26
+ Y -5 -5 -5 -7 -6 -4 -7 -8 1 -4 -4 -5 -5 4 -7 -5 -5 2 12 -5 -6 -6 -4 -10
27
+ V -2 -6 -7 -8 -3 -5 -5 -8 -7 4 0 -5 0 -3 -6 -4 -1 -5 -5 8 -7 -5 -3 -10
28
+ B -4 -4 5 6 -7 -2 0 -3 -2 -8 -8 -2 -7 -7 -5 -1 -2 -9 -6 -7 6 0 -4 -10
29
+ Z -2 -1 -2 0 -8 5 7 -5 -1 -7 -6 0 -4 -7 -4 -2 -3 -7 -6 -5 0 6 -2 -10
30
+ X -2 -3 -3 -4 -5 -2 -3 -4 -4 -3 -3 -3 -3 -4 -4 -2 -2 -6 -4 -3 -4 -2 -3 -10
31
+ * -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 1