biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,591 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides utility functions for creating filters on atom
7
+ arrays and atom array stacks.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann, Tom David Müller"
12
+ __all__ = [
13
+ "filter_solvent",
14
+ "filter_monoatomic_ions",
15
+ "filter_nucleotides",
16
+ "filter_canonical_nucleotides",
17
+ "filter_amino_acids",
18
+ "filter_canonical_amino_acids",
19
+ "filter_carbohydrates",
20
+ "filter_intersection",
21
+ "filter_first_altloc",
22
+ "filter_highest_occupancy_altloc",
23
+ "filter_peptide_backbone",
24
+ "filter_phosphate_backbone",
25
+ "filter_linear_bond_continuity",
26
+ "filter_polymer",
27
+ ]
28
+
29
+
30
+ from functools import partial
31
+ import numpy as np
32
+ from biotite.structure.atoms import array as atom_array
33
+ from biotite.structure.info.groups import (
34
+ amino_acid_names,
35
+ carbohydrate_names,
36
+ nucleotide_names,
37
+ )
38
+ from biotite.structure.residues import get_residue_count, get_residue_starts
39
+
40
+ _canonical_aa_list = [
41
+ "ALA",
42
+ "ARG",
43
+ "ASN",
44
+ "ASP",
45
+ "CYS",
46
+ "GLN",
47
+ "GLU",
48
+ "GLY",
49
+ "HIS",
50
+ "ILE",
51
+ "LEU",
52
+ "LYS",
53
+ "MET",
54
+ "PHE",
55
+ "PRO",
56
+ "PYL",
57
+ "SER",
58
+ "THR",
59
+ "TRP",
60
+ "TYR",
61
+ "VAL",
62
+ "SEC",
63
+ ]
64
+ _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
65
+
66
+ # Residue names of solvent molecules non only in CCD, but also from modeling software
67
+ _solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
68
+
69
+ _peptide_backbone_atoms = ["N", "CA", "C"]
70
+ _phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
71
+
72
+
73
+ def filter_monoatomic_ions(array):
74
+ """
75
+ Filter all atoms of an atom array, that are monoatomic ions
76
+ (e.g. sodium or chloride ions).
77
+
78
+ Parameters
79
+ ----------
80
+ array : AtomArray or AtomArrayStack
81
+ The array to be filtered.
82
+
83
+ Returns
84
+ -------
85
+ filter : ndarray, dtype=bool
86
+ This array is `True` for all indices in `array`, where the atom
87
+ is a monoatomic ion.
88
+ """
89
+ # Exclusively in monoatomic ions,
90
+ # the element name is equal to the residue name
91
+ return array.res_name == array.element
92
+
93
+
94
+ def filter_solvent(array):
95
+ """
96
+ Filter all atoms of one array that are part of the solvent.
97
+
98
+ Parameters
99
+ ----------
100
+ array : AtomArray or AtomArrayStack
101
+ The array to be filtered.
102
+
103
+ Returns
104
+ -------
105
+ filter : ndarray, dtype=bool
106
+ This array is `True` for all indices in `array`, where the atom
107
+ belongs to the solvent.
108
+ """
109
+ return np.isin(array.res_name, _solvent_list)
110
+
111
+
112
+ def filter_canonical_nucleotides(array):
113
+ """
114
+ Filter all atoms of one array that belong to canonical nucleotides.
115
+
116
+ Parameters
117
+ ----------
118
+ array : AtomArray or AtomArrayStack
119
+ The array to be filtered.
120
+
121
+ Returns
122
+ -------
123
+ filter : ndarray, dtype=bool
124
+ This array is `True` for all indices in `array`, where the atom
125
+ belongs to a canonical nucleotide.
126
+ """
127
+ return np.isin(array.res_name, _canonical_nucleotide_list)
128
+
129
+
130
+ def filter_nucleotides(array):
131
+ """
132
+ Filter all atoms of one array that belong to nucleotides.
133
+
134
+ Parameters
135
+ ----------
136
+ array : AtomArray or AtomArrayStack
137
+ The array to be filtered.
138
+
139
+ Returns
140
+ -------
141
+ filter : ndarray, dtype=bool
142
+ This array is `True` for all indices in `array`, where the atom
143
+ belongs to a nucleotide.
144
+
145
+ Notes
146
+ -----
147
+ Nucleotides are identified according to the PDB chemical component
148
+ dictionary. A residue is considered a nucleotide if it its
149
+ ``_chem_comp.type`` property has one of the following values (case
150
+ insensitive):
151
+
152
+ ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
153
+ ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
154
+ ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
155
+ ``RNA OH 5 PRIME TERMINUS``
156
+ """
157
+ return np.isin(array.res_name, nucleotide_names())
158
+
159
+
160
+ def filter_canonical_amino_acids(array):
161
+ """
162
+ Filter all atoms of one array that belong to canonical amino acid
163
+ residues.
164
+
165
+ Parameters
166
+ ----------
167
+ array : AtomArray or AtomArrayStack
168
+ The array to be filtered.
169
+
170
+ Returns
171
+ -------
172
+ filter : ndarray, dtype=bool
173
+ This array is `True` for all indices in `array`, where the atom
174
+ belongs to a canonical amino acid residue.
175
+ """
176
+ return np.isin(array.res_name, _canonical_aa_list)
177
+
178
+
179
+ def filter_amino_acids(array):
180
+ """
181
+ Filter all atoms of one array that belong to amino acid residues.
182
+
183
+ Parameters
184
+ ----------
185
+ array : AtomArray or AtomArrayStack
186
+ The array to be filtered.
187
+
188
+ Returns
189
+ -------
190
+ filter : ndarray, dtype=bool
191
+ This array is `True` for all indices in `array`, where the atom
192
+ belongs to an amino acid residue.
193
+
194
+ Notes
195
+ -----
196
+ Amino acids are identified according to the PDB chemical component
197
+ dictionary. A residue is considered an amino acid if it its
198
+ ``_chem_comp.type`` property has one of the following values (case
199
+ insensitive):
200
+
201
+ ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
202
+ ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
203
+ ``D-PEPTIDE NH3 AMINO TERMINUS``,
204
+ ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
205
+ ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
206
+ ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
207
+ ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
208
+ """
209
+ return np.isin(array.res_name, amino_acid_names())
210
+
211
+
212
+ def filter_carbohydrates(array):
213
+ """
214
+ Filter all atoms of one array that belong to carbohydrates.
215
+
216
+ Parameters
217
+ ----------
218
+ array : AtomArray or AtomArrayStack
219
+ The array to be filtered.
220
+
221
+ Returns
222
+ -------
223
+ filter : ndarray, dtype=bool
224
+ This array is `True` for all indices in `array`, where the atom
225
+ belongs to a carbohydrate.
226
+
227
+ Notes
228
+ -----
229
+ Carbohydrates are identified according to the PDB chemical component
230
+ dictionary. A residue is considered a carbohydrate if it its
231
+ ``_chem_comp.type`` property has one of the following values (case
232
+ insensitive):
233
+
234
+ ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
235
+ ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
236
+ ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
237
+ ``SACCHARIDE``
238
+ """
239
+ return np.isin(array.res_name, carbohydrate_names())
240
+
241
+
242
+ def _filter_atom_names(array, atom_names):
243
+ return np.isin(array.atom_name, atom_names)
244
+
245
+
246
+ def filter_peptide_backbone(array):
247
+ """
248
+ Filter all peptide backbone atoms of one array.
249
+
250
+ This includes the "N", "CA" and "C" atoms of amino acids.
251
+
252
+ Parameters
253
+ ----------
254
+ array : AtomArray or AtomArrayStack
255
+ The array to be filtered.
256
+
257
+ Returns
258
+ -------
259
+ filter : ndarray, dtype=bool
260
+ This array is `True` for all indices in `array`, where an atom
261
+ is a part of the peptide backbone.
262
+ """
263
+
264
+ return _filter_atom_names(array, _peptide_backbone_atoms) & filter_amino_acids(
265
+ array
266
+ )
267
+
268
+
269
+ def filter_phosphate_backbone(array):
270
+ """
271
+ Filter all phosphate backbone atoms of one array.
272
+
273
+ This includes the P, O5', C5', C4', C3', and O3' atoms.
274
+
275
+ Parameters
276
+ ----------
277
+ array : AtomArray or AtomArrayStack
278
+ The array to be filtered.
279
+
280
+ Returns
281
+ -------
282
+ filter : ndarray, dtype=bool
283
+ This array is ``True`` for all indices in `array`, where an atom
284
+ is a part of the phosphate backbone.
285
+ """
286
+
287
+ return _filter_atom_names(array, _phosphate_backbone_atoms) & filter_nucleotides(
288
+ array
289
+ )
290
+
291
+
292
+ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
293
+ """
294
+ Filter for atoms such that their bond length with the next atom
295
+ lies within the provided boundaries.
296
+
297
+ The result will depend on the atoms' order.
298
+ For instance, consider a molecule:
299
+
300
+ .. code-block:: none
301
+
302
+ C3
303
+ |
304
+ C1-C2-C4
305
+
306
+ If the order corresponds to ``[C1, C2, C4, C3]``, the output will be
307
+ ``[True, True, False, True]``.
308
+ Note that the trailing atom will always evaluate to ``True``.
309
+
310
+ Parameters
311
+ ----------
312
+ array : AtomArray
313
+ The array to filter.
314
+ min_len : float
315
+ Minmum bond length.
316
+ max_len : float
317
+ Maximum bond length.
318
+
319
+ Returns
320
+ -------
321
+ filter : ndarray, dtype=bool
322
+ This array is `True` for all indices in `array`, where an atom
323
+ has a bond length with the next atom within [`min_len`, `max_len`]
324
+ boundaries.
325
+
326
+ Notes
327
+ -----
328
+ Note that this function purely uses distances between consecutive atoms.
329
+ A potential ``BondList`` is not considered here.
330
+ """
331
+ dist = np.linalg.norm(np.diff(array.coord, axis=0), axis=1)
332
+ mask = (dist >= min_len) & (dist <= max_len)
333
+ return np.append(mask, True)
334
+
335
+
336
+ def _is_polymer(array, min_size, pol_type):
337
+ if pol_type.startswith("p"):
338
+ filt_fn = filter_amino_acids
339
+ elif pol_type.startswith("n"):
340
+ filt_fn = filter_nucleotides
341
+ elif pol_type.startswith("c"):
342
+ filt_fn = filter_carbohydrates
343
+ else:
344
+ raise ValueError(f"Unsupported polymer type {pol_type}")
345
+
346
+ mask = filt_fn(array)
347
+ return get_residue_count(array[mask]) >= min_size
348
+
349
+
350
+ def filter_polymer(array, min_size=2, pol_type="peptide"):
351
+ """
352
+ Filter for atoms that are a part of a consecutive standard macromolecular
353
+ polymer entity.
354
+
355
+ Parameters
356
+ ----------
357
+ array : AtomArray or AtomArrayStack
358
+ The array to filter.
359
+ min_size : int
360
+ The minimum number of monomers.
361
+ pol_type : str
362
+ The polymer type, either ``"peptide"``, ``"nucleotide"``, or ``"carbohydrate"``.
363
+ Abbreviations are supported: ``"p"``, ``"pep"``, ``"n"``, etc.
364
+
365
+ Returns
366
+ -------
367
+ filter : ndarray, dtype=bool
368
+ This array is `True` for all indices in `array`, where atoms belong to
369
+ consecutive polymer entity having at least `min_size` monomers.
370
+ """
371
+ # Import `check_res_id_continuity` here to avoid circular imports
372
+ from biotite.structure.integrity import check_res_id_continuity
373
+
374
+ split_idx = check_res_id_continuity(array)
375
+
376
+ check_pol = partial(_is_polymer, min_size=min_size, pol_type=pol_type)
377
+ bool_idx = map(
378
+ lambda a: np.full(len(a), check_pol(atom_array(a)), dtype=bool),
379
+ np.split(array, split_idx),
380
+ )
381
+ return np.concatenate(list(bool_idx))
382
+
383
+
384
+ def filter_intersection(array, intersect):
385
+ """
386
+ Filter all atoms of one array that exist also in another array.
387
+
388
+ An atom is defined as existent in the second array, if there is an
389
+ atom in the second array that has the same annotation values in all
390
+ categories that exists in both arrays.
391
+
392
+ Parameters
393
+ ----------
394
+ array : AtomArray or AtomArrayStack
395
+ The array to be filtered.
396
+ intersect : AtomArray
397
+ Atoms in `array` that also exists in `intersect` are filtered.
398
+
399
+ Returns
400
+ -------
401
+ filter : ndarray, dtype=bool
402
+ This array is `True` for all indices in `array`, where the atom
403
+ exists also in `intersect`.
404
+
405
+ Examples
406
+ --------
407
+
408
+ Creating an atom array from atoms:
409
+
410
+ >>> array1 = AtomArray(length=5)
411
+ >>> array1.chain_id = np.array(["A","B","C","D","E"])
412
+ >>> array2 = AtomArray(length=3)
413
+ >>> array2.chain_id = np.array(["D","B","C"])
414
+ >>> array1 = array1[filter_intersection(array1, array2)]
415
+ >>> print(array1.chain_id)
416
+ ['B' 'C' 'D']
417
+ """
418
+ filter = np.full(array.array_length(), True, dtype=bool)
419
+ intersect_categories = intersect.get_annotation_categories()
420
+ # Check atom equality only for categories,
421
+ # which exist in both arrays
422
+ categories = [
423
+ category
424
+ for category in array.get_annotation_categories()
425
+ if category in intersect_categories
426
+ ]
427
+ for i in range(array.array_length()):
428
+ subfilter = np.full(intersect.array_length(), True, dtype=bool)
429
+ for category in categories:
430
+ subfilter &= (
431
+ intersect.get_annotation(category) == array.get_annotation(category)[i]
432
+ )
433
+ filter[i] = subfilter.any()
434
+ return filter
435
+
436
+
437
+ def filter_first_altloc(atoms, altloc_ids):
438
+ """
439
+ Filter all atoms, that have the first *altloc* ID appearing in a
440
+ residue.
441
+
442
+ Structure files (PDB, PDBx) allow for duplicate atom records,
443
+ in case a residue is found in multiple alternate locations
444
+ (*altloc*).
445
+ This function is used to remove such duplicate atoms by choosing a
446
+ single *altloc ID* for an atom with other *altlocs* being removed.
447
+
448
+ Parameters
449
+ ----------
450
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
451
+ The unfiltered structure to be filtered.
452
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
453
+ An array containing the alternate location IDs for each
454
+ atom in `atoms`.
455
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
456
+ position.
457
+
458
+ Returns
459
+ -------
460
+ filter : ndarray, dtype=bool
461
+ For each residue, this array is True in the following cases:
462
+
463
+ - The atom has no altloc ID (`'.'`, `'?'`, `' '`, `''`).
464
+ - The atom has the same altloc ID (e.g. `'A'`, `'B'`, etc.)
465
+ as the first atom in the residue that has an altloc ID.
466
+
467
+ Notes
468
+ -----
469
+ The function will be rarely used by the end user, since this kind
470
+ of filtering is usually automatically performed, when the structure
471
+ is loaded from a file.
472
+ The exception are structures that were read with `altloc` set to
473
+ `True`.
474
+
475
+ Examples
476
+ --------
477
+
478
+ >>> atoms = array([
479
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
480
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
481
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
482
+ ... ])
483
+ >>> altloc_ids = np.array([".", "A", "B"])
484
+ >>> filtered = atoms[filter_first_altloc(atoms, altloc_ids)]
485
+ >>> print(filtered)
486
+ 1 CA 1.000 2.000 3.000
487
+ 1 CB 4.000 5.000 6.000
488
+ """
489
+ # Filter all atoms without altloc code
490
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
491
+
492
+ # And filter all atoms for each residue with the first altloc ID
493
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
494
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
495
+ letter_altloc_ids = [loc for loc in altloc_ids[start:stop] if loc.isalpha()]
496
+ if len(letter_altloc_ids) > 0:
497
+ first_id = letter_altloc_ids[0]
498
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == first_id
499
+ else:
500
+ # No altloc ID in this residue -> Nothing to do
501
+ pass
502
+
503
+ return altloc_filter
504
+
505
+
506
+ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
507
+ """
508
+ For each residue, filter all atoms, that have the *altloc* ID
509
+ with the highest occupancy for this residue.
510
+
511
+ Structure files (PDB, PDBx) allow for duplicate atom records,
512
+ in case a residue is found in multiple alternate locations
513
+ (*altloc*).
514
+ This function is used to remove such duplicate atoms by choosing a
515
+ single *altloc ID* for an atom with other *altlocs* being removed.
516
+
517
+ Parameters
518
+ ----------
519
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
520
+ The unfiltered structure to be filtered.
521
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
522
+ An array containing the alternate location IDs for each
523
+ atom in `atoms`.
524
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
525
+ position.
526
+ occupancies : ndarray, shape=(n,), dtype=float
527
+ An array containing the occupancy values for each atom in
528
+ `atoms`.
529
+
530
+ Returns
531
+ -------
532
+ filter : ndarray, dtype=bool
533
+ For each residue, this array is True in the following cases:
534
+
535
+ - The atom has no altloc ID
536
+ (``'.'``, ``'?'``, ``' '``, ``''``).
537
+ - The atom has the altloc ID (e.g. ``'A'``, ``'B'``, etc.),
538
+ of which the corresponding occupancy values are highest
539
+ for the **entire** residue.
540
+
541
+ Notes
542
+ -----
543
+ The function will be rarely used by the end user, since this kind
544
+ of filtering is usually automatically performed, when the structure
545
+ is loaded from a file.
546
+ The exception are structures that were read with ``altloc`` set to
547
+ ``True``.
548
+
549
+ Examples
550
+ --------
551
+
552
+ >>> atoms = array([
553
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
554
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
555
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
556
+ ... ])
557
+ >>> altloc_ids = np.array([".", "A", "B"])
558
+ >>> occupancies = np.array([1.0, 0.1, 0.9])
559
+ >>> filtered = atoms[filter_highest_occupancy_altloc(
560
+ ... atoms, altloc_ids, occupancies
561
+ ... )]
562
+ >>> print(filtered)
563
+ 1 CA 1.000 2.000 3.000
564
+ 1 CB 6.000 5.000 4.000
565
+ """
566
+ # Filter all atoms without altloc code
567
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
568
+
569
+ # And filter all atoms for each residue with the highest sum of
570
+ # occupancies
571
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
572
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
573
+ occupancies_in_res = occupancies[start:stop]
574
+ altloc_ids_in_res = altloc_ids[start:stop]
575
+
576
+ letter_altloc_ids = [loc for loc in altloc_ids_in_res if loc.isalpha()]
577
+
578
+ if len(letter_altloc_ids) > 0:
579
+ highest = -1.0
580
+ highest_id = None
581
+ for id in sorted(set(letter_altloc_ids)):
582
+ occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
583
+ if occupancy_sum > highest:
584
+ highest = occupancy_sum
585
+ highest_id = id
586
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == highest_id
587
+ else:
588
+ # No altloc ID in this residue -> Nothing to do
589
+ pass
590
+
591
+ return altloc_filter