biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,646 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides utility functions for creating filters on atom
7
+ arrays and atom array stacks.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann, Tom David Müller"
12
+ __all__ = [
13
+ "filter_solvent",
14
+ "filter_monoatomic_ions",
15
+ "filter_heavy",
16
+ "filter_nucleotides",
17
+ "filter_canonical_nucleotides",
18
+ "filter_amino_acids",
19
+ "filter_canonical_amino_acids",
20
+ "filter_carbohydrates",
21
+ "filter_intersection",
22
+ "filter_first_altloc",
23
+ "filter_highest_occupancy_altloc",
24
+ "filter_peptide_backbone",
25
+ "filter_phosphate_backbone",
26
+ "filter_linear_bond_continuity",
27
+ "filter_polymer",
28
+ ]
29
+
30
+
31
+ from functools import partial
32
+ import numpy as np
33
+ from biotite.structure.atoms import array as atom_array
34
+ from biotite.structure.info.groups import (
35
+ amino_acid_names,
36
+ carbohydrate_names,
37
+ nucleotide_names,
38
+ )
39
+ from biotite.structure.residues import get_residue_count, get_residue_starts
40
+
41
+ _canonical_aa_list = [
42
+ "ALA",
43
+ "ARG",
44
+ "ASN",
45
+ "ASP",
46
+ "CYS",
47
+ "GLN",
48
+ "GLU",
49
+ "GLY",
50
+ "HIS",
51
+ "ILE",
52
+ "LEU",
53
+ "LYS",
54
+ "MET",
55
+ "PHE",
56
+ "PRO",
57
+ "PYL",
58
+ "SER",
59
+ "THR",
60
+ "TRP",
61
+ "TYR",
62
+ "VAL",
63
+ "SEC",
64
+ ]
65
+ _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
66
+
67
+ # Residue names of solvent molecules non only in CCD, but also from modeling software
68
+ _solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
69
+
70
+ _peptide_backbone_atoms = ["N", "CA", "C"]
71
+ _phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
72
+
73
+
74
+ def filter_monoatomic_ions(array):
75
+ """
76
+ Filter all atoms of an atom array, that are monoatomic ions
77
+ (e.g. sodium or chloride ions).
78
+
79
+ Parameters
80
+ ----------
81
+ array : AtomArray or AtomArrayStack
82
+ The array to be filtered.
83
+
84
+ Returns
85
+ -------
86
+ filter : ndarray, dtype=bool
87
+ This array is `True` for all indices in `array`, where the atom
88
+ is a monoatomic ion.
89
+ """
90
+ # Exclusively in monoatomic ions,
91
+ # the element name is equal to the residue name
92
+ return array.res_name == array.element
93
+
94
+
95
+ def filter_heavy(array):
96
+ """
97
+ Filter all non-hydrogen atoms of an atom array.
98
+
99
+ Parameters
100
+ ----------
101
+ array : AtomArray or AtomArrayStack
102
+ The array to be filtered.
103
+
104
+ Returns
105
+ -------
106
+ filter : ndarray, dtype=bool
107
+ This array is `True` for all indices in `array`, where the atom
108
+ is a non-hydrogen atom.
109
+ """
110
+ return (array.element != "H") & (array.element != "D")
111
+
112
+
113
+ def filter_solvent(array):
114
+ """
115
+ Filter all atoms of one array that are part of the solvent.
116
+
117
+ Parameters
118
+ ----------
119
+ array : AtomArray or AtomArrayStack
120
+ The array to be filtered.
121
+
122
+ Returns
123
+ -------
124
+ filter : ndarray, dtype=bool
125
+ This array is `True` for all indices in `array`, where the atom
126
+ belongs to the solvent.
127
+ """
128
+ return np.isin(array.res_name, _solvent_list)
129
+
130
+
131
+ def filter_canonical_nucleotides(array):
132
+ """
133
+ Filter all atoms of one array that belong to canonical nucleotides.
134
+
135
+ Parameters
136
+ ----------
137
+ array : AtomArray or AtomArrayStack
138
+ The array to be filtered.
139
+
140
+ Returns
141
+ -------
142
+ filter : ndarray, dtype=bool
143
+ This array is `True` for all indices in `array`, where the atom
144
+ belongs to a canonical nucleotide.
145
+ """
146
+ return np.isin(array.res_name, _canonical_nucleotide_list)
147
+
148
+
149
+ def filter_nucleotides(array):
150
+ """
151
+ Filter all atoms of one array that belong to nucleotides.
152
+
153
+ Parameters
154
+ ----------
155
+ array : AtomArray or AtomArrayStack
156
+ The array to be filtered.
157
+
158
+ Returns
159
+ -------
160
+ filter : ndarray, dtype=bool
161
+ This array is `True` for all indices in `array`, where the atom
162
+ belongs to a nucleotide.
163
+
164
+ Notes
165
+ -----
166
+ Nucleotides are identified according to the PDB chemical component
167
+ dictionary. A residue is considered a nucleotide if it its
168
+ ``_chem_comp.type`` property has one of the following values (case
169
+ insensitive):
170
+
171
+ ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
172
+ ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
173
+ ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
174
+ ``RNA OH 5 PRIME TERMINUS``
175
+ """
176
+ return np.isin(array.res_name, nucleotide_names())
177
+
178
+
179
+ def filter_canonical_amino_acids(array):
180
+ """
181
+ Filter all atoms of one array that belong to canonical amino acid
182
+ residues.
183
+
184
+ Parameters
185
+ ----------
186
+ array : AtomArray or AtomArrayStack
187
+ The array to be filtered.
188
+
189
+ Returns
190
+ -------
191
+ filter : ndarray, dtype=bool
192
+ This array is `True` for all indices in `array`, where the atom
193
+ belongs to a canonical amino acid residue.
194
+ """
195
+ return np.isin(array.res_name, _canonical_aa_list)
196
+
197
+
198
+ def filter_amino_acids(array):
199
+ """
200
+ Filter all atoms of one array that belong to amino acid residues.
201
+
202
+ Parameters
203
+ ----------
204
+ array : AtomArray or AtomArrayStack
205
+ The array to be filtered.
206
+
207
+ Returns
208
+ -------
209
+ filter : ndarray, dtype=bool
210
+ This array is `True` for all indices in `array`, where the atom
211
+ belongs to an amino acid residue.
212
+
213
+ Notes
214
+ -----
215
+ Amino acids are identified according to the PDB chemical component
216
+ dictionary. A residue is considered an amino acid if it its
217
+ ``_chem_comp.type`` property has one of the following values (case
218
+ insensitive):
219
+
220
+ ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
221
+ ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
222
+ ``D-PEPTIDE NH3 AMINO TERMINUS``,
223
+ ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
224
+ ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
225
+ ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
226
+ ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
227
+ """
228
+ return np.isin(array.res_name, amino_acid_names())
229
+
230
+
231
+ def filter_carbohydrates(array):
232
+ """
233
+ Filter all atoms of one array that belong to carbohydrates.
234
+
235
+ Parameters
236
+ ----------
237
+ array : AtomArray or AtomArrayStack
238
+ The array to be filtered.
239
+
240
+ Returns
241
+ -------
242
+ filter : ndarray, dtype=bool
243
+ This array is `True` for all indices in `array`, where the atom
244
+ belongs to a carbohydrate.
245
+
246
+ Notes
247
+ -----
248
+ Carbohydrates are identified according to the PDB chemical component
249
+ dictionary. A residue is considered a carbohydrate if it its
250
+ ``_chem_comp.type`` property has one of the following values (case
251
+ insensitive):
252
+
253
+ ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
254
+ ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
255
+ ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
256
+ ``SACCHARIDE``
257
+ """
258
+ return np.isin(array.res_name, carbohydrate_names())
259
+
260
+
261
+ def _filter_atom_names(array, atom_names):
262
+ return np.isin(array.atom_name, atom_names)
263
+
264
+
265
+ def filter_peptide_backbone(array):
266
+ """
267
+ Filter all peptide backbone atoms of one array.
268
+
269
+ This includes the "N", "CA" and "C" atoms of amino acids.
270
+
271
+ Parameters
272
+ ----------
273
+ array : AtomArray or AtomArrayStack
274
+ The array to be filtered.
275
+
276
+ Returns
277
+ -------
278
+ filter : ndarray, dtype=bool
279
+ This array is `True` for all indices in `array`, where an atom
280
+ is a part of the peptide backbone.
281
+ """
282
+
283
+ return _filter_atom_names(array, _peptide_backbone_atoms) & filter_amino_acids(
284
+ array
285
+ )
286
+
287
+
288
+ def filter_phosphate_backbone(array):
289
+ """
290
+ Filter all phosphate backbone atoms of one array.
291
+
292
+ This includes the P, O5', C5', C4', C3', and O3' atoms.
293
+
294
+ Parameters
295
+ ----------
296
+ array : AtomArray or AtomArrayStack
297
+ The array to be filtered.
298
+
299
+ Returns
300
+ -------
301
+ filter : ndarray, dtype=bool
302
+ This array is ``True`` for all indices in `array`, where an atom
303
+ is a part of the phosphate backbone.
304
+ """
305
+
306
+ return _filter_atom_names(array, _phosphate_backbone_atoms) & filter_nucleotides(
307
+ array
308
+ )
309
+
310
+
311
+ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
312
+ """
313
+ Filter for atoms such that their bond length with the next atom
314
+ lies within the provided boundaries.
315
+
316
+ The result will depend on the atoms' order.
317
+ For instance, consider a molecule:
318
+
319
+ .. code-block:: none
320
+
321
+ C3
322
+ |
323
+ C1-C2-C4
324
+
325
+ If the order corresponds to ``[C1, C2, C4, C3]``, the output will be
326
+ ``[True, True, False, True]``.
327
+ Note that the trailing atom will always evaluate to ``True``.
328
+
329
+ Parameters
330
+ ----------
331
+ array : AtomArray
332
+ The array to filter.
333
+ min_len : float
334
+ Minmum bond length.
335
+ max_len : float
336
+ Maximum bond length.
337
+
338
+ Returns
339
+ -------
340
+ filter : ndarray, dtype=bool
341
+ This array is `True` for all indices in `array`, where an atom
342
+ has a bond length with the next atom within [`min_len`, `max_len`]
343
+ boundaries.
344
+
345
+ Notes
346
+ -----
347
+ Note that this function purely uses distances between consecutive atoms.
348
+ A potential ``BondList`` is not considered here.
349
+ """
350
+ dist = np.linalg.norm(np.diff(array.coord, axis=0), axis=1)
351
+ mask = (dist >= min_len) & (dist <= max_len)
352
+ return np.append(mask, True)
353
+
354
+
355
+ def _is_polymer(array, min_size, pol_type):
356
+ if pol_type.startswith("p"):
357
+ filt_fn = filter_amino_acids
358
+ elif pol_type.startswith("n"):
359
+ filt_fn = filter_nucleotides
360
+ elif pol_type.startswith("c"):
361
+ filt_fn = filter_carbohydrates
362
+ else:
363
+ raise ValueError(f"Unsupported polymer type {pol_type}")
364
+
365
+ mask = filt_fn(array)
366
+ return get_residue_count(array[mask]) >= min_size
367
+
368
+
369
+ def filter_polymer(array, min_size=2, pol_type="peptide"):
370
+ """
371
+ Filter for atoms that are a part of a consecutive standard macromolecular
372
+ polymer entity.
373
+
374
+ Parameters
375
+ ----------
376
+ array : AtomArray or AtomArrayStack
377
+ The array to filter.
378
+ min_size : int
379
+ The minimum number of monomers.
380
+ pol_type : str
381
+ The polymer type, either ``"peptide"``, ``"nucleotide"``, or ``"carbohydrate"``.
382
+ Abbreviations are supported: ``"p"``, ``"pep"``, ``"n"``, etc.
383
+
384
+ Returns
385
+ -------
386
+ filter : ndarray, dtype=bool
387
+ This array is `True` for all indices in `array`, where atoms belong to
388
+ consecutive polymer entity having at least `min_size` monomers.
389
+ """
390
+ # Import `check_res_id_continuity` here to avoid circular imports
391
+ from biotite.structure.integrity import check_res_id_continuity
392
+
393
+ split_idx = check_res_id_continuity(array)
394
+
395
+ check_pol = partial(_is_polymer, min_size=min_size, pol_type=pol_type)
396
+ bool_idx = map(
397
+ lambda a: np.full(len(a), check_pol(atom_array(a)), dtype=bool),
398
+ np.split(array, split_idx),
399
+ )
400
+ return np.concatenate(list(bool_idx))
401
+
402
+
403
+ def filter_intersection(array, intersect, categories=None):
404
+ """
405
+ Filter all atoms of one array that exist also in another array.
406
+
407
+ An atom is defined as existent in the second array, if there is an
408
+ atom in the second array that has the same annotation values in all
409
+ categories that exists in both arrays.
410
+
411
+ Parameters
412
+ ----------
413
+ array : AtomArray or AtomArrayStack
414
+ The array to be filtered.
415
+ intersect : AtomArray
416
+ Atoms in `array` that also exists in `intersect` are filtered.
417
+ categories : iterable of str
418
+ If specified, the given annotation categories are checked for equality in both
419
+ arrays.
420
+ By default, all common annotation categories are checked.
421
+
422
+ Returns
423
+ -------
424
+ filter : ndarray, dtype=bool
425
+ This array is `True` for all indices in `array`, where the atom
426
+ exists also in `intersect`.
427
+
428
+ Examples
429
+ --------
430
+
431
+ Creating an atom array from atoms:
432
+
433
+ >>> array1 = AtomArray(length=5)
434
+ >>> array1.chain_id = np.array(["A","B","C","D","E"])
435
+ >>> array2 = AtomArray(length=3)
436
+ >>> array2.chain_id = np.array(["D","B","C"])
437
+ >>> array1 = array1[filter_intersection(array1, array2)]
438
+ >>> print(array1.chain_id)
439
+ ['B' 'C' 'D']
440
+ """
441
+ if categories is None:
442
+ intersect_categories = intersect.get_annotation_categories()
443
+ # Check atom equality only for categories,
444
+ # which exist in both arrays
445
+ categories = [
446
+ category
447
+ for category in array.get_annotation_categories()
448
+ if category in intersect_categories
449
+ ]
450
+ else:
451
+ for category in categories:
452
+ if category not in array.get_annotation_categories():
453
+ raise ValueError(f"Category {category} does not exist in 'array'")
454
+ if category not in intersect.get_annotation_categories():
455
+ raise ValueError(f"Category {category} does not exist in 'intersect'")
456
+
457
+ # Implicitly expect that the annotation array dtypes are the same for both
458
+ structured_dtype = np.dtype(
459
+ [(name, array.get_annotation(name).dtype) for name in categories]
460
+ )
461
+ array_annotations = _annotations_to_structured(array, structured_dtype)
462
+ intersect_annotations = _annotations_to_structured(intersect, structured_dtype)
463
+ # Identify the intersection of the two annotation arrays
464
+ return np.isin(array_annotations, intersect_annotations)
465
+
466
+
467
+ def filter_first_altloc(atoms, altloc_ids):
468
+ """
469
+ Filter all atoms, that have the first *altloc* ID appearing in a
470
+ residue.
471
+
472
+ Structure files (PDB, PDBx) allow for duplicate atom records,
473
+ in case a residue is found in multiple alternate locations
474
+ (*altloc*).
475
+ This function is used to remove such duplicate atoms by choosing a
476
+ single *altloc ID* for an atom with other *altlocs* being removed.
477
+
478
+ Parameters
479
+ ----------
480
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
481
+ The unfiltered structure to be filtered.
482
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
483
+ An array containing the alternate location IDs for each
484
+ atom in `atoms`.
485
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
486
+ position.
487
+
488
+ Returns
489
+ -------
490
+ filter : ndarray, dtype=bool
491
+ For each residue, this array is True in the following cases:
492
+
493
+ - The atom has no altloc ID (`'.'`, `'?'`, `' '`, `''`).
494
+ - The atom has the same altloc ID (e.g. `'A'`, `'B'`, etc.)
495
+ as the first atom in the residue that has an altloc ID.
496
+
497
+ Notes
498
+ -----
499
+ The function will be rarely used by the end user, since this kind
500
+ of filtering is usually automatically performed, when the structure
501
+ is loaded from a file.
502
+ The exception are structures that were read with `altloc` set to
503
+ `True`.
504
+
505
+ Examples
506
+ --------
507
+
508
+ >>> atoms = array([
509
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
510
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
511
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
512
+ ... ])
513
+ >>> altloc_ids = np.array([".", "A", "B"])
514
+ >>> filtered = atoms[filter_first_altloc(atoms, altloc_ids)]
515
+ >>> print(filtered)
516
+ 1 CA 1.000 2.000 3.000
517
+ 1 CB 4.000 5.000 6.000
518
+ """
519
+ # Filter all atoms without altloc code
520
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
521
+
522
+ # And filter all atoms for each residue with the first altloc ID
523
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
524
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
525
+ letter_altloc_ids = [loc for loc in altloc_ids[start:stop] if loc.isalpha()]
526
+ if len(letter_altloc_ids) > 0:
527
+ first_id = letter_altloc_ids[0]
528
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == first_id
529
+ else:
530
+ # No altloc ID in this residue -> Nothing to do
531
+ pass
532
+
533
+ return altloc_filter
534
+
535
+
536
+ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
537
+ """
538
+ For each residue, filter all atoms, that have the *altloc* ID
539
+ with the highest occupancy for this residue.
540
+
541
+ Structure files (PDB, PDBx) allow for duplicate atom records,
542
+ in case a residue is found in multiple alternate locations
543
+ (*altloc*).
544
+ This function is used to remove such duplicate atoms by choosing a
545
+ single *altloc ID* for an atom with other *altlocs* being removed.
546
+
547
+ Parameters
548
+ ----------
549
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
550
+ The unfiltered structure to be filtered.
551
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
552
+ An array containing the alternate location IDs for each
553
+ atom in `atoms`.
554
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
555
+ position.
556
+ occupancies : ndarray, shape=(n,), dtype=float
557
+ An array containing the occupancy values for each atom in
558
+ `atoms`.
559
+
560
+ Returns
561
+ -------
562
+ filter : ndarray, dtype=bool
563
+ For each residue, this array is True in the following cases:
564
+
565
+ - The atom has no altloc ID
566
+ (``'.'``, ``'?'``, ``' '``, ``''``).
567
+ - The atom has the altloc ID (e.g. ``'A'``, ``'B'``, etc.),
568
+ of which the corresponding occupancy values are highest
569
+ for the **entire** residue.
570
+
571
+ Notes
572
+ -----
573
+ The function will be rarely used by the end user, since this kind
574
+ of filtering is usually automatically performed, when the structure
575
+ is loaded from a file.
576
+ The exception are structures that were read with ``altloc`` set to
577
+ ``True``.
578
+
579
+ Examples
580
+ --------
581
+
582
+ >>> atoms = array([
583
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
584
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
585
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
586
+ ... ])
587
+ >>> altloc_ids = np.array([".", "A", "B"])
588
+ >>> occupancies = np.array([1.0, 0.1, 0.9])
589
+ >>> filtered = atoms[filter_highest_occupancy_altloc(
590
+ ... atoms, altloc_ids, occupancies
591
+ ... )]
592
+ >>> print(filtered)
593
+ 1 CA 1.000 2.000 3.000
594
+ 1 CB 6.000 5.000 4.000
595
+ """
596
+ # Filter all atoms without altloc code
597
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
598
+
599
+ # And filter all atoms for each residue with the highest sum of
600
+ # occupancies
601
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
602
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
603
+ occupancies_in_res = occupancies[start:stop]
604
+ altloc_ids_in_res = altloc_ids[start:stop]
605
+
606
+ letter_altloc_ids = [loc for loc in altloc_ids_in_res if loc.isalpha()]
607
+
608
+ if len(letter_altloc_ids) > 0:
609
+ highest = -1.0
610
+ highest_id = None
611
+ for id in sorted(set(letter_altloc_ids)):
612
+ occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
613
+ if occupancy_sum > highest:
614
+ highest = occupancy_sum
615
+ highest_id = id
616
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == highest_id
617
+ else:
618
+ # No altloc ID in this residue -> Nothing to do
619
+ pass
620
+
621
+ return altloc_filter
622
+
623
+
624
+ def _annotations_to_structured(atoms, structured_dtype):
625
+ """
626
+ Convert atom annotations into a single structured `ndarray`.
627
+
628
+ Parameters
629
+ ----------
630
+ atoms : AtomArray, shape=(n,)
631
+ The annotation arrays are taken from this structure.
632
+ structured_dtype : dtype
633
+ The dtype of the structured array to be created.
634
+ The fields of the dtype determine which annotations are taken from `atoms`.
635
+
636
+ Returns
637
+ -------
638
+ structured : ndarray, shape=(n,), dtype=structured_dtype
639
+ The structured array.
640
+ """
641
+ if structured_dtype.fields is None:
642
+ raise TypeError("dtype must be structured")
643
+ structured = np.zeros(atoms.array_length(), dtype=structured_dtype)
644
+ for field in structured_dtype.fields:
645
+ structured[field] = atoms.get_annotation(field)
646
+ return structured