biotite 1.3.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +191 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +160 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1226 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  266. biotite/structure/bonds.pyx +1975 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +276 -0
  271. biotite/structure/charges.cpython-312-darwin.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +681 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +590 -0
  278. biotite/structure/geometry.py +655 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +90 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +388 -0
  311. biotite/structure/io/pdb/file.py +1356 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +671 -0
  319. biotite/structure/io/pdbx/cif.py +1088 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +358 -0
  322. biotite/structure/io/pdbx/convert.py +2097 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1047 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +544 -0
  337. biotite/structure/rings.py +335 -0
  338. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +292 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +168 -0
  349. biotite/version.py +21 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.3.0.dist-info/METADATA +162 -0
  352. biotite-1.3.0.dist-info/RECORD +354 -0
  353. biotite-1.3.0.dist-info/WHEEL +6 -0
  354. biotite-1.3.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,590 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides utility functions for creating filters on atom
7
+ arrays and atom array stacks.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann, Tom David Müller"
12
+ __all__ = [
13
+ "filter_solvent",
14
+ "filter_monoatomic_ions",
15
+ "filter_nucleotides",
16
+ "filter_canonical_nucleotides",
17
+ "filter_amino_acids",
18
+ "filter_canonical_amino_acids",
19
+ "filter_carbohydrates",
20
+ "filter_intersection",
21
+ "filter_first_altloc",
22
+ "filter_highest_occupancy_altloc",
23
+ "filter_peptide_backbone",
24
+ "filter_phosphate_backbone",
25
+ "filter_linear_bond_continuity",
26
+ "filter_polymer",
27
+ ]
28
+
29
+
30
+ from functools import partial
31
+ import numpy as np
32
+ from biotite.structure.atoms import array as atom_array
33
+ from biotite.structure.info.groups import (
34
+ amino_acid_names,
35
+ carbohydrate_names,
36
+ nucleotide_names,
37
+ )
38
+ from biotite.structure.residues import get_residue_count, get_residue_starts
39
+
40
+ _canonical_aa_list = [
41
+ "ALA",
42
+ "ARG",
43
+ "ASN",
44
+ "ASP",
45
+ "CYS",
46
+ "GLN",
47
+ "GLU",
48
+ "GLY",
49
+ "HIS",
50
+ "ILE",
51
+ "LEU",
52
+ "LYS",
53
+ "MET",
54
+ "PHE",
55
+ "PRO",
56
+ "PYL",
57
+ "SER",
58
+ "THR",
59
+ "TRP",
60
+ "TYR",
61
+ "VAL",
62
+ "SEC",
63
+ ]
64
+ _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
65
+
66
+ _solvent_list = ["HOH", "SOL"]
67
+
68
+ _peptide_backbone_atoms = ["N", "CA", "C"]
69
+ _phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
70
+
71
+
72
+ def filter_monoatomic_ions(array):
73
+ """
74
+ Filter all atoms of an atom array, that are monoatomic ions
75
+ (e.g. sodium or chloride ions).
76
+
77
+ Parameters
78
+ ----------
79
+ array : AtomArray or AtomArrayStack
80
+ The array to be filtered.
81
+
82
+ Returns
83
+ -------
84
+ filter : ndarray, dtype=bool
85
+ This array is `True` for all indices in `array`, where the atom
86
+ is a monoatomic ion.
87
+ """
88
+ # Exclusively in monoatomic ions,
89
+ # the element name is equal to the residue name
90
+ return array.res_name == array.element
91
+
92
+
93
+ def filter_solvent(array):
94
+ """
95
+ Filter all atoms of one array that are part of the solvent.
96
+
97
+ Parameters
98
+ ----------
99
+ array : AtomArray or AtomArrayStack
100
+ The array to be filtered.
101
+
102
+ Returns
103
+ -------
104
+ filter : ndarray, dtype=bool
105
+ This array is `True` for all indices in `array`, where the atom
106
+ belongs to the solvent.
107
+ """
108
+ return np.isin(array.res_name, _solvent_list)
109
+
110
+
111
+ def filter_canonical_nucleotides(array):
112
+ """
113
+ Filter all atoms of one array that belong to canonical nucleotides.
114
+
115
+ Parameters
116
+ ----------
117
+ array : AtomArray or AtomArrayStack
118
+ The array to be filtered.
119
+
120
+ Returns
121
+ -------
122
+ filter : ndarray, dtype=bool
123
+ This array is `True` for all indices in `array`, where the atom
124
+ belongs to a canonical nucleotide.
125
+ """
126
+ return np.isin(array.res_name, _canonical_nucleotide_list)
127
+
128
+
129
+ def filter_nucleotides(array):
130
+ """
131
+ Filter all atoms of one array that belong to nucleotides.
132
+
133
+ Parameters
134
+ ----------
135
+ array : AtomArray or AtomArrayStack
136
+ The array to be filtered.
137
+
138
+ Returns
139
+ -------
140
+ filter : ndarray, dtype=bool
141
+ This array is `True` for all indices in `array`, where the atom
142
+ belongs to a nucleotide.
143
+
144
+ Notes
145
+ -----
146
+ Nucleotides are identified according to the PDB chemical component
147
+ dictionary. A residue is considered a nucleotide if it its
148
+ ``_chem_comp.type`` property has one of the following values (case
149
+ insensitive):
150
+
151
+ ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
152
+ ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
153
+ ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
154
+ ``RNA OH 5 PRIME TERMINUS``
155
+ """
156
+ return np.isin(array.res_name, nucleotide_names())
157
+
158
+
159
+ def filter_canonical_amino_acids(array):
160
+ """
161
+ Filter all atoms of one array that belong to canonical amino acid
162
+ residues.
163
+
164
+ Parameters
165
+ ----------
166
+ array : AtomArray or AtomArrayStack
167
+ The array to be filtered.
168
+
169
+ Returns
170
+ -------
171
+ filter : ndarray, dtype=bool
172
+ This array is `True` for all indices in `array`, where the atom
173
+ belongs to a canonical amino acid residue.
174
+ """
175
+ return np.isin(array.res_name, _canonical_aa_list)
176
+
177
+
178
+ def filter_amino_acids(array):
179
+ """
180
+ Filter all atoms of one array that belong to amino acid residues.
181
+
182
+ Parameters
183
+ ----------
184
+ array : AtomArray or AtomArrayStack
185
+ The array to be filtered.
186
+
187
+ Returns
188
+ -------
189
+ filter : ndarray, dtype=bool
190
+ This array is `True` for all indices in `array`, where the atom
191
+ belongs to an amino acid residue.
192
+
193
+ Notes
194
+ -----
195
+ Amino acids are identified according to the PDB chemical component
196
+ dictionary. A residue is considered an amino acid if it its
197
+ ``_chem_comp.type`` property has one of the following values (case
198
+ insensitive):
199
+
200
+ ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
201
+ ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
202
+ ``D-PEPTIDE NH3 AMINO TERMINUS``,
203
+ ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
204
+ ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
205
+ ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
206
+ ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
207
+ """
208
+ return np.isin(array.res_name, amino_acid_names())
209
+
210
+
211
+ def filter_carbohydrates(array):
212
+ """
213
+ Filter all atoms of one array that belong to carbohydrates.
214
+
215
+ Parameters
216
+ ----------
217
+ array : AtomArray or AtomArrayStack
218
+ The array to be filtered.
219
+
220
+ Returns
221
+ -------
222
+ filter : ndarray, dtype=bool
223
+ This array is `True` for all indices in `array`, where the atom
224
+ belongs to a carbohydrate.
225
+
226
+ Notes
227
+ -----
228
+ Carbohydrates are identified according to the PDB chemical component
229
+ dictionary. A residue is considered a carbohydrate if it its
230
+ ``_chem_comp.type`` property has one of the following values (case
231
+ insensitive):
232
+
233
+ ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
234
+ ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
235
+ ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
236
+ ``SACCHARIDE``
237
+ """
238
+ return np.isin(array.res_name, carbohydrate_names())
239
+
240
+
241
+ def _filter_atom_names(array, atom_names):
242
+ return np.isin(array.atom_name, atom_names)
243
+
244
+
245
+ def filter_peptide_backbone(array):
246
+ """
247
+ Filter all peptide backbone atoms of one array.
248
+
249
+ This includes the "N", "CA" and "C" atoms of amino acids.
250
+
251
+ Parameters
252
+ ----------
253
+ array : AtomArray or AtomArrayStack
254
+ The array to be filtered.
255
+
256
+ Returns
257
+ -------
258
+ filter : ndarray, dtype=bool
259
+ This array is `True` for all indices in `array`, where an atom
260
+ is a part of the peptide backbone.
261
+ """
262
+
263
+ return _filter_atom_names(array, _peptide_backbone_atoms) & filter_amino_acids(
264
+ array
265
+ )
266
+
267
+
268
+ def filter_phosphate_backbone(array):
269
+ """
270
+ Filter all phosphate backbone atoms of one array.
271
+
272
+ This includes the P, O5', C5', C4', C3', and O3' atoms.
273
+
274
+ Parameters
275
+ ----------
276
+ array : AtomArray or AtomArrayStack
277
+ The array to be filtered.
278
+
279
+ Returns
280
+ -------
281
+ filter : ndarray, dtype=bool
282
+ This array is ``True`` for all indices in `array`, where an atom
283
+ is a part of the phosphate backbone.
284
+ """
285
+
286
+ return _filter_atom_names(array, _phosphate_backbone_atoms) & filter_nucleotides(
287
+ array
288
+ )
289
+
290
+
291
+ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
292
+ """
293
+ Filter for atoms such that their bond length with the next atom
294
+ lies within the provided boundaries.
295
+
296
+ The result will depend on the atoms' order.
297
+ For instance, consider a molecule:
298
+
299
+ .. code-block:: none
300
+
301
+ C3
302
+ |
303
+ C1-C2-C4
304
+
305
+ If the order corresponds to ``[C1, C2, C4, C3]``, the output will be
306
+ ``[True, True, False, True]``.
307
+ Note that the trailing atom will always evaluate to ``True``.
308
+
309
+ Parameters
310
+ ----------
311
+ array : AtomArray
312
+ The array to filter.
313
+ min_len : float
314
+ Minmum bond length.
315
+ max_len : float
316
+ Maximum bond length.
317
+
318
+ Returns
319
+ -------
320
+ filter : ndarray, dtype=bool
321
+ This array is `True` for all indices in `array`, where an atom
322
+ has a bond length with the next atom within [`min_len`, `max_len`]
323
+ boundaries.
324
+
325
+ Notes
326
+ -----
327
+ Note that this function purely uses distances between consecutive atoms.
328
+ A potential ``BondList`` is not considered here.
329
+ """
330
+ dist = np.linalg.norm(np.diff(array.coord, axis=0), axis=1)
331
+ mask = (dist >= min_len) & (dist <= max_len)
332
+ return np.append(mask, True)
333
+
334
+
335
+ def _is_polymer(array, min_size, pol_type):
336
+ if pol_type.startswith("p"):
337
+ filt_fn = filter_amino_acids
338
+ elif pol_type.startswith("n"):
339
+ filt_fn = filter_nucleotides
340
+ elif pol_type.startswith("c"):
341
+ filt_fn = filter_carbohydrates
342
+ else:
343
+ raise ValueError(f"Unsupported polymer type {pol_type}")
344
+
345
+ mask = filt_fn(array)
346
+ return get_residue_count(array[mask]) >= min_size
347
+
348
+
349
+ def filter_polymer(array, min_size=2, pol_type="peptide"):
350
+ """
351
+ Filter for atoms that are a part of a consecutive standard macromolecular
352
+ polymer entity.
353
+
354
+ Parameters
355
+ ----------
356
+ array : AtomArray or AtomArrayStack
357
+ The array to filter.
358
+ min_size : int
359
+ The minimum number of monomers.
360
+ pol_type : str
361
+ The polymer type, either ``"peptide"``, ``"nucleotide"``, or ``"carbohydrate"``.
362
+ Abbreviations are supported: ``"p"``, ``"pep"``, ``"n"``, etc.
363
+
364
+ Returns
365
+ -------
366
+ filter : ndarray, dtype=bool
367
+ This array is `True` for all indices in `array`, where atoms belong to
368
+ consecutive polymer entity having at least `min_size` monomers.
369
+ """
370
+ # Import `check_res_id_continuity` here to avoid circular imports
371
+ from biotite.structure.integrity import check_res_id_continuity
372
+
373
+ split_idx = check_res_id_continuity(array)
374
+
375
+ check_pol = partial(_is_polymer, min_size=min_size, pol_type=pol_type)
376
+ bool_idx = map(
377
+ lambda a: np.full(len(a), check_pol(atom_array(a)), dtype=bool),
378
+ np.split(array, split_idx),
379
+ )
380
+ return np.concatenate(list(bool_idx))
381
+
382
+
383
+ def filter_intersection(array, intersect):
384
+ """
385
+ Filter all atoms of one array that exist also in another array.
386
+
387
+ An atom is defined as existent in the second array, if there is an
388
+ atom in the second array that has the same annotation values in all
389
+ categories that exists in both arrays.
390
+
391
+ Parameters
392
+ ----------
393
+ array : AtomArray or AtomArrayStack
394
+ The array to be filtered.
395
+ intersect : AtomArray
396
+ Atoms in `array` that also exists in `intersect` are filtered.
397
+
398
+ Returns
399
+ -------
400
+ filter : ndarray, dtype=bool
401
+ This array is `True` for all indices in `array`, where the atom
402
+ exists also in `intersect`.
403
+
404
+ Examples
405
+ --------
406
+
407
+ Creating an atom array from atoms:
408
+
409
+ >>> array1 = AtomArray(length=5)
410
+ >>> array1.chain_id = np.array(["A","B","C","D","E"])
411
+ >>> array2 = AtomArray(length=3)
412
+ >>> array2.chain_id = np.array(["D","B","C"])
413
+ >>> array1 = array1[filter_intersection(array1, array2)]
414
+ >>> print(array1.chain_id)
415
+ ['B' 'C' 'D']
416
+ """
417
+ filter = np.full(array.array_length(), True, dtype=bool)
418
+ intersect_categories = intersect.get_annotation_categories()
419
+ # Check atom equality only for categories,
420
+ # which exist in both arrays
421
+ categories = [
422
+ category
423
+ for category in array.get_annotation_categories()
424
+ if category in intersect_categories
425
+ ]
426
+ for i in range(array.array_length()):
427
+ subfilter = np.full(intersect.array_length(), True, dtype=bool)
428
+ for category in categories:
429
+ subfilter &= (
430
+ intersect.get_annotation(category) == array.get_annotation(category)[i]
431
+ )
432
+ filter[i] = subfilter.any()
433
+ return filter
434
+
435
+
436
+ def filter_first_altloc(atoms, altloc_ids):
437
+ """
438
+ Filter all atoms, that have the first *altloc* ID appearing in a
439
+ residue.
440
+
441
+ Structure files (PDB, PDBx) allow for duplicate atom records,
442
+ in case a residue is found in multiple alternate locations
443
+ (*altloc*).
444
+ This function is used to remove such duplicate atoms by choosing a
445
+ single *altloc ID* for an atom with other *altlocs* being removed.
446
+
447
+ Parameters
448
+ ----------
449
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
450
+ The unfiltered structure to be filtered.
451
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
452
+ An array containing the alternate location IDs for each
453
+ atom in `atoms`.
454
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
455
+ position.
456
+
457
+ Returns
458
+ -------
459
+ filter : ndarray, dtype=bool
460
+ For each residue, this array is True in the following cases:
461
+
462
+ - The atom has no altloc ID (`'.'`, `'?'`, `' '`, `''`).
463
+ - The atom has the same altloc ID (e.g. `'A'`, `'B'`, etc.)
464
+ as the first atom in the residue that has an altloc ID.
465
+
466
+ Notes
467
+ -----
468
+ The function will be rarely used by the end user, since this kind
469
+ of filtering is usually automatically performed, when the structure
470
+ is loaded from a file.
471
+ The exception are structures that were read with `altloc` set to
472
+ `True`.
473
+
474
+ Examples
475
+ --------
476
+
477
+ >>> atoms = array([
478
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
479
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
480
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
481
+ ... ])
482
+ >>> altloc_ids = np.array([".", "A", "B"])
483
+ >>> filtered = atoms[filter_first_altloc(atoms, altloc_ids)]
484
+ >>> print(filtered)
485
+ 1 CA 1.000 2.000 3.000
486
+ 1 CB 4.000 5.000 6.000
487
+ """
488
+ # Filter all atoms without altloc code
489
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
490
+
491
+ # And filter all atoms for each residue with the first altloc ID
492
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
493
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
494
+ letter_altloc_ids = [loc for loc in altloc_ids[start:stop] if loc.isalpha()]
495
+ if len(letter_altloc_ids) > 0:
496
+ first_id = letter_altloc_ids[0]
497
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == first_id
498
+ else:
499
+ # No altloc ID in this residue -> Nothing to do
500
+ pass
501
+
502
+ return altloc_filter
503
+
504
+
505
+ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
506
+ """
507
+ For each residue, filter all atoms, that have the *altloc* ID
508
+ with the highest occupancy for this residue.
509
+
510
+ Structure files (PDB, PDBx) allow for duplicate atom records,
511
+ in case a residue is found in multiple alternate locations
512
+ (*altloc*).
513
+ This function is used to remove such duplicate atoms by choosing a
514
+ single *altloc ID* for an atom with other *altlocs* being removed.
515
+
516
+ Parameters
517
+ ----------
518
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
519
+ The unfiltered structure to be filtered.
520
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
521
+ An array containing the alternate location IDs for each
522
+ atom in `atoms`.
523
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
524
+ position.
525
+ occupancies : ndarray, shape=(n,), dtype=float
526
+ An array containing the occupancy values for each atom in
527
+ `atoms`.
528
+
529
+ Returns
530
+ -------
531
+ filter : ndarray, dtype=bool
532
+ For each residue, this array is True in the following cases:
533
+
534
+ - The atom has no altloc ID
535
+ (``'.'``, ``'?'``, ``' '``, ``''``).
536
+ - The atom has the altloc ID (e.g. ``'A'``, ``'B'``, etc.),
537
+ of which the corresponding occupancy values are highest
538
+ for the **entire** residue.
539
+
540
+ Notes
541
+ -----
542
+ The function will be rarely used by the end user, since this kind
543
+ of filtering is usually automatically performed, when the structure
544
+ is loaded from a file.
545
+ The exception are structures that were read with ``altloc`` set to
546
+ ``True``.
547
+
548
+ Examples
549
+ --------
550
+
551
+ >>> atoms = array([
552
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
553
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
554
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
555
+ ... ])
556
+ >>> altloc_ids = np.array([".", "A", "B"])
557
+ >>> occupancies = np.array([1.0, 0.1, 0.9])
558
+ >>> filtered = atoms[filter_highest_occupancy_altloc(
559
+ ... atoms, altloc_ids, occupancies
560
+ ... )]
561
+ >>> print(filtered)
562
+ 1 CA 1.000 2.000 3.000
563
+ 1 CB 6.000 5.000 4.000
564
+ """
565
+ # Filter all atoms without altloc code
566
+ altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""])
567
+
568
+ # And filter all atoms for each residue with the highest sum of
569
+ # occupancies
570
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
571
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
572
+ occupancies_in_res = occupancies[start:stop]
573
+ altloc_ids_in_res = altloc_ids[start:stop]
574
+
575
+ letter_altloc_ids = [loc for loc in altloc_ids_in_res if loc.isalpha()]
576
+
577
+ if len(letter_altloc_ids) > 0:
578
+ highest = -1.0
579
+ highest_id = None
580
+ for id in sorted(set(letter_altloc_ids)):
581
+ occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
582
+ if occupancy_sum > highest:
583
+ highest = occupancy_sum
584
+ highest_id = id
585
+ altloc_filter[start:stop] |= altloc_ids[start:stop] == highest_id
586
+ else:
587
+ # No altloc ID in this residue -> Nothing to do
588
+ pass
589
+
590
+ return altloc_filter