biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,585 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides utility functions for creating filters on atom
7
+ arrays and atom array stacks.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann, Tom David Müller"
12
+ __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
13
+ "filter_canonical_nucleotides", "filter_amino_acids",
14
+ "filter_canonical_amino_acids", "filter_carbohydrates",
15
+ "filter_backbone", "filter_intersection", "filter_first_altloc",
16
+ "filter_highest_occupancy_altloc", "filter_peptide_backbone",
17
+ "filter_phosphate_backbone", "filter_linear_bond_continuity",
18
+ "filter_polymer"]
19
+
20
+ import warnings
21
+
22
+ import numpy as np
23
+ from functools import partial
24
+ from .atoms import array as atom_array
25
+ from .residues import get_residue_starts, get_residue_count
26
+ from .info.groups import amino_acid_names, carbohydrate_names, nucleotide_names
27
+
28
+
29
+ _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
30
+ "ILE","LEU","LYS","MET","PHE","PRO","PYL","SER","THR",
31
+ "TRP","TYR","VAL", "SEC"]
32
+ _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
33
+
34
+ _solvent_list = ["HOH","SOL"]
35
+
36
+ _peptide_backbone_atoms = ['N', 'CA', 'C']
37
+ _phosphate_backbone_atoms = ['P', 'O5\'', 'C5\'', 'C4\'', 'C3\'', 'O3\'']
38
+
39
+
40
+ def filter_monoatomic_ions(array):
41
+ """
42
+ Filter all atoms of an atom array, that are monoatomic ions
43
+ (e.g. sodium or chloride ions).
44
+
45
+ Parameters
46
+ ----------
47
+ array : AtomArray or AtomArrayStack
48
+ The array to be filtered.
49
+
50
+ Returns
51
+ -------
52
+ filter : ndarray, dtype=bool
53
+ This array is `True` for all indices in `array`, where the atom
54
+ is a monoatomic ion.
55
+ """
56
+ # Exclusively in monoatomic ions,
57
+ # the element name is equal to the residue name
58
+ return (array.res_name == array.element)
59
+
60
+
61
+ def filter_solvent(array):
62
+ """
63
+ Filter all atoms of one array that are part of the solvent.
64
+
65
+ Parameters
66
+ ----------
67
+ array : AtomArray or AtomArrayStack
68
+ The array to be filtered.
69
+
70
+ Returns
71
+ -------
72
+ filter : ndarray, dtype=bool
73
+ This array is `True` for all indices in `array`, where the atom
74
+ belongs to the solvent.
75
+ """
76
+ return np.isin(array.res_name, _solvent_list)
77
+
78
+
79
+ def filter_canonical_nucleotides(array):
80
+ """
81
+ Filter all atoms of one array that belong to canonical nucleotides.
82
+
83
+ Parameters
84
+ ----------
85
+ array : AtomArray or AtomArrayStack
86
+ The array to be filtered.
87
+
88
+ Returns
89
+ -------
90
+ filter : ndarray, dtype=bool
91
+ This array is `True` for all indices in `array`, where the atom
92
+ belongs to a canonical nucleotide.
93
+ """
94
+ return np.isin(array.res_name, _canonical_nucleotide_list)
95
+
96
+
97
+ def filter_nucleotides(array):
98
+ """
99
+ Filter all atoms of one array that belong to nucleotides.
100
+
101
+ Parameters
102
+ ----------
103
+ array : AtomArray or AtomArrayStack
104
+ The array to be filtered.
105
+
106
+ Returns
107
+ -------
108
+ filter : ndarray, dtype=bool
109
+ This array is `True` for all indices in `array`, where the atom
110
+ belongs to a nucleotide.
111
+
112
+ Notes
113
+ -----
114
+ Nucleotides are identified according to the PDB chemical component
115
+ dictionary. A residue is considered a nucleotide if it its
116
+ ``_chem_comp.type`` property has one of the following values (case
117
+ insensitive):
118
+
119
+ ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
120
+ ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
121
+ ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
122
+ ``RNA OH 5 PRIME TERMINUS``
123
+ """
124
+ return np.isin(array.res_name, nucleotide_names())
125
+
126
+
127
+ def filter_canonical_amino_acids(array):
128
+ """
129
+ Filter all atoms of one array that belong to canonical amino acid
130
+ residues.
131
+
132
+ Parameters
133
+ ----------
134
+ array : AtomArray or AtomArrayStack
135
+ The array to be filtered.
136
+
137
+ Returns
138
+ -------
139
+ filter : ndarray, dtype=bool
140
+ This array is `True` for all indices in `array`, where the atom
141
+ belongs to a canonical amino acid residue.
142
+ """
143
+ return np.isin(array.res_name, _canonical_aa_list)
144
+
145
+
146
+ def filter_amino_acids(array):
147
+ """
148
+ Filter all atoms of one array that belong to amino acid residues.
149
+
150
+ Parameters
151
+ ----------
152
+ array : AtomArray or AtomArrayStack
153
+ The array to be filtered.
154
+
155
+ Returns
156
+ -------
157
+ filter : ndarray, dtype=bool
158
+ This array is `True` for all indices in `array`, where the atom
159
+ belongs to an amino acid residue.
160
+
161
+ Notes
162
+ -----
163
+ Amino acids are identified according to the PDB chemical component
164
+ dictionary. A residue is considered an amino acid if it its
165
+ ``_chem_comp.type`` property has one of the following values (case
166
+ insensitive):
167
+
168
+ ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
169
+ ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
170
+ ``D-PEPTIDE NH3 AMINO TERMINUS``,
171
+ ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
172
+ ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
173
+ ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
174
+ ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
175
+ """
176
+ return np.isin(array.res_name, amino_acid_names())
177
+
178
+
179
+ def filter_carbohydrates(array):
180
+ """
181
+ Filter all atoms of one array that belong to carbohydrates.
182
+
183
+ Parameters
184
+ ----------
185
+ array : AtomArray or AtomArrayStack
186
+ The array to be filtered.
187
+
188
+ Returns
189
+ -------
190
+ filter : ndarray, dtype=bool
191
+ This array is `True` for all indices in `array`, where the atom
192
+ belongs to a carbohydrate.
193
+
194
+ Notes
195
+ -----
196
+ Carbohydrates are identified according to the PDB chemical component
197
+ dictionary. A residue is considered a carbohydrate if it its
198
+ ``_chem_comp.type`` property has one of the following values (case
199
+ insensitive):
200
+
201
+ ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
202
+ ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
203
+ ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
204
+ ``SACCHARIDE``
205
+ """
206
+ return np.isin(array.res_name, carbohydrate_names())
207
+
208
+
209
+ def filter_backbone(array):
210
+ """
211
+ Filter all peptide backbone atoms of one array.
212
+
213
+ This includes the "N", "CA" and "C" atoms of amino acids.
214
+
215
+ DEPRECATED: Please use :func:`filter_peptide_backbone` to filter
216
+ for protein backbone atoms.
217
+
218
+ Parameters
219
+ ----------
220
+ array : AtomArray or AtomArrayStack
221
+ The array to be filtered.
222
+
223
+ Returns
224
+ -------
225
+ filter : ndarray, dtype=bool
226
+ This array is `True` for all indices in `array`, where the atom
227
+ as an backbone atom.
228
+ """
229
+ warnings.warn(
230
+ "Please use `filter_peptide_backbone()` to filter "
231
+ "for protein backbone atoms.",
232
+ DeprecationWarning
233
+ )
234
+ return ( ((array.atom_name == "N") |
235
+ (array.atom_name == "CA") |
236
+ (array.atom_name == "C")) &
237
+ filter_amino_acids(array) )
238
+
239
+
240
+ def _filter_atom_names(array, atom_names):
241
+ return np.isin(array.atom_name, atom_names)
242
+
243
+
244
+ def filter_peptide_backbone(array):
245
+ """
246
+ Filter all peptide backbone atoms of one array.
247
+
248
+ This includes the "N", "CA" and "C" atoms of amino acids.
249
+
250
+ Parameters
251
+ ----------
252
+ array : AtomArray or AtomArrayStack
253
+ The array to be filtered.
254
+
255
+ Returns
256
+ -------
257
+ filter : ndarray, dtype=bool
258
+ This array is `True` for all indices in `array`, where an atom
259
+ is a part of the peptide backbone.
260
+ """
261
+
262
+ return (_filter_atom_names(array, _peptide_backbone_atoms) &
263
+ filter_amino_acids(array))
264
+
265
+
266
+ def filter_phosphate_backbone(array):
267
+ """
268
+ Filter all phosphate backbone atoms of one array.
269
+
270
+ This includes the P, O5', C5', C4', C3', and O3' atoms.
271
+
272
+ Parameters
273
+ ----------
274
+ array : AtomArray or AtomArrayStack
275
+ The array to be filtered.
276
+
277
+ Returns
278
+ -------
279
+ filter : ndarray, dtype=bool
280
+ This array is ``True`` for all indices in `array`, where an atom
281
+ is a part of the phosphate backbone.
282
+ """
283
+
284
+ return (_filter_atom_names(array, _phosphate_backbone_atoms) &
285
+ filter_nucleotides(array))
286
+
287
+
288
+ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
289
+ """
290
+ Filter for atoms such that their bond length with the next atom
291
+ lies within the provided boundaries.
292
+
293
+ The result will depend on the atoms' order.
294
+ For instance, consider a molecule::
295
+
296
+ C3
297
+ |
298
+ C1-C2-C4
299
+
300
+ If the order corresponds to ``[C1, C2, C4, C3]``, the output will be
301
+ ``[True, True, False, True]``.
302
+ Note that the trailing atom will always evaluate to ``True``.
303
+
304
+ Parameters
305
+ ----------
306
+ array: AtomArray
307
+ The array to filter.
308
+ min_len: float
309
+ Minmum bond length
310
+ max_len: float
311
+ Maximum bond length
312
+
313
+ Returns
314
+ -------
315
+ filter : ndarray, dtype=bool
316
+ This array is `True` for all indices in `array`, where an atom
317
+ has a bond length with the next atom within [`min_len`, `max_len`]
318
+ boundaries.
319
+
320
+ Notes
321
+ -----
322
+ Note that this function purely uses distances between consecutive atoms.
323
+ A potential ``BondList`` is not considered here.
324
+ """
325
+ dist = np.linalg.norm(np.diff(array.coord, axis=0), axis=1)
326
+ mask = (dist >= min_len) & (dist <= max_len)
327
+ return np.append(mask, True)
328
+
329
+
330
+ def _is_polymer(array, min_size, pol_type):
331
+
332
+ if pol_type.startswith('p'):
333
+ filt_fn = filter_amino_acids
334
+ elif pol_type.startswith('n'):
335
+ filt_fn = filter_nucleotides
336
+ elif pol_type.startswith('c'):
337
+ filt_fn = filter_carbohydrates
338
+ else:
339
+ raise ValueError(f'Unsupported polymer type {pol_type}')
340
+
341
+ mask = filt_fn(array)
342
+ return get_residue_count(array[mask]) >= min_size
343
+
344
+
345
+ def filter_polymer(array, min_size=2, pol_type='peptide'):
346
+ """
347
+ Filter for atoms that are a part of a consecutive standard macromolecular
348
+ polymer entity.
349
+
350
+ Parameters
351
+ ----------
352
+ array : AtomArray or AtomArrayStack
353
+ The array to filter.
354
+ min_size : int
355
+ The minimum number of monomers.
356
+ pol_type : str
357
+ The polymer type, either ``"peptide"``, ``"nucleotide"``, or ``"carbohydrate"``.
358
+ Abbreviations are supported: ``"p"``, ``"pep"``, ``"n"``, etc.
359
+
360
+ Returns
361
+ -------
362
+ filter : ndarray, dtype=bool
363
+ This array is `True` for all indices in `array`, where atoms belong to
364
+ consecutive polymer entity having at least `min_size` monomers.
365
+
366
+ """
367
+ # Import `check_res_id_continuity` here to avoid circular imports
368
+ from .integrity import check_res_id_continuity
369
+ split_idx = check_res_id_continuity(array)
370
+
371
+ check_pol = partial(_is_polymer, min_size=min_size, pol_type=pol_type)
372
+ bool_idx = map(
373
+ lambda a: np.full(len(a), check_pol(atom_array(a)), dtype=bool),
374
+ np.split(array, split_idx)
375
+ )
376
+ return np.concatenate(list(bool_idx))
377
+
378
+
379
+ def filter_intersection(array, intersect):
380
+ """
381
+ Filter all atoms of one array that exist also in another array.
382
+
383
+ An atom is defined as existent in the second array, if there is an
384
+ atom in the second array that has the same annotation values in all
385
+ categories that exists in both arrays.
386
+
387
+ Parameters
388
+ ----------
389
+ array : AtomArray or AtomArrayStack
390
+ The array to be filtered.
391
+ intersect : AtomArray
392
+ Atoms in `array` that also exists in `intersect` are filtered.
393
+
394
+ Returns
395
+ -------
396
+ filter : ndarray, dtype=bool
397
+ This array is `True` for all indices in `array`, where the atom
398
+ exists also in `intersect`.
399
+
400
+ Examples
401
+ --------
402
+
403
+ Creating an atom array from atoms:
404
+
405
+ >>> array1 = AtomArray(length=5)
406
+ >>> array1.chain_id = np.array(["A","B","C","D","E"])
407
+ >>> array2 = AtomArray(length=3)
408
+ >>> array2.chain_id = np.array(["D","B","C"])
409
+ >>> array1 = array1[filter_intersection(array1, array2)]
410
+ >>> print(array1.chain_id)
411
+ ['B' 'C' 'D']
412
+
413
+ """
414
+ filter = np.full(array.array_length(), True, dtype=bool)
415
+ intersect_categories = intersect.get_annotation_categories()
416
+ # Check atom equality only for categories,
417
+ # which exist in both arrays
418
+ categories = [category for category in array.get_annotation_categories()
419
+ if category in intersect_categories]
420
+ for i in range(array.array_length()):
421
+ subfilter = np.full(intersect.array_length(), True, dtype=bool)
422
+ for category in categories:
423
+ subfilter &= (intersect.get_annotation(category)
424
+ == array.get_annotation(category)[i])
425
+ filter[i] = subfilter.any()
426
+ return filter
427
+
428
+
429
+ def filter_first_altloc(atoms, altloc_ids):
430
+ """
431
+ Filter all atoms, that have the first *altloc* ID appearing in a
432
+ residue.
433
+
434
+ Structure files (PDB, PDBx) allow for duplicate atom records,
435
+ in case a residue is found in multiple alternate locations
436
+ (*altloc*).
437
+ This function is used to remove such duplicate atoms by choosing a
438
+ single *altloc ID* for an atom with other *altlocs* being removed.
439
+
440
+ Parameters
441
+ ----------
442
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
443
+ The unfiltered structure to be filtered.
444
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
445
+ An array containing the alternate location IDs for each
446
+ atom in `atoms`.
447
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
448
+ position.
449
+
450
+ Returns
451
+ -------
452
+ filter : ndarray, dtype=bool
453
+ For each residue, this array is True in the following cases:
454
+
455
+ - The atom has no altloc ID (`'.'`, `'?'`, `' '`, `''`).
456
+ - The atom has the same altloc ID (e.g. `'A'`, `'B'`, etc.)
457
+ as the first atom in the residue that has an altloc ID.
458
+
459
+ Notes
460
+ -----
461
+ The function will be rarely used by the end user, since this kind
462
+ of filtering is usually automatically performed, when the structure
463
+ is loaded from a file.
464
+ The exception are structures that were read with `altloc` set to
465
+ `True`.
466
+
467
+ Examples
468
+ --------
469
+
470
+ >>> atoms = array([
471
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
472
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
473
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
474
+ ... ])
475
+ >>> altloc_ids = np.array([".", "A", "B"])
476
+ >>> filtered = atoms[filter_first_altloc(atoms, altloc_ids)]
477
+ >>> print(filtered)
478
+ 1 CA 1.000 2.000 3.000
479
+ 1 CB 4.000 5.000 6.000
480
+ """
481
+ # Filter all atoms without altloc code
482
+ altloc_filter = np.in1d(altloc_ids, [".", "?", " ", ""])
483
+
484
+ # And filter all atoms for each residue with the first altloc ID
485
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
486
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
487
+ letter_altloc_ids = [l for l in altloc_ids[start:stop] if l.isalpha()]
488
+ if len(letter_altloc_ids) > 0:
489
+ first_id = letter_altloc_ids[0]
490
+ altloc_filter[start:stop] |= (altloc_ids[start:stop] == first_id)
491
+ else:
492
+ # No altloc ID in this residue -> Nothing to do
493
+ pass
494
+
495
+ return altloc_filter
496
+
497
+
498
+ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
499
+ """
500
+ For each residue, filter all atoms, that have the *altloc* ID
501
+ with the highest occupancy for this residue.
502
+
503
+ Structure files (PDB, PDBx) allow for duplicate atom records,
504
+ in case a residue is found in multiple alternate locations
505
+ (*altloc*).
506
+ This function is used to remove such duplicate atoms by choosing a
507
+ single *altloc ID* for an atom with other *altlocs* being removed.
508
+
509
+ Parameters
510
+ ----------
511
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
512
+ The unfiltered structure to be filtered.
513
+ altloc_ids : ndarray, shape=(n,), dtype='U1'
514
+ An array containing the alternate location IDs for each
515
+ atom in `atoms`.
516
+ Can contain `'.'`, `'?'`, `' '`, `''` or a letter at each
517
+ position.
518
+ occupancies : ndarray, shape=(n,), dtype=float
519
+ An array containing the occupancy values for each atom in
520
+ `atoms`.
521
+
522
+ Returns
523
+ -------
524
+ filter : ndarray, dtype=bool
525
+ For each residue, this array is True in the following cases:
526
+
527
+ - The atom has no altloc ID
528
+ (``'.'``, ``'?'``, ``' '``, ``''``).
529
+ - The atom has the altloc ID (e.g. ``'A'``, ``'B'``, etc.),
530
+ of which the corresponding occupancy values are highest
531
+ for the **entire** residue.
532
+
533
+ Notes
534
+ -----
535
+ The function will be rarely used by the end user, since this kind
536
+ of filtering is usually automatically performed, when the structure
537
+ is loaded from a file.
538
+ The exception are structures that were read with ``altloc`` set to
539
+ ``True``.
540
+
541
+ Examples
542
+ --------
543
+
544
+ >>> atoms = array([
545
+ ... Atom(coord=[1, 2, 3], res_id=1, atom_name="CA"),
546
+ ... Atom(coord=[4, 5, 6], res_id=1, atom_name="CB"),
547
+ ... Atom(coord=[6, 5, 4], res_id=1, atom_name="CB")
548
+ ... ])
549
+ >>> altloc_ids = np.array([".", "A", "B"])
550
+ >>> occupancies = np.array([1.0, 0.1, 0.9])
551
+ >>> filtered = atoms[filter_highest_occupancy_altloc(
552
+ ... atoms, altloc_ids, occupancies
553
+ ... )]
554
+ >>> print(filtered)
555
+ 1 CA 1.000 2.000 3.000
556
+ 1 CB 6.000 5.000 4.000
557
+ """
558
+ # Filter all atoms without altloc code
559
+ altloc_filter = np.in1d(altloc_ids, [".", "?", " ", ""])
560
+
561
+ # And filter all atoms for each residue with the highest sum of
562
+ # occupancies
563
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
564
+ for start, stop in zip(residue_starts[:-1], residue_starts[1:]):
565
+ occupancies_in_res = occupancies[start:stop]
566
+ altloc_ids_in_res = altloc_ids[start:stop]
567
+
568
+ letter_altloc_ids = [l for l in altloc_ids_in_res if l.isalpha()]
569
+
570
+ if len(letter_altloc_ids) > 0:
571
+ highest = -1.0
572
+ highest_id = None
573
+ for id in set(letter_altloc_ids):
574
+ occupancy_sum = np.sum(
575
+ occupancies_in_res[altloc_ids_in_res == id]
576
+ )
577
+ if occupancy_sum > highest:
578
+ highest = occupancy_sum
579
+ highest_id = id
580
+ altloc_filter[start:stop] |= (altloc_ids[start:stop] == highest_id)
581
+ else:
582
+ # No altloc ID in this residue -> Nothing to do
583
+ pass
584
+
585
+ return altloc_filter