biotite 0.41.1__cp311-cp311-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-311-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1415 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides functions for base pair identification.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Tom David Müller"
11
+ __all__ = ["base_pairs", "map_nucleotide", "base_stacking", "base_pairs_edge",
12
+ "Edge", "base_pairs_glycosidic_bond", "GlycosidicBond"]
13
+
14
+ import numpy as np
15
+ import warnings
16
+ from enum import IntEnum
17
+ from .atoms import Atom, array
18
+ from .superimpose import superimpose
19
+ from .filter import filter_nucleotides
20
+ from .celllist import CellList
21
+ from .hbond import hbond
22
+ from .error import IncompleteStructureWarning, UnexpectedStructureWarning, \
23
+ BadStructureError
24
+ from .util import distance, norm_vector
25
+ from .residues import get_residue_starts_for, get_residue_masks
26
+ from .info.standardize import standardize_order
27
+ from .compare import rmsd
28
+
29
+
30
+ def _get_std_adenine():
31
+ """
32
+ Get standard base variables for adenine.
33
+
34
+ Returns
35
+ -------
36
+ standard_base : AtomArray
37
+ Standard coordinates nomenclature of the adenine base as
38
+ :class:`AtomArray` with nomenclature of PDB File Format V3
39
+ coordinates : tuple (ndarray, ndarray, ndarray, dtype=float)
40
+ :class:`ndarray` containing the center according to the SCHNaP-
41
+ paper referenced in the function ``base_pairs``,
42
+ :class:`ndarray` containing the coordinates of the pyrimidine
43
+ ring center, :class:`ndarray` containing the coordinates of the
44
+ imidazole ring center
45
+ """
46
+ atom1 = Atom([-1.291, 4.498, 0.000], atom_name="N9", res_name="A")
47
+ atom2 = Atom([0.024, 4.897, 0.000], atom_name="C8", res_name="A")
48
+ atom3 = Atom([0.877, 3.902, 0.000], atom_name="N7", res_name="A")
49
+ atom4 = Atom([0.071, 2.771, 0.000], atom_name="C5", res_name="A")
50
+ atom5 = Atom([0.369, 1.398, 0.000], atom_name="C6", res_name="A")
51
+ atom6 = Atom([1.611, 0.909, 0.000], atom_name="N6", res_name="A")
52
+ atom7 = Atom([-0.668, 0.532, 0.000], atom_name="N1", res_name="A")
53
+ atom8 = Atom([-1.912, 1.023, 0.000], atom_name="C2", res_name="A")
54
+ atom9 = Atom([-2.320, 2.290, 0.000], atom_name="N3", res_name="A")
55
+ atom10 = Atom([-1.267, 3.124, 0.000], atom_name="C4", res_name="A")
56
+ adenine = array(
57
+ [atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8,
58
+ atom9, atom10]
59
+ )
60
+
61
+ # Get the midpoint between the N1 and C4 atoms
62
+ midpoint = np.mean([atom7.coord, atom10.coord], axis=-2)
63
+ # Calculate the coordinates of the aromatic ring centers
64
+ pyrimidine_center = np.mean(
65
+ [atom4.coord, atom5.coord, atom7.coord,
66
+ atom8.coord, atom9.coord, atom10.coord], axis=-2
67
+ )
68
+ imidazole_center = np.mean(
69
+ [atom1.coord, atom2.coord, atom3.coord,
70
+ atom4.coord, atom10.coord], axis=-2
71
+ )
72
+
73
+ return adenine, (midpoint, pyrimidine_center, imidazole_center)
74
+
75
+
76
+ def _get_std_cytosine():
77
+ """
78
+ Get standard base variables for cytosine.
79
+
80
+ Returns
81
+ -------
82
+ standard_base : AtomArray
83
+ Standard coordinates nomenclature of the cytosine base as
84
+ :class:`AtomArray` with nomenclature of PDB File Format V3
85
+ coordinates : tuple (ndarray, ndarray, dtype=float)
86
+ :class:`ndarray` containing the center according to the SCHNaP-
87
+ paper referenced in the function ``base_pairs``,
88
+ :class:`ndarray` containing the coordinates of the pyrimidine
89
+ ring center
90
+ """
91
+ atom1 = Atom([-1.285, 4.542, 0.000], atom_name="N1", res_name="C")
92
+ atom2 = Atom([-1.472, 3.158, 0.000], atom_name="C2", res_name="C")
93
+ atom3 = Atom([-2.628, 2.709, 0.000], atom_name="O2", res_name="C")
94
+ atom4 = Atom([-0.391, 2.344, 0.000], atom_name="N3", res_name="C")
95
+ atom5 = Atom([0.837, 2.868, 0.000], atom_name="C4", res_name="C")
96
+ atom6 = Atom([1.875, 2.027, 0.000], atom_name="N4", res_name="C")
97
+ atom7 = Atom([1.056, 4.275, 0.000], atom_name="C5", res_name="C")
98
+ atom8 = Atom([-0.023, 5.068, 0.000], atom_name="C6", res_name="C")
99
+ cytosine = array(
100
+ [atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8]
101
+ )
102
+
103
+ # Get the midpoint between the N3 and C6 atoms
104
+ midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
105
+ # Calculate the coordinates of the aromatic ring center
106
+ pyrimidine_center = np.mean(
107
+ [atom1.coord, atom2.coord, atom4.coord,
108
+ atom5.coord, atom7.coord, atom8.coord], axis=-2
109
+ )
110
+
111
+ return cytosine, (midpoint, pyrimidine_center)
112
+
113
+
114
+ def _get_std_guanine():
115
+ """
116
+ Get standard base variables for guanine.
117
+
118
+ Returns
119
+ -------
120
+ standard_base : AtomArray
121
+ Standard coordinates nomenclature of the guanine base as
122
+ :class:`AtomArray` with nomenclature of PDB File Format V3
123
+ coordinates : tuple (ndarray, ndarray, ndarray, dtype=float)
124
+ :class:`ndarray` containing the center according to the SCHNaP-
125
+ paper referenced in the function ''base_pairs'',
126
+ :class:`ndarray` containing the coordinates of the pyrimidine
127
+ ring center, :class:`ndarray` containing the coordinates of the
128
+ imidazole ring center
129
+ """
130
+ atom1 = Atom([-1.289, 4.551, 0.000], atom_name="N9", res_name="G")
131
+ atom2 = Atom([0.023, 4.962, 0.000], atom_name="C8", res_name="G")
132
+ atom3 = Atom([0.870, 3.969, 0.000], atom_name="N7", res_name="G")
133
+ atom4 = Atom([0.071, 2.833, 0.000], atom_name="C5", res_name="G")
134
+ atom5 = Atom([0.424, 1.460, 0.000], atom_name="C6", res_name="G")
135
+ atom6 = Atom([1.554, 0.955, 0.000], atom_name="O6", res_name="G")
136
+ atom7 = Atom([-0.700, 0.641, 0.000], atom_name="N1", res_name="G")
137
+ atom8 = Atom([-1.999, 1.087, 0.000], atom_name="C2", res_name="G")
138
+ atom9 = Atom([-2.949, 0.139, -0.001], atom_name="N2", res_name="G")
139
+ atom10 = Atom([-2.342, 2.364, 0.001], atom_name="N3", res_name="G")
140
+ atom11 = Atom([-1.265, 3.177, 0.000], atom_name="C4", res_name="G")
141
+ guanine = array(
142
+ [atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8,
143
+ atom9, atom10, atom11]
144
+ )
145
+
146
+ # Get the midpoint between the N1 and C4 atoms
147
+ midpoint = np.mean([atom7.coord, atom11.coord], axis=-2)
148
+ # Calculate the coordinates of the aromatic ring centers
149
+ pyrimidine_center = np.mean(
150
+ [atom4.coord, atom5.coord, atom7.coord,
151
+ atom8.coord, atom10.coord, atom11.coord], axis=-2
152
+ )
153
+ imidazole_center = np.mean(
154
+ [atom1.coord, atom2.coord, atom3.coord,
155
+ atom4.coord, atom11.coord], axis=-2
156
+ )
157
+
158
+ return guanine, (midpoint, pyrimidine_center, imidazole_center)
159
+
160
+
161
+ def _get_std_thymine():
162
+ """
163
+ Get standard base variables for thymine.
164
+
165
+ Returns
166
+ -------
167
+ standard_base : AtomArray
168
+ Standard coordinates nomenclature of the thymine base as
169
+ :class:`AtomArray` with nomenclature of PDB File Format V3
170
+ coordinates : tuple (ndarray, ndarray, dtype=float)
171
+ :class:`ndarray` containing the center according to the SCHNaP-
172
+ paper referenced in the function ``base_pairs``,
173
+ :class:`ndarray` containing the coordinates of the pyrimidine
174
+ ring center
175
+ """
176
+ atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="T")
177
+ atom2 = Atom([-1.462, 3.135, 0.000], atom_name="C2", res_name="T")
178
+ atom3 = Atom([-2.562, 2.608, 0.000], atom_name="O2", res_name="T")
179
+ atom4 = Atom([-0.298, 2.407, 0.000], atom_name="N3", res_name="T")
180
+ atom5 = Atom([0.994, 2.897, 0.000], atom_name="C4", res_name="T")
181
+ atom6 = Atom([1.944, 2.119, 0.000], atom_name="O4", res_name="T")
182
+ atom7 = Atom([1.106, 4.338, 0.000], atom_name="C5", res_name="T")
183
+ atom8 = Atom([2.466, 4.961, 0.001], atom_name="C7", res_name="T")
184
+ atom9 = Atom([-0.024, 5.057, 0.000], atom_name="C6", res_name="T")
185
+ thymine = array(
186
+ [atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9]
187
+ )
188
+
189
+ # Get the midpoint between the N3 and C6 atoms
190
+ midpoint = np.mean([atom4.coord, atom9.coord], axis=-2)
191
+ # Calculate the coordinates of the aromatic ring center
192
+ pyrimidine_center = np.mean(
193
+ [atom1.coord, atom2.coord, atom4.coord,
194
+ atom5.coord, atom7.coord, atom9.coord], axis=-2
195
+ )
196
+
197
+ return thymine, (midpoint, pyrimidine_center)
198
+
199
+
200
+ def _get_std_uracil():
201
+ """
202
+ Get standard base variables for uracil.
203
+
204
+ Returns
205
+ -------
206
+ standard_base : AtomArray
207
+ Standard coordinates nomenclature of the uracil base as
208
+ :class:`AtomArray` with nomenclature of PDB File Format V3
209
+ coordinates : tuple (ndarray, ndarray, dtype=float)
210
+ :class:`ndarray` containing the center according to the SCHNaP-
211
+ paper referenced in the function ``base_pairs``,
212
+ :class:`ndarray` containing the coordinates of the pyrimidine
213
+ ring center
214
+ """
215
+ atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="U")
216
+ atom2 = Atom([-1.462, 3.131, 0.000], atom_name="C2", res_name="U")
217
+ atom3 = Atom([-2.563, 2.608, 0.000], atom_name="O2", res_name="U")
218
+ atom4 = Atom([-0.302, 2.397, 0.000], atom_name="N3", res_name="U")
219
+ atom5 = Atom([0.989, 2.884, 0.000], atom_name="C4", res_name="U")
220
+ atom6 = Atom([1.935, 2.094, -0.001], atom_name="O4", res_name="U")
221
+ atom7 = Atom([1.089, 4.311, 0.000], atom_name="C5", res_name="U")
222
+ atom8 = Atom([-0.024, 5.053, 0.000], atom_name="C6", res_name="U")
223
+ uracil = array(
224
+ [atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8]
225
+ )
226
+
227
+ # Get the midpoint between the N3 and C6 atoms
228
+ midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
229
+ # Calculate the coordinates of the aromatic ring center
230
+ pyrimidine_center = np.mean(
231
+ [atom1.coord, atom2.coord, atom4.coord,
232
+ atom5.coord, atom7.coord, atom8.coord], axis=-2
233
+ )
234
+
235
+ return uracil, (midpoint, pyrimidine_center)
236
+
237
+
238
+ _STD_ADENINE, _STD_ADENINE_RING_CENTERS = _get_std_adenine()
239
+ _STD_CYTOSINE, _STD_CYTOSINE_RING_CENTERS = _get_std_cytosine()
240
+ _STD_GUANINE, _STD_GUANINE_RING_CENTERS = _get_std_guanine()
241
+ _STD_THYMINE, _STD_THYMINE_RING_CENTERS = _get_std_thymine()
242
+ _STD_URACIL, _STD_URACIL_RING_CENTERS = _get_std_uracil()
243
+
244
+ _ADENINE_CONTAINING_NUCLEOTIDES = ["A", "DA"]
245
+ _THYMINE_CONTAINING_NUCLEOTIDES = ["T", "DT"]
246
+ _CYTOSINE_CONTAINING_NUCLEOTIDES = ["C", "DC"]
247
+ _GUANINE_CONTAINING_NUCLEOTIDES = ["G", "DG"]
248
+ _URACIL_CONTAINING_NUCLEOTIDES = ["U", "DU"]
249
+ _REFERENCE_NUCLEOTIDE_NAMES = (
250
+ _ADENINE_CONTAINING_NUCLEOTIDES +
251
+ _THYMINE_CONTAINING_NUCLEOTIDES +
252
+ _CYTOSINE_CONTAINING_NUCLEOTIDES +
253
+ _GUANINE_CONTAINING_NUCLEOTIDES +
254
+ _URACIL_CONTAINING_NUCLEOTIDES
255
+ )
256
+
257
+ # Atoms that are part of respective base edges according to the
258
+ # Leontis-Westhof nomenclature
259
+ _WATSON_CRICK_EDGE = {
260
+ "A" : ["N6", "N1"],
261
+ "G" : ["O6", "N1", "N2"],
262
+ "U" : ["O4", "N3", "O2"],
263
+ "T" : ["O4", "N3", "O2"],
264
+ "C" : ["N4", "N3", "O2"]
265
+ }
266
+ _HOOGSTEEN_EDGE = {
267
+ "A" : ["N6", "N7"],
268
+ "G" : ["O6", "N7"],
269
+ "U" : ["O4"],
270
+ "T" : ["O4"],
271
+ "C" : ["N4"]
272
+ }
273
+ _SUGAR_EDGE = {
274
+ "A" : ["N3", "O2'"],
275
+ "G" : ["N2", "N3", "O2'"],
276
+ "U" : ["O2", "O2'"],
277
+ "T" : ["O2", "O2'"],
278
+ "C" : ["O2", "O2'"]
279
+ }
280
+ _EDGES = [_WATSON_CRICK_EDGE, _HOOGSTEEN_EDGE, _SUGAR_EDGE]
281
+
282
+
283
+ class Edge(IntEnum):
284
+ """
285
+ This enum type represents the interacting edge for a given base.
286
+ """
287
+ INVALID = 0,
288
+ WATSON_CRICK = 1,
289
+ HOOGSTEEN = 2,
290
+ SUGAR = 3
291
+
292
+
293
+ class GlycosidicBond(IntEnum):
294
+ """
295
+ This enum type represents the relative glycosidic bond orientation
296
+ for a given base pair.
297
+ """
298
+ INVALID = 0
299
+ CIS = 1,
300
+ TRANS = 2,
301
+
302
+
303
+ def base_pairs_edge(atom_array, base_pairs):
304
+ """
305
+ Get the interacting edges for given base pairs in an
306
+ :class:`AtomArray` according to the Leontis-Westhof nomenclature.
307
+ :footcite:`Leontis2001`
308
+
309
+ The :class:`AtomArray` must contain hydrogens as it relies on
310
+ :func:`hbond()`.
311
+
312
+ Parameters
313
+ ----------
314
+ atom_array : AtomArray
315
+ The :class:`AtomArray` containing the bases.
316
+ base_pairs : ndarray, dtype=int, shape=(n,2)
317
+ Each row is equivalent to one base pair and contains the first
318
+ indices of the residues corresponding to each base. The
319
+ structure of the ``ndarray`` is the same as the output of
320
+ :func:`base_pairs()`.
321
+
322
+ Returns
323
+ -------
324
+ results : ndarray, dtype=uint8, shape=(n,2)
325
+ The ``ndarray`` has the same dimensions as ``base_pairs``. Each
326
+ cell corresponds to the interacting edge of the referenced base
327
+ in ``base_pairs``. The edge type is stored as integer that is
328
+ interpreted as member of the the :class:`Edge` enum.
329
+
330
+ See Also
331
+ --------
332
+ base_pairs
333
+ base_pairs_glycosidic_bond
334
+
335
+ Notes
336
+ -----
337
+ If a base is not a canonical base (``A``, ``C``, ``G``, ``T``,
338
+ ``U``) or no hydrogen bonds are found between the bases that conform
339
+ to the interacting edges described by Leontis and Westhof, 0 is
340
+ returned (corresponding to ``Edge.INVALID``).
341
+
342
+ The edge returned always corresponds to the edge with the most
343
+ hydrogen bonding interactions.
344
+
345
+ Examples
346
+ --------
347
+ Compute the interacting base edges for the dna helix with the PDB
348
+ id 1QXB:
349
+
350
+ >>> from os.path import join
351
+ >>> dna_helix = load_structure(
352
+ ... join(path_to_structures, "base_pairs", "1qxb.cif")
353
+ ... )
354
+ >>> basepairs = base_pairs(dna_helix)
355
+ >>> interacting_edges = base_pairs_edge(dna_helix, basepairs)
356
+ >>> print(interacting_edges)
357
+ [[1 1]
358
+ [1 1]
359
+ [1 1]
360
+ [1 1]
361
+ [1 1]
362
+ [1 1]
363
+ [1 1]
364
+ [1 1]
365
+ [1 1]
366
+ [1 1]
367
+ [1 1]
368
+ [1 1]]
369
+
370
+ The resulting integers can be interpreted as :class:`Edge` ``Enum``:
371
+
372
+ >>> for interaction in interacting_edges:
373
+ ... print(Edge(interaction[0]), Edge(interaction[1]))
374
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
375
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
376
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
377
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
378
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
379
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
380
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
381
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
382
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
383
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
384
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
385
+ Edge.WATSON_CRICK Edge.WATSON_CRICK
386
+
387
+ References
388
+ ----------
389
+
390
+ .. footbibliography::
391
+ """
392
+ # Result-``ndarray`` matches the dimensions of the input array
393
+ results = np.zeros_like(base_pairs, dtype='uint8')
394
+
395
+ # Get the residue masks for each residue
396
+ base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
397
+
398
+ # Group every two masks together for easy iteration (each 'row' is
399
+ # respective to a row in ``base_pairs``)
400
+ base_pairs_masks = base_pairs_masks.reshape(
401
+ (base_pairs.shape[0], 2, atom_array.shape[0])
402
+ )
403
+
404
+ for i, base_masks in enumerate(base_pairs_masks):
405
+ # Get the absolute atom count for each edge
406
+ base_edges = _get_edge_matrix(atom_array, base_masks)
407
+
408
+ # Classify the base edges based on the highest number of
409
+ # matching hydrogen bonded atoms
410
+ for j, base in enumerate(base_edges):
411
+ if np.max(base) != 0:
412
+ results[i, j] = np.argmax(base) + 1
413
+ return results
414
+
415
+
416
+ def _get_edge_matrix(atom_array, base_masks):
417
+ """
418
+ Get the number of atoms interacting for each edge as a matrix, where
419
+ each row corresponds to a base and each column to the number of
420
+ Watson-Crick-, Hoogsteen- and Sugar-edge interactions respectively.
421
+
422
+ Parameters
423
+ ----------
424
+ atom_array : AtomArray
425
+ The :class:`AtomArray` containing the bases.
426
+ base_masks : ndarray, dtype=bool, shape=(2,n)
427
+ Boolean masks for the interacting bases
428
+
429
+ Returns
430
+ -------
431
+ matrix : ndarray, dtype=int, shape=(2,3)
432
+ The edge matrix.
433
+ """
434
+ # Get the hydrogen bonds between the residues
435
+ hbonds = hbond(atom_array, base_masks[0], base_masks[1])
436
+ if len(hbonds) == 0:
437
+ raise BadStructureError(
438
+ f"No hydrogen bonds between nucleotides with residue start "
439
+ f"indices {np.argmax(base_masks[0])} and "
440
+ f"{np.argmax(base_masks[1])}"
441
+ )
442
+ # filter out donor/acceptor heteroatoms and flatten for easy
443
+ # iteration
444
+ hbonds = hbonds[:, (0,2)].flatten()
445
+
446
+ # ``ndarray`` with one row for each base and the number of
447
+ # bonded edge heteroatoms as in ``_edge`` as columns
448
+ matrix = np.zeros((2, 3), dtype='int32')
449
+
450
+ # Iterate through the atoms and corresponding atoms indices
451
+ # that are part of the hydrogen bonds
452
+ for atom, atom_index in zip(atom_array[hbonds], hbonds):
453
+
454
+ if atom.res_name not in _REFERENCE_NUCLEOTIDE_NAMES:
455
+ continue
456
+
457
+ # Iterate over the edge types
458
+ for edge_type_index, edge_type in enumerate(_EDGES):
459
+ # Iterate over the two base masks
460
+ for base_index, base_mask in enumerate(base_masks):
461
+ # If a donor/acceptor atom name matches a name in
462
+ # the corresponding edge list increase the tally
463
+ if (base_mask[atom_index] and
464
+ atom.atom_name in edge_type[atom.res_name[-1]]):
465
+ matrix[base_index, edge_type_index] += 1
466
+ return matrix
467
+
468
+
469
+ def base_pairs_glycosidic_bond(atom_array, base_pairs):
470
+ """
471
+ Calculate the glycosidic bond orientation for given base pairs in an
472
+ :class:`AtomArray` according to the Leontis-Westhof nomenclature.
473
+ :footcite:`Leontis2001`
474
+
475
+ Parameters
476
+ ----------
477
+ atom_array : AtomArray
478
+ The :class:`AtomArray` containing the bases.
479
+ base_pairs : ndarray, dtype=int, shape=(n,2)
480
+ Each row is equivalent to one base pair and contains the first
481
+ indices of the residues corresponding to each base. The
482
+ structure of the ``ndarray`` is the same as the output of
483
+ :func:`base_pairs()`.
484
+
485
+ Returns
486
+ -------
487
+ results : ndarray, dtype=edge, shape=(n,)
488
+ The ``ndarray`` has the same dimensions as ``base_pairs``. Each
489
+ cell corresponds to the interacting edge of the referenced base
490
+ in ``base_pairs``.
491
+ Each row is equivalent to the respective base pair. The
492
+ glycosidic bond orientation is stored as integer that is
493
+ interpreted as member of the the :class:`GlycosidicBond` class.
494
+
495
+ See Also
496
+ --------
497
+ base_pairs
498
+ base_pairs_edge
499
+ GlycosidicBond
500
+
501
+ Notes
502
+ -----
503
+ The orientation is found using the geometric centers of the bases
504
+ and the glycosidic bonds as described in :footcite:`Yang2003`.
505
+
506
+ Examples
507
+ --------
508
+ Compute the glycosidic bond orientations for the dna helix with the
509
+ PDB ID 1QXB:
510
+
511
+ >>> from os.path import join
512
+ >>> dna_helix = load_structure(
513
+ ... join(path_to_structures, "base_pairs", "1qxb.cif")
514
+ ... )
515
+ >>> basepairs = base_pairs(dna_helix)
516
+ >>> orientations = base_pairs_glycosidic_bond(dna_helix, basepairs)
517
+ >>> print(orientations)
518
+ [1 1 1 1 1 1 1 1 1 1 1 1]
519
+
520
+ The resulting integers can be interpreted as :class:`GlycosidicBond`
521
+ ``Enum``:
522
+
523
+ >>> for orientation in orientations:
524
+ ... print(GlycosidicBond(orientation))
525
+ GlycosidicBond.CIS
526
+ GlycosidicBond.CIS
527
+ GlycosidicBond.CIS
528
+ GlycosidicBond.CIS
529
+ GlycosidicBond.CIS
530
+ GlycosidicBond.CIS
531
+ GlycosidicBond.CIS
532
+ GlycosidicBond.CIS
533
+ GlycosidicBond.CIS
534
+ GlycosidicBond.CIS
535
+ GlycosidicBond.CIS
536
+ GlycosidicBond.CIS
537
+
538
+ References
539
+ ----------
540
+
541
+ .. footbibliography::
542
+ """
543
+ results = np.zeros(len(base_pairs), dtype='uint8')
544
+
545
+ # Get the residue masks for each residue
546
+ base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
547
+
548
+ # Group every two masks together for easy iteration (each 'row' is
549
+ # respective to a row in ``base_pairs``)
550
+ base_pairs_masks = base_pairs_masks.reshape(
551
+ (base_pairs.shape[0], 2, atom_array.shape[0])
552
+ )
553
+
554
+ for i, pair_masks in enumerate(base_pairs_masks):
555
+
556
+ # position vectors of each bases geometric center
557
+ geometric_centers = np.zeros((2, 3))
558
+ # direction vectors of the glycosidic bonds
559
+ glycosidic_bonds = np.zeros((2, 3))
560
+
561
+ for base_index, base_mask in enumerate(pair_masks):
562
+ base = atom_array[base_mask]
563
+ ring_center = _match_base(base, 3)[3:]
564
+
565
+ # For Purines the glycosidic bond is between the C1' and the
566
+ # N9 atoms, for pyrimidines it is between the C1' atom and
567
+ # the N1 atom
568
+ if (base.res_name[0] in _ADENINE_CONTAINING_NUCLEOTIDES or
569
+ base.res_name[0] in _GUANINE_CONTAINING_NUCLEOTIDES):
570
+
571
+ geometric_centers[base_index] = (
572
+ (ring_center[0] + ring_center[1]) / 2
573
+ )
574
+ base_atom = base[base.atom_name == "N9"][0]
575
+
576
+ elif (base.res_name[0] in _THYMINE_CONTAINING_NUCLEOTIDES or
577
+ base.res_name[0] in _URACIL_CONTAINING_NUCLEOTIDES or
578
+ base.res_name[0] in _CYTOSINE_CONTAINING_NUCLEOTIDES):
579
+
580
+ geometric_centers[base_index] = ring_center[0]
581
+ base_atom = base[base.atom_name == "N1"][0]
582
+
583
+ else:
584
+
585
+ results[i] = GlycosidicBond.INVALID
586
+ break
587
+
588
+ sugar_atom = base[base.atom_name == "C1'"][0]
589
+
590
+ # Calculate the glycosidic bond direction vector
591
+ glycosidic_bonds[base_index] = sugar_atom.coord - base_atom.coord
592
+
593
+ # if the bond is not invalid compute the orientation
594
+ else:
595
+ # Calculate the direction vector between the geometric centers
596
+ geometric_centers_dir = geometric_centers[1] - geometric_centers[0]
597
+
598
+ # Check the orientation of the glycosidic bonds
599
+ if np.dot(
600
+ np.cross(geometric_centers_dir, glycosidic_bonds[0]),
601
+ np.cross(geometric_centers_dir, glycosidic_bonds[1])
602
+ ) < 0:
603
+
604
+ results[i] = GlycosidicBond.TRANS
605
+
606
+ else:
607
+
608
+ results[i] = GlycosidicBond.CIS
609
+
610
+ return results
611
+
612
+
613
+ def base_stacking(atom_array, min_atoms_per_base=3):
614
+ """
615
+ Find pi-stacking interactions between aromatic rings
616
+ in nucleic acids.
617
+
618
+ The presence of base stacking is assumed if the following criteria
619
+ are met :footcite:`Gabb1996`:
620
+
621
+ (i) Distance between aromatic ring centers <=4.5 Å
622
+
623
+ (ii) Angle between the ring normal vectors <=23°
624
+
625
+ (iii) Angle between normalized distance vector between two ring
626
+ centers and both bases' normal vectors <=40°
627
+
628
+ Parameters
629
+ ----------
630
+ atom_array : AtomArray
631
+ The :class:`AtomArray` to find stacked bases in.
632
+ min_atoms_per_base : integer, optional (default: 3)
633
+ The number of atoms a nucleotides' base must have to be
634
+ considered a candidate for a stacking interaction.
635
+
636
+ Returns
637
+ -------
638
+ stacked_bases : ndarray, dtype=int, shape=(n,2)
639
+ Each row is equivalent to one pair of stacked bases and
640
+ contains the indices to the first atom for each one of both
641
+ paired residues.
642
+
643
+ Notes
644
+ -----
645
+ Please note that ring normal vectors are assumed to be equal to the
646
+ base normal vectors.
647
+
648
+ Examples
649
+ --------
650
+ Compute the stacking interactions for a DNA-double-helix (PDB ID
651
+ 1BNA):
652
+
653
+ >>> from os.path import join
654
+ >>> dna_helix = load_structure(
655
+ ... join(path_to_structures, "base_pairs", "1bna.pdb")
656
+ ... )
657
+ >>> stacking_interactions = base_stacking(dna_helix)
658
+ >>> print(dna_helix[stacking_interactions].res_id)
659
+ [[ 1 2]
660
+ [ 2 3]
661
+ [ 3 4]
662
+ [ 4 5]
663
+ [ 5 6]
664
+ [ 6 7]
665
+ [ 7 8]
666
+ [ 8 9]
667
+ [ 9 10]
668
+ [11 12]
669
+ [14 15]
670
+ [15 16]
671
+ [16 17]
672
+ [17 18]
673
+ [18 19]
674
+ [19 20]
675
+ [20 21]
676
+ [21 22]
677
+ [22 23]
678
+ [23 24]]
679
+
680
+ References
681
+ ----------
682
+
683
+ .. footbibliography::
684
+ """
685
+ # Get the stacking candidates according to a cutoff distance, where
686
+ # each base is identified as the first index of its respective
687
+ # residue.
688
+ # The diameter from the C1'-sugar-atom across a purine base is ~5Å
689
+ # and the distance between the base centers can be at most 4.5Å.
690
+ # Thus, accounting for buffer, a cutoff of 15Å between the
691
+ # nucleotides' C1'-atoms was chosen.
692
+ c1_mask = filter_nucleotides(atom_array) & (atom_array.atom_name == "C1'")
693
+ stacking_candidates, _ = _get_proximate_residues(atom_array, c1_mask, 15)
694
+
695
+ # Contains the plausible pairs of stacked bases
696
+ stacked_bases = []
697
+
698
+ # Get the residue masks for each residue
699
+ base_masks = get_residue_masks(atom_array, stacking_candidates.flatten())
700
+
701
+ # Group every two masks together for easy iteration (each 'row' is
702
+ # respective to a row in ``stacking_candidates``)
703
+ base_masks = base_masks.reshape(
704
+ (stacking_candidates.shape[0], 2, atom_array.shape[0])
705
+ )
706
+
707
+ for (base1_index, base2_index), (base1_mask, base2_mask) in zip(
708
+ stacking_candidates, base_masks
709
+ ):
710
+ bases = (atom_array[base1_mask], atom_array[base2_mask])
711
+
712
+ # A list containing ndarray for each base with transformed
713
+ # vectors from the standard base reference frame to the
714
+ # structures' coordinates. The layout is as follows:
715
+ #
716
+ # [Origin coordinates]
717
+ # [Base normal vector]
718
+ # [SCHNAaP origin coordinates]
719
+ # [Aromatic Ring Center coordinates]
720
+ transformed_std_vectors = [None] * 2
721
+
722
+ # Generate the data necessary for analysis of each base.
723
+ for i in range(2):
724
+ base_tuple = _match_base(bases[i], min_atoms_per_base)
725
+
726
+ if(base_tuple is None):
727
+ break
728
+
729
+ transformed_std_vectors[i] = base_tuple
730
+
731
+ normal_vectors = np.vstack((transformed_std_vectors[0][1],
732
+ transformed_std_vectors[1][1]))
733
+ aromatic_ring_centers = [transformed_std_vectors[0][3:],
734
+ transformed_std_vectors[1][3:]]
735
+
736
+ # Check if the base pairs are stacked.
737
+ stacked = _check_base_stacking(aromatic_ring_centers, normal_vectors)
738
+
739
+ # If a stacking interaction is found, append the first indices
740
+ # of the bases´'residues to the output.
741
+ if stacked:
742
+ stacked_bases.append((base1_index, base2_index))
743
+
744
+ return np.array(stacked_bases)
745
+
746
+
747
+ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
748
+ """
749
+ Use DSSR criteria to find the base pairs in an :class:`AtomArray`.
750
+
751
+ The algorithm is able to identify canonical and non-canonical
752
+ base pairs. between the 5 common bases Adenine, Guanine, Thymine,
753
+ Cytosine, and Uracil bound to Deoxyribose and Ribose.
754
+ Each Base is mapped to the 5 common bases Adenine, Guanine, Thymine,
755
+ Cytosine, and Uracil in a standard reference frame described in
756
+ :footcite:`Olson2001` using :func:`map_nucleotide()`.
757
+
758
+ The DSSR Criteria are as follows :footcite:`Lu2015`:
759
+
760
+ (i) Distance between base origins <=15 Å
761
+
762
+ (ii) Vertical separation between the base planes <=2.5 Å
763
+
764
+ (iii) Angle between the base normal vectors <=65°
765
+
766
+ (iv) Absence of stacking between the two bases
767
+
768
+ (v) Presence of at least one hydrogen bond involving a base atom
769
+
770
+ Parameters
771
+ ----------
772
+ atom_array : AtomArray
773
+ The :class:`AtomArray` to find base pairs in.
774
+ min_atoms_per_base : integer, optional (default: 3)
775
+ The number of atoms a nucleotides' base must have to be
776
+ considered a candidate for a base pair.
777
+ unique : bool, optional (default: True)
778
+ If ``True``, each base is assumed to be only paired with one
779
+ other base. If multiple pairings are plausible, the pairing with
780
+ the most hydrogen bonds is selected.
781
+
782
+ Returns
783
+ -------
784
+ basepairs : ndarray, dtype=int, shape=(n,2)
785
+ Each row is equivalent to one base pair and contains the first
786
+ indices of the residues corresponding to each base.
787
+
788
+ Notes
789
+ -----
790
+ The bases from the standard reference frame described in
791
+ :footcite:`Olson2001` were modified such that only the base atoms
792
+ are implemented.
793
+ Sugar atoms (specifically C1') were disregarded, as nucleosides such
794
+ as PSU do not posess the usual N-glycosidic linkage, thus leading to
795
+ inaccurate results.
796
+
797
+ The vertical separation is implemented as the scalar
798
+ projection of the distance vectors between the base origins
799
+ according to :footcite:`Lu1997` onto the averaged base normal
800
+ vectors.
801
+
802
+ The presence of base stacking is assumed if the following criteria
803
+ are met :footcite:`Gabb1996`:
804
+
805
+ (i) Distance between aromatic ring centers <=4.5 Å
806
+
807
+ (ii) Angle between the ring normal vectors <=23°
808
+
809
+ (iii) Angle between normalized distance vector between two ring
810
+ centers and both bases' normal vectors <=40°
811
+
812
+ Please note that ring normal vectors are assumed to be equal to the
813
+ base normal vectors.
814
+
815
+ For structures without hydrogens the accuracy of the algorithm is
816
+ limited as the hydrogen bonds can be only checked be checked for
817
+ plausibility.
818
+ A hydrogen bond is considered as plausible if a cutoff of 3.6 Å
819
+ between N/O atom pairs is met. 3.6Å was chosen as hydrogen bonds are
820
+ typically 1.5-2.5Å in length. N-H and O-H bonds have a length of
821
+ 1.00Å and 0.96Å respectively. Thus, including some buffer, a 3.6Å
822
+ cutoff should cover all hydrogen bonds.
823
+
824
+ Examples
825
+ --------
826
+ Compute the base pairs for the structure with the PDB ID 1QXB:
827
+
828
+ >>> from os.path import join
829
+ >>> dna_helix = load_structure(
830
+ ... join(path_to_structures, "base_pairs", "1qxb.cif")
831
+ ... )
832
+ >>> basepairs = base_pairs(dna_helix)
833
+ >>> print(dna_helix[basepairs].res_name)
834
+ [['DC' 'DG']
835
+ ['DG' 'DC']
836
+ ['DC' 'DG']
837
+ ['DG' 'DC']
838
+ ['DA' 'DT']
839
+ ['DA' 'DT']
840
+ ['DT' 'DA']
841
+ ['DT' 'DA']
842
+ ['DC' 'DG']
843
+ ['DG' 'DC']
844
+ ['DC' 'DG']
845
+ ['DG' 'DC']]
846
+
847
+ References
848
+ ----------
849
+
850
+ .. footbibliography::
851
+ """
852
+
853
+ # Get the nucleotides for the given atom_array
854
+ nucleotides_boolean = filter_nucleotides(atom_array)
855
+
856
+ # Disregard the phosphate-backbone
857
+ non_phosphate_boolean = (
858
+ ~ np.isin(
859
+ atom_array.atom_name,
860
+ ["O5'", "P", "OP1", "OP2", "OP3", "HOP2", "HOP3"]
861
+ )
862
+ )
863
+
864
+ # Combine the two boolean masks
865
+ boolean_mask = nucleotides_boolean & non_phosphate_boolean
866
+
867
+ # Get only nucleosides
868
+ nucleosides = atom_array[boolean_mask]
869
+
870
+
871
+ # Get the base pair candidates according to a N/O cutoff distance,
872
+ # where each base is identified as the first index of its respective
873
+ # residue
874
+ n_o_mask = np.isin(nucleosides.element, ["N", "O"])
875
+ basepair_candidates, n_o_matches = _get_proximate_residues(
876
+ nucleosides, n_o_mask, 3.6
877
+ )
878
+
879
+ # Contains the plausible base pairs
880
+ basepairs = []
881
+ # Contains the number of hydrogens for each plausible base pair
882
+ basepairs_hbonds = []
883
+
884
+ # Get the residue masks for each residue
885
+ base_masks = get_residue_masks(nucleosides, basepair_candidates.flatten())
886
+
887
+ # Group every two masks together for easy iteration (each 'row' is
888
+ # respective to a row in ``basepair_candidates``)
889
+ base_masks = base_masks.reshape(
890
+ (basepair_candidates.shape[0], 2, nucleosides.shape[0])
891
+ )
892
+
893
+ for (base1_index, base2_index), (base1_mask, base2_mask), n_o_pairs in zip(
894
+ basepair_candidates, base_masks, n_o_matches
895
+ ):
896
+ base1 = nucleosides[base1_mask]
897
+ base2 = nucleosides[base2_mask]
898
+
899
+ hbonds = _check_dssr_criteria(
900
+ (base1, base2), min_atoms_per_base, unique
901
+ )
902
+
903
+ # If no hydrogens are present use the number N/O pairs to
904
+ # decide between multiple pairing possibilities.
905
+
906
+ if hbonds is None:
907
+ # Each N/O-pair is detected twice. Thus, the number of
908
+ # matches must be divided by two.
909
+ hbonds = n_o_pairs/2
910
+ if hbonds != -1:
911
+ basepairs.append((base1_index, base2_index))
912
+ if unique:
913
+ basepairs_hbonds.append(hbonds)
914
+
915
+ basepair_array = np.array(basepairs)
916
+
917
+ if unique:
918
+ # Contains all non-unique base pairs that are flagged to be
919
+ # removed
920
+ to_remove = []
921
+
922
+ # Get all bases that have non-unique pairing interactions
923
+ base_indices, occurrences = np.unique(basepairs, return_counts=True)
924
+ for base_index, occurrence in zip(base_indices, occurrences):
925
+ if(occurrence > 1):
926
+ # Write the non-unique base pairs to a dictionary as
927
+ # 'index: number of hydrogen bonds'
928
+ remove_candidates = {}
929
+ for i, row in enumerate(
930
+ np.asarray(basepair_array == base_index)
931
+ ):
932
+ if(np.any(row)):
933
+ remove_candidates[i] = basepairs_hbonds[i]
934
+ # Flag all non-unique base pairs for removal except the
935
+ # one that has the most hydrogen bonds
936
+ del remove_candidates[
937
+ max(remove_candidates, key=remove_candidates.get)
938
+ ]
939
+ to_remove += list(remove_candidates.keys())
940
+ # Remove all flagged base pairs from the output `ndarray`
941
+ basepair_array = np.delete(basepair_array, to_remove, axis=0)
942
+
943
+ # Remap values to original atom array
944
+ if len(basepair_array) > 0:
945
+ basepair_array = np.where(boolean_mask)[0][basepair_array]
946
+ for i, row in enumerate(basepair_array):
947
+ basepair_array[i] = get_residue_starts_for(atom_array, row)
948
+ return basepair_array
949
+
950
+
951
+ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
952
+ """
953
+ Check the DSSR criteria of a potential base pair.
954
+
955
+ Parameters
956
+ ----------
957
+ basepair : tuple (AtomArray, AtomArray)
958
+ The two bases to check the criteria for as :class:`AtomArray`.
959
+ min_atoms_per_base : int
960
+ The number of atoms a nucleotides' base must have to be
961
+ considered a candidate for a base pair.
962
+ unique : bool
963
+ If ``True``, the shortest hydrogen bond length between the bases
964
+ is calculated for plausible base pairs.
965
+
966
+ Returns
967
+ -------
968
+ satisfied : int
969
+ `> 0` if the base pair satisfies the criteria and `-1`,
970
+ if it does not.
971
+ If unique is ``True``, the number of hydrogen bonds is
972
+ returned for plausible base pairs.
973
+ """
974
+
975
+ # A list containing ndarray for each base with transformed
976
+ # vectors from the standard base reference frame to the structures'
977
+ # coordinates. The layout is as follows:
978
+ #
979
+ # [Origin coordinates]
980
+ # [Base normal vector]
981
+ # [SCHNAaP origin coordinates]
982
+ # [Aromatic Ring Center coordinates]
983
+ transformed_std_vectors = [None] * 2
984
+
985
+ # Generate the data necessary for analysis of each base.
986
+ for i in range(2):
987
+ transformed_std_vectors[i] = _match_base(
988
+ basepair[i], min_atoms_per_base
989
+ )
990
+
991
+ if(transformed_std_vectors[i] is None):
992
+ return -1
993
+
994
+ origins = np.vstack((transformed_std_vectors[0][0],
995
+ transformed_std_vectors[1][0]))
996
+ normal_vectors = np.vstack((transformed_std_vectors[0][1],
997
+ transformed_std_vectors[1][1]))
998
+ schnaap_origins = np.vstack((transformed_std_vectors[0][2],
999
+ transformed_std_vectors[1][2]))
1000
+ aromatic_ring_centers = [transformed_std_vectors[0][3:],
1001
+ transformed_std_vectors[1][3:]]
1002
+
1003
+ # Criterion 1: Distance between orgins <=15 Å
1004
+ if not (distance(origins[0], origins[1]) <= 15):
1005
+ return -1
1006
+
1007
+ # Criterion 2: Vertical separation <=2.5 Å
1008
+ #
1009
+ # Average the base normal vectors. If the angle between the vectors
1010
+ # is >=90°, flip one vector before averaging
1011
+ mean_normal_vector = (
1012
+ normal_vectors[0] + (normal_vectors[1] * np.sign(np.dot(
1013
+ normal_vectors[0], normal_vectors[1]
1014
+ )))
1015
+ ) / 2
1016
+ norm_vector(mean_normal_vector)
1017
+ # Calculate the distance vector between the two SCHNAaP origins
1018
+ origin_distance_vector = schnaap_origins[1] - schnaap_origins[0]
1019
+
1020
+ # The scalar projection of the distance vector between the two
1021
+ # origins onto the averaged normal vectors is the vertical
1022
+ # seperation
1023
+ if not abs(np.dot(origin_distance_vector, mean_normal_vector)) <= 2.5:
1024
+ return -1
1025
+
1026
+ # Criterion 3: Angle between normal vectors <=65°
1027
+ if not (np.arccos(np.dot(normal_vectors[0], normal_vectors[1]))
1028
+ >= ((115*np.pi)/180)):
1029
+ return -1
1030
+
1031
+ # Criterion 4: Absence of stacking
1032
+ if _check_base_stacking(aromatic_ring_centers, normal_vectors):
1033
+ return -1
1034
+
1035
+ # Criterion 5: Presence of at least one hydrogen bond
1036
+ #
1037
+ # Check if both bases came with hydrogens.
1038
+ if (("H" in basepair[0].element)
1039
+ and ("H" in basepair[1].element)):
1040
+ # For Structures that contain hydrogens, check for their
1041
+ # presence directly.
1042
+ #
1043
+ # Generate input atom array for ``hbond``
1044
+ potential_basepair = basepair[0] + basepair[1]
1045
+
1046
+ # Get the number of hydrogen bonds
1047
+ bonds = len(hbond(
1048
+ potential_basepair,
1049
+ np.ones_like(potential_basepair, dtype=bool),
1050
+ np.ones_like(potential_basepair, dtype=bool)
1051
+ ))
1052
+
1053
+ if bonds > 0:
1054
+ return bonds
1055
+ return -1
1056
+
1057
+ else:
1058
+ # If the structure does not contain hydrogens return None
1059
+ return None
1060
+
1061
+
1062
+ def _check_base_stacking(aromatic_ring_centers, normal_vectors):
1063
+ """
1064
+ Check for base stacking between two bases.
1065
+
1066
+ Parameters
1067
+ ----------
1068
+ aromatic_ring_centers : list [ndarray, ndarray]
1069
+ A list with the aromatic ring center coordinates as
1070
+ :class:`ndarray`. Each row represents a ring center.
1071
+ normal_vectors : ndarray shape=(2, 3)
1072
+ The normal vectors of the bases.
1073
+
1074
+ Returns
1075
+ -------
1076
+ base_stacking : bool
1077
+ ``True`` if base stacking is detected and ``False`` if not
1078
+ """
1079
+
1080
+ # Contains the normalized distance vectors between ring centers less
1081
+ # than 4.5 Å apart.
1082
+ normalized_distance_vectors = []
1083
+
1084
+ # Criterion 1: Distance between aromatic ring centers <=4.5 Å
1085
+ wrong_distance = True
1086
+ for ring_center1 in aromatic_ring_centers[0]:
1087
+ for ring_center2 in aromatic_ring_centers[1]:
1088
+ if (distance(ring_center1, ring_center2) <= 4.5):
1089
+ wrong_distance = False
1090
+ normalized_distance_vectors.append(ring_center2 - ring_center1)
1091
+ norm_vector(normalized_distance_vectors[-1])
1092
+ if wrong_distance:
1093
+ return False
1094
+
1095
+ # Criterion 2: Angle between normal vectors or its supplement <=23°
1096
+ normal_vectors_angle = np.rad2deg(
1097
+ np.arccos(np.dot(normal_vectors[0], normal_vectors[1]))
1098
+ )
1099
+ if (normal_vectors_angle >= 23) and (normal_vectors_angle <= 157):
1100
+ return False
1101
+
1102
+ # Criterion 3: Angle between one normalized distance vector and
1103
+ # each of the bases' normal vector or supplement <=40°
1104
+ for normal_vector in normal_vectors:
1105
+ for normalized_dist_vector in normalized_distance_vectors:
1106
+ dist_normal_vector_angle = np.rad2deg(
1107
+ np.arccos(np.dot(normal_vector, normalized_dist_vector))
1108
+ )
1109
+ if ((dist_normal_vector_angle >= 40) and
1110
+ (dist_normal_vector_angle <= 140)):
1111
+ return False
1112
+
1113
+ return True
1114
+
1115
+
1116
+ def _match_base(nucleotide, min_atoms_per_base):
1117
+ """
1118
+ Match the nucleotide to a corresponding standard base reference
1119
+ frame.
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ nucleotide : AtomArray
1124
+ The nucleotide to be matched to a standard base.
1125
+ min_atoms_per_base : integer
1126
+ The number of atoms a base must have to be considered a
1127
+ candidate for a base pair.
1128
+
1129
+ Returns
1130
+ -------
1131
+ vectors : ndarray, dtype=float, shape=(n,3)
1132
+ Transformed standard vectors, origin coordinates, base normal
1133
+ vector, aromatic ring center coordinates.
1134
+ """
1135
+
1136
+ # Standard vectors containing the origin and the base normal vectors
1137
+ vectors = np.array([[0, 0, 0], [0, 0, 1]], dtype=float)
1138
+
1139
+ # Map the nucleotide to a reference base
1140
+ one_letter_code, _ = map_nucleotide(nucleotide, min_atoms_per_base)
1141
+
1142
+ if one_letter_code is None:
1143
+ return None
1144
+
1145
+ if (one_letter_code == 'A'):
1146
+ std_base = _STD_ADENINE
1147
+ std_ring_centers = _STD_ADENINE_RING_CENTERS
1148
+ elif (one_letter_code == 'T'):
1149
+ std_base = _STD_THYMINE
1150
+ std_ring_centers = _STD_THYMINE_RING_CENTERS
1151
+ elif (one_letter_code == 'C'):
1152
+ std_base = _STD_CYTOSINE
1153
+ std_ring_centers = _STD_CYTOSINE_RING_CENTERS
1154
+ elif (one_letter_code == 'G'):
1155
+ std_base = _STD_GUANINE
1156
+ std_ring_centers = _STD_GUANINE_RING_CENTERS
1157
+ elif (one_letter_code == 'U'):
1158
+ std_base = _STD_URACIL
1159
+ std_ring_centers = _STD_URACIL_RING_CENTERS
1160
+
1161
+ # Add the ring centers to the array of vectors to be transformed.
1162
+ vectors = np.vstack((vectors, std_ring_centers))
1163
+
1164
+ # Select the matching atoms of the nucleotide and the standard base
1165
+ nucleotide_matched = nucleotide[
1166
+ np.isin(nucleotide.atom_name, std_base.atom_name)
1167
+ ]
1168
+ std_base_matched = std_base[
1169
+ np.isin(std_base.atom_name, nucleotide.atom_name)
1170
+ ]
1171
+ # Ensure the nucleotide does not contain duplicate atom names
1172
+ _, unique_indices = np.unique(
1173
+ nucleotide_matched.atom_name, return_index=True
1174
+ )
1175
+ nucleotide_matched = nucleotide_matched[unique_indices]
1176
+ # Only continue if minimum number of matching atoms is reached
1177
+ if len(nucleotide_matched) < min_atoms_per_base:
1178
+ warnings.warn(
1179
+ f"Nucleotide with res_id {nucleotide.res_id[0]} and "
1180
+ f"chain_id {nucleotide.chain_id[0]} has less than 3 base "
1181
+ f"atoms, unable to check for base pair.",
1182
+ IncompleteStructureWarning
1183
+ )
1184
+ return None
1185
+ # Reorder the atoms of the nucleotide to obtain the standard RCSB
1186
+ # PDB atom order.
1187
+ nucleotide_matched = nucleotide_matched[
1188
+ standardize_order(nucleotide_matched)
1189
+ ]
1190
+
1191
+ # Match the selected std_base to the base.
1192
+ _, transformation = superimpose(nucleotide_matched, std_base_matched)
1193
+ vectors = transformation.apply(vectors)
1194
+ # Normalize the base-normal-vector
1195
+ vectors[1,:] = vectors[1,:]-vectors[0,:]
1196
+ norm_vector(vectors[1,:])
1197
+
1198
+ return vectors
1199
+
1200
+
1201
+ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
1202
+ """
1203
+ Map a nucleotide to one of the 5 common bases Adenine, Guanine,
1204
+ Thymine, Cytosine, and Uracil. If one of those bases bound to
1205
+ Deoxyribose and Ribose is detected as input, the corresponding one-
1206
+ letter-code (``A``, ``G``, ``T``, ``C``, ``U``) is returned.
1207
+
1208
+ If a different nucleotide is given, it is mapped to the best
1209
+ fitting base using the algorithm described below.
1210
+
1211
+ (i) The number of matching atom names with the reference bases is
1212
+ counted. If the number of matching atoms with all reference
1213
+ bases is less than the specified `min_atoms_per_base`
1214
+ (default 3) the nucleotide cannot be mapped and ``None`` is
1215
+ returned.
1216
+
1217
+ (ii) The bases with maximum number of matching atoms are selected
1218
+ and superimposed with each reference. The base with lowest RMSD
1219
+ is chosen. If the RMSD is more than the specified
1220
+ `rmsd_cutoff` (default 0.28) the nucleotide cannot be mapped
1221
+ and ``None`` is returned.
1222
+
1223
+ Parameters
1224
+ ----------
1225
+ residue : AtomArray
1226
+ The nucleotide to be mapped.
1227
+ min_atoms_per_base : int, optional (default: 3)
1228
+ The number of atoms the residue must have in common with the
1229
+ reference.
1230
+ rmsd_cutoff : float, optional (default: 0.28)
1231
+ The maximum RSMD that is allowed for a mapping to occur.
1232
+
1233
+ Returns
1234
+ -------
1235
+ one_letter_code : str
1236
+ The one-letter-code of the mapped base. ``None`` if no base can
1237
+ be mapped.
1238
+ exact_match : bool
1239
+ Wether or not the residue name exactly matches one of the common
1240
+ bases, i.e. the ``res_name`` of the input `residue` is one of
1241
+ ``A``, ``G``, ``T``, ``C``, ``U``, ``DA``, ``DG``, ``DT``,
1242
+ ``DC`` or ``DU``.
1243
+
1244
+ Notes
1245
+ -----
1246
+ The default RMSD cutoff was chosen according to :footcite:`Lu2015`,
1247
+ where the same cutoff is used to detect if a given base is a
1248
+ nucleotide, by superimposing the base ring atoms onto a reference
1249
+ structure.
1250
+
1251
+ References
1252
+ ----------
1253
+
1254
+ .. footbibliography::
1255
+ """
1256
+ # Check if the residue is a 'standard' nucleotide
1257
+ if residue.res_name[0] in _REFERENCE_NUCLEOTIDE_NAMES:
1258
+ return residue.res_name[0][-1], True
1259
+
1260
+ # List of the standard bases for easy iteration
1261
+ std_base_list = [
1262
+ _STD_ADENINE, _STD_THYMINE, _STD_CYTOSINE, _STD_GUANINE,
1263
+ _STD_URACIL
1264
+ ]
1265
+
1266
+ # The number of matched atoms for each 'standard' base
1267
+ matched_atom_no = [
1268
+ np.sum(np.isin(ref_base.atom_name, residue.atom_name))
1269
+ for ref_base in std_base_list
1270
+ ]
1271
+
1272
+ if np.max(matched_atom_no) < min_atoms_per_base:
1273
+ warnings.warn(
1274
+ f"Base with res_id {residue.res_id[0]} and chain_id "
1275
+ f"{residue.chain_id[0]} has an overlap with the reference "
1276
+ f"bases which is less than {min_atoms_per_base} atoms. "
1277
+ f"Unable to map nucleotide.",
1278
+ IncompleteStructureWarning
1279
+ )
1280
+ return None, False
1281
+
1282
+ # The one letter code of the best matching reference base
1283
+ best_base = None
1284
+
1285
+ # Iterate through the reference bases with the maximum number of
1286
+ # matching atoms
1287
+ for ref_base in np.array(std_base_list, dtype='object')[
1288
+ np.array(matched_atom_no) == np.max(matched_atom_no)
1289
+ ]:
1290
+ # Copy the residue as the res_name property of the ``AtomArray``
1291
+ # has to be modified for later function calls.
1292
+ nuc = residue.copy()
1293
+
1294
+ # Select the matching atoms of the nucleotide and the reference
1295
+ # base
1296
+ nuc = nuc[
1297
+ np.isin(nuc.atom_name, ref_base.atom_name)
1298
+ ]
1299
+ ref_base_matched = ref_base[
1300
+ np.isin(ref_base.atom_name, nuc.atom_name)
1301
+ ]
1302
+
1303
+ # Set the res_name property to the same as the reference base.
1304
+ # This is a requirement for ``standardize_order``
1305
+ nuc.res_name = ref_base_matched.res_name
1306
+ # Reorder the atoms of the nucleotide to obtain the standard
1307
+ # RCSB PDB atom order. If a residue contains multiple atoms with
1308
+ # the same ``atom_name`` an exception is thrown by
1309
+ # ``standardize_order``. The exception is caught and the
1310
+ # selected reference is disregarded
1311
+ try:
1312
+ nuc = nuc[standardize_order(nuc)]
1313
+ except Exception:
1314
+ continue
1315
+
1316
+ # Superimpose the nucleotide to the reference base
1317
+ fitted, _ = superimpose(ref_base_matched, nuc)
1318
+
1319
+ # If the RMSD is lower than the specified cutoff or better than
1320
+ # a previous found reference, the current reference is selected
1321
+ # as best base
1322
+ if(rmsd(fitted, ref_base_matched) < rmsd_cutoff):
1323
+ rmsd_cutoff = rmsd(fitted, ref_base_matched)
1324
+ best_base = ref_base_matched.res_name[0][-1]
1325
+
1326
+ if best_base is None:
1327
+ warnings.warn(
1328
+ f"Base Type {residue.res_name[0]} not supported. ",
1329
+ UnexpectedStructureWarning
1330
+ )
1331
+ return None
1332
+
1333
+ return best_base, False
1334
+
1335
+
1336
+ def _get_proximate_residues(atom_array, boolean_mask, cutoff):
1337
+ """
1338
+ Filter for residue pairs based on the distance between selected
1339
+ atoms.
1340
+
1341
+ Parameters
1342
+ ----------
1343
+ atom_array : AtomArray, shape=(n,)
1344
+ The :class:`AtomArray`` to find basepair candidates in.
1345
+ boolean_mask : ndarray, dtype=bool, shape=(n,)
1346
+ The selection of atoms.
1347
+ cutoff : integer
1348
+ The maximum distance between the atoms of the two residues.
1349
+
1350
+ Returns
1351
+ -------
1352
+ pairs : ndarray, dtype=int, shape=(n,2)
1353
+ Contains the basepair candidates. Each row is equivalent to one
1354
+ potential basepair. bases are represented as the first indices
1355
+ of their corresponding residues.
1356
+ count : ndarray, dtype=int, shape=(n,)
1357
+ The number of atom pairs between the residues within the
1358
+ specified cutoff
1359
+ """
1360
+
1361
+ # Get the indices of the atoms that are within the maximum cutoff
1362
+ # of each other
1363
+ indices = CellList(
1364
+ atom_array, cutoff, selection=boolean_mask
1365
+ ).get_atoms(atom_array.coord[boolean_mask], cutoff)
1366
+
1367
+ # Loop through the indices of potential partners
1368
+ pairs = []
1369
+ for candidate, partners in zip(np.argwhere(boolean_mask)[:, 0], indices):
1370
+ for partner in partners:
1371
+ if partner != -1:
1372
+ pairs.append((candidate, partner))
1373
+
1374
+ # Get the residue starts for the indices of the candidate/partner
1375
+ # indices.
1376
+ pairs = np.array(pairs)
1377
+ basepair_candidates_shape = pairs.shape
1378
+ pairs = get_residue_starts_for(
1379
+ atom_array, pairs.flatten()
1380
+ ).reshape(basepair_candidates_shape)
1381
+
1382
+ # Remove candidates where the pairs are from the same residue
1383
+ pairs = np.delete(
1384
+ pairs, np.where(
1385
+ pairs[:,0] == pairs[:,1]
1386
+ ), axis=0
1387
+ )
1388
+ # Sort the residue starts for each pair
1389
+ for i, candidate in enumerate(pairs):
1390
+ pairs[i] = sorted(candidate)
1391
+ # Make sure each pair is only listed once, count the occurrences
1392
+ pairs, count = np.unique(pairs, axis=0, return_counts=True)
1393
+
1394
+ return pairs, count
1395
+
1396
+
1397
+ def _filter_atom_type(atom_array, atom_names):
1398
+ """
1399
+ Get all atoms with specified atom names.
1400
+
1401
+ Parameters
1402
+ ----------
1403
+ atom_array : AtomArray
1404
+ The :class:`AtomArray` to filter.
1405
+ atom_names : array_like
1406
+ The desired atom names.
1407
+
1408
+ Returns
1409
+ -------
1410
+ filter : ndarray, dtype=bool
1411
+ This array is ``True`` for all indices in the :class:`AtomArray`
1412
+ , where the atom has the desired atom names.
1413
+ """
1414
+ return (np.isin(atom_array.atom_name, atom_names)
1415
+ & (atom_array.res_id != -1))