biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,2036 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows efficient search of atoms in a defined radius around
7
+ a location.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["BondList", "BondType",
13
+ "connect_via_distances", "connect_via_residue_names",
14
+ "find_connected", "find_rotatable_bonds"]
15
+
16
+ cimport cython
17
+ cimport numpy as np
18
+ from libc.stdlib cimport free, realloc
19
+
20
+ from collections.abc import Sequence
21
+ import itertools
22
+ import numbers
23
+ from enum import IntEnum
24
+ import networkx as nx
25
+ import numpy as np
26
+ from .error import BadStructureError
27
+ from ..copyable import Copyable
28
+
29
+ ctypedef np.uint64_t ptr
30
+ ctypedef np.uint8_t uint8
31
+ ctypedef np.uint16_t uint16
32
+ ctypedef np.uint32_t uint32
33
+ ctypedef np.uint64_t uint64
34
+ ctypedef np.int8_t int8
35
+ ctypedef np.int16_t int16
36
+ ctypedef np.int32_t int32
37
+ ctypedef np.int64_t int64
38
+
39
+
40
+ ctypedef fused IndexType:
41
+ uint8
42
+ uint16
43
+ uint32
44
+ uint64
45
+ int8
46
+ int16
47
+ int32
48
+ int64
49
+
50
+
51
+ class BondType(IntEnum):
52
+ """
53
+ This enum type represents the type of a chemical bond.
54
+
55
+ - `ANY` - Used if the actual type is unknown
56
+ - `SINGLE` - Single bond
57
+ - `DOUBLE` - Double bond
58
+ - `TRIPLE` - Triple bond
59
+ - `QUADRUPLE` - A quadruple bond
60
+ - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
61
+ - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
62
+ - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
63
+ - `AROMATIC` - Aromatic bond without specification of the formal bond
64
+ - `COORDINATION` - Coordination complex involving a metal atom
65
+ """
66
+ ANY = 0
67
+ SINGLE = 1
68
+ DOUBLE = 2
69
+ TRIPLE = 3
70
+ QUADRUPLE = 4
71
+ AROMATIC_SINGLE = 5
72
+ AROMATIC_DOUBLE = 6
73
+ AROMATIC_TRIPLE = 7
74
+ COORDINATION = 8
75
+ AROMATIC = 9
76
+
77
+
78
+ def without_aromaticity(self):
79
+ """
80
+ Remove aromaticity from the bond type.
81
+
82
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
83
+ :attr:`BondType.{ORDER}`.
84
+
85
+ Returns
86
+ -------
87
+ new_bond_type : BondType
88
+ The :class:`BondType` without aromaticity.
89
+
90
+ Examples
91
+ --------
92
+
93
+ >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
94
+ DOUBLE
95
+ """
96
+ if self == BondType.AROMATIC_SINGLE:
97
+ return BondType.SINGLE
98
+ elif self == BondType.AROMATIC_DOUBLE:
99
+ return BondType.DOUBLE
100
+ elif self == BondType.AROMATIC_TRIPLE:
101
+ return BondType.TRIPLE
102
+ elif self == BondType.AROMATIC:
103
+ return BondType.ANY
104
+ else:
105
+ return self
106
+
107
+
108
+ @cython.boundscheck(False)
109
+ @cython.wraparound(False)
110
+ class BondList(Copyable):
111
+ """
112
+ __init__(atom_count, bonds=None)
113
+
114
+ A bond list stores indices of atoms
115
+ (usually of an :class:`AtomArray` or :class:`AtomArrayStack`)
116
+ that form chemical bonds together with the type (or order) of the
117
+ bond.
118
+
119
+ Internally the bonds are stored as *n x 3* :class:`ndarray`.
120
+ For each row, the first column specifies the index of the first
121
+ atom, the second column the index of the second atom involved in the
122
+ bond.
123
+ The third column stores an integer that is interpreted as member
124
+ of the the :class:`BondType` enum, that specifies the order of the
125
+ bond.
126
+
127
+ When indexing a :class:`BondList`, the index is not forwarded to the
128
+ internal :class:`ndarray`. Instead the indexing behavior is
129
+ consistent with indexing an :class:`AtomArray` or
130
+ :class:`AtomArrayStack`:
131
+ Bonds with at least one atom index that is not covered by the index
132
+ are removed, atom indices that occur after an uncovered atom index
133
+ move up.
134
+ Effectively, this means that after indexing an :class:`AtomArray`
135
+ and a :class:`BondList` with the same index, the atom indices in the
136
+ :class:`BondList` will still point to the same atoms in the
137
+ :class:`AtomArray`.
138
+ Indexing a :class:`BondList` with a single integer is equivalent
139
+ to calling :func:`get_bonds()`.
140
+
141
+ The same consistency applies to adding :class:`BondList` instances
142
+ via the '+' operator:
143
+ The atom indices of the second :class:`BondList` are increased by
144
+ the atom count of the first :class:`BondList` and then both
145
+ :class:`BondList` objects are merged.
146
+
147
+ Parameters
148
+ ----------
149
+ atom_count : int
150
+ A positive integer, that specifies the number of atoms the
151
+ :class:`BondList` refers to
152
+ (usually the length of an atom array (stack)).
153
+ Effectively, this value is the exclusive maximum for the indices
154
+ stored in the :class:`BondList`.
155
+ bonds : ndarray, shape=(n,2) or shape=(n,3), dtype=int, optional
156
+ This array contains the indices of atoms which are bonded:
157
+ For each row, the first column specifies the first atom,
158
+ the second row the second atom involved in a chemical bond.
159
+ If an *n x 3* array is provided, the additional column
160
+ specifies a :class:`BondType` instead of :attr:`BondType.ANY`.
161
+ By default, the created :class:`BondList` is empty.
162
+
163
+ Notes
164
+ -----
165
+ When initially providing the bonds as :class:`ndarray`, the input is
166
+ sanitized: Redundant bonds are removed, and each bond entry is
167
+ sorted so that the lower one of the two atom indices is in the first
168
+ column.
169
+ If a bond appears multiple times with different bond types, the
170
+ first bond takes precedence.
171
+
172
+ Examples
173
+ --------
174
+
175
+ Construct a :class:`BondList`, where a central atom (index 1) is
176
+ connected to three other atoms (index 0, 3 and 4):
177
+
178
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
179
+ >>> print(bond_list)
180
+ [[0 1 0]
181
+ [1 3 0]
182
+ [1 4 0]]
183
+
184
+ Remove the first atom (index 0) via indexing:
185
+ The bond containing index 0 is removed, since the corresponding atom
186
+ does not exist anymore. Since all other atoms move up in their
187
+ position, the indices in the bond list are decreased by one:
188
+
189
+ >>> bond_list = bond_list[1:]
190
+ >>> print(bond_list)
191
+ [[0 2 0]
192
+ [0 3 0]]
193
+
194
+ :class:`BondList` objects can be associated to an :class:`AtomArray`
195
+ or :class:`AtomArrayStack`.
196
+ The following snippet shows this for a benzene molecule:
197
+
198
+ >>> benzene = AtomArray(12)
199
+ >>> # Omit filling most required annotation categories for brevity
200
+ >>> benzene.atom_name = np.array(
201
+ ... ["C1", "C2", "C3", "C4", "C5", "C6", "H1", "H2", "H3", "H4", "H5", "H6"]
202
+ ... )
203
+ >>> benzene.bonds = BondList(
204
+ ... benzene.array_length(),
205
+ ... np.array([
206
+ ... # Bonds between carbon atoms in the ring
207
+ ... (0, 1, BondType.AROMATIC_SINGLE),
208
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
209
+ ... (2, 3, BondType.AROMATIC_SINGLE),
210
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
211
+ ... (4, 5, BondType.AROMATIC_SINGLE),
212
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
213
+ ... # Bonds between carbon and hydrogen
214
+ ... (0, 6, BondType.SINGLE),
215
+ ... (1, 7, BondType.SINGLE),
216
+ ... (2, 8, BondType.SINGLE),
217
+ ... (3, 9, BondType.SINGLE),
218
+ ... (4, 10, BondType.SINGLE),
219
+ ... (5, 11, BondType.SINGLE),
220
+ ... ])
221
+ ... )
222
+ >>> for i, j, bond_type in benzene.bonds.as_array():
223
+ ... print(
224
+ ... f"{BondType(bond_type).name} bond between "
225
+ ... f"{benzene.atom_name[i]} and {benzene.atom_name[j]}"
226
+ ... )
227
+ AROMATIC_SINGLE bond between C1 and C2
228
+ AROMATIC_DOUBLE bond between C2 and C3
229
+ AROMATIC_SINGLE bond between C3 and C4
230
+ AROMATIC_DOUBLE bond between C4 and C5
231
+ AROMATIC_SINGLE bond between C5 and C6
232
+ AROMATIC_DOUBLE bond between C1 and C6
233
+ SINGLE bond between C1 and H1
234
+ SINGLE bond between C2 and H2
235
+ SINGLE bond between C3 and H3
236
+ SINGLE bond between C4 and H4
237
+ SINGLE bond between C5 and H5
238
+ SINGLE bond between C6 and H6
239
+
240
+ Obtain the bonded atoms for the :math:`C_1`:
241
+
242
+ >>> bonds, types = benzene.bonds.get_bonds(0)
243
+ >>> print(bonds)
244
+ [1 5 6]
245
+ >>> print(types)
246
+ [5 6 1]
247
+ >>> print(f"C1 is bonded to {', '.join(benzene.atom_name[bonds])}")
248
+ C1 is bonded to C2, C6, H1
249
+
250
+ Cut the benzene molecule in half.
251
+ Although the first half of the atoms are missing the indices of
252
+ the cropped :class:`BondList` still represents the bonds of the
253
+ remaining atoms:
254
+
255
+ >>> half_benzene = benzene[
256
+ ... np.isin(benzene.atom_name, ["C4", "C5", "C6", "H4", "H5", "H6"])
257
+ ... ]
258
+ >>> for i, j, bond_type in half_benzene.bonds.as_array():
259
+ ... print(
260
+ ... f"{BondType(bond_type).name} bond between "
261
+ ... f"{half_benzene.atom_name[i]} and {half_benzene.atom_name[j]}"
262
+ ... )
263
+ AROMATIC_DOUBLE bond between C4 and C5
264
+ AROMATIC_SINGLE bond between C5 and C6
265
+ SINGLE bond between C4 and H4
266
+ SINGLE bond between C5 and H5
267
+ SINGLE bond between C6 and H6
268
+ """
269
+
270
+ def __init__(self, uint32 atom_count, np.ndarray bonds=None):
271
+ self._atom_count = atom_count
272
+
273
+ if bonds is not None and len(bonds) > 0:
274
+ if bonds.ndim != 2:
275
+ raise ValueError("Expected a 2D-ndarray for input bonds")
276
+
277
+ self._bonds = np.zeros((bonds.shape[0], 3), dtype=np.uint32)
278
+ if bonds.shape[1] == 3:
279
+ # Input contains bonds (index 0 and 1)
280
+ # including the bond type value (index 2)
281
+ # Bond indices:
282
+ self._bonds[:,:2] = np.sort(
283
+ # Indices are sorted per bond
284
+ # so that the lower index is at the first position
285
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
286
+ )
287
+ # Bond type:
288
+ if (bonds[:, 2] >= len(BondType)).any():
289
+ raise ValueError(
290
+ f"BondType {np.max(bonds[:, 2])} is invalid"
291
+ )
292
+ self._bonds[:,2] = bonds[:, 2]
293
+
294
+ # Indices are sorted per bond
295
+ # so that the lower index is at the first position
296
+ elif bonds.shape[1] == 2:
297
+ # Input contains the bonds without bond type
298
+ # -> Default: Set bond type ANY (0)
299
+ self._bonds[:,:2] = np.sort(
300
+ # Indices are sorted per bond
301
+ # so that the lower index is at the first position
302
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
303
+ )
304
+ else:
305
+ raise ValueError(
306
+ "Input array containing bonds must be either of shape "
307
+ "(n,2) or (n,3)"
308
+ )
309
+ self._remove_redundant_bonds()
310
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
311
+
312
+ else:
313
+ # Create empty bond list
314
+ self._bonds = np.zeros((0, 3), dtype=np.uint32)
315
+ self._max_bonds_per_atom = 0
316
+
317
+ @staticmethod
318
+ def concatenate(bonds_lists):
319
+ """
320
+ Concatenate multiple :class:`BondList` objects into a single
321
+ :class:`BondList`, respectively.
322
+
323
+ Parameters
324
+ ----------
325
+ bonds_lists : iterable object of BondList
326
+ The bond lists to be concatenated.
327
+
328
+ Returns
329
+ -------
330
+ concatenated_bonds : BondList
331
+ The concatenated bond lists.
332
+
333
+ Examples
334
+ --------
335
+
336
+ >>> bonds1 = BondList(2, np.array([(0, 1)]))
337
+ >>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
338
+ >>> merged_bonds = BondList.concatenate([bonds1, bonds2])
339
+ >>> print(merged_bonds.get_atom_count())
340
+ 5
341
+ >>> print(merged_bonds.as_array()[:, :2])
342
+ [[0 1]
343
+ [2 3]
344
+ [2 4]]
345
+ """
346
+ # Ensure that the bonds_lists can be iterated over multiple times
347
+ if not isinstance(bonds_lists, Sequence):
348
+ bonds_lists = list(bonds_lists)
349
+
350
+ cdef np.ndarray merged_bonds = np.concatenate(
351
+ [bond_list._bonds for bond_list in bonds_lists]
352
+ )
353
+ # Offset the indices of appended bonds list
354
+ # (consistent with addition of AtomArray)
355
+ cdef int start = 0, stop = 0
356
+ cdef int cum_atom_count = 0
357
+ for bond_list in bonds_lists:
358
+ stop = start + bond_list._bonds.shape[0]
359
+ merged_bonds[start : stop, :2] += cum_atom_count
360
+ cum_atom_count += bond_list._atom_count
361
+ start = stop
362
+
363
+ cdef merged_bond_list = BondList(cum_atom_count)
364
+ # Array is not used in constructor to prevent unnecessary
365
+ # maximum and redundant bond calculation
366
+ merged_bond_list._bonds = merged_bonds
367
+ merged_bond_list._max_bonds_per_atom = max(
368
+ [bond_list._max_bonds_per_atom for bond_list in bonds_lists]
369
+ )
370
+ return merged_bond_list
371
+
372
+ def __copy_create__(self):
373
+ # Create empty bond list to prevent
374
+ # unnecessary removal of redundant atoms
375
+ # and calculation of maximum bonds per atom
376
+ return BondList(self._atom_count)
377
+
378
+ def __copy_fill__(self, clone):
379
+ # The bonds are added here
380
+ clone._bonds = self._bonds.copy()
381
+ clone._max_bonds_per_atom = self._max_bonds_per_atom
382
+
383
+ def offset_indices(self, int offset):
384
+ """
385
+ offset_indices(offset)
386
+
387
+ Increase all atom indices in the :class:`BondList` by the given
388
+ offset.
389
+
390
+ Implicitly this increases the atom count.
391
+
392
+ Parameters
393
+ ----------
394
+ offset : int
395
+ The atom indices are increased by this value.
396
+ Must be positive.
397
+
398
+ Examples
399
+ --------
400
+
401
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
402
+ >>> print(bond_list)
403
+ [[0 1 0]
404
+ [1 3 0]
405
+ [1 4 0]]
406
+ >>> bond_list.offset_indices(2)
407
+ >>> print(bond_list)
408
+ [[2 3 0]
409
+ [3 5 0]
410
+ [3 6 0]]
411
+ """
412
+ if offset < 0:
413
+ raise ValueError("Offest must be positive")
414
+ self._bonds[:,:2] += offset
415
+ self._atom_count += offset
416
+
417
+ def as_array(self):
418
+ """
419
+ as_array()
420
+
421
+ Obtain a copy of the internal :class:`ndarray`.
422
+
423
+ Returns
424
+ -------
425
+ array : ndarray, shape=(n,3), dtype=np.uint32
426
+ Copy of the internal :class:`ndarray`.
427
+ For each row, the first column specifies the index of the
428
+ first atom, the second column the index of the second atom
429
+ involved in the bond.
430
+ The third column stores the :class:`BondType`.
431
+ """
432
+ return self._bonds.copy()
433
+
434
+ def as_set(self):
435
+ """
436
+ as_set()
437
+
438
+ Obtain a set representation of the :class:`BondList`.
439
+
440
+ Returns
441
+ -------
442
+ bond_set : set of tuple(int, int, int)
443
+ A set of tuples.
444
+ Each tuple represents one bond:
445
+ The first integer represents the first atom,
446
+ the second integer represents the second atom,
447
+ the third integer represents the :class:`BondType`.
448
+ """
449
+ cdef uint32[:,:] all_bonds_v = self._bonds
450
+ cdef int i
451
+ cdef set bond_set = set()
452
+ for i in range(all_bonds_v.shape[0]):
453
+ bond_set.add(
454
+ (all_bonds_v[i,0], all_bonds_v[i,1], all_bonds_v[i,2])
455
+ )
456
+ return bond_set
457
+
458
+ def as_graph(self):
459
+ """
460
+ as_graph()
461
+
462
+ Obtain a graph representation of the :class:`BondList`.
463
+
464
+ Returns
465
+ -------
466
+ bond_set : Graph
467
+ A *NetworkX* :class:`Graph`.
468
+ The atom indices are nodes, the bonds are edges.
469
+ Each edge has a ``"bond_type"`` attribute containing the
470
+ :class:`BondType`.
471
+
472
+ Examples
473
+ --------
474
+
475
+ >>> bond_list = BondList(5, np.array([(1,0,2), (1,3,1), (1,4,1)]))
476
+ >>> graph = bond_list.as_graph()
477
+ >>> print(graph.nodes)
478
+ [0, 1, 3, 4]
479
+ >>> print(graph.edges)
480
+ [(0, 1), (1, 3), (1, 4)]
481
+ >>> for i, j in graph.edges:
482
+ ... print(i, j, graph.get_edge_data(i, j))
483
+ 0 1 {'bond_type': <BondType.DOUBLE: 2>}
484
+ 1 3 {'bond_type': <BondType.SINGLE: 1>}
485
+ 1 4 {'bond_type': <BondType.SINGLE: 1>}
486
+ """
487
+ cdef int i
488
+
489
+ cdef uint32[:,:] all_bonds_v = self._bonds
490
+
491
+ g = nx.Graph()
492
+ cdef list edges = [None] * all_bonds_v.shape[0]
493
+ for i in range(all_bonds_v.shape[0]):
494
+ edges[i] = (
495
+ all_bonds_v[i,0], all_bonds_v[i,1],
496
+ {"bond_type": BondType(all_bonds_v[i,2])}
497
+ )
498
+ g.add_edges_from(edges)
499
+ return g
500
+
501
+ def remove_aromaticity(self):
502
+ """
503
+ Remove aromaticity from the bond types.
504
+
505
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
506
+ :attr:`BondType.{ORDER}`.
507
+
508
+ Examples
509
+ --------
510
+
511
+ >>> bond_list = BondList(3)
512
+ >>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
513
+ >>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
514
+ >>> bond_list.remove_aromaticity()
515
+ >>> for i, j, bond_type in bond_list.as_array():
516
+ ... print(i, j, BondType(bond_type).name)
517
+ 0 1 SINGLE
518
+ 1 2 DOUBLE
519
+ """
520
+ for aromatic_type, non_aromatic_type in [
521
+ (BondType.AROMATIC_SINGLE, BondType.SINGLE),
522
+ (BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
523
+ (BondType.AROMATIC_TRIPLE, BondType.TRIPLE),
524
+ (BondType.AROMATIC, BondType.ANY),
525
+ ]:
526
+ mask = self._bonds[:, 2] == aromatic_type
527
+ self._bonds[mask, 2] = non_aromatic_type
528
+
529
+ def remove_kekulization(self):
530
+ """
531
+ Remove the bond order information from aromatic bonds, i.e. convert all
532
+ aromatic bonds to :attr:`BondType.ANY`.
533
+
534
+ Examples
535
+ --------
536
+
537
+ >>> bond_list = BondList(3)
538
+ >>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
539
+ >>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
540
+ >>> bond_list.remove_kekulization()
541
+ >>> for i, j, bond_type in bond_list.as_array():
542
+ ... print(i, j, BondType(bond_type).name)
543
+ 0 1 AROMATIC
544
+ 1 2 AROMATIC
545
+ """
546
+ kekulized_mask = np.isin(
547
+ self._bonds[:, 2],
548
+ (
549
+ BondType.AROMATIC_SINGLE,
550
+ BondType.AROMATIC_DOUBLE,
551
+ BondType.AROMATIC_TRIPLE,
552
+ ),
553
+ )
554
+ self._bonds[kekulized_mask, 2] = BondType.AROMATIC
555
+
556
+ def remove_bond_order(self):
557
+ """
558
+ Convert all bonds to :attr:`BondType.ANY`.
559
+ """
560
+ self._bonds[:,2] = BondType.ANY
561
+
562
+ def convert_bond_type(self, original_bond_type, new_bond_type):
563
+ """
564
+ convert_bond_type(original_bond_type, new_bond_type)
565
+
566
+ Convert all occurences of a given bond type into another bond type.
567
+
568
+ Parameters
569
+ ----------
570
+ original_bond_type : BondType or int
571
+ The bond type to convert.
572
+ new_bond_type : BondType or int
573
+ The new bond type.
574
+
575
+ Examples
576
+ --------
577
+
578
+ >>> bond_list = BondList(4)
579
+ >>> bond_list.add_bond(0, 1, BondType.DOUBLE)
580
+ >>> bond_list.add_bond(1, 2, BondType.COORDINATION)
581
+ >>> bond_list.add_bond(2, 3, BondType.COORDINATION)
582
+ >>> for i, j, bond_type in bond_list.as_array():
583
+ ... print(i, j, BondType(bond_type).name)
584
+ 0 1 DOUBLE
585
+ 1 2 COORDINATION
586
+ 2 3 COORDINATION
587
+ >>> bond_list.convert_bond_type(BondType.COORDINATION, BondType.SINGLE)
588
+ >>> for i, j, bond_type in bond_list.as_array():
589
+ ... print(i, j, BondType(bond_type).name)
590
+ 0 1 DOUBLE
591
+ 1 2 SINGLE
592
+ 2 3 SINGLE
593
+ """
594
+ mask = self._bonds[:, 2] == original_bond_type
595
+ self._bonds[mask, 2] = new_bond_type
596
+
597
+ def get_atom_count(self):
598
+ """
599
+ get_atom_count()
600
+
601
+ Get the atom count.
602
+
603
+ Returns
604
+ -------
605
+ atom_count : int
606
+ The atom count.
607
+ """
608
+ return self._atom_count
609
+
610
+ def get_bond_count(self):
611
+ """
612
+ get_bond_count()
613
+
614
+ Get the amount of bonds.
615
+
616
+ Returns
617
+ -------
618
+ bond_count : int
619
+ The amount of bonds. This is equal to the length of the
620
+ internal :class:`ndarray` containing the bonds.
621
+ """
622
+ return len(self._bonds)
623
+
624
+ def get_bonds(self, int32 atom_index):
625
+ """
626
+ get_bonds(atom_index)
627
+
628
+ Obtain the indices of the atoms bonded to the atom with the
629
+ given index as well as the corresponding bond types.
630
+
631
+ Parameters
632
+ ----------
633
+ atom_index : int
634
+ The index of the atom to get the bonds for.
635
+
636
+ Returns
637
+ -------
638
+ bonds : np.ndarray, dtype=np.uint32, shape=(k,)
639
+ The indices of connected atoms.
640
+ bond_types : np.ndarray, dtype=np.uint8, shape=(k,)
641
+ Array of integers, interpreted as :class:`BondType`
642
+ instances.
643
+ This array specifies the type (or order) of the bonds to
644
+ the connected atoms.
645
+
646
+ Examples
647
+ --------
648
+
649
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
650
+ >>> bonds, types = bond_list.get_bonds(1)
651
+ >>> print(bonds)
652
+ [0 3 4]
653
+ """
654
+ cdef int i=0, j=0
655
+
656
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
657
+
658
+ cdef uint32[:,:] all_bonds_v = self._bonds
659
+ # Pessimistic array allocation:
660
+ # assume size is equal to the atom with most bonds
661
+ cdef np.ndarray bonds = np.zeros(self._max_bonds_per_atom,
662
+ dtype=np.uint32)
663
+ cdef uint32[:] bonds_v = bonds
664
+ cdef np.ndarray bond_types = np.zeros(self._max_bonds_per_atom,
665
+ dtype=np.uint8)
666
+ cdef uint8[:] bond_types_v = bond_types
667
+
668
+ for i in range(all_bonds_v.shape[0]):
669
+ # If a bond is found for the desired atom index
670
+ # at the first or second position of the bond,
671
+ # then append the index of the respective other position
672
+ if all_bonds_v[i,0] == index:
673
+ bonds_v[j] = all_bonds_v[i,1]
674
+ bond_types_v[j] = all_bonds_v[i,2]
675
+ j += 1
676
+ elif all_bonds_v[i,1] == index:
677
+ bonds_v[j] = all_bonds_v[i,0]
678
+ bond_types_v[j] = all_bonds_v[i,2]
679
+ j += 1
680
+
681
+ # Trim to correct size
682
+ bonds = bonds[:j]
683
+ bond_types = bond_types[:j]
684
+
685
+ return bonds, bond_types
686
+
687
+
688
+ def get_all_bonds(self):
689
+ """
690
+ get_all_bonds()
691
+
692
+ For each atom index, give the indices of the atoms bonded to
693
+ this atom as well as the corresponding bond types.
694
+
695
+ Returns
696
+ -------
697
+ bonds : np.ndarray, dtype=np.uint32, shape=(n,k)
698
+ The indices of connected atoms.
699
+ The first dimension represents the atoms,
700
+ the second dimension represents the indices of atoms bonded
701
+ to the respective atom.
702
+ Atoms can have have different numbers of atoms bonded to
703
+ them.
704
+ Therefore, the length of the second dimension *k* is equal
705
+ to the maximum number of bonds for an atom in this
706
+ :class:`BondList`.
707
+ For atoms with less bonds, the corresponding entry in the
708
+ array is padded with ``-1`` values.
709
+ bond_types : np.ndarray, dtype=np.uint32, shape=(n,k)
710
+ Array of integers, interpreted as :class:`BondType`
711
+ instances.
712
+ This array specifies the bond type (or order) corresponding
713
+ to the returned `bonds`.
714
+ It uses the same ``-1``-padding.
715
+
716
+ Examples
717
+ --------
718
+
719
+ >>> # BondList for benzene
720
+ >>> bond_list = BondList(
721
+ ... 12,
722
+ ... np.array([
723
+ ... # Bonds between the carbon atoms in the ring
724
+ ... (0, 1, BondType.AROMATIC_SINGLE),
725
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
726
+ ... (2, 3, BondType.AROMATIC_SINGLE),
727
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
728
+ ... (4, 5, BondType.AROMATIC_SINGLE),
729
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
730
+ ... # Bonds between carbon and hydrogen
731
+ ... (0, 6, BondType.SINGLE),
732
+ ... (1, 7, BondType.SINGLE),
733
+ ... (2, 8, BondType.SINGLE),
734
+ ... (3, 9, BondType.SINGLE),
735
+ ... (4, 10, BondType.SINGLE),
736
+ ... (5, 11, BondType.SINGLE),
737
+ ... ])
738
+ ... )
739
+ >>> bonds, types = bond_list.get_all_bonds()
740
+ >>> print(bonds)
741
+ [[ 1 5 6]
742
+ [ 0 2 7]
743
+ [ 1 3 8]
744
+ [ 2 4 9]
745
+ [ 3 5 10]
746
+ [ 4 0 11]
747
+ [ 0 -1 -1]
748
+ [ 1 -1 -1]
749
+ [ 2 -1 -1]
750
+ [ 3 -1 -1]
751
+ [ 4 -1 -1]
752
+ [ 5 -1 -1]]
753
+ >>> print(types)
754
+ [[ 5 6 1]
755
+ [ 5 6 1]
756
+ [ 6 5 1]
757
+ [ 5 6 1]
758
+ [ 6 5 1]
759
+ [ 5 6 1]
760
+ [ 1 -1 -1]
761
+ [ 1 -1 -1]
762
+ [ 1 -1 -1]
763
+ [ 1 -1 -1]
764
+ [ 1 -1 -1]
765
+ [ 1 -1 -1]]
766
+ >>> for i in range(bond_list.get_atom_count()):
767
+ ... bonds_for_atom = bonds[i]
768
+ ... # Remove trailing '-1' values
769
+ ... bonds_for_atom = bonds_for_atom[bonds_for_atom != -1]
770
+ ... print(f"{i}: {bonds_for_atom}")
771
+ 0: [1 5 6]
772
+ 1: [0 2 7]
773
+ 2: [1 3 8]
774
+ 3: [2 4 9]
775
+ 4: [ 3 5 10]
776
+ 5: [ 4 0 11]
777
+ 6: [0]
778
+ 7: [1]
779
+ 8: [2]
780
+ 9: [3]
781
+ 10: [4]
782
+ 11: [5]
783
+ """
784
+ cdef int i=0
785
+ cdef uint32 atom_index_i, atom_index_j, bond_type
786
+
787
+ cdef uint32[:,:] all_bonds_v = self._bonds
788
+ # The size of 2nd dimension is equal to the atom with most bonds
789
+ # Since each atom can have an individual number of bonded atoms,
790
+ # The arrays are padded with '-1'
791
+ cdef np.ndarray bonds = np.full(
792
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int32
793
+ )
794
+ cdef int32[:,:] bonds_v = bonds
795
+ cdef np.ndarray bond_types = np.full(
796
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int8
797
+ )
798
+ cdef int8[:,:] bond_types_v = bond_types
799
+ # Track the number of already found bonds for each given index
800
+ cdef np.ndarray lengths = np.zeros(self._atom_count, dtype=np.uint32)
801
+ cdef uint32[:] lengths_v = lengths
802
+
803
+ for i in range(all_bonds_v.shape[0]):
804
+ atom_index_i = all_bonds_v[i,0]
805
+ atom_index_j = all_bonds_v[i,1]
806
+ bond_type = all_bonds_v[i,2]
807
+ # Add second bonded atom for the first bonded atom
808
+ # and vice versa
809
+ # Use 'lengths' variable to append the value
810
+ bonds_v[atom_index_i, lengths_v[atom_index_i]] = atom_index_j
811
+ bonds_v[atom_index_j, lengths_v[atom_index_j]] = atom_index_i
812
+ bond_types_v[atom_index_i, lengths_v[atom_index_i]] = bond_type
813
+ bond_types_v[atom_index_j, lengths_v[atom_index_j]] = bond_type
814
+ # Increment lengths
815
+ lengths_v[atom_index_i] += 1
816
+ lengths_v[atom_index_j] += 1
817
+
818
+ return bonds, bond_types
819
+
820
+
821
+ def adjacency_matrix(self):
822
+ r"""
823
+ adjacency_matrix(bond_list)
824
+
825
+ Represent this :class:`BondList` as adjacency matrix.
826
+
827
+ The adjacency matrix is a quadratic matrix with boolean values
828
+ according to
829
+
830
+ .. math::
831
+
832
+ M_{i,j} =
833
+ \begin{cases}
834
+ \text{True}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
835
+ \text{False}, & \text{otherwise}
836
+ \end{cases}.
837
+
838
+ Returns
839
+ -------
840
+ matrix : ndarray, dtype=bool, shape=(n,n)
841
+ The created adjacency matrix.
842
+
843
+ Examples
844
+ --------
845
+
846
+ >>> # BondList for formaldehyde
847
+ >>> bond_list = BondList(
848
+ ... 4,
849
+ ... np.array([
850
+ ... # Bond between carbon and oxygen
851
+ ... (0, 1, BondType.DOUBLE),
852
+ ... # Bonds between carbon and hydrogen
853
+ ... (0, 2, BondType.SINGLE),
854
+ ... (0, 3, BondType.SINGLE),
855
+ ... ])
856
+ ... )
857
+ >>> print(bond_list.adjacency_matrix())
858
+ [[False True True True]
859
+ [ True False False False]
860
+ [ True False False False]
861
+ [ True False False False]]
862
+ """
863
+ matrix = np.zeros(
864
+ (self._atom_count, self._atom_count), dtype=bool
865
+ )
866
+ matrix[self._bonds[:,0], self._bonds[:,1]] = True
867
+ matrix[self._bonds[:,1], self._bonds[:,0]] = True
868
+ return matrix
869
+
870
+
871
+ def bond_type_matrix(self):
872
+ r"""
873
+ adjacency_matrix(bond_list)
874
+
875
+ Represent this :class:`BondList` as a matrix depicting the bond
876
+ type.
877
+
878
+ The matrix is a quadratic matrix:
879
+
880
+ .. math::
881
+
882
+ M_{i,j} =
883
+ \begin{cases}
884
+ \text{BondType}_{ij}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
885
+ -1, & \text{otherwise}
886
+ \end{cases}.
887
+
888
+ Returns
889
+ -------
890
+ matrix : ndarray, dtype=bool, shape=(n,n)
891
+ The created bond type matrix.
892
+
893
+ Examples
894
+ --------
895
+
896
+ >>> # BondList for formaldehyde
897
+ >>> bond_list = BondList(
898
+ ... 4,
899
+ ... np.array([
900
+ ... # Bond between carbon and oxygen
901
+ ... (0, 1, BondType.DOUBLE),
902
+ ... # Bonds between carbon and hydrogen
903
+ ... (0, 2, BondType.SINGLE),
904
+ ... (0, 3, BondType.SINGLE),
905
+ ... ])
906
+ ... )
907
+ >>> print(bond_list.bond_type_matrix())
908
+ [[-1 2 1 1]
909
+ [ 2 -1 -1 -1]
910
+ [ 1 -1 -1 -1]
911
+ [ 1 -1 -1 -1]]
912
+ """
913
+ matrix = np.full(
914
+ (self._atom_count, self._atom_count), -1, dtype=np.int8
915
+ )
916
+ matrix[self._bonds[:,0], self._bonds[:,1]] = self._bonds[:,2]
917
+ matrix[self._bonds[:,1], self._bonds[:,0]] = self._bonds[:,2]
918
+ return matrix
919
+
920
+
921
+ def add_bond(self, int32 atom_index1, int32 atom_index2,
922
+ bond_type=BondType.ANY):
923
+ """
924
+ add_bond(atom_index1, atom_index2, bond_type=BondType.ANY)
925
+
926
+ Add a bond to the :class:`BondList`.
927
+
928
+ If the bond is already existent, only the bond type is updated.
929
+
930
+ Parameters
931
+ ----------
932
+ atom_index1, atom_index2 : int
933
+ The indices of the atoms to create a bond for.
934
+ bond_type : BondType or int, optional
935
+ The type of the bond. Default is :attr:`BondType.ANY`.
936
+ """
937
+ if bond_type >= len(BondType):
938
+ raise ValueError(f"BondType {bond_type} is invalid")
939
+
940
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
941
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
942
+ _sort(&index1, &index2)
943
+
944
+ cdef int i
945
+ cdef uint32[:,:] all_bonds_v = self._bonds
946
+ # Check if bond is already existent in list
947
+ cdef bint in_list = False
948
+ for i in range(all_bonds_v.shape[0]):
949
+ # Since the bonds have the atom indices sorted
950
+ # the reverse check is omitted
951
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
952
+ in_list = True
953
+ # If in list, update bond type
954
+ all_bonds_v[i,2] = int(bond_type)
955
+ break
956
+ if not in_list:
957
+ self._bonds = np.append(
958
+ self._bonds,
959
+ np.array(
960
+ [(index1, index2, int(bond_type))], dtype=np.uint32
961
+ ),
962
+ axis=0
963
+ )
964
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
965
+
966
+ def remove_bond(self, int32 atom_index1, int32 atom_index2):
967
+ """
968
+ remove_bond(atom_index1, atom_index2)
969
+
970
+ Remove a bond from the :class:`BondList`.
971
+
972
+ If the bond is not existent in the :class:`BondList`, nothing happens.
973
+
974
+ Parameters
975
+ ----------
976
+ atom_index1, atom_index2 : int
977
+ The indices of the atoms whose bond should be removed.
978
+ """
979
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
980
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
981
+ _sort(&index1, &index2)
982
+
983
+ # Find the bond in bond list
984
+ cdef int i
985
+ cdef uint32[:,:] all_bonds_v = self._bonds
986
+ for i in range(all_bonds_v.shape[0]):
987
+ # Since the bonds have the atom indices sorted
988
+ # the reverse check is omitted
989
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
990
+ self._bonds = np.delete(self._bonds, i, axis=0)
991
+ # The maximum bonds per atom is not recalculated,
992
+ # as the value can only be decreased on bond removal
993
+ # Since this value is only used for pessimistic array allocation
994
+ # in 'get_bonds()', the slightly larger memory usage is a better
995
+ # option than the repetitive call of _get_max_bonds_per_atom()
996
+
997
+ def remove_bonds_to(self, int32 atom_index):
998
+ """
999
+ remove_bonds_to(self, atom_index)
1000
+
1001
+ Remove all bonds from the :class:`BondList` where the given atom
1002
+ is involved.
1003
+
1004
+ Parameters
1005
+ ----------
1006
+ atom_index : int
1007
+ The index of the atom whose bonds should be removed.
1008
+ """
1009
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
1010
+
1011
+ cdef np.ndarray mask = np.ones(len(self._bonds), dtype=np.uint8)
1012
+ cdef uint8[:] mask_v = mask
1013
+
1014
+ # Find the bond in bond list
1015
+ cdef int i
1016
+ cdef uint32[:,:] all_bonds_v = self._bonds
1017
+ for i in range(all_bonds_v.shape[0]):
1018
+ if (all_bonds_v[i,0] == index or all_bonds_v[i,1] == index):
1019
+ mask_v[i] = False
1020
+ # Remove the bonds
1021
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
1022
+ # The maximum bonds per atom is not recalculated
1023
+ # (see 'remove_bond()')
1024
+
1025
+ def remove_bonds(self, bond_list):
1026
+ """
1027
+ remove_bonds(bond_list)
1028
+
1029
+ Remove multiple bonds from the :class:`BondList`.
1030
+
1031
+ All bonds present in `bond_list` are removed from this instance.
1032
+ If a bond is not existent in this instance, nothing happens.
1033
+ Only the bond indices, not the bond types, are relevant for
1034
+ this.
1035
+
1036
+ Parameters
1037
+ ----------
1038
+ bond_list : BondList
1039
+ The bonds in `bond_list` are removed from this instance.
1040
+ """
1041
+ cdef int i=0, j=0
1042
+
1043
+ # All bonds in the own BondList
1044
+ cdef uint32[:,:] all_bonds_v = self._bonds
1045
+ # The bonds that should be removed
1046
+ cdef uint32[:,:] rem_bonds_v = bond_list._bonds
1047
+ cdef np.ndarray mask = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1048
+ cdef uint8[:] mask_v = mask
1049
+ for i in range(all_bonds_v.shape[0]):
1050
+ for j in range(rem_bonds_v.shape[0]):
1051
+ if all_bonds_v[i,0] == rem_bonds_v[j,0] \
1052
+ and all_bonds_v[i,1] == rem_bonds_v[j,1]:
1053
+ mask_v[i] = False
1054
+
1055
+ # Remove the bonds
1056
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
1057
+ # The maximum bonds per atom is not recalculated
1058
+ # (see 'remove_bond()')
1059
+
1060
+ def merge(self, bond_list):
1061
+ """
1062
+ merge(bond_list)
1063
+
1064
+ Merge another :class:`BondList` with this instance into a new
1065
+ object.
1066
+ If a bond appears in both :class:`BondList`'s, the
1067
+ :class:`BondType` from the given `bond_list` takes precedence.
1068
+
1069
+ The internal :class:`ndarray` instances containg the bonds are
1070
+ simply concatenated and the new atom count is the maximum of
1071
+ both bond lists.
1072
+
1073
+ Parameters
1074
+ ----------
1075
+ bond_list : BondList
1076
+ This bond list is merged with this instance.
1077
+
1078
+ Returns
1079
+ -------
1080
+ bond_list : BondList
1081
+ The merged :class:`BondList`.
1082
+
1083
+ Notes
1084
+ -----
1085
+ This is not equal to using the `+` operator.
1086
+
1087
+ Examples
1088
+ --------
1089
+
1090
+ >>> bond_list1 = BondList(3, np.array([(0,1),(1,2)]))
1091
+ >>> bond_list2 = BondList(5, np.array([(2,3),(3,4)]))
1092
+ >>> merged_list = bond_list2.merge(bond_list1)
1093
+ >>> print(merged_list.get_atom_count())
1094
+ 5
1095
+ >>> print(merged_list)
1096
+ [[0 1 0]
1097
+ [1 2 0]
1098
+ [2 3 0]
1099
+ [3 4 0]]
1100
+
1101
+ The BondList given as parameter takes precedence:
1102
+
1103
+ >>> # Specifiy bond type to see where a bond is taken from
1104
+ >>> bond_list1 = BondList(4, np.array([
1105
+ ... (0, 1, BondType.SINGLE),
1106
+ ... (1, 2, BondType.SINGLE)
1107
+ ... ]))
1108
+ >>> bond_list2 = BondList(4, np.array([
1109
+ ... (1, 2, BondType.DOUBLE), # This one is a duplicate
1110
+ ... (2, 3, BondType.DOUBLE)
1111
+ ... ]))
1112
+ >>> merged_list = bond_list2.merge(bond_list1)
1113
+ >>> print(merged_list)
1114
+ [[0 1 1]
1115
+ [1 2 1]
1116
+ [2 3 2]]
1117
+ """
1118
+ return BondList(
1119
+ max(self._atom_count, bond_list._atom_count),
1120
+ np.concatenate(
1121
+ [bond_list.as_array(), self.as_array()],
1122
+ axis=0
1123
+ )
1124
+ )
1125
+
1126
+ def __add__(self, bond_list):
1127
+ return BondList.concatenate([self, bond_list])
1128
+
1129
+ def __getitem__(self, index):
1130
+ ## Variables for both, integer and boolean index arrays
1131
+ cdef uint32[:,:] all_bonds_v
1132
+ cdef int i
1133
+ cdef uint32* index1_ptr
1134
+ cdef uint32* index2_ptr
1135
+ cdef np.ndarray removal_filter
1136
+ cdef uint8[:] removal_filter_v
1137
+
1138
+ ## Variables for integer arrays
1139
+ cdef int32[:] inverse_index_v
1140
+ cdef int32 new_index1, new_index2
1141
+
1142
+ ## Variables for boolean mask
1143
+ # Boolean mask representation of the index
1144
+ cdef np.ndarray mask
1145
+ cdef uint8[:] mask_v
1146
+ # Boolean mask for removal of bonds
1147
+ cdef np.ndarray offsets
1148
+ cdef uint32[:] offsets_v
1149
+
1150
+ if isinstance(index, numbers.Integral):
1151
+ ## Handle single index
1152
+ return self.get_bonds(index)
1153
+
1154
+ elif isinstance(index, np.ndarray) and index.dtype == bool:
1155
+ ## Handle boolean masks
1156
+ copy = self.copy()
1157
+ all_bonds_v = copy._bonds
1158
+ # Use 'uint8' instead of 'bool' for memory view
1159
+ mask = np.frombuffer(index, dtype=np.uint8)
1160
+
1161
+ # Each time an atom is missing in the mask,
1162
+ # the offset is increased by one
1163
+ offsets = np.cumsum(
1164
+ ~mask.astype(bool, copy=False), dtype=np.uint32
1165
+ )
1166
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1167
+ removal_filter_v = removal_filter
1168
+ mask_v = mask
1169
+ offsets_v = offsets
1170
+ # If an atom in a bond is not masked,
1171
+ # the bond is removed from the list
1172
+ # If an atom is masked,
1173
+ # its index value is decreased by the respective offset
1174
+ # The offset is neccessary, removing atoms in an AtomArray
1175
+ # decreases the index of the following atoms
1176
+ for i in range(all_bonds_v.shape[0]):
1177
+ # Usage of pointer to increase performance
1178
+ # as redundant indexing is avoided
1179
+ index1_ptr = &all_bonds_v[i,0]
1180
+ index2_ptr = &all_bonds_v[i,1]
1181
+ if mask_v[index1_ptr[0]] and mask_v[index2_ptr[0]]:
1182
+ # Both atoms involved in bond are masked
1183
+ # -> decrease atom index by offset
1184
+ index1_ptr[0] -= offsets_v[index1_ptr[0]]
1185
+ index2_ptr[0] -= offsets_v[index2_ptr[0]]
1186
+ else:
1187
+ # At least one atom involved in bond is not masked
1188
+ # -> remove bond
1189
+ removal_filter_v[i] = False
1190
+ # Apply the bond removal filter
1191
+ copy._bonds = copy._bonds[removal_filter.astype(bool, copy=False)]
1192
+ copy._atom_count = len(np.nonzero(mask)[0])
1193
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1194
+ return copy
1195
+
1196
+ else:
1197
+ ## Convert any other type of index into index array, as it preserves order
1198
+ copy = self.copy()
1199
+ all_bonds_v = copy._bonds
1200
+ index = _to_index_array(index, self._atom_count)
1201
+ index = _to_positive_index_array(index, self._atom_count)
1202
+
1203
+ # The inverse index is required to efficiently obtain
1204
+ # the new index of an atom in case of an unsorted index
1205
+ # array
1206
+ inverse_index_v = _invert_index(index, self._atom_count)
1207
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1208
+ removal_filter_v = removal_filter
1209
+ for i in range(all_bonds_v.shape[0]):
1210
+ # Usage of pointer to increase performance
1211
+ # as redundant indexing is avoided
1212
+ index1_ptr = &all_bonds_v[i,0]
1213
+ index2_ptr = &all_bonds_v[i,1]
1214
+ new_index1 = inverse_index_v[index1_ptr[0]]
1215
+ new_index2 = inverse_index_v[index2_ptr[0]]
1216
+ if new_index1 != -1 and new_index2 != -1:
1217
+ # Both atoms involved in bond are included
1218
+ # by index array
1219
+ # -> assign new atom indices
1220
+ index1_ptr[0] = <int32>new_index1
1221
+ index2_ptr[0] = <int32>new_index2
1222
+ else:
1223
+ # At least one atom in bond is not included
1224
+ # -> remove bond
1225
+ removal_filter_v[i] = False
1226
+
1227
+ copy._bonds = copy._bonds[
1228
+ removal_filter.astype(bool, copy=False)
1229
+ ]
1230
+ # Again, sort indices per bond
1231
+ # as the correct order is not guaranteed anymore
1232
+ # for unsorted index arrays
1233
+ copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
1234
+ copy._atom_count = len(index)
1235
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1236
+ return copy
1237
+
1238
+ def __iter__(self):
1239
+ raise TypeError("'BondList' object is not iterable")
1240
+
1241
+ def __str__(self):
1242
+ return str(self.as_array())
1243
+
1244
+ def __eq__(self, item):
1245
+ if not isinstance(item, BondList):
1246
+ return False
1247
+ return (self._atom_count == item._atom_count and
1248
+ self.as_set() == item.as_set())
1249
+
1250
+ def __contains__(self, item):
1251
+ if not isinstance(item, tuple) and len(tuple) != 2:
1252
+ raise TypeError("Expected a tuple of atom indices")
1253
+
1254
+ cdef int i=0
1255
+
1256
+ cdef uint32 match_index1, match_index2
1257
+ # Sort indices for faster search in loop
1258
+ cdef uint32 atom_index1 = min(item)
1259
+ cdef uint32 atom_index2 = max(item)
1260
+
1261
+ cdef uint32[:,:] all_bonds_v = self._bonds
1262
+ for i in range(all_bonds_v.shape[0]):
1263
+ match_index1 = all_bonds_v[i,0]
1264
+ match_index2 = all_bonds_v[i,1]
1265
+ if atom_index1 == match_index1 and atom_index2 == match_index2:
1266
+ return True
1267
+
1268
+ return False
1269
+
1270
+
1271
+ def _get_max_bonds_per_atom(self):
1272
+ if self._atom_count == 0:
1273
+ return 0
1274
+
1275
+ cdef int i
1276
+ cdef uint32[:,:] all_bonds_v = self._bonds
1277
+ # Create an array that counts number of occurences of each index
1278
+ cdef np.ndarray index_count = np.zeros(self._atom_count,
1279
+ dtype=np.uint32)
1280
+ cdef uint32[:] index_count_v = index_count
1281
+ for i in range(all_bonds_v.shape[0]):
1282
+ # Increment count of both indices found in bond list at i
1283
+ index_count_v[all_bonds_v[i,0]] += 1
1284
+ index_count_v[all_bonds_v[i,1]] += 1
1285
+ return np.max(index_count_v)
1286
+
1287
+ def _remove_redundant_bonds(self):
1288
+ cdef int j
1289
+ cdef uint32[:,:] all_bonds_v = self._bonds
1290
+ # Boolean mask for final removal of redundant atoms
1291
+ # Unfortunately views of boolean ndarrays are not supported
1292
+ # -> use uint8 array
1293
+ cdef np.ndarray redundancy_filter = np.ones(all_bonds_v.shape[0],
1294
+ dtype=np.uint8)
1295
+ cdef uint8[:] redundancy_filter_v = redundancy_filter
1296
+ # Array of pointers to C-arrays
1297
+ # The array is indexed with the atom indices in the bond list
1298
+ # The respective C-array contains the indices of bonded atoms
1299
+ cdef ptr[:] ptrs_v = np.zeros(self._atom_count, dtype=np.uint64)
1300
+ # Stores the length of the C-arrays
1301
+ cdef int[:] array_len_v = np.zeros(self._atom_count, dtype=np.int32)
1302
+ # Iterate over bond list:
1303
+ # If bond is already listed in the array of pointers,
1304
+ # set filter to false at that position
1305
+ # Else add bond to array of pointers
1306
+ cdef uint32 i1, i2
1307
+ cdef uint32* array_ptr
1308
+ cdef int length
1309
+
1310
+ try:
1311
+ for j in range(all_bonds_v.shape[0]):
1312
+ i1 = all_bonds_v[j,0]
1313
+ i2 = all_bonds_v[j,1]
1314
+ # Since the bonds have the atom indices sorted
1315
+ # the reverse check is omitted
1316
+ if _in_array(<uint32*>ptrs_v[i1], i2, array_len_v[i1]):
1317
+ redundancy_filter_v[j] = False
1318
+ else:
1319
+ # Append bond in respective C-array
1320
+ # and update C-array length
1321
+ length = array_len_v[i1] +1
1322
+ array_ptr = <uint32*>ptrs_v[i1]
1323
+ array_ptr = <uint32*>realloc(
1324
+ array_ptr, length * sizeof(uint32)
1325
+ )
1326
+ if not array_ptr:
1327
+ raise MemoryError()
1328
+ array_ptr[length-1] = i2
1329
+ ptrs_v[i1] = <ptr>array_ptr
1330
+ array_len_v[i1] = length
1331
+
1332
+ finally:
1333
+ # Free pointers
1334
+ for i in range(ptrs_v.shape[0]):
1335
+ free(<int*>ptrs_v[i])
1336
+
1337
+ # Eventually remove redundant bonds
1338
+ self._bonds = self._bonds[redundancy_filter.astype(bool, copy=False)]
1339
+
1340
+
1341
+ cdef uint32 _to_positive_index(int32 index, uint32 array_length) except -1:
1342
+ """
1343
+ Convert a potentially negative index into a positive index.
1344
+ """
1345
+ cdef uint32 pos_index
1346
+ if index < 0:
1347
+ pos_index = <uint32> (array_length + index)
1348
+ if pos_index < 0:
1349
+ raise IndexError(
1350
+ f"Index {index} is out of range "
1351
+ f"for an atom count of {array_length}"
1352
+ )
1353
+ return pos_index
1354
+ else:
1355
+ if <uint32> index >= array_length:
1356
+ raise IndexError(
1357
+ f"Index {index} is out of range "
1358
+ f"for an atom count of {array_length}"
1359
+ )
1360
+ return <uint32> index
1361
+
1362
+
1363
+ def _to_positive_index_array(index_array, length):
1364
+ """
1365
+ Convert potentially negative values in an array into positive
1366
+ values and check for out-of-bounds values.
1367
+ """
1368
+ index_array = index_array.copy()
1369
+ orig_shape = index_array.shape
1370
+ index_array = index_array.flatten()
1371
+ negatives = index_array < 0
1372
+ index_array[negatives] = length + index_array[negatives]
1373
+ if (index_array < 0).any():
1374
+ raise IndexError(
1375
+ f"Index {np.min(index_array)} is out of range "
1376
+ f"for an atom count of {length}"
1377
+ )
1378
+ if (index_array >= length).any():
1379
+ raise IndexError(
1380
+ f"Index {np.max(index_array)} is out of range "
1381
+ f"for an atom count of {length}"
1382
+ )
1383
+ return index_array.reshape(orig_shape)
1384
+
1385
+
1386
+ def _to_index_array(object index, uint32 length):
1387
+ """
1388
+ Convert an index of arbitrary type into an index array.
1389
+ """
1390
+ if isinstance(index, np.ndarray) and np.issubdtype(index.dtype, np.integer):
1391
+ return index
1392
+ else:
1393
+ # Convert into index array
1394
+ all_indices = np.arange(length, dtype=np.uint32)
1395
+ return all_indices[index]
1396
+
1397
+
1398
+ cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
1399
+ """
1400
+ Test whether a value (`atom_index`) is in a C-array `array`.
1401
+ """
1402
+ cdef int i = 0
1403
+ if array == NULL:
1404
+ return False
1405
+ for i in range(array_length):
1406
+ if array[i] == atom_index:
1407
+ return True
1408
+ return False
1409
+
1410
+
1411
+ cdef inline void _sort(uint32* index1_ptr, uint32* index2_ptr):
1412
+ cdef uint32 swap
1413
+ if index1_ptr[0] > index2_ptr[0]:
1414
+ # Swap indices
1415
+ swap = index1_ptr[0]
1416
+ index1_ptr[0] = index2_ptr[0]
1417
+ index2_ptr[0] = swap
1418
+
1419
+
1420
+ @cython.wraparound(False)
1421
+ # Do bounds check, as the input indices may be out of bounds
1422
+ def _invert_index(IndexType[:] index_v, uint32 length):
1423
+ """
1424
+ Invert an input index array, so that
1425
+ if *input[i] = j*, *output[j] = i*.
1426
+ For all elements *j*, that are not in *input*, *output[j]* = -1.
1427
+ """
1428
+ cdef int32 i
1429
+ cdef IndexType index_val
1430
+ inverse_index = np.full(length, -1, dtype=np.int32)
1431
+ cdef int32[:] inverse_index_v = inverse_index
1432
+
1433
+ for i in range(index_v.shape[0]):
1434
+ index_val = index_v[i]
1435
+ if inverse_index_v[index_val] != -1:
1436
+ # One index can theoretically appear multiple times
1437
+ # This is currently not supported
1438
+ raise NotImplementedError(
1439
+ f"Duplicate indices are not supported, "
1440
+ f"but index {index_val} appeared multiple times"
1441
+ )
1442
+ inverse_index_v[index_val] = i
1443
+
1444
+
1445
+ return inverse_index
1446
+
1447
+
1448
+
1449
+
1450
+ # fmt: off
1451
+ _DEFAULT_DISTANCE_RANGE = {
1452
+ # Taken from Allen et al.
1453
+ # min - 2*std max + 2*std
1454
+ ("B", "C" ) : (1.556 - 2*0.015, 1.556 + 2*0.015),
1455
+ ("BR", "C" ) : (1.875 - 2*0.029, 1.966 + 2*0.029),
1456
+ ("BR", "O" ) : (1.581 - 2*0.007, 1.581 + 2*0.007),
1457
+ ("C", "C" ) : (1.174 - 2*0.011, 1.588 + 2*0.025),
1458
+ ("C", "CL") : (1.713 - 2*0.011, 1.849 + 2*0.011),
1459
+ ("C", "F" ) : (1.320 - 2*0.009, 1.428 + 2*0.009),
1460
+ ("C", "H" ) : (1.059 - 2*0.030, 1.099 + 2*0.007),
1461
+ ("C", "I" ) : (2.095 - 2*0.015, 2.162 + 2*0.015),
1462
+ ("C", "N" ) : (1.325 - 2*0.009, 1.552 + 2*0.023),
1463
+ ("C", "O" ) : (1.187 - 2*0.011, 1.477 + 2*0.008),
1464
+ ("C", "P" ) : (1.791 - 2*0.006, 1.855 + 2*0.019),
1465
+ ("C", "S" ) : (1.630 - 2*0.014, 1.863 + 2*0.015),
1466
+ ("C", "SE") : (1.893 - 2*0.013, 1.970 + 2*0.032),
1467
+ ("C", "SI") : (1.837 - 2*0.012, 1.888 + 2*0.023),
1468
+ ("CL", "O" ) : (1.414 - 2*0.026, 1.414 + 2*0.026),
1469
+ ("CL", "P" ) : (1.997 - 2*0.035, 2.008 + 2*0.035),
1470
+ ("CL", "S" ) : (2.072 - 2*0.023, 2.072 + 2*0.023),
1471
+ ("CL", "SI") : (2.072 - 2*0.009, 2.072 + 2*0.009),
1472
+ ("F", "N" ) : (1.406 - 2*0.016, 1.406 + 2*0.016),
1473
+ ("F", "P" ) : (1.495 - 2*0.016, 1.579 + 2*0.025),
1474
+ ("F", "S" ) : (1.640 - 2*0.011, 1.640 + 2*0.011),
1475
+ ("F", "SI") : (1.588 - 2*0.014, 1.694 + 2*0.013),
1476
+ ("H", "N" ) : (1.009 - 2*0.022, 1.033 + 2*0.022),
1477
+ ("H", "O" ) : (0.967 - 2*0.010, 1.015 + 2*0.017),
1478
+ ("I", "O" ) : (2.144 - 2*0.028, 2.144 + 2*0.028),
1479
+ ("N", "N" ) : (1.124 - 2*0.015, 1.454 + 2*0.021),
1480
+ ("N", "O" ) : (1.210 - 2*0.011, 1.463 + 2*0.012),
1481
+ ("N", "P" ) : (1.571 - 2*0.013, 1.697 + 2*0.015),
1482
+ ("N", "S" ) : (1.541 - 2*0.022, 1.710 + 2*0.019),
1483
+ ("N", "SI") : (1.711 - 2*0.019, 1.748 + 2*0.022),
1484
+ ("O", "P" ) : (1.449 - 2*0.007, 1.689 + 2*0.024),
1485
+ ("O", "S" ) : (1.423 - 2*0.008, 1.580 + 2*0.015),
1486
+ ("O", "SI") : (1.622 - 2*0.014, 1.680 + 2*0.008),
1487
+ ("P", "P" ) : (2.214 - 2*0.022, 2.214 + 2*0.022),
1488
+ ("P", "S" ) : (1.913 - 2*0.014, 1.954 + 2*0.005),
1489
+ ("P", "SE") : (2.093 - 2*0.019, 2.093 + 2*0.019),
1490
+ ("P", "SI") : (2.264 - 2*0.019, 2.264 + 2*0.019),
1491
+ ("S", "S" ) : (1.897 - 2*0.012, 2.070 + 2*0.022),
1492
+ ("S", "SE") : (2.193 - 2*0.015, 2.193 + 2*0.015),
1493
+ ("S", "SI") : (2.145 - 2*0.020, 2.145 + 2*0.020),
1494
+ ("SE", "SE") : (2.340 - 2*0.024, 2.340 + 2*0.024),
1495
+ ("SI", "SE") : (2.359 - 2*0.012, 2.359 + 2*0.012),
1496
+ }
1497
+ # fmt: on
1498
+
1499
+ def connect_via_distances(atoms, dict distance_range=None, bint inter_residue=True,
1500
+ default_bond_type=BondType.ANY, bint periodic=False):
1501
+ """
1502
+ connect_via_distances(atoms, distance_range=None, inter_residue=True,
1503
+ default_bond_type=BondType.ANY, periodic=False)
1504
+
1505
+ Create a :class:`BondList` for a given atom array, based on
1506
+ pairwise atom distances.
1507
+
1508
+ A :attr:`BondType.ANY`, bond is created for two atoms within the
1509
+ same residue, if the distance between them is within the expected
1510
+ bond distance range.
1511
+ Bonds between two adjacent residues are created for the atoms
1512
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1513
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1514
+
1515
+ Parameters
1516
+ ----------
1517
+ atoms : AtomArray
1518
+ The structure to create the :class:`BondList` for.
1519
+ distance_range : dict of tuple(str, str) -> tuple(float, float), optional
1520
+ Custom minimum and maximum bond distances.
1521
+ The dictionary keys are tuples of chemical elements representing
1522
+ the atoms to be potentially bonded.
1523
+ The order of elements within each tuple does not matter.
1524
+ The dictionary values are the minimum and maximum bond distance,
1525
+ respectively, for the given combination of elements.
1526
+ This parameter updates the default dictionary.
1527
+ Hence, the default bond distances for missing element pairs are
1528
+ still taken from the default dictionary.
1529
+ The default bond distances are taken from :footcite:`Allen1987`.
1530
+ inter_residue : bool, optional
1531
+ If true, connections between consecutive amino acids and
1532
+ nucleotides are also added.
1533
+ default_bond_type : BondType or int, optional
1534
+ By default, all created bonds have :attr:`BondType.ANY`.
1535
+ An alternative :class:`BondType` can be given in this parameter.
1536
+ periodic : bool, optional
1537
+ If set to true, bonds can also be detected in periodic
1538
+ boundary conditions.
1539
+ The `box` attribute of `atoms` is required in this case.
1540
+
1541
+ Returns
1542
+ -------
1543
+ BondList
1544
+ The created bond list.
1545
+
1546
+ See Also
1547
+ --------
1548
+ connect_via_residue_names
1549
+
1550
+ Notes
1551
+ -----
1552
+ This method might miss bonds, if the bond distance is unexpectedly
1553
+ high or low, or it might create false bonds, if two atoms within a
1554
+ residue are accidentally in the right distance.
1555
+ A more accurate method for determining bonds is
1556
+ :func:`connect_via_residue_names()`.
1557
+
1558
+ References
1559
+ ----------
1560
+
1561
+ .. footbibliography::
1562
+ """
1563
+ from .atoms import AtomArray
1564
+ from .geometry import distance
1565
+ from .residues import get_residue_starts
1566
+
1567
+ cdef list bonds = []
1568
+ cdef int i
1569
+ cdef int curr_start_i, next_start_i
1570
+ cdef np.ndarray coord = atoms.coord
1571
+ cdef np.ndarray coord_in_res
1572
+ cdef np.ndarray distances
1573
+ cdef float dist
1574
+ cdef np.ndarray elements = atoms.element
1575
+ cdef np.ndarray elements_in_res
1576
+ cdef int atom_index1, atom_index2
1577
+ cdef dict dist_ranges = {}
1578
+ cdef tuple dist_range
1579
+ cdef float min_dist, max_dist
1580
+
1581
+ if not isinstance(atoms, AtomArray):
1582
+ raise TypeError(f"Expected 'AtomArray', not '{type(atoms).__name__}'")
1583
+ if periodic:
1584
+ if atoms.box is None:
1585
+ raise BadStructureError("Atom array has no box")
1586
+ box = atoms.box
1587
+ else:
1588
+ box = None
1589
+
1590
+ # Prepare distance dictionary...
1591
+ if distance_range is None:
1592
+ distance_range = {}
1593
+ # Merge default and custom entries
1594
+ for key, val in itertools.chain(
1595
+ _DEFAULT_DISTANCE_RANGE.items(), distance_range.items()
1596
+ ):
1597
+ element1, element2 = key
1598
+ # Add entries for both element orders
1599
+ dist_ranges[(element1.upper(), element2.upper())] = val
1600
+ dist_ranges[(element2.upper(), element1.upper())] = val
1601
+
1602
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1603
+ # Omit exclsive stop in 'residue_starts'
1604
+ for i in range(len(residue_starts)-1):
1605
+ curr_start_i = residue_starts[i]
1606
+ next_start_i = residue_starts[i+1]
1607
+
1608
+ elements_in_res = elements[curr_start_i : next_start_i]
1609
+ coord_in_res = coord[curr_start_i : next_start_i]
1610
+ # Matrix containing all pairwise atom distances in the residue
1611
+ distances = distance(
1612
+ coord_in_res[:, np.newaxis, :],
1613
+ coord_in_res[np.newaxis, :, :],
1614
+ box
1615
+ )
1616
+ for atom_index1 in range(len(elements_in_res)):
1617
+ for atom_index2 in range(atom_index1):
1618
+ dist_range = dist_ranges.get((
1619
+ elements_in_res[atom_index1],
1620
+ elements_in_res[atom_index2]
1621
+ ))
1622
+ if dist_range is None:
1623
+ # No bond distance entry for this element
1624
+ # combination -> skip
1625
+ continue
1626
+ else:
1627
+ min_dist, max_dist = dist_range
1628
+ dist = distances[atom_index1, atom_index2]
1629
+ if dist >= min_dist and dist <= max_dist:
1630
+ bonds.append((
1631
+ curr_start_i + atom_index1,
1632
+ curr_start_i + atom_index2,
1633
+ default_bond_type
1634
+ ))
1635
+
1636
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1637
+
1638
+ if inter_residue:
1639
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1640
+ if default_bond_type == BondType.ANY:
1641
+ # As all bonds should be of type ANY, convert also
1642
+ # inter-residue bonds to ANY
1643
+ inter_bonds.remove_bond_order()
1644
+ return bond_list.merge(inter_bonds)
1645
+ else:
1646
+ return bond_list
1647
+
1648
+
1649
+
1650
+ def connect_via_residue_names(atoms, bint inter_residue=True,
1651
+ dict custom_bond_dict=None):
1652
+ """
1653
+ connect_via_residue_names(atoms, inter_residue=True, custom_bond_dict=None)
1654
+
1655
+ Create a :class:`BondList` for a given atom array (stack), based on
1656
+ the deposited bonds for each residue in the RCSB ``components.cif``
1657
+ dataset.
1658
+
1659
+ Bonds between two adjacent residues are created for the atoms
1660
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1661
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1662
+
1663
+ Parameters
1664
+ ----------
1665
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
1666
+ The structure to create the :class:`BondList` for.
1667
+ inter_residue : bool, optional
1668
+ If true, connections between consecutive amino acids and
1669
+ nucleotides are also added.
1670
+ custom_bond_dict : dict (str -> dict ((str, str) -> int)), optional
1671
+ A dictionary of dictionaries:
1672
+ The outer dictionary maps residue names to inner dictionaries.
1673
+ The inner dictionary maps tuples of two atom names to their
1674
+ respective :class:`BondType` (represented as integer).
1675
+ If given, these bonds are used instead of the bonds read from
1676
+ ``components.cif``.
1677
+
1678
+ Returns
1679
+ -------
1680
+ BondList
1681
+ The created bond list.
1682
+ No bonds are added for residues that are not found in
1683
+ ``components.cif``.
1684
+
1685
+ See Also
1686
+ --------
1687
+ connect_via_distances
1688
+
1689
+ Notes
1690
+ -----
1691
+ This method can only find bonds for residues in the RCSB
1692
+ *Chemical Component Dictionary*, unless `custom_bond_dict` is set.
1693
+ Although this includes most molecules one encounters, this will fail
1694
+ for exotic molecules, e.g. specialized inhibitors.
1695
+
1696
+ .. currentmodule:: biotite.structure.info
1697
+
1698
+ To supplement `custom_bond_dict` with bonds for residues from the
1699
+ *Chemical Component Dictionary* you can use
1700
+ :meth:`bonds_in_residue()`.
1701
+
1702
+ >>> import pprint
1703
+ >>> custom_bond_dict = {
1704
+ ... "XYZ": {
1705
+ ... ("A", "B"): BondType.SINGLE,
1706
+ ... ("B", "C"): BondType.SINGLE
1707
+ ... }
1708
+ ... }
1709
+ >>> # Supplement with bonds for common residues
1710
+ >>> custom_bond_dict["ALA"] = bonds_in_residue("ALA")
1711
+ >>> pp = pprint.PrettyPrinter(width=40)
1712
+ >>> pp.pprint(custom_bond_dict)
1713
+ {'ALA': {('C', 'O'): <BondType.DOUBLE: 2>,
1714
+ ('C', 'OXT'): <BondType.SINGLE: 1>,
1715
+ ('CA', 'C'): <BondType.SINGLE: 1>,
1716
+ ('CA', 'CB'): <BondType.SINGLE: 1>,
1717
+ ('CA', 'HA'): <BondType.SINGLE: 1>,
1718
+ ('CB', 'HB1'): <BondType.SINGLE: 1>,
1719
+ ('CB', 'HB2'): <BondType.SINGLE: 1>,
1720
+ ('CB', 'HB3'): <BondType.SINGLE: 1>,
1721
+ ('N', 'CA'): <BondType.SINGLE: 1>,
1722
+ ('N', 'H'): <BondType.SINGLE: 1>,
1723
+ ('N', 'H2'): <BondType.SINGLE: 1>,
1724
+ ('OXT', 'HXT'): <BondType.SINGLE: 1>},
1725
+ 'XYZ': {('A', 'B'): <BondType.SINGLE: 1>,
1726
+ ('B', 'C'): <BondType.SINGLE: 1>}}
1727
+ """
1728
+ from .info.bonds import bonds_in_residue
1729
+ from .residues import get_residue_starts
1730
+
1731
+ cdef list bonds = []
1732
+ cdef int res_i
1733
+ cdef int i, j
1734
+ cdef int curr_start_i, next_start_i
1735
+ cdef np.ndarray atom_names = atoms.atom_name
1736
+ cdef np.ndarray atom_names_in_res
1737
+ cdef np.ndarray res_names = atoms.res_name
1738
+ cdef str atom_name1, atom_name2
1739
+ cdef int64[:] atom_indices1, atom_indices2
1740
+ cdef dict bond_dict_for_res
1741
+
1742
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1743
+ # Omit exclsive stop in 'residue_starts'
1744
+ for res_i in range(len(residue_starts)-1):
1745
+ curr_start_i = residue_starts[res_i]
1746
+ next_start_i = residue_starts[res_i+1]
1747
+
1748
+ if custom_bond_dict is None:
1749
+ bond_dict_for_res = bonds_in_residue(res_names[curr_start_i])
1750
+ else:
1751
+ bond_dict_for_res = custom_bond_dict.get(
1752
+ res_names[curr_start_i], {}
1753
+ )
1754
+
1755
+ atom_names_in_res = atom_names[curr_start_i : next_start_i]
1756
+ for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
1757
+ atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
1758
+ .astype(np.int64, copy=False)
1759
+ atom_indices2 = np.where(atom_names_in_res == atom_name2)[0] \
1760
+ .astype(np.int64, copy=False)
1761
+ # In rare cases the same atom name may appear multiple times
1762
+ # (e.g. in altlocs)
1763
+ # -> create all possible bond combinations
1764
+ for i in range(atom_indices1.shape[0]):
1765
+ for j in range(atom_indices2.shape[0]):
1766
+ bonds.append((
1767
+ curr_start_i + atom_indices1[i],
1768
+ curr_start_i + atom_indices2[j],
1769
+ bond_type
1770
+ ))
1771
+
1772
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1773
+
1774
+ if inter_residue:
1775
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1776
+ return bond_list.merge(inter_bonds)
1777
+ else:
1778
+ return bond_list
1779
+
1780
+
1781
+
1782
+ _PEPTIDE_LINKS = ["PEPTIDE LINKING", "L-PEPTIDE LINKING", "D-PEPTIDE LINKING"]
1783
+ _NUCLEIC_LINKS = ["RNA LINKING", "DNA LINKING"]
1784
+
1785
+ def _connect_inter_residue(atoms, residue_starts):
1786
+ """
1787
+ Create a :class:`BondList` containing the bonds between adjacent
1788
+ amino acid or nucleotide residues.
1789
+
1790
+ Parameters
1791
+ ----------
1792
+ atoms : AtomArray or AtomArrayStack
1793
+ The structure to create the :class:`BondList` for.
1794
+ residue_starts : ndarray, dtype=int
1795
+ Return value of
1796
+ ``get_residue_starts(atoms, add_exclusive_stop=True)``.
1797
+
1798
+ Returns
1799
+ -------
1800
+ BondList
1801
+ A bond list containing all inter residue bonds.
1802
+ """
1803
+ from .info.misc import link_type
1804
+
1805
+ cdef list bonds = []
1806
+ cdef int i
1807
+ cdef np.ndarray atom_names = atoms.atom_name
1808
+ cdef np.ndarray res_names = atoms.res_name
1809
+ cdef np.ndarray res_ids = atoms.res_id
1810
+ cdef np.ndarray chain_ids = atoms.chain_id
1811
+ cdef int curr_start_i, next_start_i, after_next_start_i
1812
+ cdef str curr_connect_atom_name, next_connect_atom_name
1813
+ cdef np.ndarray curr_connect_indices, next_connect_indices
1814
+
1815
+ # Iterate over all starts excluding:
1816
+ # - the last residue and
1817
+ # - exclusive end index of 'atoms'
1818
+ for i in range(len(residue_starts)-2):
1819
+ curr_start_i = residue_starts[i]
1820
+ next_start_i = residue_starts[i+1]
1821
+ after_next_start_i = residue_starts[i+2]
1822
+
1823
+ # Check if the current and next residue is in the same chain
1824
+ if chain_ids[next_start_i] != chain_ids[curr_start_i]:
1825
+ continue
1826
+ # Check if the current and next residue
1827
+ # have consecutive residue IDs
1828
+ # (Same residue ID is also possible if insertion code is used)
1829
+ if res_ids[next_start_i] - res_ids[curr_start_i] > 1:
1830
+ continue
1831
+
1832
+ # Get link type for this residue from RCSB components.cif
1833
+ curr_link = link_type(res_names[curr_start_i])
1834
+ next_link = link_type(res_names[next_start_i])
1835
+
1836
+ if curr_link in _PEPTIDE_LINKS and next_link in _PEPTIDE_LINKS:
1837
+ curr_connect_atom_name = "C"
1838
+ next_connect_atom_name = "N"
1839
+ elif curr_link in _NUCLEIC_LINKS and next_link in _NUCLEIC_LINKS:
1840
+ curr_connect_atom_name = "O3'"
1841
+ next_connect_atom_name = "P"
1842
+ else:
1843
+ # Create no bond if the connection types of consecutive
1844
+ # residues are not compatible
1845
+ continue
1846
+
1847
+ # Index in atom array for atom name in current residue
1848
+ # Addition of 'curr_start_i' is necessary, as only a slice of
1849
+ # 'atom_names' is taken, beginning at 'curr_start_i'
1850
+ curr_connect_indices = curr_start_i + np.where(
1851
+ atom_names[curr_start_i : next_start_i]
1852
+ == curr_connect_atom_name
1853
+ )[0]
1854
+ # Index in atom array for atom name in next residue
1855
+ next_connect_indices = next_start_i + np.where(
1856
+ atom_names[next_start_i : after_next_start_i]
1857
+ == next_connect_atom_name
1858
+ )[0]
1859
+ if len(curr_connect_indices) == 0 or len(next_connect_indices) == 0:
1860
+ # The connector atoms are not found in the adjacent residues
1861
+ # -> skip this bond
1862
+ continue
1863
+
1864
+ bonds.append((
1865
+ curr_connect_indices[0],
1866
+ next_connect_indices[0],
1867
+ BondType.SINGLE
1868
+ ))
1869
+
1870
+ return BondList(atoms.array_length(), np.array(bonds, dtype=np.uint32))
1871
+
1872
+
1873
+
1874
+ def find_connected(bond_list, uint32 root, bint as_mask=False):
1875
+ """
1876
+ find_connected(bond_list, root, as_mask=False)
1877
+
1878
+ Get indices to all atoms that are directly or inderectly connected
1879
+ to the root atom indicated by the given index.
1880
+
1881
+ An atom is *connected* to the `root` atom, if that atom is reachable
1882
+ by traversing an arbitrary number of bonds, starting from the
1883
+ `root`.
1884
+ Effectively, this means that all atoms are *connected* to `root`,
1885
+ that are in the same molecule as `root`.
1886
+ Per definition `root` is also *connected* to itself.
1887
+
1888
+ Parameters
1889
+ ----------
1890
+ bond_list : BondList
1891
+ The reference bond list.
1892
+ root : int
1893
+ The index of the root atom.
1894
+ as_mask : bool, optional
1895
+ If true, the connected atom indices are returned as boolean
1896
+ mask.
1897
+ By default, the connected atom indices are returned as integer
1898
+ array.
1899
+
1900
+ Returns
1901
+ -------
1902
+ connected : ndarray, dtype=int or ndarray, dtype=bool
1903
+ Either a boolean mask or an integer array, representing the
1904
+ connected atoms.
1905
+ In case of a boolean mask: ``connected[i] == True``, if the atom
1906
+ with index ``i`` is connected.
1907
+
1908
+ Examples
1909
+ --------
1910
+ Consider a system with 4 atoms, where only the last atom is not
1911
+ bonded with the other ones (``0-1-2 3``):
1912
+
1913
+ >>> bonds = BondList(4)
1914
+ >>> bonds.add_bond(0, 1)
1915
+ >>> bonds.add_bond(1, 2)
1916
+ >>> print(find_connected(bonds, 0))
1917
+ [0 1 2]
1918
+ >>> print(find_connected(bonds, 1))
1919
+ [0 1 2]
1920
+ >>> print(find_connected(bonds, 2))
1921
+ [0 1 2]
1922
+ >>> print(find_connected(bonds, 3))
1923
+ [3]
1924
+ """
1925
+ all_bonds, _ = bond_list.get_all_bonds()
1926
+
1927
+ if root >= bond_list.get_atom_count():
1928
+ raise ValueError(
1929
+ f"Root atom index {root} is out of bounds for bond list "
1930
+ f"representing {bond_list.get_atom_count()} atoms"
1931
+ )
1932
+
1933
+ cdef uint8[:] is_connected_mask = np.zeros(
1934
+ bond_list.get_atom_count(), dtype=np.uint8
1935
+ )
1936
+ # Find connections in a recursive way,
1937
+ # by visiting all atoms that are reachable by a bond
1938
+ _find_connected(bond_list, root, is_connected_mask, all_bonds)
1939
+ if as_mask:
1940
+ return is_connected_mask
1941
+ else:
1942
+ return np.where(np.asarray(is_connected_mask))[0]
1943
+
1944
+
1945
+ cdef _find_connected(bond_list,
1946
+ int32 index,
1947
+ uint8[:] is_connected_mask,
1948
+ int32[:,:] all_bonds):
1949
+ if is_connected_mask[index]:
1950
+ # This atom has already been visited
1951
+ # -> exit condition
1952
+ return
1953
+ is_connected_mask[index] = True
1954
+
1955
+ cdef int32 j
1956
+ cdef int32 connected_index
1957
+ for j in range(all_bonds.shape[1]):
1958
+ connected_index = all_bonds[index, j]
1959
+ if connected_index == -1:
1960
+ # Ignore padding values
1961
+ continue
1962
+ _find_connected(
1963
+ bond_list, connected_index, is_connected_mask, all_bonds
1964
+ )
1965
+
1966
+
1967
+ def find_rotatable_bonds(bonds):
1968
+ """
1969
+ find_rotatable_bonds(bonds)
1970
+
1971
+ Find all rotatable bonds in a given :class:`BondList`.
1972
+
1973
+ The following conditions must be true for a bond to be counted as
1974
+ rotatable:
1975
+
1976
+ 1. The bond must be a single bond (``BondType.SINGLE``)
1977
+ 2. The connected atoms must not be within the same cycle/ring
1978
+ 3. Both connected atoms must not be terminal, e.g. not a *C-H*
1979
+ bond, as rotation about such bonds would not change any
1980
+ coordinates
1981
+
1982
+ Parameters
1983
+ ----------
1984
+ bonds : BondList
1985
+ The bonds to find the rotatable bonds in.
1986
+
1987
+ Returns
1988
+ -------
1989
+ rotatable_bonds : BondList
1990
+ The subset of the input `bonds` that contains only rotatable
1991
+ bonds.
1992
+
1993
+ Examples
1994
+ --------
1995
+
1996
+ >>> molecule = residue("TYR")
1997
+ >>> for i, j, _ in find_rotatable_bonds(molecule.bonds).as_array():
1998
+ ... print(molecule.atom_name[i], molecule.atom_name[j])
1999
+ N CA
2000
+ CA C
2001
+ CA CB
2002
+ C OXT
2003
+ CB CG
2004
+ CZ OH
2005
+ """
2006
+ cdef uint32 i, j
2007
+ cdef uint32 bond_type
2008
+ cdef uint32 SINGLE = int(BondType.SINGLE)
2009
+ cdef bint in_same_cycle
2010
+
2011
+ bond_graph = bonds.as_graph()
2012
+ cycles = nx.algorithms.cycles.cycle_basis(bond_graph)
2013
+
2014
+ cdef int64[:] number_of_partners_v = np.count_nonzero(
2015
+ bonds.get_all_bonds()[0] != -1,
2016
+ axis=1
2017
+ ).astype(np.int64, copy=False)
2018
+
2019
+ rotatable_bonds = []
2020
+ cdef uint32[:,:] bonds_v = bonds.as_array()
2021
+ for i, j, bond_type in bonds_v:
2022
+ # Can only rotate about single bonds
2023
+ # Furthermore, it makes no sense to rotate about a bond,
2024
+ # that leads to a single atom
2025
+ if bond_type == BondType.SINGLE \
2026
+ and number_of_partners_v[i] > 1 \
2027
+ and number_of_partners_v[j] > 1:
2028
+ # Cannot rotate about a bond, if the two connected atoms
2029
+ # are in a cycle
2030
+ in_same_cycle = False
2031
+ for cycle in cycles:
2032
+ if i in cycle and j in cycle:
2033
+ in_same_cycle = True
2034
+ if not in_same_cycle:
2035
+ rotatable_bonds.append((i,j, bond_type))
2036
+ return BondList(bonds.get_atom_count(), np.array(rotatable_bonds))