biotite 1.3.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +191 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +160 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1226 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  266. biotite/structure/bonds.pyx +1975 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +276 -0
  271. biotite/structure/charges.cpython-312-darwin.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +681 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +590 -0
  278. biotite/structure/geometry.py +655 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +90 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +388 -0
  311. biotite/structure/io/pdb/file.py +1356 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +671 -0
  319. biotite/structure/io/pdbx/cif.py +1088 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +358 -0
  322. biotite/structure/io/pdbx/convert.py +2097 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1047 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +544 -0
  337. biotite/structure/rings.py +335 -0
  338. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +292 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +168 -0
  349. biotite/version.py +21 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.3.0.dist-info/METADATA +162 -0
  352. biotite-1.3.0.dist-info/RECORD +354 -0
  353. biotite-1.3.0.dist-info/WHEEL +6 -0
  354. biotite-1.3.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1975 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows efficient search of atoms in a defined radius around
7
+ a location.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["BondList", "BondType",
13
+ "connect_via_distances", "connect_via_residue_names",
14
+ "find_connected", "find_rotatable_bonds"]
15
+
16
+ cimport cython
17
+ cimport numpy as np
18
+ from libc.stdlib cimport free, realloc
19
+
20
+ from collections.abc import Sequence
21
+ import itertools
22
+ import numbers
23
+ from enum import IntEnum
24
+ import networkx as nx
25
+ import numpy as np
26
+ from .error import BadStructureError
27
+ from ..copyable import Copyable
28
+
29
+ ctypedef np.uint64_t ptr
30
+ ctypedef np.uint8_t uint8
31
+ ctypedef np.uint16_t uint16
32
+ ctypedef np.uint32_t uint32
33
+ ctypedef np.uint64_t uint64
34
+ ctypedef np.int8_t int8
35
+ ctypedef np.int16_t int16
36
+ ctypedef np.int32_t int32
37
+ ctypedef np.int64_t int64
38
+
39
+
40
+ ctypedef fused IndexType:
41
+ uint8
42
+ uint16
43
+ uint32
44
+ uint64
45
+ int8
46
+ int16
47
+ int32
48
+ int64
49
+
50
+
51
+ class BondType(IntEnum):
52
+ """
53
+ This enum type represents the type of a chemical bond.
54
+
55
+ - `ANY` - Used if the actual type is unknown
56
+ - `SINGLE` - Single bond
57
+ - `DOUBLE` - Double bond
58
+ - `TRIPLE` - Triple bond
59
+ - `QUADRUPLE` - A quadruple bond
60
+ - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
61
+ - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
62
+ - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
63
+ - `AROMATIC` - Aromatic bond without specification of the formal bond
64
+ - `COORDINATION` - Coordination complex involving a metal atom
65
+ """
66
+ ANY = 0
67
+ SINGLE = 1
68
+ DOUBLE = 2
69
+ TRIPLE = 3
70
+ QUADRUPLE = 4
71
+ AROMATIC_SINGLE = 5
72
+ AROMATIC_DOUBLE = 6
73
+ AROMATIC_TRIPLE = 7
74
+ COORDINATION = 8
75
+ AROMATIC = 9
76
+
77
+
78
+ def without_aromaticity(self):
79
+ """
80
+ Remove aromaticity from the bond type.
81
+
82
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
83
+ :attr:`BondType.{ORDER}`.
84
+
85
+ Returns
86
+ -------
87
+ new_bond_type : BondType
88
+ The :class:`BondType` without aromaticity.
89
+
90
+ Examples
91
+ --------
92
+
93
+ >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
94
+ DOUBLE
95
+ """
96
+ if self == BondType.AROMATIC_SINGLE:
97
+ return BondType.SINGLE
98
+ elif self == BondType.AROMATIC_DOUBLE:
99
+ return BondType.DOUBLE
100
+ elif self == BondType.AROMATIC_TRIPLE:
101
+ return BondType.TRIPLE
102
+ elif self == BondType.AROMATIC:
103
+ return BondType.ANY
104
+ else:
105
+ return self
106
+
107
+
108
+ @cython.boundscheck(False)
109
+ @cython.wraparound(False)
110
+ class BondList(Copyable):
111
+ """
112
+ __init__(atom_count, bonds=None)
113
+
114
+ A bond list stores indices of atoms
115
+ (usually of an :class:`AtomArray` or :class:`AtomArrayStack`)
116
+ that form chemical bonds together with the type (or order) of the
117
+ bond.
118
+
119
+ Internally the bonds are stored as *n x 3* :class:`ndarray`.
120
+ For each row, the first column specifies the index of the first
121
+ atom, the second column the index of the second atom involved in the
122
+ bond.
123
+ The third column stores an integer that is interpreted as member
124
+ of the the :class:`BondType` enum, that specifies the order of the
125
+ bond.
126
+
127
+ When indexing a :class:`BondList`, the index is not forwarded to the
128
+ internal :class:`ndarray`. Instead the indexing behavior is
129
+ consistent with indexing an :class:`AtomArray` or
130
+ :class:`AtomArrayStack`:
131
+ Bonds with at least one atom index that is not covered by the index
132
+ are removed, atom indices that occur after an uncovered atom index
133
+ move up.
134
+ Effectively, this means that after indexing an :class:`AtomArray`
135
+ and a :class:`BondList` with the same index, the atom indices in the
136
+ :class:`BondList` will still point to the same atoms in the
137
+ :class:`AtomArray`.
138
+ Indexing a :class:`BondList` with a single integer is equivalent
139
+ to calling :func:`get_bonds()`.
140
+
141
+ The same consistency applies to adding :class:`BondList` instances
142
+ via the '+' operator:
143
+ The atom indices of the second :class:`BondList` are increased by
144
+ the atom count of the first :class:`BondList` and then both
145
+ :class:`BondList` objects are merged.
146
+
147
+ Parameters
148
+ ----------
149
+ atom_count : int
150
+ A positive integer, that specifies the number of atoms the
151
+ :class:`BondList` refers to
152
+ (usually the length of an atom array (stack)).
153
+ Effectively, this value is the exclusive maximum for the indices
154
+ stored in the :class:`BondList`.
155
+ bonds : ndarray, shape=(n,2) or shape=(n,3), dtype=int, optional
156
+ This array contains the indices of atoms which are bonded:
157
+ For each row, the first column specifies the first atom,
158
+ the second row the second atom involved in a chemical bond.
159
+ If an *n x 3* array is provided, the additional column
160
+ specifies a :class:`BondType` instead of :attr:`BondType.ANY`.
161
+ By default, the created :class:`BondList` is empty.
162
+
163
+ Notes
164
+ -----
165
+ When initially providing the bonds as :class:`ndarray`, the input is
166
+ sanitized: Redundant bonds are removed, and each bond entry is
167
+ sorted so that the lower one of the two atom indices is in the first
168
+ column.
169
+ If a bond appears multiple times with different bond types, the
170
+ first bond takes precedence.
171
+
172
+ Examples
173
+ --------
174
+
175
+ Construct a :class:`BondList`, where a central atom (index 1) is
176
+ connected to three other atoms (index 0, 3 and 4):
177
+
178
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
179
+ >>> print(bond_list)
180
+ [[0 1 0]
181
+ [1 3 0]
182
+ [1 4 0]]
183
+
184
+ Remove the first atom (index 0) via indexing:
185
+ The bond containing index 0 is removed, since the corresponding atom
186
+ does not exist anymore. Since all other atoms move up in their
187
+ position, the indices in the bond list are decreased by one:
188
+
189
+ >>> bond_list = bond_list[1:]
190
+ >>> print(bond_list)
191
+ [[0 2 0]
192
+ [0 3 0]]
193
+
194
+ :class:`BondList` objects can be associated to an :class:`AtomArray`
195
+ or :class:`AtomArrayStack`.
196
+ The following snippet shows this for a benzene molecule:
197
+
198
+ >>> benzene = AtomArray(12)
199
+ >>> # Omit filling most required annotation categories for brevity
200
+ >>> benzene.atom_name = np.array(
201
+ ... ["C1", "C2", "C3", "C4", "C5", "C6", "H1", "H2", "H3", "H4", "H5", "H6"]
202
+ ... )
203
+ >>> benzene.bonds = BondList(
204
+ ... benzene.array_length(),
205
+ ... np.array([
206
+ ... # Bonds between carbon atoms in the ring
207
+ ... (0, 1, BondType.AROMATIC_SINGLE),
208
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
209
+ ... (2, 3, BondType.AROMATIC_SINGLE),
210
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
211
+ ... (4, 5, BondType.AROMATIC_SINGLE),
212
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
213
+ ... # Bonds between carbon and hydrogen
214
+ ... (0, 6, BondType.SINGLE),
215
+ ... (1, 7, BondType.SINGLE),
216
+ ... (2, 8, BondType.SINGLE),
217
+ ... (3, 9, BondType.SINGLE),
218
+ ... (4, 10, BondType.SINGLE),
219
+ ... (5, 11, BondType.SINGLE),
220
+ ... ])
221
+ ... )
222
+ >>> for i, j, bond_type in benzene.bonds.as_array():
223
+ ... print(
224
+ ... f"{BondType(bond_type).name} bond between "
225
+ ... f"{benzene.atom_name[i]} and {benzene.atom_name[j]}"
226
+ ... )
227
+ AROMATIC_SINGLE bond between C1 and C2
228
+ AROMATIC_DOUBLE bond between C2 and C3
229
+ AROMATIC_SINGLE bond between C3 and C4
230
+ AROMATIC_DOUBLE bond between C4 and C5
231
+ AROMATIC_SINGLE bond between C5 and C6
232
+ AROMATIC_DOUBLE bond between C1 and C6
233
+ SINGLE bond between C1 and H1
234
+ SINGLE bond between C2 and H2
235
+ SINGLE bond between C3 and H3
236
+ SINGLE bond between C4 and H4
237
+ SINGLE bond between C5 and H5
238
+ SINGLE bond between C6 and H6
239
+
240
+ Obtain the bonded atoms for the :math:`C_1`:
241
+
242
+ >>> bonds, types = benzene.bonds.get_bonds(0)
243
+ >>> print(bonds)
244
+ [1 5 6]
245
+ >>> print(types)
246
+ [5 6 1]
247
+ >>> print(f"C1 is bonded to {', '.join(benzene.atom_name[bonds])}")
248
+ C1 is bonded to C2, C6, H1
249
+
250
+ Cut the benzene molecule in half.
251
+ Although the first half of the atoms are missing the indices of
252
+ the cropped :class:`BondList` still represents the bonds of the
253
+ remaining atoms:
254
+
255
+ >>> half_benzene = benzene[
256
+ ... np.isin(benzene.atom_name, ["C4", "C5", "C6", "H4", "H5", "H6"])
257
+ ... ]
258
+ >>> for i, j, bond_type in half_benzene.bonds.as_array():
259
+ ... print(
260
+ ... f"{BondType(bond_type).name} bond between "
261
+ ... f"{half_benzene.atom_name[i]} and {half_benzene.atom_name[j]}"
262
+ ... )
263
+ AROMATIC_DOUBLE bond between C4 and C5
264
+ AROMATIC_SINGLE bond between C5 and C6
265
+ SINGLE bond between C4 and H4
266
+ SINGLE bond between C5 and H5
267
+ SINGLE bond between C6 and H6
268
+ """
269
+
270
+ def __init__(self, uint32 atom_count, np.ndarray bonds=None):
271
+ self._atom_count = atom_count
272
+
273
+ if bonds is not None and len(bonds) > 0:
274
+ if bonds.ndim != 2:
275
+ raise ValueError("Expected a 2D-ndarray for input bonds")
276
+
277
+ self._bonds = np.zeros((bonds.shape[0], 3), dtype=np.uint32)
278
+ if bonds.shape[1] == 3:
279
+ # Input contains bonds (index 0 and 1)
280
+ # including the bond type value (index 2)
281
+ # Bond indices:
282
+ self._bonds[:,:2] = np.sort(
283
+ # Indices are sorted per bond
284
+ # so that the lower index is at the first position
285
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
286
+ )
287
+ # Bond type:
288
+ if (bonds[:, 2] >= len(BondType)).any():
289
+ raise ValueError(
290
+ f"BondType {np.max(bonds[:, 2])} is invalid"
291
+ )
292
+ self._bonds[:,2] = bonds[:, 2]
293
+
294
+ # Indices are sorted per bond
295
+ # so that the lower index is at the first position
296
+ elif bonds.shape[1] == 2:
297
+ # Input contains the bonds without bond type
298
+ # -> Default: Set bond type ANY (0)
299
+ self._bonds[:,:2] = np.sort(
300
+ # Indices are sorted per bond
301
+ # so that the lower index is at the first position
302
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
303
+ )
304
+ else:
305
+ raise ValueError(
306
+ "Input array containing bonds must be either of shape "
307
+ "(n,2) or (n,3)"
308
+ )
309
+ self._remove_redundant_bonds()
310
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
311
+
312
+ else:
313
+ # Create empty bond list
314
+ self._bonds = np.zeros((0, 3), dtype=np.uint32)
315
+ self._max_bonds_per_atom = 0
316
+
317
+ @staticmethod
318
+ def concatenate(bonds_lists):
319
+ """
320
+ Concatenate multiple :class:`BondList` objects into a single
321
+ :class:`BondList`, respectively.
322
+
323
+ Parameters
324
+ ----------
325
+ bonds_lists : iterable object of BondList
326
+ The bond lists to be concatenated.
327
+
328
+ Returns
329
+ -------
330
+ concatenated_bonds : BondList
331
+ The concatenated bond lists.
332
+
333
+ Examples
334
+ --------
335
+
336
+ >>> bonds1 = BondList(2, np.array([(0, 1)]))
337
+ >>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
338
+ >>> merged_bonds = BondList.concatenate([bonds1, bonds2])
339
+ >>> print(merged_bonds.get_atom_count())
340
+ 5
341
+ >>> print(merged_bonds.as_array()[:, :2])
342
+ [[0 1]
343
+ [2 3]
344
+ [2 4]]
345
+ """
346
+ # Ensure that the bonds_lists can be iterated over multiple times
347
+ if not isinstance(bonds_lists, Sequence):
348
+ bonds_lists = list(bonds_lists)
349
+
350
+ cdef np.ndarray merged_bonds = np.concatenate(
351
+ [bond_list._bonds for bond_list in bonds_lists]
352
+ )
353
+ # Offset the indices of appended bonds list
354
+ # (consistent with addition of AtomArray)
355
+ cdef int start = 0, stop = 0
356
+ cdef int cum_atom_count = 0
357
+ for bond_list in bonds_lists:
358
+ stop = start + bond_list._bonds.shape[0]
359
+ merged_bonds[start : stop, :2] += cum_atom_count
360
+ cum_atom_count += bond_list._atom_count
361
+ start = stop
362
+
363
+ cdef merged_bond_list = BondList(cum_atom_count)
364
+ # Array is not used in constructor to prevent unnecessary
365
+ # maximum and redundant bond calculation
366
+ merged_bond_list._bonds = merged_bonds
367
+ merged_bond_list._max_bonds_per_atom = max(
368
+ [bond_list._max_bonds_per_atom for bond_list in bonds_lists]
369
+ )
370
+ return merged_bond_list
371
+
372
+ def __copy_create__(self):
373
+ # Create empty bond list to prevent
374
+ # unnecessary removal of redundant atoms
375
+ # and calculation of maximum bonds per atom
376
+ return BondList(self._atom_count)
377
+
378
+ def __copy_fill__(self, clone):
379
+ # The bonds are added here
380
+ clone._bonds = self._bonds.copy()
381
+ clone._max_bonds_per_atom = self._max_bonds_per_atom
382
+
383
+ def offset_indices(self, int offset):
384
+ """
385
+ offset_indices(offset)
386
+
387
+ Increase all atom indices in the :class:`BondList` by the given
388
+ offset.
389
+
390
+ Implicitly this increases the atom count.
391
+
392
+ Parameters
393
+ ----------
394
+ offset : int
395
+ The atom indices are increased by this value.
396
+ Must be positive.
397
+
398
+ Examples
399
+ --------
400
+
401
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
402
+ >>> print(bond_list)
403
+ [[0 1 0]
404
+ [1 3 0]
405
+ [1 4 0]]
406
+ >>> bond_list.offset_indices(2)
407
+ >>> print(bond_list)
408
+ [[2 3 0]
409
+ [3 5 0]
410
+ [3 6 0]]
411
+ """
412
+ if offset < 0:
413
+ raise ValueError("Offest must be positive")
414
+ self._bonds[:,:2] += offset
415
+ self._atom_count += offset
416
+
417
+ def as_array(self):
418
+ """
419
+ as_array()
420
+
421
+ Obtain a copy of the internal :class:`ndarray`.
422
+
423
+ Returns
424
+ -------
425
+ array : ndarray, shape=(n,3), dtype=np.uint32
426
+ Copy of the internal :class:`ndarray`.
427
+ For each row, the first column specifies the index of the
428
+ first atom, the second column the index of the second atom
429
+ involved in the bond.
430
+ The third column stores the :class:`BondType`.
431
+ """
432
+ return self._bonds.copy()
433
+
434
+ def as_set(self):
435
+ """
436
+ as_set()
437
+
438
+ Obtain a set representation of the :class:`BondList`.
439
+
440
+ Returns
441
+ -------
442
+ bond_set : set of tuple(int, int, int)
443
+ A set of tuples.
444
+ Each tuple represents one bond:
445
+ The first integer represents the first atom,
446
+ the second integer represents the second atom,
447
+ the third integer represents the :class:`BondType`.
448
+ """
449
+ cdef uint32[:,:] all_bonds_v = self._bonds
450
+ cdef int i
451
+ cdef set bond_set = set()
452
+ for i in range(all_bonds_v.shape[0]):
453
+ bond_set.add(
454
+ (all_bonds_v[i,0], all_bonds_v[i,1], all_bonds_v[i,2])
455
+ )
456
+ return bond_set
457
+
458
+ def as_graph(self):
459
+ """
460
+ as_graph()
461
+
462
+ Obtain a graph representation of the :class:`BondList`.
463
+
464
+ Returns
465
+ -------
466
+ bond_set : Graph
467
+ A *NetworkX* :class:`Graph`.
468
+ The atom indices are nodes, the bonds are edges.
469
+ Each edge has a ``"bond_type"`` attribute containing the
470
+ :class:`BondType`.
471
+
472
+ Examples
473
+ --------
474
+
475
+ >>> bond_list = BondList(5, np.array([(1,0,2), (1,3,1), (1,4,1)]))
476
+ >>> graph = bond_list.as_graph()
477
+ >>> print(graph.nodes)
478
+ [0, 1, 3, 4]
479
+ >>> print(graph.edges)
480
+ [(0, 1), (1, 3), (1, 4)]
481
+ >>> for i, j in graph.edges:
482
+ ... print(i, j, graph.get_edge_data(i, j))
483
+ 0 1 {'bond_type': <BondType.DOUBLE: 2>}
484
+ 1 3 {'bond_type': <BondType.SINGLE: 1>}
485
+ 1 4 {'bond_type': <BondType.SINGLE: 1>}
486
+ """
487
+ cdef int i
488
+
489
+ cdef uint32[:,:] all_bonds_v = self._bonds
490
+
491
+ g = nx.Graph()
492
+ cdef list edges = [None] * all_bonds_v.shape[0]
493
+ for i in range(all_bonds_v.shape[0]):
494
+ edges[i] = (
495
+ all_bonds_v[i,0], all_bonds_v[i,1],
496
+ {"bond_type": BondType(all_bonds_v[i,2])}
497
+ )
498
+ g.add_edges_from(edges)
499
+ return g
500
+
501
+ def remove_aromaticity(self):
502
+ """
503
+ Remove aromaticity from the bond types.
504
+
505
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
506
+ :attr:`BondType.{ORDER}`.
507
+
508
+ Examples
509
+ --------
510
+
511
+ >>> bond_list = BondList(3)
512
+ >>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
513
+ >>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
514
+ >>> bond_list.remove_aromaticity()
515
+ >>> for i, j, bond_type in bond_list.as_array():
516
+ ... print(i, j, BondType(bond_type).name)
517
+ 0 1 SINGLE
518
+ 1 2 DOUBLE
519
+ """
520
+ bond_types = self._bonds[:,2]
521
+ for aromatic_type, non_aromatic_type in [
522
+ (BondType.AROMATIC_SINGLE, BondType.SINGLE),
523
+ (BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
524
+ (BondType.AROMATIC_TRIPLE, BondType.TRIPLE),
525
+ (BondType.AROMATIC, BondType.ANY),
526
+ ]:
527
+ bond_types[bond_types == aromatic_type] = non_aromatic_type
528
+
529
+ def remove_bond_order(self):
530
+ """
531
+ Convert all bonds to :attr:`BondType.ANY`.
532
+ """
533
+ self._bonds[:,2] = BondType.ANY
534
+
535
+ def get_atom_count(self):
536
+ """
537
+ get_atom_count()
538
+
539
+ Get the atom count.
540
+
541
+ Returns
542
+ -------
543
+ atom_count : int
544
+ The atom count.
545
+ """
546
+ return self._atom_count
547
+
548
+ def get_bond_count(self):
549
+ """
550
+ get_bond_count()
551
+
552
+ Get the amount of bonds.
553
+
554
+ Returns
555
+ -------
556
+ bond_count : int
557
+ The amount of bonds. This is equal to the length of the
558
+ internal :class:`ndarray` containing the bonds.
559
+ """
560
+ return len(self._bonds)
561
+
562
+ def get_bonds(self, int32 atom_index):
563
+ """
564
+ get_bonds(atom_index)
565
+
566
+ Obtain the indices of the atoms bonded to the atom with the
567
+ given index as well as the corresponding bond types.
568
+
569
+ Parameters
570
+ ----------
571
+ atom_index : int
572
+ The index of the atom to get the bonds for.
573
+
574
+ Returns
575
+ -------
576
+ bonds : np.ndarray, dtype=np.uint32, shape=(k,)
577
+ The indices of connected atoms.
578
+ bond_types : np.ndarray, dtype=np.uint8, shape=(k,)
579
+ Array of integers, interpreted as :class:`BondType`
580
+ instances.
581
+ This array specifies the type (or order) of the bonds to
582
+ the connected atoms.
583
+
584
+ Examples
585
+ --------
586
+
587
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
588
+ >>> bonds, types = bond_list.get_bonds(1)
589
+ >>> print(bonds)
590
+ [0 3 4]
591
+ """
592
+ cdef int i=0, j=0
593
+
594
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
595
+
596
+ cdef uint32[:,:] all_bonds_v = self._bonds
597
+ # Pessimistic array allocation:
598
+ # assume size is equal to the atom with most bonds
599
+ cdef np.ndarray bonds = np.zeros(self._max_bonds_per_atom,
600
+ dtype=np.uint32)
601
+ cdef uint32[:] bonds_v = bonds
602
+ cdef np.ndarray bond_types = np.zeros(self._max_bonds_per_atom,
603
+ dtype=np.uint8)
604
+ cdef uint8[:] bond_types_v = bond_types
605
+
606
+ for i in range(all_bonds_v.shape[0]):
607
+ # If a bond is found for the desired atom index
608
+ # at the first or second position of the bond,
609
+ # then append the index of the respective other position
610
+ if all_bonds_v[i,0] == index:
611
+ bonds_v[j] = all_bonds_v[i,1]
612
+ bond_types_v[j] = all_bonds_v[i,2]
613
+ j += 1
614
+ elif all_bonds_v[i,1] == index:
615
+ bonds_v[j] = all_bonds_v[i,0]
616
+ bond_types_v[j] = all_bonds_v[i,2]
617
+ j += 1
618
+
619
+ # Trim to correct size
620
+ bonds = bonds[:j]
621
+ bond_types = bond_types[:j]
622
+
623
+ return bonds, bond_types
624
+
625
+
626
+ def get_all_bonds(self):
627
+ """
628
+ get_all_bonds()
629
+
630
+ For each atom index, give the indices of the atoms bonded to
631
+ this atom as well as the corresponding bond types.
632
+
633
+ Returns
634
+ -------
635
+ bonds : np.ndarray, dtype=np.uint32, shape=(n,k)
636
+ The indices of connected atoms.
637
+ The first dimension represents the atoms,
638
+ the second dimension represents the indices of atoms bonded
639
+ to the respective atom.
640
+ Atoms can have have different numbers of atoms bonded to
641
+ them.
642
+ Therefore, the length of the second dimension *k* is equal
643
+ to the maximum number of bonds for an atom in this
644
+ :class:`BondList`.
645
+ For atoms with less bonds, the corresponding entry in the
646
+ array is padded with ``-1`` values.
647
+ bond_types : np.ndarray, dtype=np.uint32, shape=(n,k)
648
+ Array of integers, interpreted as :class:`BondType`
649
+ instances.
650
+ This array specifies the bond type (or order) corresponding
651
+ to the returned `bonds`.
652
+ It uses the same ``-1``-padding.
653
+
654
+ Examples
655
+ --------
656
+
657
+ >>> # BondList for benzene
658
+ >>> bond_list = BondList(
659
+ ... 12,
660
+ ... np.array([
661
+ ... # Bonds between the carbon atoms in the ring
662
+ ... (0, 1, BondType.AROMATIC_SINGLE),
663
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
664
+ ... (2, 3, BondType.AROMATIC_SINGLE),
665
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
666
+ ... (4, 5, BondType.AROMATIC_SINGLE),
667
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
668
+ ... # Bonds between carbon and hydrogen
669
+ ... (0, 6, BondType.SINGLE),
670
+ ... (1, 7, BondType.SINGLE),
671
+ ... (2, 8, BondType.SINGLE),
672
+ ... (3, 9, BondType.SINGLE),
673
+ ... (4, 10, BondType.SINGLE),
674
+ ... (5, 11, BondType.SINGLE),
675
+ ... ])
676
+ ... )
677
+ >>> bonds, types = bond_list.get_all_bonds()
678
+ >>> print(bonds)
679
+ [[ 1 5 6]
680
+ [ 0 2 7]
681
+ [ 1 3 8]
682
+ [ 2 4 9]
683
+ [ 3 5 10]
684
+ [ 4 0 11]
685
+ [ 0 -1 -1]
686
+ [ 1 -1 -1]
687
+ [ 2 -1 -1]
688
+ [ 3 -1 -1]
689
+ [ 4 -1 -1]
690
+ [ 5 -1 -1]]
691
+ >>> print(types)
692
+ [[ 5 6 1]
693
+ [ 5 6 1]
694
+ [ 6 5 1]
695
+ [ 5 6 1]
696
+ [ 6 5 1]
697
+ [ 5 6 1]
698
+ [ 1 -1 -1]
699
+ [ 1 -1 -1]
700
+ [ 1 -1 -1]
701
+ [ 1 -1 -1]
702
+ [ 1 -1 -1]
703
+ [ 1 -1 -1]]
704
+ >>> for i in range(bond_list.get_atom_count()):
705
+ ... bonds_for_atom = bonds[i]
706
+ ... # Remove trailing '-1' values
707
+ ... bonds_for_atom = bonds_for_atom[bonds_for_atom != -1]
708
+ ... print(f"{i}: {bonds_for_atom}")
709
+ 0: [1 5 6]
710
+ 1: [0 2 7]
711
+ 2: [1 3 8]
712
+ 3: [2 4 9]
713
+ 4: [ 3 5 10]
714
+ 5: [ 4 0 11]
715
+ 6: [0]
716
+ 7: [1]
717
+ 8: [2]
718
+ 9: [3]
719
+ 10: [4]
720
+ 11: [5]
721
+ """
722
+ cdef int i=0
723
+ cdef uint32 atom_index_i, atom_index_j, bond_type
724
+
725
+ cdef uint32[:,:] all_bonds_v = self._bonds
726
+ # The size of 2nd dimension is equal to the atom with most bonds
727
+ # Since each atom can have an individual number of bonded atoms,
728
+ # The arrays are padded with '-1'
729
+ cdef np.ndarray bonds = np.full(
730
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int32
731
+ )
732
+ cdef int32[:,:] bonds_v = bonds
733
+ cdef np.ndarray bond_types = np.full(
734
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int8
735
+ )
736
+ cdef int8[:,:] bond_types_v = bond_types
737
+ # Track the number of already found bonds for each given index
738
+ cdef np.ndarray lengths = np.zeros(self._atom_count, dtype=np.uint32)
739
+ cdef uint32[:] lengths_v = lengths
740
+
741
+ for i in range(all_bonds_v.shape[0]):
742
+ atom_index_i = all_bonds_v[i,0]
743
+ atom_index_j = all_bonds_v[i,1]
744
+ bond_type = all_bonds_v[i,2]
745
+ # Add second bonded atom for the first bonded atom
746
+ # and vice versa
747
+ # Use 'lengths' variable to append the value
748
+ bonds_v[atom_index_i, lengths_v[atom_index_i]] = atom_index_j
749
+ bonds_v[atom_index_j, lengths_v[atom_index_j]] = atom_index_i
750
+ bond_types_v[atom_index_i, lengths_v[atom_index_i]] = bond_type
751
+ bond_types_v[atom_index_j, lengths_v[atom_index_j]] = bond_type
752
+ # Increment lengths
753
+ lengths_v[atom_index_i] += 1
754
+ lengths_v[atom_index_j] += 1
755
+
756
+ return bonds, bond_types
757
+
758
+
759
+ def adjacency_matrix(self):
760
+ r"""
761
+ adjacency_matrix(bond_list)
762
+
763
+ Represent this :class:`BondList` as adjacency matrix.
764
+
765
+ The adjacency matrix is a quadratic matrix with boolean values
766
+ according to
767
+
768
+ .. math::
769
+
770
+ M_{i,j} =
771
+ \begin{cases}
772
+ \text{True}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
773
+ \text{False}, & \text{otherwise}
774
+ \end{cases}.
775
+
776
+ Returns
777
+ -------
778
+ matrix : ndarray, dtype=bool, shape=(n,n)
779
+ The created adjacency matrix.
780
+
781
+ Examples
782
+ --------
783
+
784
+ >>> # BondList for formaldehyde
785
+ >>> bond_list = BondList(
786
+ ... 4,
787
+ ... np.array([
788
+ ... # Bond between carbon and oxygen
789
+ ... (0, 1, BondType.DOUBLE),
790
+ ... # Bonds between carbon and hydrogen
791
+ ... (0, 2, BondType.SINGLE),
792
+ ... (0, 3, BondType.SINGLE),
793
+ ... ])
794
+ ... )
795
+ >>> print(bond_list.adjacency_matrix())
796
+ [[False True True True]
797
+ [ True False False False]
798
+ [ True False False False]
799
+ [ True False False False]]
800
+ """
801
+ matrix = np.zeros(
802
+ (self._atom_count, self._atom_count), dtype=bool
803
+ )
804
+ matrix[self._bonds[:,0], self._bonds[:,1]] = True
805
+ matrix[self._bonds[:,1], self._bonds[:,0]] = True
806
+ return matrix
807
+
808
+
809
+ def bond_type_matrix(self):
810
+ r"""
811
+ adjacency_matrix(bond_list)
812
+
813
+ Represent this :class:`BondList` as a matrix depicting the bond
814
+ type.
815
+
816
+ The matrix is a quadratic matrix:
817
+
818
+ .. math::
819
+
820
+ M_{i,j} =
821
+ \begin{cases}
822
+ \text{BondType}_{ij}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
823
+ -1, & \text{otherwise}
824
+ \end{cases}.
825
+
826
+ Returns
827
+ -------
828
+ matrix : ndarray, dtype=bool, shape=(n,n)
829
+ The created bond type matrix.
830
+
831
+ Examples
832
+ --------
833
+
834
+ >>> # BondList for formaldehyde
835
+ >>> bond_list = BondList(
836
+ ... 4,
837
+ ... np.array([
838
+ ... # Bond between carbon and oxygen
839
+ ... (0, 1, BondType.DOUBLE),
840
+ ... # Bonds between carbon and hydrogen
841
+ ... (0, 2, BondType.SINGLE),
842
+ ... (0, 3, BondType.SINGLE),
843
+ ... ])
844
+ ... )
845
+ >>> print(bond_list.bond_type_matrix())
846
+ [[-1 2 1 1]
847
+ [ 2 -1 -1 -1]
848
+ [ 1 -1 -1 -1]
849
+ [ 1 -1 -1 -1]]
850
+ """
851
+ matrix = np.full(
852
+ (self._atom_count, self._atom_count), -1, dtype=np.int8
853
+ )
854
+ matrix[self._bonds[:,0], self._bonds[:,1]] = self._bonds[:,2]
855
+ matrix[self._bonds[:,1], self._bonds[:,0]] = self._bonds[:,2]
856
+ return matrix
857
+
858
+
859
+ def add_bond(self, int32 atom_index1, int32 atom_index2,
860
+ bond_type=BondType.ANY):
861
+ """
862
+ add_bond(atom_index1, atom_index2, bond_type=BondType.ANY)
863
+
864
+ Add a bond to the :class:`BondList`.
865
+
866
+ If the bond is already existent, only the bond type is updated.
867
+
868
+ Parameters
869
+ ----------
870
+ atom_index1, atom_index2 : int
871
+ The indices of the atoms to create a bond for.
872
+ bond_type : BondType or int, optional
873
+ The type of the bond. Default is :attr:`BondType.ANY`.
874
+ """
875
+ if bond_type >= len(BondType):
876
+ raise ValueError(f"BondType {bond_type} is invalid")
877
+
878
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
879
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
880
+ _sort(&index1, &index2)
881
+
882
+ cdef int i
883
+ cdef uint32[:,:] all_bonds_v = self._bonds
884
+ # Check if bond is already existent in list
885
+ cdef bint in_list = False
886
+ for i in range(all_bonds_v.shape[0]):
887
+ # Since the bonds have the atom indices sorted
888
+ # the reverse check is omitted
889
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
890
+ in_list = True
891
+ # If in list, update bond type
892
+ all_bonds_v[i,2] = int(bond_type)
893
+ break
894
+ if not in_list:
895
+ self._bonds = np.append(
896
+ self._bonds,
897
+ np.array(
898
+ [(index1, index2, int(bond_type))], dtype=np.uint32
899
+ ),
900
+ axis=0
901
+ )
902
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
903
+
904
+ def remove_bond(self, int32 atom_index1, int32 atom_index2):
905
+ """
906
+ remove_bond(atom_index1, atom_index2)
907
+
908
+ Remove a bond from the :class:`BondList`.
909
+
910
+ If the bond is not existent in the :class:`BondList`, nothing happens.
911
+
912
+ Parameters
913
+ ----------
914
+ atom_index1, atom_index2 : int
915
+ The indices of the atoms whose bond should be removed.
916
+ """
917
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
918
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
919
+ _sort(&index1, &index2)
920
+
921
+ # Find the bond in bond list
922
+ cdef int i
923
+ cdef uint32[:,:] all_bonds_v = self._bonds
924
+ for i in range(all_bonds_v.shape[0]):
925
+ # Since the bonds have the atom indices sorted
926
+ # the reverse check is omitted
927
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
928
+ self._bonds = np.delete(self._bonds, i, axis=0)
929
+ # The maximum bonds per atom is not recalculated,
930
+ # as the value can only be decreased on bond removal
931
+ # Since this value is only used for pessimistic array allocation
932
+ # in 'get_bonds()', the slightly larger memory usage is a better
933
+ # option than the repetitive call of _get_max_bonds_per_atom()
934
+
935
+ def remove_bonds_to(self, int32 atom_index):
936
+ """
937
+ remove_bonds_to(self, atom_index)
938
+
939
+ Remove all bonds from the :class:`BondList` where the given atom
940
+ is involved.
941
+
942
+ Parameters
943
+ ----------
944
+ atom_index : int
945
+ The index of the atom whose bonds should be removed.
946
+ """
947
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
948
+
949
+ cdef np.ndarray mask = np.ones(len(self._bonds), dtype=np.uint8)
950
+ cdef uint8[:] mask_v = mask
951
+
952
+ # Find the bond in bond list
953
+ cdef int i
954
+ cdef uint32[:,:] all_bonds_v = self._bonds
955
+ for i in range(all_bonds_v.shape[0]):
956
+ if (all_bonds_v[i,0] == index or all_bonds_v[i,1] == index):
957
+ mask_v[i] = False
958
+ # Remove the bonds
959
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
960
+ # The maximum bonds per atom is not recalculated
961
+ # (see 'remove_bond()')
962
+
963
+ def remove_bonds(self, bond_list):
964
+ """
965
+ remove_bonds(bond_list)
966
+
967
+ Remove multiple bonds from the :class:`BondList`.
968
+
969
+ All bonds present in `bond_list` are removed from this instance.
970
+ If a bond is not existent in this instance, nothing happens.
971
+ Only the bond indices, not the bond types, are relevant for
972
+ this.
973
+
974
+ Parameters
975
+ ----------
976
+ bond_list : BondList
977
+ The bonds in `bond_list` are removed from this instance.
978
+ """
979
+ cdef int i=0, j=0
980
+
981
+ # All bonds in the own BondList
982
+ cdef uint32[:,:] all_bonds_v = self._bonds
983
+ # The bonds that should be removed
984
+ cdef uint32[:,:] rem_bonds_v = bond_list._bonds
985
+ cdef np.ndarray mask = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
986
+ cdef uint8[:] mask_v = mask
987
+ for i in range(all_bonds_v.shape[0]):
988
+ for j in range(rem_bonds_v.shape[0]):
989
+ if all_bonds_v[i,0] == rem_bonds_v[j,0] \
990
+ and all_bonds_v[i,1] == rem_bonds_v[j,1]:
991
+ mask_v[i] = False
992
+
993
+ # Remove the bonds
994
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
995
+ # The maximum bonds per atom is not recalculated
996
+ # (see 'remove_bond()')
997
+
998
+ def merge(self, bond_list):
999
+ """
1000
+ merge(bond_list)
1001
+
1002
+ Merge another :class:`BondList` with this instance into a new
1003
+ object.
1004
+ If a bond appears in both :class:`BondList`'s, the
1005
+ :class:`BondType` from the given `bond_list` takes precedence.
1006
+
1007
+ The internal :class:`ndarray` instances containg the bonds are
1008
+ simply concatenated and the new atom count is the maximum of
1009
+ both bond lists.
1010
+
1011
+ Parameters
1012
+ ----------
1013
+ bond_list : BondList
1014
+ This bond list is merged with this instance.
1015
+
1016
+ Returns
1017
+ -------
1018
+ bond_list : BondList
1019
+ The merged :class:`BondList`.
1020
+
1021
+ Notes
1022
+ -----
1023
+ This is not equal to using the `+` operator.
1024
+
1025
+ Examples
1026
+ --------
1027
+
1028
+ >>> bond_list1 = BondList(3, np.array([(0,1),(1,2)]))
1029
+ >>> bond_list2 = BondList(5, np.array([(2,3),(3,4)]))
1030
+ >>> merged_list = bond_list2.merge(bond_list1)
1031
+ >>> print(merged_list.get_atom_count())
1032
+ 5
1033
+ >>> print(merged_list)
1034
+ [[0 1 0]
1035
+ [1 2 0]
1036
+ [2 3 0]
1037
+ [3 4 0]]
1038
+
1039
+ The BondList given as parameter takes precedence:
1040
+
1041
+ >>> # Specifiy bond type to see where a bond is taken from
1042
+ >>> bond_list1 = BondList(4, np.array([
1043
+ ... (0, 1, BondType.SINGLE),
1044
+ ... (1, 2, BondType.SINGLE)
1045
+ ... ]))
1046
+ >>> bond_list2 = BondList(4, np.array([
1047
+ ... (1, 2, BondType.DOUBLE), # This one is a duplicate
1048
+ ... (2, 3, BondType.DOUBLE)
1049
+ ... ]))
1050
+ >>> merged_list = bond_list2.merge(bond_list1)
1051
+ >>> print(merged_list)
1052
+ [[0 1 1]
1053
+ [1 2 1]
1054
+ [2 3 2]]
1055
+ """
1056
+ return BondList(
1057
+ max(self._atom_count, bond_list._atom_count),
1058
+ np.concatenate(
1059
+ [bond_list.as_array(), self.as_array()],
1060
+ axis=0
1061
+ )
1062
+ )
1063
+
1064
+ def __add__(self, bond_list):
1065
+ return BondList.concatenate([self, bond_list])
1066
+
1067
+ def __getitem__(self, index):
1068
+ ## Variables for both, integer and boolean index arrays
1069
+ cdef uint32[:,:] all_bonds_v
1070
+ cdef int i
1071
+ cdef uint32* index1_ptr
1072
+ cdef uint32* index2_ptr
1073
+ cdef np.ndarray removal_filter
1074
+ cdef uint8[:] removal_filter_v
1075
+
1076
+ ## Variables for integer arrays
1077
+ cdef int32[:] inverse_index_v
1078
+ cdef int32 new_index1, new_index2
1079
+
1080
+ ## Variables for boolean mask
1081
+ # Boolean mask representation of the index
1082
+ cdef np.ndarray mask
1083
+ cdef uint8[:] mask_v
1084
+ # Boolean mask for removal of bonds
1085
+ cdef np.ndarray offsets
1086
+ cdef uint32[:] offsets_v
1087
+
1088
+ if isinstance(index, numbers.Integral):
1089
+ ## Handle single index
1090
+ return self.get_bonds(index)
1091
+
1092
+ elif isinstance(index, np.ndarray) and index.dtype == bool:
1093
+ ## Handle boolean masks
1094
+ copy = self.copy()
1095
+ all_bonds_v = copy._bonds
1096
+ # Use 'uint8' instead of 'bool' for memory view
1097
+ mask = np.frombuffer(index, dtype=np.uint8)
1098
+
1099
+ # Each time an atom is missing in the mask,
1100
+ # the offset is increased by one
1101
+ offsets = np.cumsum(
1102
+ ~mask.astype(bool, copy=False), dtype=np.uint32
1103
+ )
1104
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1105
+ removal_filter_v = removal_filter
1106
+ mask_v = mask
1107
+ offsets_v = offsets
1108
+ # If an atom in a bond is not masked,
1109
+ # the bond is removed from the list
1110
+ # If an atom is masked,
1111
+ # its index value is decreased by the respective offset
1112
+ # The offset is neccessary, removing atoms in an AtomArray
1113
+ # decreases the index of the following atoms
1114
+ for i in range(all_bonds_v.shape[0]):
1115
+ # Usage of pointer to increase performance
1116
+ # as redundant indexing is avoided
1117
+ index1_ptr = &all_bonds_v[i,0]
1118
+ index2_ptr = &all_bonds_v[i,1]
1119
+ if mask_v[index1_ptr[0]] and mask_v[index2_ptr[0]]:
1120
+ # Both atoms involved in bond are masked
1121
+ # -> decrease atom index by offset
1122
+ index1_ptr[0] -= offsets_v[index1_ptr[0]]
1123
+ index2_ptr[0] -= offsets_v[index2_ptr[0]]
1124
+ else:
1125
+ # At least one atom involved in bond is not masked
1126
+ # -> remove bond
1127
+ removal_filter_v[i] = False
1128
+ # Apply the bond removal filter
1129
+ copy._bonds = copy._bonds[removal_filter.astype(bool, copy=False)]
1130
+ copy._atom_count = len(np.nonzero(mask)[0])
1131
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1132
+ return copy
1133
+
1134
+ else:
1135
+ ## Convert any other type of index into index array, as it preserves order
1136
+ copy = self.copy()
1137
+ all_bonds_v = copy._bonds
1138
+ index = _to_index_array(index, self._atom_count)
1139
+ index = _to_positive_index_array(index, self._atom_count)
1140
+
1141
+ # The inverse index is required to efficiently obtain
1142
+ # the new index of an atom in case of an unsorted index
1143
+ # array
1144
+ inverse_index_v = _invert_index(index, self._atom_count)
1145
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1146
+ removal_filter_v = removal_filter
1147
+ for i in range(all_bonds_v.shape[0]):
1148
+ # Usage of pointer to increase performance
1149
+ # as redundant indexing is avoided
1150
+ index1_ptr = &all_bonds_v[i,0]
1151
+ index2_ptr = &all_bonds_v[i,1]
1152
+ new_index1 = inverse_index_v[index1_ptr[0]]
1153
+ new_index2 = inverse_index_v[index2_ptr[0]]
1154
+ if new_index1 != -1 and new_index2 != -1:
1155
+ # Both atoms involved in bond are included
1156
+ # by index array
1157
+ # -> assign new atom indices
1158
+ index1_ptr[0] = <int32>new_index1
1159
+ index2_ptr[0] = <int32>new_index2
1160
+ else:
1161
+ # At least one atom in bond is not included
1162
+ # -> remove bond
1163
+ removal_filter_v[i] = False
1164
+
1165
+ copy._bonds = copy._bonds[
1166
+ removal_filter.astype(bool, copy=False)
1167
+ ]
1168
+ # Again, sort indices per bond
1169
+ # as the correct order is not guaranteed anymore
1170
+ # for unsorted index arrays
1171
+ copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
1172
+ copy._atom_count = len(index)
1173
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1174
+ return copy
1175
+
1176
+ def __iter__(self):
1177
+ raise TypeError("'BondList' object is not iterable")
1178
+
1179
+ def __str__(self):
1180
+ return str(self.as_array())
1181
+
1182
+ def __eq__(self, item):
1183
+ if not isinstance(item, BondList):
1184
+ return False
1185
+ return (self._atom_count == item._atom_count and
1186
+ self.as_set() == item.as_set())
1187
+
1188
+ def __contains__(self, item):
1189
+ if not isinstance(item, tuple) and len(tuple) != 2:
1190
+ raise TypeError("Expected a tuple of atom indices")
1191
+
1192
+ cdef int i=0
1193
+
1194
+ cdef uint32 match_index1, match_index2
1195
+ # Sort indices for faster search in loop
1196
+ cdef uint32 atom_index1 = min(item)
1197
+ cdef uint32 atom_index2 = max(item)
1198
+
1199
+ cdef uint32[:,:] all_bonds_v = self._bonds
1200
+ for i in range(all_bonds_v.shape[0]):
1201
+ match_index1 = all_bonds_v[i,0]
1202
+ match_index2 = all_bonds_v[i,1]
1203
+ if atom_index1 == match_index1 and atom_index2 == match_index2:
1204
+ return True
1205
+
1206
+ return False
1207
+
1208
+
1209
+ def _get_max_bonds_per_atom(self):
1210
+ if self._atom_count == 0:
1211
+ return 0
1212
+
1213
+ cdef int i
1214
+ cdef uint32[:,:] all_bonds_v = self._bonds
1215
+ # Create an array that counts number of occurences of each index
1216
+ cdef np.ndarray index_count = np.zeros(self._atom_count,
1217
+ dtype=np.uint32)
1218
+ cdef uint32[:] index_count_v = index_count
1219
+ for i in range(all_bonds_v.shape[0]):
1220
+ # Increment count of both indices found in bond list at i
1221
+ index_count_v[all_bonds_v[i,0]] += 1
1222
+ index_count_v[all_bonds_v[i,1]] += 1
1223
+ return np.max(index_count_v)
1224
+
1225
+ def _remove_redundant_bonds(self):
1226
+ cdef int j
1227
+ cdef uint32[:,:] all_bonds_v = self._bonds
1228
+ # Boolean mask for final removal of redundant atoms
1229
+ # Unfortunately views of boolean ndarrays are not supported
1230
+ # -> use uint8 array
1231
+ cdef np.ndarray redundancy_filter = np.ones(all_bonds_v.shape[0],
1232
+ dtype=np.uint8)
1233
+ cdef uint8[:] redundancy_filter_v = redundancy_filter
1234
+ # Array of pointers to C-arrays
1235
+ # The array is indexed with the atom indices in the bond list
1236
+ # The respective C-array contains the indices of bonded atoms
1237
+ cdef ptr[:] ptrs_v = np.zeros(self._atom_count, dtype=np.uint64)
1238
+ # Stores the length of the C-arrays
1239
+ cdef int[:] array_len_v = np.zeros(self._atom_count, dtype=np.int32)
1240
+ # Iterate over bond list:
1241
+ # If bond is already listed in the array of pointers,
1242
+ # set filter to false at that position
1243
+ # Else add bond to array of pointers
1244
+ cdef uint32 i1, i2
1245
+ cdef uint32* array_ptr
1246
+ cdef int length
1247
+
1248
+ try:
1249
+ for j in range(all_bonds_v.shape[0]):
1250
+ i1 = all_bonds_v[j,0]
1251
+ i2 = all_bonds_v[j,1]
1252
+ # Since the bonds have the atom indices sorted
1253
+ # the reverse check is omitted
1254
+ if _in_array(<uint32*>ptrs_v[i1], i2, array_len_v[i1]):
1255
+ redundancy_filter_v[j] = False
1256
+ else:
1257
+ # Append bond in respective C-array
1258
+ # and update C-array length
1259
+ length = array_len_v[i1] +1
1260
+ array_ptr = <uint32*>ptrs_v[i1]
1261
+ array_ptr = <uint32*>realloc(
1262
+ array_ptr, length * sizeof(uint32)
1263
+ )
1264
+ if not array_ptr:
1265
+ raise MemoryError()
1266
+ array_ptr[length-1] = i2
1267
+ ptrs_v[i1] = <ptr>array_ptr
1268
+ array_len_v[i1] = length
1269
+
1270
+ finally:
1271
+ # Free pointers
1272
+ for i in range(ptrs_v.shape[0]):
1273
+ free(<int*>ptrs_v[i])
1274
+
1275
+ # Eventually remove redundant bonds
1276
+ self._bonds = self._bonds[redundancy_filter.astype(bool, copy=False)]
1277
+
1278
+
1279
+ cdef uint32 _to_positive_index(int32 index, uint32 array_length) except -1:
1280
+ """
1281
+ Convert a potentially negative index into a positive index.
1282
+ """
1283
+ cdef uint32 pos_index
1284
+ if index < 0:
1285
+ pos_index = <uint32> (array_length + index)
1286
+ if pos_index < 0:
1287
+ raise IndexError(
1288
+ f"Index {index} is out of range "
1289
+ f"for an atom count of {array_length}"
1290
+ )
1291
+ return pos_index
1292
+ else:
1293
+ if <uint32> index >= array_length:
1294
+ raise IndexError(
1295
+ f"Index {index} is out of range "
1296
+ f"for an atom count of {array_length}"
1297
+ )
1298
+ return <uint32> index
1299
+
1300
+
1301
+ def _to_positive_index_array(index_array, length):
1302
+ """
1303
+ Convert potentially negative values in an array into positive
1304
+ values and check for out-of-bounds values.
1305
+ """
1306
+ index_array = index_array.copy()
1307
+ orig_shape = index_array.shape
1308
+ index_array = index_array.flatten()
1309
+ negatives = index_array < 0
1310
+ index_array[negatives] = length + index_array[negatives]
1311
+ if (index_array < 0).any():
1312
+ raise IndexError(
1313
+ f"Index {np.min(index_array)} is out of range "
1314
+ f"for an atom count of {length}"
1315
+ )
1316
+ if (index_array >= length).any():
1317
+ raise IndexError(
1318
+ f"Index {np.max(index_array)} is out of range "
1319
+ f"for an atom count of {length}"
1320
+ )
1321
+ return index_array.reshape(orig_shape)
1322
+
1323
+
1324
+ def _to_index_array(object index, uint32 length):
1325
+ """
1326
+ Convert an index of arbitrary type into an index array.
1327
+ """
1328
+ if isinstance(index, np.ndarray) and np.issubdtype(index.dtype, np.integer):
1329
+ return index
1330
+ else:
1331
+ # Convert into index array
1332
+ all_indices = np.arange(length, dtype=np.uint32)
1333
+ return all_indices[index]
1334
+
1335
+
1336
+ cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
1337
+ """
1338
+ Test whether a value (`atom_index`) is in a C-array `array`.
1339
+ """
1340
+ cdef int i = 0
1341
+ if array == NULL:
1342
+ return False
1343
+ for i in range(array_length):
1344
+ if array[i] == atom_index:
1345
+ return True
1346
+ return False
1347
+
1348
+
1349
+ cdef inline void _sort(uint32* index1_ptr, uint32* index2_ptr):
1350
+ cdef uint32 swap
1351
+ if index1_ptr[0] > index2_ptr[0]:
1352
+ # Swap indices
1353
+ swap = index1_ptr[0]
1354
+ index1_ptr[0] = index2_ptr[0]
1355
+ index2_ptr[0] = swap
1356
+
1357
+
1358
+ @cython.wraparound(False)
1359
+ # Do bounds check, as the input indices may be out of bounds
1360
+ def _invert_index(IndexType[:] index_v, uint32 length):
1361
+ """
1362
+ Invert an input index array, so that
1363
+ if *input[i] = j*, *output[j] = i*.
1364
+ For all elements *j*, that are not in *input*, *output[j]* = -1.
1365
+ """
1366
+ cdef int32 i
1367
+ cdef IndexType index_val
1368
+ inverse_index = np.full(length, -1, dtype=np.int32)
1369
+ cdef int32[:] inverse_index_v = inverse_index
1370
+
1371
+ for i in range(index_v.shape[0]):
1372
+ index_val = index_v[i]
1373
+ if inverse_index_v[index_val] != -1:
1374
+ # One index can theoretically appear multiple times
1375
+ # This is currently not supported
1376
+ raise NotImplementedError(
1377
+ f"Duplicate indices are not supported, "
1378
+ f"but index {index_val} appeared multiple times"
1379
+ )
1380
+ inverse_index_v[index_val] = i
1381
+
1382
+
1383
+ return inverse_index
1384
+
1385
+
1386
+
1387
+
1388
+ # fmt: off
1389
+ _DEFAULT_DISTANCE_RANGE = {
1390
+ # Taken from Allen et al.
1391
+ # min - 2*std max + 2*std
1392
+ ("B", "C" ) : (1.556 - 2*0.015, 1.556 + 2*0.015),
1393
+ ("BR", "C" ) : (1.875 - 2*0.029, 1.966 + 2*0.029),
1394
+ ("BR", "O" ) : (1.581 - 2*0.007, 1.581 + 2*0.007),
1395
+ ("C", "C" ) : (1.174 - 2*0.011, 1.588 + 2*0.025),
1396
+ ("C", "CL") : (1.713 - 2*0.011, 1.849 + 2*0.011),
1397
+ ("C", "F" ) : (1.320 - 2*0.009, 1.428 + 2*0.009),
1398
+ ("C", "H" ) : (1.059 - 2*0.030, 1.099 + 2*0.007),
1399
+ ("C", "I" ) : (2.095 - 2*0.015, 2.162 + 2*0.015),
1400
+ ("C", "N" ) : (1.325 - 2*0.009, 1.552 + 2*0.023),
1401
+ ("C", "O" ) : (1.187 - 2*0.011, 1.477 + 2*0.008),
1402
+ ("C", "P" ) : (1.791 - 2*0.006, 1.855 + 2*0.019),
1403
+ ("C", "S" ) : (1.630 - 2*0.014, 1.863 + 2*0.015),
1404
+ ("C", "SE") : (1.893 - 2*0.013, 1.970 + 2*0.032),
1405
+ ("C", "SI") : (1.837 - 2*0.012, 1.888 + 2*0.023),
1406
+ ("CL", "O" ) : (1.414 - 2*0.026, 1.414 + 2*0.026),
1407
+ ("CL", "P" ) : (1.997 - 2*0.035, 2.008 + 2*0.035),
1408
+ ("CL", "S" ) : (2.072 - 2*0.023, 2.072 + 2*0.023),
1409
+ ("CL", "SI") : (2.072 - 2*0.009, 2.072 + 2*0.009),
1410
+ ("F", "N" ) : (1.406 - 2*0.016, 1.406 + 2*0.016),
1411
+ ("F", "P" ) : (1.495 - 2*0.016, 1.579 + 2*0.025),
1412
+ ("F", "S" ) : (1.640 - 2*0.011, 1.640 + 2*0.011),
1413
+ ("F", "SI") : (1.588 - 2*0.014, 1.694 + 2*0.013),
1414
+ ("H", "N" ) : (1.009 - 2*0.022, 1.033 + 2*0.022),
1415
+ ("H", "O" ) : (0.967 - 2*0.010, 1.015 + 2*0.017),
1416
+ ("I", "O" ) : (2.144 - 2*0.028, 2.144 + 2*0.028),
1417
+ ("N", "N" ) : (1.124 - 2*0.015, 1.454 + 2*0.021),
1418
+ ("N", "O" ) : (1.210 - 2*0.011, 1.463 + 2*0.012),
1419
+ ("N", "P" ) : (1.571 - 2*0.013, 1.697 + 2*0.015),
1420
+ ("N", "S" ) : (1.541 - 2*0.022, 1.710 + 2*0.019),
1421
+ ("N", "SI") : (1.711 - 2*0.019, 1.748 + 2*0.022),
1422
+ ("O", "P" ) : (1.449 - 2*0.007, 1.689 + 2*0.024),
1423
+ ("O", "S" ) : (1.423 - 2*0.008, 1.580 + 2*0.015),
1424
+ ("O", "SI") : (1.622 - 2*0.014, 1.680 + 2*0.008),
1425
+ ("P", "P" ) : (2.214 - 2*0.022, 2.214 + 2*0.022),
1426
+ ("P", "S" ) : (1.913 - 2*0.014, 1.954 + 2*0.005),
1427
+ ("P", "SE") : (2.093 - 2*0.019, 2.093 + 2*0.019),
1428
+ ("P", "SI") : (2.264 - 2*0.019, 2.264 + 2*0.019),
1429
+ ("S", "S" ) : (1.897 - 2*0.012, 2.070 + 2*0.022),
1430
+ ("S", "SE") : (2.193 - 2*0.015, 2.193 + 2*0.015),
1431
+ ("S", "SI") : (2.145 - 2*0.020, 2.145 + 2*0.020),
1432
+ ("SE", "SE") : (2.340 - 2*0.024, 2.340 + 2*0.024),
1433
+ ("SI", "SE") : (2.359 - 2*0.012, 2.359 + 2*0.012),
1434
+ }
1435
+ # fmt: on
1436
+
1437
+ def connect_via_distances(atoms, dict distance_range=None, bint inter_residue=True,
1438
+ default_bond_type=BondType.ANY, bint periodic=False):
1439
+ """
1440
+ connect_via_distances(atoms, distance_range=None, atom_mask=None,
1441
+ inter_residue=True, default_bond_type=BondType.ANY,
1442
+ periodic=False)
1443
+
1444
+ Create a :class:`BondList` for a given atom array, based on
1445
+ pairwise atom distances.
1446
+
1447
+ A :attr:`BondType.ANY`, bond is created for two atoms within the
1448
+ same residue, if the distance between them is within the expected
1449
+ bond distance range.
1450
+ Bonds between two adjacent residues are created for the atoms
1451
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1452
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1453
+
1454
+ Parameters
1455
+ ----------
1456
+ atoms : AtomArray
1457
+ The structure to create the :class:`BondList` for.
1458
+ distance_range : dict of tuple(str, str) -> tuple(float, float), optional
1459
+ Custom minimum and maximum bond distances.
1460
+ The dictionary keys are tuples of chemical elements representing
1461
+ the atoms to be potentially bonded.
1462
+ The order of elements within each tuple does not matter.
1463
+ The dictionary values are the minimum and maximum bond distance,
1464
+ respectively, for the given combination of elements.
1465
+ This parameter updates the default dictionary.
1466
+ Hence, the default bond distances for missing element pairs are
1467
+ still taken from the default dictionary.
1468
+ The default bond distances are taken from :footcite:`Allen1987`.
1469
+ inter_residue : bool, optional
1470
+ If true, connections between consecutive amino acids and
1471
+ nucleotides are also added.
1472
+ default_bond_type : BondType or int, optional
1473
+ By default, all created bonds have :attr:`BondType.ANY`.
1474
+ An alternative :class:`BondType` can be given in this parameter.
1475
+ periodic : bool, optional
1476
+ If set to true, bonds can also be detected in periodic
1477
+ boundary conditions.
1478
+ The `box` attribute of `atoms` is required in this case.
1479
+
1480
+ Returns
1481
+ -------
1482
+ BondList
1483
+ The created bond list.
1484
+
1485
+ See Also
1486
+ --------
1487
+ connect_via_residue_names
1488
+
1489
+ Notes
1490
+ -----
1491
+ This method might miss bonds, if the bond distance is unexpectedly
1492
+ high or low, or it might create false bonds, if two atoms within a
1493
+ residue are accidentally in the right distance.
1494
+ A more accurate method for determining bonds is
1495
+ :func:`connect_via_residue_names()`.
1496
+
1497
+ References
1498
+ ----------
1499
+
1500
+ .. footbibliography::
1501
+ """
1502
+ from .atoms import AtomArray
1503
+ from .geometry import distance
1504
+ from .residues import get_residue_starts
1505
+
1506
+ cdef list bonds = []
1507
+ cdef int i
1508
+ cdef int curr_start_i, next_start_i
1509
+ cdef np.ndarray coord = atoms.coord
1510
+ cdef np.ndarray coord_in_res
1511
+ cdef np.ndarray distances
1512
+ cdef float dist
1513
+ cdef np.ndarray elements = atoms.element
1514
+ cdef np.ndarray elements_in_res
1515
+ cdef int atom_index1, atom_index2
1516
+ cdef dict dist_ranges = {}
1517
+ cdef tuple dist_range
1518
+ cdef float min_dist, max_dist
1519
+
1520
+ if not isinstance(atoms, AtomArray):
1521
+ raise TypeError(f"Expected 'AtomArray', not '{type(atoms).__name__}'")
1522
+ if periodic:
1523
+ if atoms.box is None:
1524
+ raise BadStructureError("Atom array has no box")
1525
+ box = atoms.box
1526
+ else:
1527
+ box = None
1528
+
1529
+ # Prepare distance dictionary...
1530
+ if distance_range is None:
1531
+ distance_range = {}
1532
+ # Merge default and custom entries
1533
+ for key, val in itertools.chain(
1534
+ _DEFAULT_DISTANCE_RANGE.items(), distance_range.items()
1535
+ ):
1536
+ element1, element2 = key
1537
+ # Add entries for both element orders
1538
+ dist_ranges[(element1.upper(), element2.upper())] = val
1539
+ dist_ranges[(element2.upper(), element1.upper())] = val
1540
+
1541
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1542
+ # Omit exclsive stop in 'residue_starts'
1543
+ for i in range(len(residue_starts)-1):
1544
+ curr_start_i = residue_starts[i]
1545
+ next_start_i = residue_starts[i+1]
1546
+
1547
+ elements_in_res = elements[curr_start_i : next_start_i]
1548
+ coord_in_res = coord[curr_start_i : next_start_i]
1549
+ # Matrix containing all pairwise atom distances in the residue
1550
+ distances = distance(
1551
+ coord_in_res[:, np.newaxis, :],
1552
+ coord_in_res[np.newaxis, :, :],
1553
+ box
1554
+ )
1555
+ for atom_index1 in range(len(elements_in_res)):
1556
+ for atom_index2 in range(atom_index1):
1557
+ dist_range = dist_ranges.get((
1558
+ elements_in_res[atom_index1],
1559
+ elements_in_res[atom_index2]
1560
+ ))
1561
+ if dist_range is None:
1562
+ # No bond distance entry for this element
1563
+ # combination -> skip
1564
+ continue
1565
+ else:
1566
+ min_dist, max_dist = dist_range
1567
+ dist = distances[atom_index1, atom_index2]
1568
+ if dist >= min_dist and dist <= max_dist:
1569
+ bonds.append((
1570
+ curr_start_i + atom_index1,
1571
+ curr_start_i + atom_index2,
1572
+ default_bond_type
1573
+ ))
1574
+
1575
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1576
+
1577
+ if inter_residue:
1578
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1579
+ if default_bond_type == BondType.ANY:
1580
+ # As all bonds should be of type ANY, convert also
1581
+ # inter-residue bonds to ANY
1582
+ inter_bonds.remove_bond_order()
1583
+ return bond_list.merge(inter_bonds)
1584
+ else:
1585
+ return bond_list
1586
+
1587
+
1588
+
1589
+ def connect_via_residue_names(atoms, bint inter_residue=True,
1590
+ dict custom_bond_dict=None):
1591
+ """
1592
+ connect_via_residue_names(atoms, atom_mask=None, inter_residue=True)
1593
+
1594
+ Create a :class:`BondList` for a given atom array (stack), based on
1595
+ the deposited bonds for each residue in the RCSB ``components.cif``
1596
+ dataset.
1597
+
1598
+ Bonds between two adjacent residues are created for the atoms
1599
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1600
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1601
+
1602
+ Parameters
1603
+ ----------
1604
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
1605
+ The structure to create the :class:`BondList` for.
1606
+ inter_residue : bool, optional
1607
+ If true, connections between consecutive amino acids and
1608
+ nucleotides are also added.
1609
+ custom_bond_dict : dict (str -> dict ((str, str) -> int)), optional
1610
+ A dictionary of dictionaries:
1611
+ The outer dictionary maps residue names to inner dictionaries.
1612
+ The inner dictionary maps tuples of two atom names to their
1613
+ respective :class:`BondType` (represented as integer).
1614
+ If given, these bonds are used instead of the bonds read from
1615
+ ``components.cif``.
1616
+
1617
+ Returns
1618
+ -------
1619
+ BondList
1620
+ The created bond list.
1621
+ No bonds are added for residues that are not found in
1622
+ ``components.cif``.
1623
+
1624
+ See Also
1625
+ --------
1626
+ connect_via_distances
1627
+
1628
+ Notes
1629
+ -----
1630
+ This method can only find bonds for residues in the RCSB
1631
+ *Chemical Component Dictionary*, unless `custom_bond_dict` is set.
1632
+ Although this includes most molecules one encounters, this will fail
1633
+ for exotic molecules, e.g. specialized inhibitors.
1634
+
1635
+ .. currentmodule:: biotite.structure.info
1636
+
1637
+ To supplement `custom_bond_dict` with bonds for residues from the
1638
+ *Chemical Component Dictionary* you can use
1639
+ :meth:`bonds_in_residue()`.
1640
+
1641
+ >>> import pprint
1642
+ >>> custom_bond_dict = {
1643
+ ... "XYZ": {
1644
+ ... ("A", "B"): BondType.SINGLE,
1645
+ ... ("B", "C"): BondType.SINGLE
1646
+ ... }
1647
+ ... }
1648
+ >>> # Supplement with bonds for common residues
1649
+ >>> custom_bond_dict["ALA"] = bonds_in_residue("ALA")
1650
+ >>> pp = pprint.PrettyPrinter(width=40)
1651
+ >>> pp.pprint(custom_bond_dict)
1652
+ {'ALA': {('C', 'O'): <BondType.DOUBLE: 2>,
1653
+ ('C', 'OXT'): <BondType.SINGLE: 1>,
1654
+ ('CA', 'C'): <BondType.SINGLE: 1>,
1655
+ ('CA', 'CB'): <BondType.SINGLE: 1>,
1656
+ ('CA', 'HA'): <BondType.SINGLE: 1>,
1657
+ ('CB', 'HB1'): <BondType.SINGLE: 1>,
1658
+ ('CB', 'HB2'): <BondType.SINGLE: 1>,
1659
+ ('CB', 'HB3'): <BondType.SINGLE: 1>,
1660
+ ('N', 'CA'): <BondType.SINGLE: 1>,
1661
+ ('N', 'H'): <BondType.SINGLE: 1>,
1662
+ ('N', 'H2'): <BondType.SINGLE: 1>,
1663
+ ('OXT', 'HXT'): <BondType.SINGLE: 1>},
1664
+ 'XYZ': {('A', 'B'): <BondType.SINGLE: 1>,
1665
+ ('B', 'C'): <BondType.SINGLE: 1>}}
1666
+ """
1667
+ from .info.bonds import bonds_in_residue
1668
+ from .residues import get_residue_starts
1669
+
1670
+ cdef list bonds = []
1671
+ cdef int res_i
1672
+ cdef int i, j
1673
+ cdef int curr_start_i, next_start_i
1674
+ cdef np.ndarray atom_names = atoms.atom_name
1675
+ cdef np.ndarray atom_names_in_res
1676
+ cdef np.ndarray res_names = atoms.res_name
1677
+ cdef str atom_name1, atom_name2
1678
+ cdef int64[:] atom_indices1, atom_indices2
1679
+ cdef dict bond_dict_for_res
1680
+
1681
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1682
+ # Omit exclsive stop in 'residue_starts'
1683
+ for res_i in range(len(residue_starts)-1):
1684
+ curr_start_i = residue_starts[res_i]
1685
+ next_start_i = residue_starts[res_i+1]
1686
+
1687
+ if custom_bond_dict is None:
1688
+ bond_dict_for_res = bonds_in_residue(res_names[curr_start_i])
1689
+ else:
1690
+ bond_dict_for_res = custom_bond_dict.get(
1691
+ res_names[curr_start_i], {}
1692
+ )
1693
+
1694
+ atom_names_in_res = atom_names[curr_start_i : next_start_i]
1695
+ for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
1696
+ atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
1697
+ .astype(np.int64, copy=False)
1698
+ atom_indices2 = np.where(atom_names_in_res == atom_name2)[0] \
1699
+ .astype(np.int64, copy=False)
1700
+ # In rare cases the same atom name may appear multiple times
1701
+ # (e.g. in altlocs)
1702
+ # -> create all possible bond combinations
1703
+ for i in range(atom_indices1.shape[0]):
1704
+ for j in range(atom_indices2.shape[0]):
1705
+ bonds.append((
1706
+ curr_start_i + atom_indices1[i],
1707
+ curr_start_i + atom_indices2[j],
1708
+ bond_type
1709
+ ))
1710
+
1711
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1712
+
1713
+ if inter_residue:
1714
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1715
+ return bond_list.merge(inter_bonds)
1716
+ else:
1717
+ return bond_list
1718
+
1719
+
1720
+
1721
+ _PEPTIDE_LINKS = ["PEPTIDE LINKING", "L-PEPTIDE LINKING", "D-PEPTIDE LINKING"]
1722
+ _NUCLEIC_LINKS = ["RNA LINKING", "DNA LINKING"]
1723
+
1724
+ def _connect_inter_residue(atoms, residue_starts):
1725
+ """
1726
+ Create a :class:`BondList` containing the bonds between adjacent
1727
+ amino acid or nucleotide residues.
1728
+
1729
+ Parameters
1730
+ ----------
1731
+ atoms : AtomArray or AtomArrayStack
1732
+ The structure to create the :class:`BondList` for.
1733
+ residue_starts : ndarray, dtype=int
1734
+ Return value of
1735
+ ``get_residue_starts(atoms, add_exclusive_stop=True)``.
1736
+
1737
+ Returns
1738
+ -------
1739
+ BondList
1740
+ A bond list containing all inter residue bonds.
1741
+ """
1742
+ from .info.misc import link_type
1743
+
1744
+ cdef list bonds = []
1745
+ cdef int i
1746
+ cdef np.ndarray atom_names = atoms.atom_name
1747
+ cdef np.ndarray res_names = atoms.res_name
1748
+ cdef np.ndarray res_ids = atoms.res_id
1749
+ cdef np.ndarray chain_ids = atoms.chain_id
1750
+ cdef int curr_start_i, next_start_i, after_next_start_i
1751
+ cdef str curr_connect_atom_name, next_connect_atom_name
1752
+ cdef np.ndarray curr_connect_indices, next_connect_indices
1753
+
1754
+ # Iterate over all starts excluding:
1755
+ # - the last residue and
1756
+ # - exclusive end index of 'atoms'
1757
+ for i in range(len(residue_starts)-2):
1758
+ curr_start_i = residue_starts[i]
1759
+ next_start_i = residue_starts[i+1]
1760
+ after_next_start_i = residue_starts[i+2]
1761
+
1762
+ # Check if the current and next residue is in the same chain
1763
+ if chain_ids[next_start_i] != chain_ids[curr_start_i]:
1764
+ continue
1765
+ # Check if the current and next residue
1766
+ # have consecutive residue IDs
1767
+ # (Same residue ID is also possible if insertion code is used)
1768
+ if res_ids[next_start_i] - res_ids[curr_start_i] > 1:
1769
+ continue
1770
+
1771
+ # Get link type for this residue from RCSB components.cif
1772
+ curr_link = link_type(res_names[curr_start_i])
1773
+ next_link = link_type(res_names[next_start_i])
1774
+
1775
+ if curr_link in _PEPTIDE_LINKS and next_link in _PEPTIDE_LINKS:
1776
+ curr_connect_atom_name = "C"
1777
+ next_connect_atom_name = "N"
1778
+ elif curr_link in _NUCLEIC_LINKS and next_link in _NUCLEIC_LINKS:
1779
+ curr_connect_atom_name = "O3'"
1780
+ next_connect_atom_name = "P"
1781
+ else:
1782
+ # Create no bond if the connection types of consecutive
1783
+ # residues are not compatible
1784
+ continue
1785
+
1786
+ # Index in atom array for atom name in current residue
1787
+ # Addition of 'curr_start_i' is necessary, as only a slice of
1788
+ # 'atom_names' is taken, beginning at 'curr_start_i'
1789
+ curr_connect_indices = curr_start_i + np.where(
1790
+ atom_names[curr_start_i : next_start_i]
1791
+ == curr_connect_atom_name
1792
+ )[0]
1793
+ # Index in atom array for atom name in next residue
1794
+ next_connect_indices = next_start_i + np.where(
1795
+ atom_names[next_start_i : after_next_start_i]
1796
+ == next_connect_atom_name
1797
+ )[0]
1798
+ if len(curr_connect_indices) == 0 or len(next_connect_indices) == 0:
1799
+ # The connector atoms are not found in the adjacent residues
1800
+ # -> skip this bond
1801
+ continue
1802
+
1803
+ bonds.append((
1804
+ curr_connect_indices[0],
1805
+ next_connect_indices[0],
1806
+ BondType.SINGLE
1807
+ ))
1808
+
1809
+ return BondList(atoms.array_length(), np.array(bonds, dtype=np.uint32))
1810
+
1811
+
1812
+
1813
+ def find_connected(bond_list, uint32 root, bint as_mask=False):
1814
+ """
1815
+ find_connected(bond_list, root, as_mask=False)
1816
+
1817
+ Get indices to all atoms that are directly or inderectly connected
1818
+ to the root atom indicated by the given index.
1819
+
1820
+ An atom is *connected* to the `root` atom, if that atom is reachable
1821
+ by traversing an arbitrary number of bonds, starting from the
1822
+ `root`.
1823
+ Effectively, this means that all atoms are *connected* to `root`,
1824
+ that are in the same molecule as `root`.
1825
+ Per definition `root` is also *connected* to itself.
1826
+
1827
+ Parameters
1828
+ ----------
1829
+ bond_list : BondList
1830
+ The reference bond list.
1831
+ root : int
1832
+ The index of the root atom.
1833
+ as_mask : bool, optional
1834
+ If true, the connected atom indices are returned as boolean
1835
+ mask.
1836
+ By default, the connected atom indices are returned as integer
1837
+ array.
1838
+
1839
+ Returns
1840
+ -------
1841
+ connected : ndarray, dtype=int or ndarray, dtype=bool
1842
+ Either a boolean mask or an integer array, representing the
1843
+ connected atoms.
1844
+ In case of a boolean mask: ``connected[i] == True``, if the atom
1845
+ with index ``i`` is connected.
1846
+
1847
+ Examples
1848
+ --------
1849
+ Consider a system with 4 atoms, where only the last atom is not
1850
+ bonded with the other ones (``0-1-2 3``):
1851
+
1852
+ >>> bonds = BondList(4)
1853
+ >>> bonds.add_bond(0, 1)
1854
+ >>> bonds.add_bond(1, 2)
1855
+ >>> print(find_connected(bonds, 0))
1856
+ [0 1 2]
1857
+ >>> print(find_connected(bonds, 1))
1858
+ [0 1 2]
1859
+ >>> print(find_connected(bonds, 2))
1860
+ [0 1 2]
1861
+ >>> print(find_connected(bonds, 3))
1862
+ [3]
1863
+ """
1864
+ all_bonds, _ = bond_list.get_all_bonds()
1865
+
1866
+ if root >= bond_list.get_atom_count():
1867
+ raise ValueError(
1868
+ f"Root atom index {root} is out of bounds for bond list "
1869
+ f"representing {bond_list.get_atom_count()} atoms"
1870
+ )
1871
+
1872
+ cdef uint8[:] is_connected_mask = np.zeros(
1873
+ bond_list.get_atom_count(), dtype=np.uint8
1874
+ )
1875
+ # Find connections in a recursive way,
1876
+ # by visiting all atoms that are reachable by a bond
1877
+ _find_connected(bond_list, root, is_connected_mask, all_bonds)
1878
+ if as_mask:
1879
+ return is_connected_mask
1880
+ else:
1881
+ return np.where(np.asarray(is_connected_mask))[0]
1882
+
1883
+
1884
+ cdef _find_connected(bond_list,
1885
+ int32 index,
1886
+ uint8[:] is_connected_mask,
1887
+ int32[:,:] all_bonds):
1888
+ if is_connected_mask[index]:
1889
+ # This atom has already been visited
1890
+ # -> exit condition
1891
+ return
1892
+ is_connected_mask[index] = True
1893
+
1894
+ cdef int32 j
1895
+ cdef int32 connected_index
1896
+ for j in range(all_bonds.shape[1]):
1897
+ connected_index = all_bonds[index, j]
1898
+ if connected_index == -1:
1899
+ # Ignore padding values
1900
+ continue
1901
+ _find_connected(
1902
+ bond_list, connected_index, is_connected_mask, all_bonds
1903
+ )
1904
+
1905
+
1906
+ def find_rotatable_bonds(bonds):
1907
+ """
1908
+ find_rotatable_bonds(bonds)
1909
+
1910
+ Find all rotatable bonds in a given :class:`BondList`.
1911
+
1912
+ The following conditions must be true for a bond to be counted as
1913
+ rotatable:
1914
+
1915
+ 1. The bond must be a single bond (``BondType.SINGLE``)
1916
+ 2. The connected atoms must not be within the same cycle/ring
1917
+ 3. Both connected atoms must not be terminal, e.g. not a *C-H*
1918
+ bond, as rotation about such bonds would not change any
1919
+ coordinates
1920
+
1921
+ Parameters
1922
+ ----------
1923
+ bonds : BondList
1924
+ The bonds to find the rotatable bonds in.
1925
+
1926
+ Returns
1927
+ -------
1928
+ rotatable_bonds : BondList
1929
+ The subset of the input `bonds` that contains only rotatable
1930
+ bonds.
1931
+
1932
+ Examples
1933
+ --------
1934
+
1935
+ >>> molecule = residue("TYR")
1936
+ >>> for i, j, _ in find_rotatable_bonds(molecule.bonds).as_array():
1937
+ ... print(molecule.atom_name[i], molecule.atom_name[j])
1938
+ N CA
1939
+ CA C
1940
+ CA CB
1941
+ C OXT
1942
+ CB CG
1943
+ CZ OH
1944
+ """
1945
+ cdef uint32 i, j
1946
+ cdef uint32 bond_type
1947
+ cdef uint32 SINGLE = int(BondType.SINGLE)
1948
+ cdef bint in_same_cycle
1949
+
1950
+ bond_graph = bonds.as_graph()
1951
+ cycles = nx.algorithms.cycles.cycle_basis(bond_graph)
1952
+
1953
+ cdef int64[:] number_of_partners_v = np.count_nonzero(
1954
+ bonds.get_all_bonds()[0] != -1,
1955
+ axis=1
1956
+ ).astype(np.int64, copy=False)
1957
+
1958
+ rotatable_bonds = []
1959
+ cdef uint32[:,:] bonds_v = bonds.as_array()
1960
+ for i, j, bond_type in bonds_v:
1961
+ # Can only rotate about single bonds
1962
+ # Furthermore, it makes no sense to rotate about a bond,
1963
+ # that leads to a single atom
1964
+ if bond_type == BondType.SINGLE \
1965
+ and number_of_partners_v[i] > 1 \
1966
+ and number_of_partners_v[j] > 1:
1967
+ # Cannot rotate about a bond, if the two connected atoms
1968
+ # are in a cycle
1969
+ in_same_cycle = False
1970
+ for cycle in cycles:
1971
+ if i in cycle and j in cycle:
1972
+ in_same_cycle = True
1973
+ if not in_same_cycle:
1974
+ rotatable_bonds.append((i,j, bond_type))
1975
+ return BondList(bonds.get_atom_count(), np.array(rotatable_bonds))