biotite 1.1.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1972 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows efficient search of atoms in a defined radius around
7
+ a location.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["BondList", "BondType",
13
+ "connect_via_distances", "connect_via_residue_names",
14
+ "find_connected", "find_rotatable_bonds"]
15
+
16
+ cimport cython
17
+ cimport numpy as np
18
+ from libc.stdlib cimport free, realloc
19
+
20
+ from collections.abc import Sequence
21
+ import itertools
22
+ import numbers
23
+ from enum import IntEnum
24
+ import networkx as nx
25
+ import numpy as np
26
+ from .error import BadStructureError
27
+ from ..copyable import Copyable
28
+
29
+ ctypedef np.uint64_t ptr
30
+ ctypedef np.uint8_t uint8
31
+ ctypedef np.uint16_t uint16
32
+ ctypedef np.uint32_t uint32
33
+ ctypedef np.uint64_t uint64
34
+ ctypedef np.int8_t int8
35
+ ctypedef np.int16_t int16
36
+ ctypedef np.int32_t int32
37
+ ctypedef np.int64_t int64
38
+
39
+
40
+ ctypedef fused IndexType:
41
+ uint8
42
+ uint16
43
+ uint32
44
+ uint64
45
+ int8
46
+ int16
47
+ int32
48
+ int64
49
+
50
+
51
+ class BondType(IntEnum):
52
+ """
53
+ This enum type represents the type of a chemical bond.
54
+
55
+ - `ANY` - Used if the actual type is unknown
56
+ - `SINGLE` - Single bond
57
+ - `DOUBLE` - Double bond
58
+ - `TRIPLE` - Triple bond
59
+ - `QUADRUPLE` - A quadruple bond
60
+ - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
61
+ - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
62
+ - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
63
+ - `COORDINATION` - Coordination complex involving a metal atom
64
+ """
65
+ ANY = 0
66
+ SINGLE = 1
67
+ DOUBLE = 2
68
+ TRIPLE = 3
69
+ QUADRUPLE = 4
70
+ AROMATIC_SINGLE = 5
71
+ AROMATIC_DOUBLE = 6
72
+ AROMATIC_TRIPLE = 7
73
+ COORDINATION = 8
74
+
75
+
76
+ def without_aromaticity(self):
77
+ """
78
+ Remove aromaticity from the bond type.
79
+
80
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
81
+ :attr:`BondType.{ORDER}`.
82
+
83
+ Returns
84
+ -------
85
+ new_bond_type : BondType
86
+ The :class:`BondType` without aromaticity.
87
+
88
+ Examples
89
+ --------
90
+
91
+ >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
92
+ DOUBLE
93
+ """
94
+ if self == BondType.AROMATIC_SINGLE:
95
+ return BondType.SINGLE
96
+ elif self == BondType.AROMATIC_DOUBLE:
97
+ return BondType.DOUBLE
98
+ elif self == BondType.AROMATIC_TRIPLE:
99
+ return BondType.TRIPLE
100
+ else:
101
+ return self
102
+
103
+
104
+ @cython.boundscheck(False)
105
+ @cython.wraparound(False)
106
+ class BondList(Copyable):
107
+ """
108
+ __init__(atom_count, bonds=None)
109
+
110
+ A bond list stores indices of atoms
111
+ (usually of an :class:`AtomArray` or :class:`AtomArrayStack`)
112
+ that form chemical bonds together with the type (or order) of the
113
+ bond.
114
+
115
+ Internally the bonds are stored as *n x 3* :class:`ndarray`.
116
+ For each row, the first column specifies the index of the first
117
+ atom, the second column the index of the second atom involved in the
118
+ bond.
119
+ The third column stores an integer that is interpreted as member
120
+ of the the :class:`BondType` enum, that specifies the order of the
121
+ bond.
122
+
123
+ When indexing a :class:`BondList`, the index is not forwarded to the
124
+ internal :class:`ndarray`. Instead the indexing behavior is
125
+ consistent with indexing an :class:`AtomArray` or
126
+ :class:`AtomArrayStack`:
127
+ Bonds with at least one atom index that is not covered by the index
128
+ are removed, atom indices that occur after an uncovered atom index
129
+ move up.
130
+ Effectively, this means that after indexing an :class:`AtomArray`
131
+ and a :class:`BondList` with the same index, the atom indices in the
132
+ :class:`BondList` will still point to the same atoms in the
133
+ :class:`AtomArray`.
134
+ Indexing a :class:`BondList` with a single integer is equivalent
135
+ to calling :func:`get_bonds()`.
136
+
137
+ The same consistency applies to adding :class:`BondList` instances
138
+ via the '+' operator:
139
+ The atom indices of the second :class:`BondList` are increased by
140
+ the atom count of the first :class:`BondList` and then both
141
+ :class:`BondList` objects are merged.
142
+
143
+ Parameters
144
+ ----------
145
+ atom_count : int
146
+ A positive integer, that specifies the number of atoms the
147
+ :class:`BondList` refers to
148
+ (usually the length of an atom array (stack)).
149
+ Effectively, this value is the exclusive maximum for the indices
150
+ stored in the :class:`BondList`.
151
+ bonds : ndarray, shape=(n,2) or shape=(n,3), dtype=int, optional
152
+ This array contains the indices of atoms which are bonded:
153
+ For each row, the first column specifies the first atom,
154
+ the second row the second atom involved in a chemical bond.
155
+ If an *n x 3* array is provided, the additional column
156
+ specifies a :class:`BondType` instead of :attr:`BondType.ANY`.
157
+ By default, the created :class:`BondList` is empty.
158
+
159
+ Notes
160
+ -----
161
+ When initially providing the bonds as :class:`ndarray`, the input is
162
+ sanitized: Redundant bonds are removed, and each bond entry is
163
+ sorted so that the lower one of the two atom indices is in the first
164
+ column.
165
+ If a bond appears multiple times with different bond types, the
166
+ first bond takes precedence.
167
+
168
+ Examples
169
+ --------
170
+
171
+ Construct a :class:`BondList`, where a central atom (index 1) is
172
+ connected to three other atoms (index 0, 3 and 4):
173
+
174
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
175
+ >>> print(bond_list)
176
+ [[0 1 0]
177
+ [1 3 0]
178
+ [1 4 0]]
179
+
180
+ Remove the first atom (index 0) via indexing:
181
+ The bond containing index 0 is removed, since the corresponding atom
182
+ does not exist anymore. Since all other atoms move up in their
183
+ position, the indices in the bond list are decreased by one:
184
+
185
+ >>> bond_list = bond_list[1:]
186
+ >>> print(bond_list)
187
+ [[0 2 0]
188
+ [0 3 0]]
189
+
190
+ :class:`BondList` objects can be associated to an :class:`AtomArray`
191
+ or :class:`AtomArrayStack`.
192
+ The following snippet shows this for a benzene molecule:
193
+
194
+ >>> benzene = AtomArray(12)
195
+ >>> # Omit filling most required annotation categories for brevity
196
+ >>> benzene.atom_name = np.array(
197
+ ... ["C1", "C2", "C3", "C4", "C5", "C6", "H1", "H2", "H3", "H4", "H5", "H6"]
198
+ ... )
199
+ >>> benzene.bonds = BondList(
200
+ ... benzene.array_length(),
201
+ ... np.array([
202
+ ... # Bonds between carbon atoms in the ring
203
+ ... (0, 1, BondType.AROMATIC_SINGLE),
204
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
205
+ ... (2, 3, BondType.AROMATIC_SINGLE),
206
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
207
+ ... (4, 5, BondType.AROMATIC_SINGLE),
208
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
209
+ ... # Bonds between carbon and hydrogen
210
+ ... (0, 6, BondType.SINGLE),
211
+ ... (1, 7, BondType.SINGLE),
212
+ ... (2, 8, BondType.SINGLE),
213
+ ... (3, 9, BondType.SINGLE),
214
+ ... (4, 10, BondType.SINGLE),
215
+ ... (5, 11, BondType.SINGLE),
216
+ ... ])
217
+ ... )
218
+ >>> for i, j, bond_type in benzene.bonds.as_array():
219
+ ... print(
220
+ ... f"{BondType(bond_type).name} bond between "
221
+ ... f"{benzene.atom_name[i]} and {benzene.atom_name[j]}"
222
+ ... )
223
+ AROMATIC_SINGLE bond between C1 and C2
224
+ AROMATIC_DOUBLE bond between C2 and C3
225
+ AROMATIC_SINGLE bond between C3 and C4
226
+ AROMATIC_DOUBLE bond between C4 and C5
227
+ AROMATIC_SINGLE bond between C5 and C6
228
+ AROMATIC_DOUBLE bond between C1 and C6
229
+ SINGLE bond between C1 and H1
230
+ SINGLE bond between C2 and H2
231
+ SINGLE bond between C3 and H3
232
+ SINGLE bond between C4 and H4
233
+ SINGLE bond between C5 and H5
234
+ SINGLE bond between C6 and H6
235
+
236
+ Obtain the bonded atoms for the :math:`C_1`:
237
+
238
+ >>> bonds, types = benzene.bonds.get_bonds(0)
239
+ >>> print(bonds)
240
+ [1 5 6]
241
+ >>> print(types)
242
+ [5 6 1]
243
+ >>> print(f"C1 is bonded to {', '.join(benzene.atom_name[bonds])}")
244
+ C1 is bonded to C2, C6, H1
245
+
246
+ Cut the benzene molecule in half.
247
+ Although the first half of the atoms are missing the indices of
248
+ the cropped :class:`BondList` still represents the bonds of the
249
+ remaining atoms:
250
+
251
+ >>> half_benzene = benzene[
252
+ ... np.isin(benzene.atom_name, ["C4", "C5", "C6", "H4", "H5", "H6"])
253
+ ... ]
254
+ >>> for i, j, bond_type in half_benzene.bonds.as_array():
255
+ ... print(
256
+ ... f"{BondType(bond_type).name} bond between "
257
+ ... f"{half_benzene.atom_name[i]} and {half_benzene.atom_name[j]}"
258
+ ... )
259
+ AROMATIC_DOUBLE bond between C4 and C5
260
+ AROMATIC_SINGLE bond between C5 and C6
261
+ SINGLE bond between C4 and H4
262
+ SINGLE bond between C5 and H5
263
+ SINGLE bond between C6 and H6
264
+ """
265
+
266
+ def __init__(self, uint32 atom_count, np.ndarray bonds=None):
267
+ self._atom_count = atom_count
268
+
269
+ if bonds is not None and len(bonds) > 0:
270
+ if bonds.ndim != 2:
271
+ raise ValueError("Expected a 2D-ndarray for input bonds")
272
+
273
+ self._bonds = np.zeros((bonds.shape[0], 3), dtype=np.uint32)
274
+ if bonds.shape[1] == 3:
275
+ # Input contains bonds (index 0 and 1)
276
+ # including the bond type value (index 2)
277
+ # Bond indices:
278
+ self._bonds[:,:2] = np.sort(
279
+ # Indices are sorted per bond
280
+ # so that the lower index is at the first position
281
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
282
+ )
283
+ # Bond type:
284
+ if (bonds[:, 2] >= len(BondType)).any():
285
+ raise ValueError(
286
+ f"BondType {np.max(bonds[:, 2])} is invalid"
287
+ )
288
+ self._bonds[:,2] = bonds[:, 2]
289
+
290
+ # Indices are sorted per bond
291
+ # so that the lower index is at the first position
292
+ elif bonds.shape[1] == 2:
293
+ # Input contains the bonds without bond type
294
+ # -> Default: Set bond type ANY (0)
295
+ self._bonds[:,:2] = np.sort(
296
+ # Indices are sorted per bond
297
+ # so that the lower index is at the first position
298
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
299
+ )
300
+ else:
301
+ raise ValueError(
302
+ "Input array containing bonds must be either of shape "
303
+ "(n,2) or (n,3)"
304
+ )
305
+ self._remove_redundant_bonds()
306
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
307
+
308
+ else:
309
+ # Create empty bond list
310
+ self._bonds = np.zeros((0, 3), dtype=np.uint32)
311
+ self._max_bonds_per_atom = 0
312
+
313
+ @staticmethod
314
+ def concatenate(bonds_lists):
315
+ """
316
+ Concatenate multiple :class:`BondList` objects into a single
317
+ :class:`BondList`, respectively.
318
+
319
+ Parameters
320
+ ----------
321
+ bonds_lists : iterable object of BondList
322
+ The bond lists to be concatenated.
323
+
324
+ Returns
325
+ -------
326
+ concatenated_bonds : BondList
327
+ The concatenated bond lists.
328
+
329
+ Examples
330
+ --------
331
+
332
+ >>> bonds1 = BondList(2, np.array([(0, 1)]))
333
+ >>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
334
+ >>> merged_bonds = BondList.concatenate([bonds1, bonds2])
335
+ >>> print(merged_bonds.get_atom_count())
336
+ 5
337
+ >>> print(merged_bonds.as_array()[:, :2])
338
+ [[0 1]
339
+ [2 3]
340
+ [2 4]]
341
+ """
342
+ # Ensure that the bonds_lists can be iterated over multiple times
343
+ if not isinstance(bonds_lists, Sequence):
344
+ bonds_lists = list(bonds_lists)
345
+
346
+ cdef np.ndarray merged_bonds = np.concatenate(
347
+ [bond_list._bonds for bond_list in bonds_lists]
348
+ )
349
+ # Offset the indices of appended bonds list
350
+ # (consistent with addition of AtomArray)
351
+ cdef int start = 0, stop = 0
352
+ cdef int cum_atom_count = 0
353
+ for bond_list in bonds_lists:
354
+ stop = start + bond_list._bonds.shape[0]
355
+ merged_bonds[start : stop, :2] += cum_atom_count
356
+ cum_atom_count += bond_list._atom_count
357
+ start = stop
358
+
359
+ cdef merged_bond_list = BondList(cum_atom_count)
360
+ # Array is not used in constructor to prevent unnecessary
361
+ # maximum and redundant bond calculation
362
+ merged_bond_list._bonds = merged_bonds
363
+ merged_bond_list._max_bonds_per_atom = max(
364
+ [bond_list._max_bonds_per_atom for bond_list in bonds_lists]
365
+ )
366
+ return merged_bond_list
367
+
368
+ def __copy_create__(self):
369
+ # Create empty bond list to prevent
370
+ # unnecessary removal of redundant atoms
371
+ # and calculation of maximum bonds per atom
372
+ return BondList(self._atom_count)
373
+
374
+ def __copy_fill__(self, clone):
375
+ # The bonds are added here
376
+ clone._bonds = self._bonds.copy()
377
+ clone._max_bonds_per_atom = self._max_bonds_per_atom
378
+
379
+ def offset_indices(self, int offset):
380
+ """
381
+ offset_indices(offset)
382
+
383
+ Increase all atom indices in the :class:`BondList` by the given
384
+ offset.
385
+
386
+ Implicitly this increases the atom count.
387
+
388
+ Parameters
389
+ ----------
390
+ offset : int
391
+ The atom indices are increased by this value.
392
+ Must be positive.
393
+
394
+ Examples
395
+ --------
396
+
397
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
398
+ >>> print(bond_list)
399
+ [[0 1 0]
400
+ [1 3 0]
401
+ [1 4 0]]
402
+ >>> bond_list.offset_indices(2)
403
+ >>> print(bond_list)
404
+ [[2 3 0]
405
+ [3 5 0]
406
+ [3 6 0]]
407
+ """
408
+ if offset < 0:
409
+ raise ValueError("Offest must be positive")
410
+ self._bonds[:,:2] += offset
411
+ self._atom_count += offset
412
+
413
+ def as_array(self):
414
+ """
415
+ as_array()
416
+
417
+ Obtain a copy of the internal :class:`ndarray`.
418
+
419
+ Returns
420
+ -------
421
+ array : ndarray, shape=(n,3), dtype=np.uint32
422
+ Copy of the internal :class:`ndarray`.
423
+ For each row, the first column specifies the index of the
424
+ first atom, the second column the index of the second atom
425
+ involved in the bond.
426
+ The third column stores the :class:`BondType`.
427
+ """
428
+ return self._bonds.copy()
429
+
430
+ def as_set(self):
431
+ """
432
+ as_set()
433
+
434
+ Obtain a set representation of the :class:`BondList`.
435
+
436
+ Returns
437
+ -------
438
+ bond_set : set of tuple(int, int, int)
439
+ A set of tuples.
440
+ Each tuple represents one bond:
441
+ The first integer represents the first atom,
442
+ the second integer represents the second atom,
443
+ the third integer represents the :class:`BondType`.
444
+ """
445
+ cdef uint32[:,:] all_bonds_v = self._bonds
446
+ cdef int i
447
+ cdef set bond_set = set()
448
+ for i in range(all_bonds_v.shape[0]):
449
+ bond_set.add(
450
+ (all_bonds_v[i,0], all_bonds_v[i,1], all_bonds_v[i,2])
451
+ )
452
+ return bond_set
453
+
454
+ def as_graph(self):
455
+ """
456
+ as_graph()
457
+
458
+ Obtain a graph representation of the :class:`BondList`.
459
+
460
+ Returns
461
+ -------
462
+ bond_set : Graph
463
+ A *NetworkX* :class:`Graph`.
464
+ The atom indices are nodes, the bonds are edges.
465
+ Each edge has a ``"bond_type"`` attribute containing the
466
+ :class:`BondType`.
467
+
468
+ Examples
469
+ --------
470
+
471
+ >>> bond_list = BondList(5, np.array([(1,0,2), (1,3,1), (1,4,1)]))
472
+ >>> graph = bond_list.as_graph()
473
+ >>> print(graph.nodes)
474
+ [0, 1, 3, 4]
475
+ >>> print(graph.edges)
476
+ [(0, 1), (1, 3), (1, 4)]
477
+ >>> for i, j in graph.edges:
478
+ ... print(i, j, graph.get_edge_data(i, j))
479
+ 0 1 {'bond_type': <BondType.DOUBLE: 2>}
480
+ 1 3 {'bond_type': <BondType.SINGLE: 1>}
481
+ 1 4 {'bond_type': <BondType.SINGLE: 1>}
482
+ """
483
+ cdef int i
484
+
485
+ cdef uint32[:,:] all_bonds_v = self._bonds
486
+
487
+ g = nx.Graph()
488
+ cdef list edges = [None] * all_bonds_v.shape[0]
489
+ for i in range(all_bonds_v.shape[0]):
490
+ edges[i] = (
491
+ all_bonds_v[i,0], all_bonds_v[i,1],
492
+ {"bond_type": BondType(all_bonds_v[i,2])}
493
+ )
494
+ g.add_edges_from(edges)
495
+ return g
496
+
497
+ def remove_aromaticity(self):
498
+ """
499
+ Remove aromaticity from the bond types.
500
+
501
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
502
+ :attr:`BondType.{ORDER}`.
503
+
504
+ Examples
505
+ --------
506
+
507
+ >>> bond_list = BondList(3)
508
+ >>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
509
+ >>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
510
+ >>> bond_list.remove_aromaticity()
511
+ >>> for i, j, bond_type in bond_list.as_array():
512
+ ... print(i, j, BondType(bond_type).name)
513
+ 0 1 SINGLE
514
+ 1 2 DOUBLE
515
+ """
516
+ bond_types = self._bonds[:,2]
517
+ for aromatic_type, non_aromatic_type in [
518
+ (BondType.AROMATIC_SINGLE, BondType.SINGLE),
519
+ (BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
520
+ (BondType.AROMATIC_TRIPLE, BondType.TRIPLE)
521
+ ]:
522
+ bond_types[bond_types == aromatic_type] = non_aromatic_type
523
+
524
+ def remove_bond_order(self):
525
+ """
526
+ Convert all bonds to :attr:`BondType.ANY`.
527
+ """
528
+ self._bonds[:,2] = BondType.ANY
529
+
530
+ def get_atom_count(self):
531
+ """
532
+ get_atom_count()
533
+
534
+ Get the atom count.
535
+
536
+ Returns
537
+ -------
538
+ atom_count : int
539
+ The atom count.
540
+ """
541
+ return self._atom_count
542
+
543
+ def get_bond_count(self):
544
+ """
545
+ get_bond_count()
546
+
547
+ Get the amount of bonds.
548
+
549
+ Returns
550
+ -------
551
+ bond_count : int
552
+ The amount of bonds. This is equal to the length of the
553
+ internal :class:`ndarray` containing the bonds.
554
+ """
555
+ return len(self._bonds)
556
+
557
+ def get_bonds(self, int32 atom_index):
558
+ """
559
+ get_bonds(atom_index)
560
+
561
+ Obtain the indices of the atoms bonded to the atom with the
562
+ given index as well as the corresponding bond types.
563
+
564
+ Parameters
565
+ ----------
566
+ atom_index : int
567
+ The index of the atom to get the bonds for.
568
+
569
+ Returns
570
+ -------
571
+ bonds : np.ndarray, dtype=np.uint32, shape=(k,)
572
+ The indices of connected atoms.
573
+ bond_types : np.ndarray, dtype=np.uint8, shape=(k,)
574
+ Array of integers, interpreted as :class:`BondType`
575
+ instances.
576
+ This array specifies the type (or order) of the bonds to
577
+ the connected atoms.
578
+
579
+ Examples
580
+ --------
581
+
582
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
583
+ >>> bonds, types = bond_list.get_bonds(1)
584
+ >>> print(bonds)
585
+ [0 3 4]
586
+ """
587
+ cdef int i=0, j=0
588
+
589
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
590
+
591
+ cdef uint32[:,:] all_bonds_v = self._bonds
592
+ # Pessimistic array allocation:
593
+ # assume size is equal to the atom with most bonds
594
+ cdef np.ndarray bonds = np.zeros(self._max_bonds_per_atom,
595
+ dtype=np.uint32)
596
+ cdef uint32[:] bonds_v = bonds
597
+ cdef np.ndarray bond_types = np.zeros(self._max_bonds_per_atom,
598
+ dtype=np.uint8)
599
+ cdef uint8[:] bond_types_v = bond_types
600
+
601
+ for i in range(all_bonds_v.shape[0]):
602
+ # If a bond is found for the desired atom index
603
+ # at the first or second position of the bond,
604
+ # then append the index of the respective other position
605
+ if all_bonds_v[i,0] == index:
606
+ bonds_v[j] = all_bonds_v[i,1]
607
+ bond_types_v[j] = all_bonds_v[i,2]
608
+ j += 1
609
+ elif all_bonds_v[i,1] == index:
610
+ bonds_v[j] = all_bonds_v[i,0]
611
+ bond_types_v[j] = all_bonds_v[i,2]
612
+ j += 1
613
+
614
+ # Trim to correct size
615
+ bonds = bonds[:j]
616
+ bond_types = bond_types[:j]
617
+
618
+ return bonds, bond_types
619
+
620
+
621
+ def get_all_bonds(self):
622
+ """
623
+ get_all_bonds()
624
+
625
+ For each atom index, give the indices of the atoms bonded to
626
+ this atom as well as the corresponding bond types.
627
+
628
+ Returns
629
+ -------
630
+ bonds : np.ndarray, dtype=np.uint32, shape=(n,k)
631
+ The indices of connected atoms.
632
+ The first dimension represents the atoms,
633
+ the second dimension represents the indices of atoms bonded
634
+ to the respective atom.
635
+ Atoms can have have different numbers of atoms bonded to
636
+ them.
637
+ Therefore, the length of the second dimension *k* is equal
638
+ to the maximum number of bonds for an atom in this
639
+ :class:`BondList`.
640
+ For atoms with less bonds, the corresponding entry in the
641
+ array is padded with ``-1`` values.
642
+ bond_types : np.ndarray, dtype=np.uint32, shape=(n,k)
643
+ Array of integers, interpreted as :class:`BondType`
644
+ instances.
645
+ This array specifies the bond type (or order) corresponding
646
+ to the returned `bonds`.
647
+ It uses the same ``-1``-padding.
648
+
649
+ Examples
650
+ --------
651
+
652
+ >>> # BondList for benzene
653
+ >>> bond_list = BondList(
654
+ ... 12,
655
+ ... np.array([
656
+ ... # Bonds between the carbon atoms in the ring
657
+ ... (0, 1, BondType.AROMATIC_SINGLE),
658
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
659
+ ... (2, 3, BondType.AROMATIC_SINGLE),
660
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
661
+ ... (4, 5, BondType.AROMATIC_SINGLE),
662
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
663
+ ... # Bonds between carbon and hydrogen
664
+ ... (0, 6, BondType.SINGLE),
665
+ ... (1, 7, BondType.SINGLE),
666
+ ... (2, 8, BondType.SINGLE),
667
+ ... (3, 9, BondType.SINGLE),
668
+ ... (4, 10, BondType.SINGLE),
669
+ ... (5, 11, BondType.SINGLE),
670
+ ... ])
671
+ ... )
672
+ >>> bonds, types = bond_list.get_all_bonds()
673
+ >>> print(bonds)
674
+ [[ 1 5 6]
675
+ [ 0 2 7]
676
+ [ 1 3 8]
677
+ [ 2 4 9]
678
+ [ 3 5 10]
679
+ [ 4 0 11]
680
+ [ 0 -1 -1]
681
+ [ 1 -1 -1]
682
+ [ 2 -1 -1]
683
+ [ 3 -1 -1]
684
+ [ 4 -1 -1]
685
+ [ 5 -1 -1]]
686
+ >>> print(types)
687
+ [[ 5 6 1]
688
+ [ 5 6 1]
689
+ [ 6 5 1]
690
+ [ 5 6 1]
691
+ [ 6 5 1]
692
+ [ 5 6 1]
693
+ [ 1 -1 -1]
694
+ [ 1 -1 -1]
695
+ [ 1 -1 -1]
696
+ [ 1 -1 -1]
697
+ [ 1 -1 -1]
698
+ [ 1 -1 -1]]
699
+ >>> for i in range(bond_list.get_atom_count()):
700
+ ... bonds_for_atom = bonds[i]
701
+ ... # Remove trailing '-1' values
702
+ ... bonds_for_atom = bonds_for_atom[bonds_for_atom != -1]
703
+ ... print(f"{i}: {bonds_for_atom}")
704
+ 0: [1 5 6]
705
+ 1: [0 2 7]
706
+ 2: [1 3 8]
707
+ 3: [2 4 9]
708
+ 4: [ 3 5 10]
709
+ 5: [ 4 0 11]
710
+ 6: [0]
711
+ 7: [1]
712
+ 8: [2]
713
+ 9: [3]
714
+ 10: [4]
715
+ 11: [5]
716
+ """
717
+ cdef int i=0
718
+ cdef uint32 atom_index_i, atom_index_j, bond_type
719
+
720
+ cdef uint32[:,:] all_bonds_v = self._bonds
721
+ # The size of 2nd dimension is equal to the atom with most bonds
722
+ # Since each atom can have an individual number of bonded atoms,
723
+ # The arrays are padded with '-1'
724
+ cdef np.ndarray bonds = np.full(
725
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int32
726
+ )
727
+ cdef int32[:,:] bonds_v = bonds
728
+ cdef np.ndarray bond_types = np.full(
729
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int8
730
+ )
731
+ cdef int8[:,:] bond_types_v = bond_types
732
+ # Track the number of already found bonds for each given index
733
+ cdef np.ndarray lengths = np.zeros(self._atom_count, dtype=np.uint32)
734
+ cdef uint32[:] lengths_v = lengths
735
+
736
+ for i in range(all_bonds_v.shape[0]):
737
+ atom_index_i = all_bonds_v[i,0]
738
+ atom_index_j = all_bonds_v[i,1]
739
+ bond_type = all_bonds_v[i,2]
740
+ # Add second bonded atom for the first bonded atom
741
+ # and vice versa
742
+ # Use 'lengths' variable to append the value
743
+ bonds_v[atom_index_i, lengths_v[atom_index_i]] = atom_index_j
744
+ bonds_v[atom_index_j, lengths_v[atom_index_j]] = atom_index_i
745
+ bond_types_v[atom_index_i, lengths_v[atom_index_i]] = bond_type
746
+ bond_types_v[atom_index_j, lengths_v[atom_index_j]] = bond_type
747
+ # Increment lengths
748
+ lengths_v[atom_index_i] += 1
749
+ lengths_v[atom_index_j] += 1
750
+
751
+ return bonds, bond_types
752
+
753
+
754
+ def adjacency_matrix(self):
755
+ r"""
756
+ adjacency_matrix(bond_list)
757
+
758
+ Represent this :class:`BondList` as adjacency matrix.
759
+
760
+ The adjacency matrix is a quadratic matrix with boolean values
761
+ according to
762
+
763
+ .. math::
764
+
765
+ M_{i,j} =
766
+ \begin{cases}
767
+ \text{True}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
768
+ \text{False}, & \text{otherwise}
769
+ \end{cases}.
770
+
771
+ Returns
772
+ -------
773
+ matrix : ndarray, dtype=bool, shape=(n,n)
774
+ The created adjacency matrix.
775
+
776
+ Examples
777
+ --------
778
+
779
+ >>> # BondList for formaldehyde
780
+ >>> bond_list = BondList(
781
+ ... 4,
782
+ ... np.array([
783
+ ... # Bond between carbon and oxygen
784
+ ... (0, 1, BondType.DOUBLE),
785
+ ... # Bonds between carbon and hydrogen
786
+ ... (0, 2, BondType.SINGLE),
787
+ ... (0, 3, BondType.SINGLE),
788
+ ... ])
789
+ ... )
790
+ >>> print(bond_list.adjacency_matrix())
791
+ [[False True True True]
792
+ [ True False False False]
793
+ [ True False False False]
794
+ [ True False False False]]
795
+ """
796
+ matrix = np.zeros(
797
+ (self._atom_count, self._atom_count), dtype=bool
798
+ )
799
+ matrix[self._bonds[:,0], self._bonds[:,1]] = True
800
+ matrix[self._bonds[:,1], self._bonds[:,0]] = True
801
+ return matrix
802
+
803
+
804
+ def bond_type_matrix(self):
805
+ r"""
806
+ adjacency_matrix(bond_list)
807
+
808
+ Represent this :class:`BondList` as a matrix depicting the bond
809
+ type.
810
+
811
+ The matrix is a quadratic matrix:
812
+
813
+ .. math::
814
+
815
+ M_{i,j} =
816
+ \begin{cases}
817
+ \text{BondType}_{ij}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
818
+ -1, & \text{otherwise}
819
+ \end{cases}.
820
+
821
+ Returns
822
+ -------
823
+ matrix : ndarray, dtype=bool, shape=(n,n)
824
+ The created bond type matrix.
825
+
826
+ Examples
827
+ --------
828
+
829
+ >>> # BondList for formaldehyde
830
+ >>> bond_list = BondList(
831
+ ... 4,
832
+ ... np.array([
833
+ ... # Bond between carbon and oxygen
834
+ ... (0, 1, BondType.DOUBLE),
835
+ ... # Bonds between carbon and hydrogen
836
+ ... (0, 2, BondType.SINGLE),
837
+ ... (0, 3, BondType.SINGLE),
838
+ ... ])
839
+ ... )
840
+ >>> print(bond_list.bond_type_matrix())
841
+ [[-1 2 1 1]
842
+ [ 2 -1 -1 -1]
843
+ [ 1 -1 -1 -1]
844
+ [ 1 -1 -1 -1]]
845
+ """
846
+ matrix = np.full(
847
+ (self._atom_count, self._atom_count), -1, dtype=np.int8
848
+ )
849
+ matrix[self._bonds[:,0], self._bonds[:,1]] = self._bonds[:,2]
850
+ matrix[self._bonds[:,1], self._bonds[:,0]] = self._bonds[:,2]
851
+ return matrix
852
+
853
+
854
+ def add_bond(self, int32 atom_index1, int32 atom_index2,
855
+ bond_type=BondType.ANY):
856
+ """
857
+ add_bond(atom_index1, atom_index2, bond_type=BondType.ANY)
858
+
859
+ Add a bond to the :class:`BondList`.
860
+
861
+ If the bond is already existent, only the bond type is updated.
862
+
863
+ Parameters
864
+ ----------
865
+ atom_index1, atom_index2 : int
866
+ The indices of the atoms to create a bond for.
867
+ bond_type : BondType or int, optional
868
+ The type of the bond. Default is :attr:`BondType.ANY`.
869
+ """
870
+ if bond_type >= len(BondType):
871
+ raise ValueError(f"BondType {bond_type} is invalid")
872
+
873
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
874
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
875
+ _sort(&index1, &index2)
876
+
877
+ cdef int i
878
+ cdef uint32[:,:] all_bonds_v = self._bonds
879
+ # Check if bond is already existent in list
880
+ cdef bint in_list = False
881
+ for i in range(all_bonds_v.shape[0]):
882
+ # Since the bonds have the atom indices sorted
883
+ # the reverse check is omitted
884
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
885
+ in_list = True
886
+ # If in list, update bond type
887
+ all_bonds_v[i,2] = int(bond_type)
888
+ break
889
+ if not in_list:
890
+ self._bonds = np.append(
891
+ self._bonds,
892
+ np.array(
893
+ [(index1, index2, int(bond_type))], dtype=np.uint32
894
+ ),
895
+ axis=0
896
+ )
897
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
898
+
899
+ def remove_bond(self, int32 atom_index1, int32 atom_index2):
900
+ """
901
+ remove_bond(atom_index1, atom_index2)
902
+
903
+ Remove a bond from the :class:`BondList`.
904
+
905
+ If the bond is not existent in the :class:`BondList`, nothing happens.
906
+
907
+ Parameters
908
+ ----------
909
+ atom_index1, atom_index2 : int
910
+ The indices of the atoms whose bond should be removed.
911
+ """
912
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
913
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
914
+ _sort(&index1, &index2)
915
+
916
+ # Find the bond in bond list
917
+ cdef int i
918
+ cdef uint32[:,:] all_bonds_v = self._bonds
919
+ for i in range(all_bonds_v.shape[0]):
920
+ # Since the bonds have the atom indices sorted
921
+ # the reverse check is omitted
922
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
923
+ self._bonds = np.delete(self._bonds, i, axis=0)
924
+ # The maximum bonds per atom is not recalculated,
925
+ # as the value can only be decreased on bond removal
926
+ # Since this value is only used for pessimistic array allocation
927
+ # in 'get_bonds()', the slightly larger memory usage is a better
928
+ # option than the repetitive call of _get_max_bonds_per_atom()
929
+
930
+ def remove_bonds_to(self, int32 atom_index):
931
+ """
932
+ remove_bonds_to(self, atom_index)
933
+
934
+ Remove all bonds from the :class:`BondList` where the given atom
935
+ is involved.
936
+
937
+ Parameters
938
+ ----------
939
+ atom_index : int
940
+ The index of the atom whose bonds should be removed.
941
+
942
+ """
943
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
944
+
945
+ cdef np.ndarray mask = np.ones(len(self._bonds), dtype=np.uint8)
946
+ cdef uint8[:] mask_v = mask
947
+
948
+ # Find the bond in bond list
949
+ cdef int i
950
+ cdef uint32[:,:] all_bonds_v = self._bonds
951
+ for i in range(all_bonds_v.shape[0]):
952
+ if (all_bonds_v[i,0] == index or all_bonds_v[i,1] == index):
953
+ mask_v[i] = False
954
+ # Remove the bonds
955
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
956
+ # The maximum bonds per atom is not recalculated
957
+ # (see 'remove_bond()')
958
+
959
+ def remove_bonds(self, bond_list):
960
+ """
961
+ remove_bonds(bond_list)
962
+
963
+ Remove multiple bonds from the :class:`BondList`.
964
+
965
+ All bonds present in `bond_list` are removed from this instance.
966
+ If a bond is not existent in this instance, nothing happens.
967
+ Only the bond indices, not the bond types, are relevant for
968
+ this.
969
+
970
+ Parameters
971
+ ----------
972
+ bond_list : BondList
973
+ The bonds in `bond_list` are removed from this instance.
974
+ """
975
+ cdef int i=0, j=0
976
+
977
+ # All bonds in the own BondList
978
+ cdef uint32[:,:] all_bonds_v = self._bonds
979
+ # The bonds that should be removed
980
+ cdef uint32[:,:] rem_bonds_v = bond_list._bonds
981
+ cdef np.ndarray mask = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
982
+ cdef uint8[:] mask_v = mask
983
+ for i in range(all_bonds_v.shape[0]):
984
+ for j in range(rem_bonds_v.shape[0]):
985
+ if all_bonds_v[i,0] == rem_bonds_v[j,0] \
986
+ and all_bonds_v[i,1] == rem_bonds_v[j,1]:
987
+ mask_v[i] = False
988
+
989
+ # Remove the bonds
990
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
991
+ # The maximum bonds per atom is not recalculated
992
+ # (see 'remove_bond()')
993
+
994
+ def merge(self, bond_list):
995
+ """
996
+ merge(bond_list)
997
+
998
+ Merge another :class:`BondList` with this instance into a new
999
+ object.
1000
+ If a bond appears in both :class:`BondList`'s, the
1001
+ :class:`BondType` from the given `bond_list` takes precedence.
1002
+
1003
+ The internal :class:`ndarray` instances containg the bonds are
1004
+ simply concatenated and the new atom count is the maximum of
1005
+ both bond lists.
1006
+
1007
+ Parameters
1008
+ ----------
1009
+ bond_list : BondList
1010
+ This bond list is merged with this instance.
1011
+
1012
+ Returns
1013
+ -------
1014
+ bond_list : BondList
1015
+ The merged :class:`BondList`.
1016
+
1017
+ Notes
1018
+ -----
1019
+ This is not equal to using the `+` operator.
1020
+
1021
+ Examples
1022
+ --------
1023
+
1024
+ >>> bond_list1 = BondList(3, np.array([(0,1),(1,2)]))
1025
+ >>> bond_list2 = BondList(5, np.array([(2,3),(3,4)]))
1026
+ >>> merged_list = bond_list2.merge(bond_list1)
1027
+ >>> print(merged_list.get_atom_count())
1028
+ 5
1029
+ >>> print(merged_list)
1030
+ [[0 1 0]
1031
+ [1 2 0]
1032
+ [2 3 0]
1033
+ [3 4 0]]
1034
+
1035
+ The BondList given as parameter takes precedence:
1036
+
1037
+ >>> # Specifiy bond type to see where a bond is taken from
1038
+ >>> bond_list1 = BondList(4, np.array([
1039
+ ... (0, 1, BondType.SINGLE),
1040
+ ... (1, 2, BondType.SINGLE)
1041
+ ... ]))
1042
+ >>> bond_list2 = BondList(4, np.array([
1043
+ ... (1, 2, BondType.DOUBLE), # This one is a duplicate
1044
+ ... (2, 3, BondType.DOUBLE)
1045
+ ... ]))
1046
+ >>> merged_list = bond_list2.merge(bond_list1)
1047
+ >>> print(merged_list)
1048
+ [[0 1 1]
1049
+ [1 2 1]
1050
+ [2 3 2]]
1051
+ """
1052
+ return BondList(
1053
+ max(self._atom_count, bond_list._atom_count),
1054
+ np.concatenate(
1055
+ [bond_list.as_array(), self.as_array()],
1056
+ axis=0
1057
+ )
1058
+ )
1059
+
1060
+ def __add__(self, bond_list):
1061
+ return BondList.concatenate([self, bond_list])
1062
+
1063
+ def __getitem__(self, index):
1064
+ ## Variables for both, integer and boolean index arrays
1065
+ cdef uint32[:,:] all_bonds_v
1066
+ cdef int i
1067
+ cdef uint32* index1_ptr
1068
+ cdef uint32* index2_ptr
1069
+ cdef np.ndarray removal_filter
1070
+ cdef uint8[:] removal_filter_v
1071
+
1072
+ ## Variables for integer arrays
1073
+ cdef int32[:] inverse_index_v
1074
+ cdef int32 new_index1, new_index2
1075
+
1076
+ ## Variables for boolean mask
1077
+ # Boolean mask representation of the index
1078
+ cdef np.ndarray mask
1079
+ cdef uint8[:] mask_v
1080
+ # Boolean mask for removal of bonds
1081
+ cdef np.ndarray offsets
1082
+ cdef uint32[:] offsets_v
1083
+
1084
+ if isinstance(index, numbers.Integral):
1085
+ ## Handle single index
1086
+ return self.get_bonds(index)
1087
+
1088
+ elif isinstance(index, np.ndarray) and index.dtype == bool:
1089
+ ## Handle boolean masks
1090
+ copy = self.copy()
1091
+ all_bonds_v = copy._bonds
1092
+ # Use 'uint8' instead of 'bool' for memory view
1093
+ mask = np.frombuffer(index, dtype=np.uint8)
1094
+
1095
+ # Each time an atom is missing in the mask,
1096
+ # the offset is increased by one
1097
+ offsets = np.cumsum(
1098
+ ~mask.astype(bool, copy=False), dtype=np.uint32
1099
+ )
1100
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1101
+ removal_filter_v = removal_filter
1102
+ mask_v = mask
1103
+ offsets_v = offsets
1104
+ # If an atom in a bond is not masked,
1105
+ # the bond is removed from the list
1106
+ # If an atom is masked,
1107
+ # its index value is decreased by the respective offset
1108
+ # The offset is neccessary, removing atoms in an AtomArray
1109
+ # decreases the index of the following atoms
1110
+ for i in range(all_bonds_v.shape[0]):
1111
+ # Usage of pointer to increase performance
1112
+ # as redundant indexing is avoided
1113
+ index1_ptr = &all_bonds_v[i,0]
1114
+ index2_ptr = &all_bonds_v[i,1]
1115
+ if mask_v[index1_ptr[0]] and mask_v[index2_ptr[0]]:
1116
+ # Both atoms involved in bond are masked
1117
+ # -> decrease atom index by offset
1118
+ index1_ptr[0] -= offsets_v[index1_ptr[0]]
1119
+ index2_ptr[0] -= offsets_v[index2_ptr[0]]
1120
+ else:
1121
+ # At least one atom involved in bond is not masked
1122
+ # -> remove bond
1123
+ removal_filter_v[i] = False
1124
+ # Apply the bond removal filter
1125
+ copy._bonds = copy._bonds[removal_filter.astype(bool, copy=False)]
1126
+ copy._atom_count = len(np.nonzero(mask)[0])
1127
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1128
+ return copy
1129
+
1130
+ else:
1131
+ ## Convert any other type of index into index array, as it preserves order
1132
+ copy = self.copy()
1133
+ all_bonds_v = copy._bonds
1134
+ index = _to_index_array(index, self._atom_count)
1135
+ index = _to_positive_index_array(index, self._atom_count)
1136
+
1137
+ # The inverse index is required to efficiently obtain
1138
+ # the new index of an atom in case of an unsorted index
1139
+ # array
1140
+ inverse_index_v = _invert_index(index, self._atom_count)
1141
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1142
+ removal_filter_v = removal_filter
1143
+ for i in range(all_bonds_v.shape[0]):
1144
+ # Usage of pointer to increase performance
1145
+ # as redundant indexing is avoided
1146
+ index1_ptr = &all_bonds_v[i,0]
1147
+ index2_ptr = &all_bonds_v[i,1]
1148
+ new_index1 = inverse_index_v[index1_ptr[0]]
1149
+ new_index2 = inverse_index_v[index2_ptr[0]]
1150
+ if new_index1 != -1 and new_index2 != -1:
1151
+ # Both atoms involved in bond are included
1152
+ # by index array
1153
+ # -> assign new atom indices
1154
+ index1_ptr[0] = <int32>new_index1
1155
+ index2_ptr[0] = <int32>new_index2
1156
+ else:
1157
+ # At least one atom in bond is not included
1158
+ # -> remove bond
1159
+ removal_filter_v[i] = False
1160
+
1161
+ copy._bonds = copy._bonds[
1162
+ removal_filter.astype(bool, copy=False)
1163
+ ]
1164
+ # Again, sort indices per bond
1165
+ # as the correct order is not guaranteed anymore
1166
+ # for unsorted index arrays
1167
+ copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
1168
+ copy._atom_count = len(index)
1169
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1170
+ return copy
1171
+
1172
+ def __iter__(self):
1173
+ raise TypeError("'BondList' object is not iterable")
1174
+
1175
+ def __str__(self):
1176
+ return str(self.as_array())
1177
+
1178
+ def __eq__(self, item):
1179
+ if not isinstance(item, BondList):
1180
+ return False
1181
+ return (self._atom_count == item._atom_count and
1182
+ self.as_set() == item.as_set())
1183
+
1184
+ def __contains__(self, item):
1185
+ if not isinstance(item, tuple) and len(tuple) != 2:
1186
+ raise TypeError("Expected a tuple of atom indices")
1187
+
1188
+ cdef int i=0
1189
+
1190
+ cdef uint32 match_index1, match_index2
1191
+ # Sort indices for faster search in loop
1192
+ cdef uint32 atom_index1 = min(item)
1193
+ cdef uint32 atom_index2 = max(item)
1194
+
1195
+ cdef uint32[:,:] all_bonds_v = self._bonds
1196
+ for i in range(all_bonds_v.shape[0]):
1197
+ match_index1 = all_bonds_v[i,0]
1198
+ match_index2 = all_bonds_v[i,1]
1199
+ if atom_index1 == match_index1 and atom_index2 == match_index2:
1200
+ return True
1201
+
1202
+ return False
1203
+
1204
+
1205
+ def _get_max_bonds_per_atom(self):
1206
+ if self._atom_count == 0:
1207
+ return 0
1208
+
1209
+ cdef int i
1210
+ cdef uint32[:,:] all_bonds_v = self._bonds
1211
+ # Create an array that counts number of occurences of each index
1212
+ cdef np.ndarray index_count = np.zeros(self._atom_count,
1213
+ dtype=np.uint32)
1214
+ cdef uint32[:] index_count_v = index_count
1215
+ for i in range(all_bonds_v.shape[0]):
1216
+ # Increment count of both indices found in bond list at i
1217
+ index_count_v[all_bonds_v[i,0]] += 1
1218
+ index_count_v[all_bonds_v[i,1]] += 1
1219
+ return np.max(index_count_v)
1220
+
1221
+ def _remove_redundant_bonds(self):
1222
+ cdef int j
1223
+ cdef uint32[:,:] all_bonds_v = self._bonds
1224
+ # Boolean mask for final removal of redundant atoms
1225
+ # Unfortunately views of boolean ndarrays are not supported
1226
+ # -> use uint8 array
1227
+ cdef np.ndarray redundancy_filter = np.ones(all_bonds_v.shape[0],
1228
+ dtype=np.uint8)
1229
+ cdef uint8[:] redundancy_filter_v = redundancy_filter
1230
+ # Array of pointers to C-arrays
1231
+ # The array is indexed with the atom indices in the bond list
1232
+ # The respective C-array contains the indices of bonded atoms
1233
+ cdef ptr[:] ptrs_v = np.zeros(self._atom_count, dtype=np.uint64)
1234
+ # Stores the length of the C-arrays
1235
+ cdef int[:] array_len_v = np.zeros(self._atom_count, dtype=np.int32)
1236
+ # Iterate over bond list:
1237
+ # If bond is already listed in the array of pointers,
1238
+ # set filter to false at that position
1239
+ # Else add bond to array of pointers
1240
+ cdef uint32 i1, i2
1241
+ cdef uint32* array_ptr
1242
+ cdef int length
1243
+
1244
+ try:
1245
+ for j in range(all_bonds_v.shape[0]):
1246
+ i1 = all_bonds_v[j,0]
1247
+ i2 = all_bonds_v[j,1]
1248
+ # Since the bonds have the atom indices sorted
1249
+ # the reverse check is omitted
1250
+ if _in_array(<uint32*>ptrs_v[i1], i2, array_len_v[i1]):
1251
+ redundancy_filter_v[j] = False
1252
+ else:
1253
+ # Append bond in respective C-array
1254
+ # and update C-array length
1255
+ length = array_len_v[i1] +1
1256
+ array_ptr = <uint32*>ptrs_v[i1]
1257
+ array_ptr = <uint32*>realloc(
1258
+ array_ptr, length * sizeof(uint32)
1259
+ )
1260
+ if not array_ptr:
1261
+ raise MemoryError()
1262
+ array_ptr[length-1] = i2
1263
+ ptrs_v[i1] = <ptr>array_ptr
1264
+ array_len_v[i1] = length
1265
+
1266
+ finally:
1267
+ # Free pointers
1268
+ for i in range(ptrs_v.shape[0]):
1269
+ free(<int*>ptrs_v[i])
1270
+
1271
+ # Eventually remove redundant bonds
1272
+ self._bonds = self._bonds[redundancy_filter.astype(bool, copy=False)]
1273
+
1274
+
1275
+ cdef uint32 _to_positive_index(int32 index, uint32 array_length) except -1:
1276
+ """
1277
+ Convert a potentially negative index into a positive index.
1278
+ """
1279
+ cdef uint32 pos_index
1280
+ if index < 0:
1281
+ pos_index = <uint32> (array_length + index)
1282
+ if pos_index < 0:
1283
+ raise IndexError(
1284
+ f"Index {index} is out of range "
1285
+ f"for an atom count of {array_length}"
1286
+ )
1287
+ return pos_index
1288
+ else:
1289
+ if <uint32> index >= array_length:
1290
+ raise IndexError(
1291
+ f"Index {index} is out of range "
1292
+ f"for an atom count of {array_length}"
1293
+ )
1294
+ return <uint32> index
1295
+
1296
+
1297
+ def _to_positive_index_array(index_array, length):
1298
+ """
1299
+ Convert potentially negative values in an array into positive
1300
+ values and check for out-of-bounds values.
1301
+ """
1302
+ index_array = index_array.copy()
1303
+ orig_shape = index_array.shape
1304
+ index_array = index_array.flatten()
1305
+ negatives = index_array < 0
1306
+ index_array[negatives] = length + index_array[negatives]
1307
+ if (index_array < 0).any():
1308
+ raise IndexError(
1309
+ f"Index {np.min(index_array)} is out of range "
1310
+ f"for an atom count of {length}"
1311
+ )
1312
+ if (index_array >= length).any():
1313
+ raise IndexError(
1314
+ f"Index {np.max(index_array)} is out of range "
1315
+ f"for an atom count of {length}"
1316
+ )
1317
+ return index_array.reshape(orig_shape)
1318
+
1319
+
1320
+ def _to_index_array(object index, uint32 length):
1321
+ """
1322
+ Convert an index of arbitrary type into an index array.
1323
+ """
1324
+ if isinstance(index, np.ndarray) and np.issubdtype(index.dtype, np.integer):
1325
+ return index
1326
+ else:
1327
+ # Convert into index array
1328
+ all_indices = np.arange(length, dtype=np.uint32)
1329
+ return all_indices[index]
1330
+
1331
+
1332
+ cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
1333
+ """
1334
+ Test whether a value (`atom_index`) is in a C-array `array`.
1335
+ """
1336
+ cdef int i = 0
1337
+ if array == NULL:
1338
+ return False
1339
+ for i in range(array_length):
1340
+ if array[i] == atom_index:
1341
+ return True
1342
+ return False
1343
+
1344
+
1345
+ cdef inline void _sort(uint32* index1_ptr, uint32* index2_ptr):
1346
+ cdef uint32 swap
1347
+ if index1_ptr[0] > index2_ptr[0]:
1348
+ # Swap indices
1349
+ swap = index1_ptr[0]
1350
+ index1_ptr[0] = index2_ptr[0]
1351
+ index2_ptr[0] = swap
1352
+
1353
+
1354
+ @cython.wraparound(False)
1355
+ # Do bounds check, as the input indices may be out of bounds
1356
+ def _invert_index(IndexType[:] index_v, uint32 length):
1357
+ """
1358
+ Invert an input index array, so that
1359
+ if *input[i] = j*, *output[j] = i*.
1360
+ For all elements *j*, that are not in *input*, *output[j]* = -1.
1361
+ """
1362
+ cdef int32 i
1363
+ cdef IndexType index_val
1364
+ inverse_index = np.full(length, -1, dtype=np.int32)
1365
+ cdef int32[:] inverse_index_v = inverse_index
1366
+
1367
+ for i in range(index_v.shape[0]):
1368
+ index_val = index_v[i]
1369
+ if inverse_index_v[index_val] != -1:
1370
+ # One index can theoretically appear multiple times
1371
+ # This is currently not supported
1372
+ raise NotImplementedError(
1373
+ f"Duplicate indices are not supported, "
1374
+ f"but index {index_val} appeared multiple times"
1375
+ )
1376
+ inverse_index_v[index_val] = i
1377
+
1378
+
1379
+ return inverse_index
1380
+
1381
+
1382
+
1383
+
1384
+ # fmt: off
1385
+ _DEFAULT_DISTANCE_RANGE = {
1386
+ # Taken from Allen et al.
1387
+ # min - 2*std max + 2*std
1388
+ ("B", "C" ) : (1.556 - 2*0.015, 1.556 + 2*0.015),
1389
+ ("BR", "C" ) : (1.875 - 2*0.029, 1.966 + 2*0.029),
1390
+ ("BR", "O" ) : (1.581 - 2*0.007, 1.581 + 2*0.007),
1391
+ ("C", "C" ) : (1.174 - 2*0.011, 1.588 + 2*0.025),
1392
+ ("C", "CL") : (1.713 - 2*0.011, 1.849 + 2*0.011),
1393
+ ("C", "F" ) : (1.320 - 2*0.009, 1.428 + 2*0.009),
1394
+ ("C", "H" ) : (1.059 - 2*0.030, 1.099 + 2*0.007),
1395
+ ("C", "I" ) : (2.095 - 2*0.015, 2.162 + 2*0.015),
1396
+ ("C", "N" ) : (1.325 - 2*0.009, 1.552 + 2*0.023),
1397
+ ("C", "O" ) : (1.187 - 2*0.011, 1.477 + 2*0.008),
1398
+ ("C", "P" ) : (1.791 - 2*0.006, 1.855 + 2*0.019),
1399
+ ("C", "S" ) : (1.630 - 2*0.014, 1.863 + 2*0.015),
1400
+ ("C", "SE") : (1.893 - 2*0.013, 1.970 + 2*0.032),
1401
+ ("C", "SI") : (1.837 - 2*0.012, 1.888 + 2*0.023),
1402
+ ("CL", "O" ) : (1.414 - 2*0.026, 1.414 + 2*0.026),
1403
+ ("CL", "P" ) : (1.997 - 2*0.035, 2.008 + 2*0.035),
1404
+ ("CL", "S" ) : (2.072 - 2*0.023, 2.072 + 2*0.023),
1405
+ ("CL", "SI") : (2.072 - 2*0.009, 2.072 + 2*0.009),
1406
+ ("F", "N" ) : (1.406 - 2*0.016, 1.406 + 2*0.016),
1407
+ ("F", "P" ) : (1.495 - 2*0.016, 1.579 + 2*0.025),
1408
+ ("F", "S" ) : (1.640 - 2*0.011, 1.640 + 2*0.011),
1409
+ ("F", "SI") : (1.588 - 2*0.014, 1.694 + 2*0.013),
1410
+ ("H", "N" ) : (1.009 - 2*0.022, 1.033 + 2*0.022),
1411
+ ("H", "O" ) : (0.967 - 2*0.010, 1.015 + 2*0.017),
1412
+ ("I", "O" ) : (2.144 - 2*0.028, 2.144 + 2*0.028),
1413
+ ("N", "N" ) : (1.124 - 2*0.015, 1.454 + 2*0.021),
1414
+ ("N", "O" ) : (1.210 - 2*0.011, 1.463 + 2*0.012),
1415
+ ("N", "P" ) : (1.571 - 2*0.013, 1.697 + 2*0.015),
1416
+ ("N", "S" ) : (1.541 - 2*0.022, 1.710 + 2*0.019),
1417
+ ("N", "SI") : (1.711 - 2*0.019, 1.748 + 2*0.022),
1418
+ ("O", "P" ) : (1.449 - 2*0.007, 1.689 + 2*0.024),
1419
+ ("O", "S" ) : (1.423 - 2*0.008, 1.580 + 2*0.015),
1420
+ ("O", "SI") : (1.622 - 2*0.014, 1.680 + 2*0.008),
1421
+ ("P", "P" ) : (2.214 - 2*0.022, 2.214 + 2*0.022),
1422
+ ("P", "S" ) : (1.913 - 2*0.014, 1.954 + 2*0.005),
1423
+ ("P", "SE") : (2.093 - 2*0.019, 2.093 + 2*0.019),
1424
+ ("P", "SI") : (2.264 - 2*0.019, 2.264 + 2*0.019),
1425
+ ("S", "S" ) : (1.897 - 2*0.012, 2.070 + 2*0.022),
1426
+ ("S", "SE") : (2.193 - 2*0.015, 2.193 + 2*0.015),
1427
+ ("S", "SI") : (2.145 - 2*0.020, 2.145 + 2*0.020),
1428
+ ("SE", "SE") : (2.340 - 2*0.024, 2.340 + 2*0.024),
1429
+ ("SI", "SE") : (2.359 - 2*0.012, 2.359 + 2*0.012),
1430
+ }
1431
+ # fmt: on
1432
+
1433
+ def connect_via_distances(atoms, dict distance_range=None, bint inter_residue=True,
1434
+ default_bond_type=BondType.ANY, bint periodic=False):
1435
+ """
1436
+ connect_via_distances(atoms, distance_range=None, atom_mask=None,
1437
+ inter_residue=True, default_bond_type=BondType.ANY,
1438
+ periodic=False)
1439
+
1440
+ Create a :class:`BondList` for a given atom array, based on
1441
+ pairwise atom distances.
1442
+
1443
+ A :attr:`BondType.ANY`, bond is created for two atoms within the
1444
+ same residue, if the distance between them is within the expected
1445
+ bond distance range.
1446
+ Bonds between two adjacent residues are created for the atoms
1447
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1448
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1449
+
1450
+ Parameters
1451
+ ----------
1452
+ atoms : AtomArray
1453
+ The structure to create the :class:`BondList` for.
1454
+ distance_range : dict of tuple(str, str) -> tuple(float, float), optional
1455
+ Custom minimum and maximum bond distances.
1456
+ The dictionary keys are tuples of chemical elements representing
1457
+ the atoms to be potentially bonded.
1458
+ The order of elements within each tuple does not matter.
1459
+ The dictionary values are the minimum and maximum bond distance,
1460
+ respectively, for the given combination of elements.
1461
+ This parameter updates the default dictionary.
1462
+ Hence, the default bond distances for missing element pairs are
1463
+ still taken from the default dictionary.
1464
+ The default bond distances are taken from :footcite:`Allen1987`.
1465
+ inter_residue : bool, optional
1466
+ If true, connections between consecutive amino acids and
1467
+ nucleotides are also added.
1468
+ default_bond_type : BondType or int, optional
1469
+ By default, all created bonds have :attr:`BondType.ANY`.
1470
+ An alternative :class:`BondType` can be given in this parameter.
1471
+ periodic : bool, optional
1472
+ If set to true, bonds can also be detected in periodic
1473
+ boundary conditions.
1474
+ The `box` attribute of `atoms` is required in this case.
1475
+
1476
+ Returns
1477
+ -------
1478
+ BondList
1479
+ The created bond list.
1480
+
1481
+ See also
1482
+ --------
1483
+ connect_via_residue_names
1484
+
1485
+ Notes
1486
+ -----
1487
+ This method might miss bonds, if the bond distance is unexpectedly
1488
+ high or low, or it might create false bonds, if two atoms within a
1489
+ residue are accidentally in the right distance.
1490
+ A more accurate method for determining bonds is
1491
+ :func:`connect_via_residue_names()`.
1492
+
1493
+ References
1494
+ ----------
1495
+
1496
+ .. footbibliography::
1497
+ """
1498
+ from .atoms import AtomArray
1499
+ from .geometry import distance
1500
+ from .residues import get_residue_starts
1501
+
1502
+ cdef list bonds = []
1503
+ cdef int i
1504
+ cdef int curr_start_i, next_start_i
1505
+ cdef np.ndarray coord = atoms.coord
1506
+ cdef np.ndarray coord_in_res
1507
+ cdef np.ndarray distances
1508
+ cdef float dist
1509
+ cdef np.ndarray elements = atoms.element
1510
+ cdef np.ndarray elements_in_res
1511
+ cdef int atom_index1, atom_index2
1512
+ cdef dict dist_ranges = {}
1513
+ cdef tuple dist_range
1514
+ cdef float min_dist, max_dist
1515
+
1516
+ if not isinstance(atoms, AtomArray):
1517
+ raise TypeError(f"Expected 'AtomArray', not '{type(atoms).__name__}'")
1518
+ if periodic:
1519
+ if atoms.box is None:
1520
+ raise BadStructureError("Atom array has no box")
1521
+ box = atoms.box
1522
+ else:
1523
+ box = None
1524
+
1525
+ # Prepare distance dictionary...
1526
+ if distance_range is None:
1527
+ distance_range = {}
1528
+ # Merge default and custom entries
1529
+ for key, val in itertools.chain(
1530
+ _DEFAULT_DISTANCE_RANGE.items(), distance_range.items()
1531
+ ):
1532
+ element1, element2 = key
1533
+ # Add entries for both element orders
1534
+ dist_ranges[(element1.upper(), element2.upper())] = val
1535
+ dist_ranges[(element2.upper(), element1.upper())] = val
1536
+
1537
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1538
+ # Omit exclsive stop in 'residue_starts'
1539
+ for i in range(len(residue_starts)-1):
1540
+ curr_start_i = residue_starts[i]
1541
+ next_start_i = residue_starts[i+1]
1542
+
1543
+ elements_in_res = elements[curr_start_i : next_start_i]
1544
+ coord_in_res = coord[curr_start_i : next_start_i]
1545
+ # Matrix containing all pairwise atom distances in the residue
1546
+ distances = distance(
1547
+ coord_in_res[:, np.newaxis, :],
1548
+ coord_in_res[np.newaxis, :, :],
1549
+ box
1550
+ )
1551
+ for atom_index1 in range(len(elements_in_res)):
1552
+ for atom_index2 in range(atom_index1):
1553
+ dist_range = dist_ranges.get((
1554
+ elements_in_res[atom_index1],
1555
+ elements_in_res[atom_index2]
1556
+ ))
1557
+ if dist_range is None:
1558
+ # No bond distance entry for this element
1559
+ # combination -> skip
1560
+ continue
1561
+ else:
1562
+ min_dist, max_dist = dist_range
1563
+ dist = distances[atom_index1, atom_index2]
1564
+ if dist >= min_dist and dist <= max_dist:
1565
+ bonds.append((
1566
+ curr_start_i + atom_index1,
1567
+ curr_start_i + atom_index2,
1568
+ default_bond_type
1569
+ ))
1570
+
1571
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1572
+
1573
+ if inter_residue:
1574
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1575
+ if default_bond_type == BondType.ANY:
1576
+ # As all bonds should be of type ANY, convert also
1577
+ # inter-residue bonds to ANY
1578
+ inter_bonds.remove_bond_order()
1579
+ return bond_list.merge(inter_bonds)
1580
+ else:
1581
+ return bond_list
1582
+
1583
+
1584
+
1585
+ def connect_via_residue_names(atoms, bint inter_residue=True,
1586
+ dict custom_bond_dict=None):
1587
+ """
1588
+ connect_via_residue_names(atoms, atom_mask=None, inter_residue=True)
1589
+
1590
+ Create a :class:`BondList` for a given atom array (stack), based on
1591
+ the deposited bonds for each residue in the RCSB ``components.cif``
1592
+ dataset.
1593
+
1594
+ Bonds between two adjacent residues are created for the atoms
1595
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1596
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1597
+
1598
+ Parameters
1599
+ ----------
1600
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
1601
+ The structure to create the :class:`BondList` for.
1602
+ inter_residue : bool, optional
1603
+ If true, connections between consecutive amino acids and
1604
+ nucleotides are also added.
1605
+ custom_bond_dict : dict (str -> dict ((str, str) -> int)), optional
1606
+ A dictionary of dictionaries:
1607
+ The outer dictionary maps residue names to inner dictionaries.
1608
+ The inner dictionary maps tuples of two atom names to their
1609
+ respective :class:`BondType` (represented as integer).
1610
+ If given, these bonds are used instead of the bonds read from
1611
+ ``components.cif``.
1612
+
1613
+ Returns
1614
+ -------
1615
+ BondList
1616
+ The created bond list.
1617
+ No bonds are added for residues that are not found in
1618
+ ``components.cif``.
1619
+
1620
+ See also
1621
+ --------
1622
+ connect_via_distances
1623
+
1624
+ Notes
1625
+ -----
1626
+ This method can only find bonds for residues in the RCSB
1627
+ *Chemical Component Dictionary*, unless `custom_bond_dict` is set.
1628
+ Although this includes most molecules one encounters, this will fail
1629
+ for exotic molecules, e.g. specialized inhibitors.
1630
+
1631
+ .. currentmodule:: biotite.structure.info
1632
+
1633
+ To supplement `custom_bond_dict` with bonds for residues from the
1634
+ *Chemical Component Dictionary* you can use
1635
+ :meth:`bonds_in_residue()`.
1636
+
1637
+ >>> import pprint
1638
+ >>> custom_bond_dict = {
1639
+ ... "XYZ": {
1640
+ ... ("A", "B"): BondType.SINGLE,
1641
+ ... ("B", "C"): BondType.SINGLE
1642
+ ... }
1643
+ ... }
1644
+ >>> # Supplement with bonds for common residues
1645
+ >>> custom_bond_dict["ALA"] = bonds_in_residue("ALA")
1646
+ >>> pp = pprint.PrettyPrinter(width=40)
1647
+ >>> pp.pprint(custom_bond_dict)
1648
+ {'ALA': {('C', 'O'): <BondType.DOUBLE: 2>,
1649
+ ('C', 'OXT'): <BondType.SINGLE: 1>,
1650
+ ('CA', 'C'): <BondType.SINGLE: 1>,
1651
+ ('CA', 'CB'): <BondType.SINGLE: 1>,
1652
+ ('CA', 'HA'): <BondType.SINGLE: 1>,
1653
+ ('CB', 'HB1'): <BondType.SINGLE: 1>,
1654
+ ('CB', 'HB2'): <BondType.SINGLE: 1>,
1655
+ ('CB', 'HB3'): <BondType.SINGLE: 1>,
1656
+ ('N', 'CA'): <BondType.SINGLE: 1>,
1657
+ ('N', 'H'): <BondType.SINGLE: 1>,
1658
+ ('N', 'H2'): <BondType.SINGLE: 1>,
1659
+ ('OXT', 'HXT'): <BondType.SINGLE: 1>},
1660
+ 'XYZ': {('A', 'B'): <BondType.SINGLE: 1>,
1661
+ ('B', 'C'): <BondType.SINGLE: 1>}}
1662
+
1663
+ """
1664
+ from .info.bonds import bonds_in_residue
1665
+ from .residues import get_residue_starts
1666
+
1667
+ cdef list bonds = []
1668
+ cdef int res_i
1669
+ cdef int i, j
1670
+ cdef int curr_start_i, next_start_i
1671
+ cdef np.ndarray atom_names = atoms.atom_name
1672
+ cdef np.ndarray atom_names_in_res
1673
+ cdef np.ndarray res_names = atoms.res_name
1674
+ cdef str atom_name1, atom_name2
1675
+ cdef int64[:] atom_indices1, atom_indices2
1676
+ cdef dict bond_dict_for_res
1677
+
1678
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1679
+ # Omit exclsive stop in 'residue_starts'
1680
+ for res_i in range(len(residue_starts)-1):
1681
+ curr_start_i = residue_starts[res_i]
1682
+ next_start_i = residue_starts[res_i+1]
1683
+
1684
+ if custom_bond_dict is None:
1685
+ bond_dict_for_res = bonds_in_residue(res_names[curr_start_i])
1686
+ else:
1687
+ bond_dict_for_res = custom_bond_dict.get(
1688
+ res_names[curr_start_i], {}
1689
+ )
1690
+
1691
+ atom_names_in_res = atom_names[curr_start_i : next_start_i]
1692
+ for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
1693
+ atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
1694
+ .astype(np.int64, copy=False)
1695
+ atom_indices2 = np.where(atom_names_in_res == atom_name2)[0] \
1696
+ .astype(np.int64, copy=False)
1697
+ # In rare cases the same atom name may appear multiple times
1698
+ # (e.g. in altlocs)
1699
+ # -> create all possible bond combinations
1700
+ for i in range(atom_indices1.shape[0]):
1701
+ for j in range(atom_indices2.shape[0]):
1702
+ bonds.append((
1703
+ curr_start_i + atom_indices1[i],
1704
+ curr_start_i + atom_indices2[j],
1705
+ bond_type
1706
+ ))
1707
+
1708
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1709
+
1710
+ if inter_residue:
1711
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1712
+ return bond_list.merge(inter_bonds)
1713
+ else:
1714
+ return bond_list
1715
+
1716
+
1717
+
1718
+ _PEPTIDE_LINKS = ["PEPTIDE LINKING", "L-PEPTIDE LINKING", "D-PEPTIDE LINKING"]
1719
+ _NUCLEIC_LINKS = ["RNA LINKING", "DNA LINKING"]
1720
+
1721
+ def _connect_inter_residue(atoms, residue_starts):
1722
+ """
1723
+ Create a :class:`BondList` containing the bonds between adjacent
1724
+ amino acid or nucleotide residues.
1725
+
1726
+ Parameters
1727
+ ----------
1728
+ atoms : AtomArray or AtomArrayStack
1729
+ The structure to create the :class:`BondList` for.
1730
+ residue_starts : ndarray, dtype=int
1731
+ Return value of
1732
+ ``get_residue_starts(atoms, add_exclusive_stop=True)``.
1733
+
1734
+ Returns
1735
+ -------
1736
+ BondList
1737
+ A bond list containing all inter residue bonds.
1738
+ """
1739
+ from .info.misc import link_type
1740
+
1741
+ cdef list bonds = []
1742
+ cdef int i
1743
+ cdef np.ndarray atom_names = atoms.atom_name
1744
+ cdef np.ndarray res_names = atoms.res_name
1745
+ cdef np.ndarray res_ids = atoms.res_id
1746
+ cdef np.ndarray chain_ids = atoms.chain_id
1747
+ cdef int curr_start_i, next_start_i, after_next_start_i
1748
+ cdef str curr_connect_atom_name, next_connect_atom_name
1749
+ cdef np.ndarray curr_connect_indices, next_connect_indices
1750
+
1751
+ # Iterate over all starts excluding:
1752
+ # - the last residue and
1753
+ # - exclusive end index of 'atoms'
1754
+ for i in range(len(residue_starts)-2):
1755
+ curr_start_i = residue_starts[i]
1756
+ next_start_i = residue_starts[i+1]
1757
+ after_next_start_i = residue_starts[i+2]
1758
+
1759
+ # Check if the current and next residue is in the same chain
1760
+ if chain_ids[next_start_i] != chain_ids[curr_start_i]:
1761
+ continue
1762
+ # Check if the current and next residue
1763
+ # have consecutive residue IDs
1764
+ # (Same residue ID is also possible if insertion code is used)
1765
+ if res_ids[next_start_i] - res_ids[curr_start_i] > 1:
1766
+ continue
1767
+
1768
+ # Get link type for this residue from RCSB components.cif
1769
+ curr_link = link_type(res_names[curr_start_i])
1770
+ next_link = link_type(res_names[next_start_i])
1771
+
1772
+ if curr_link in _PEPTIDE_LINKS and next_link in _PEPTIDE_LINKS:
1773
+ curr_connect_atom_name = "C"
1774
+ next_connect_atom_name = "N"
1775
+ elif curr_link in _NUCLEIC_LINKS and next_link in _NUCLEIC_LINKS:
1776
+ curr_connect_atom_name = "O3'"
1777
+ next_connect_atom_name = "P"
1778
+ else:
1779
+ # Create no bond if the connection types of consecutive
1780
+ # residues are not compatible
1781
+ continue
1782
+
1783
+ # Index in atom array for atom name in current residue
1784
+ # Addition of 'curr_start_i' is necessary, as only a slice of
1785
+ # 'atom_names' is taken, beginning at 'curr_start_i'
1786
+ curr_connect_indices = curr_start_i + np.where(
1787
+ atom_names[curr_start_i : next_start_i]
1788
+ == curr_connect_atom_name
1789
+ )[0]
1790
+ # Index in atom array for atom name in next residue
1791
+ next_connect_indices = next_start_i + np.where(
1792
+ atom_names[next_start_i : after_next_start_i]
1793
+ == next_connect_atom_name
1794
+ )[0]
1795
+ if len(curr_connect_indices) == 0 or len(next_connect_indices) == 0:
1796
+ # The connector atoms are not found in the adjacent residues
1797
+ # -> skip this bond
1798
+ continue
1799
+
1800
+ bonds.append((
1801
+ curr_connect_indices[0],
1802
+ next_connect_indices[0],
1803
+ BondType.SINGLE
1804
+ ))
1805
+
1806
+ return BondList(atoms.array_length(), np.array(bonds, dtype=np.uint32))
1807
+
1808
+
1809
+
1810
+ def find_connected(bond_list, uint32 root, bint as_mask=False):
1811
+ """
1812
+ find_connected(bond_list, root, as_mask=False)
1813
+
1814
+ Get indices to all atoms that are directly or inderectly connected
1815
+ to the root atom indicated by the given index.
1816
+
1817
+ An atom is *connected* to the `root` atom, if that atom is reachable
1818
+ by traversing an arbitrary number of bonds, starting from the
1819
+ `root`.
1820
+ Effectively, this means that all atoms are *connected* to `root`,
1821
+ that are in the same molecule as `root`.
1822
+ Per definition `root` is also *connected* to itself.
1823
+
1824
+ Parameters
1825
+ ----------
1826
+ bond_list : BondList
1827
+ The reference bond list.
1828
+ root : int
1829
+ The index of the root atom.
1830
+ as_mask : bool, optional
1831
+ If true, the connected atom indices are returned as boolean
1832
+ mask.
1833
+ By default, the connected atom indices are returned as integer
1834
+ array.
1835
+
1836
+ Returns
1837
+ -------
1838
+ connected : ndarray, dtype=int or ndarray, dtype=bool
1839
+ Either a boolean mask or an integer array, representing the
1840
+ connected atoms.
1841
+ In case of a boolean mask: ``connected[i] == True``, if the atom
1842
+ with index ``i`` is connected.
1843
+
1844
+ Examples
1845
+ --------
1846
+ Consider a system with 4 atoms, where only the last atom is not
1847
+ bonded with the other ones (``0-1-2 3``):
1848
+
1849
+ >>> bonds = BondList(4)
1850
+ >>> bonds.add_bond(0, 1)
1851
+ >>> bonds.add_bond(1, 2)
1852
+ >>> print(find_connected(bonds, 0))
1853
+ [0 1 2]
1854
+ >>> print(find_connected(bonds, 1))
1855
+ [0 1 2]
1856
+ >>> print(find_connected(bonds, 2))
1857
+ [0 1 2]
1858
+ >>> print(find_connected(bonds, 3))
1859
+ [3]
1860
+ """
1861
+ all_bonds, _ = bond_list.get_all_bonds()
1862
+
1863
+ if root >= bond_list.get_atom_count():
1864
+ raise ValueError(
1865
+ f"Root atom index {root} is out of bounds for bond list "
1866
+ f"representing {bond_list.get_atom_count()} atoms"
1867
+ )
1868
+
1869
+ cdef uint8[:] is_connected_mask = np.zeros(
1870
+ bond_list.get_atom_count(), dtype=np.uint8
1871
+ )
1872
+ # Find connections in a recursive way,
1873
+ # by visiting all atoms that are reachable by a bond
1874
+ _find_connected(bond_list, root, is_connected_mask, all_bonds)
1875
+ if as_mask:
1876
+ return is_connected_mask
1877
+ else:
1878
+ return np.where(np.asarray(is_connected_mask))[0]
1879
+
1880
+
1881
+ cdef _find_connected(bond_list,
1882
+ int32 index,
1883
+ uint8[:] is_connected_mask,
1884
+ int32[:,:] all_bonds):
1885
+ if is_connected_mask[index]:
1886
+ # This atom has already been visited
1887
+ # -> exit condition
1888
+ return
1889
+ is_connected_mask[index] = True
1890
+
1891
+ cdef int32 j
1892
+ cdef int32 connected_index
1893
+ for j in range(all_bonds.shape[1]):
1894
+ connected_index = all_bonds[index, j]
1895
+ if connected_index == -1:
1896
+ # Ignore padding values
1897
+ continue
1898
+ _find_connected(
1899
+ bond_list, connected_index, is_connected_mask, all_bonds
1900
+ )
1901
+
1902
+
1903
+ def find_rotatable_bonds(bonds):
1904
+ """
1905
+ find_rotatable_bonds(bonds)
1906
+
1907
+ Find all rotatable bonds in a given :class:`BondList`.
1908
+
1909
+ The following conditions must be true for a bond to be counted as
1910
+ rotatable:
1911
+
1912
+ 1. The bond must be a single bond (``BondType.SINGLE``)
1913
+ 2. The connected atoms must not be within the same cycle/ring
1914
+ 3. Both connected atoms must not be terminal, e.g. not a *C-H*
1915
+ bond, as rotation about such bonds would not change any
1916
+ coordinates
1917
+
1918
+ Parameters
1919
+ ----------
1920
+ bonds : BondList
1921
+ The bonds to find the rotatable bonds in.
1922
+
1923
+ Returns
1924
+ -------
1925
+ rotatable_bonds : BondList
1926
+ The subset of the input `bonds` that contains only rotatable
1927
+ bonds.
1928
+
1929
+ Examples
1930
+ --------
1931
+
1932
+ >>> molecule = residue("TYR")
1933
+ >>> for i, j, _ in find_rotatable_bonds(molecule.bonds).as_array():
1934
+ ... print(molecule.atom_name[i], molecule.atom_name[j])
1935
+ N CA
1936
+ CA C
1937
+ CA CB
1938
+ C OXT
1939
+ CB CG
1940
+ CZ OH
1941
+ """
1942
+ cdef uint32 i, j
1943
+ cdef uint32 bond_type
1944
+ cdef uint32 SINGLE = int(BondType.SINGLE)
1945
+ cdef bint in_same_cycle
1946
+
1947
+ bond_graph = bonds.as_graph()
1948
+ cycles = nx.algorithms.cycles.cycle_basis(bond_graph)
1949
+
1950
+ cdef int64[:] number_of_partners_v = np.count_nonzero(
1951
+ bonds.get_all_bonds()[0] != -1,
1952
+ axis=1
1953
+ ).astype(np.int64, copy=False)
1954
+
1955
+ rotatable_bonds = []
1956
+ cdef uint32[:,:] bonds_v = bonds.as_array()
1957
+ for i, j, bond_type in bonds_v:
1958
+ # Can only rotate about single bonds
1959
+ # Furthermore, it makes no sense to rotate about a bond,
1960
+ # that leads to a single atom
1961
+ if bond_type == BondType.SINGLE \
1962
+ and number_of_partners_v[i] > 1 \
1963
+ and number_of_partners_v[j] > 1:
1964
+ # Cannot rotate about a bond, if the two connected atoms
1965
+ # are in a cycle
1966
+ in_same_cycle = False
1967
+ for cycle in cycles:
1968
+ if i in cycle and j in cycle:
1969
+ in_same_cycle = True
1970
+ if not in_same_cycle:
1971
+ rotatable_bonds.append((i,j, bond_type))
1972
+ return BondList(bonds.get_atom_count(), np.array(rotatable_bonds))