biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,1933 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module allows efficient search of atoms in a defined radius around
7
+ a location.
8
+ """
9
+
10
+ __name__ = "biotite.structure"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["BondList", "BondType",
13
+ "connect_via_distances", "connect_via_residue_names",
14
+ "find_connected", "find_rotatable_bonds"]
15
+
16
+ cimport cython
17
+ cimport numpy as np
18
+ from libc.stdlib cimport free, realloc
19
+
20
+ import itertools
21
+ import numbers
22
+ from enum import IntEnum
23
+ import networkx as nx
24
+ import numpy as np
25
+ from .error import BadStructureError
26
+ from ..copyable import Copyable
27
+
28
+ ctypedef np.uint64_t ptr
29
+ ctypedef np.uint8_t uint8
30
+ ctypedef np.uint16_t uint16
31
+ ctypedef np.uint32_t uint32
32
+ ctypedef np.uint64_t uint64
33
+ ctypedef np.int8_t int8
34
+ ctypedef np.int16_t int16
35
+ ctypedef np.int32_t int32
36
+ ctypedef np.int64_t int64
37
+
38
+
39
+ ctypedef fused IndexType:
40
+ uint8
41
+ uint16
42
+ uint32
43
+ uint64
44
+ int8
45
+ int16
46
+ int32
47
+ int64
48
+
49
+
50
+ class BondType(IntEnum):
51
+ """
52
+ This enum type represents the type of a chemical bond.
53
+
54
+ - `ANY` - Used if the actual type is unknown
55
+ - `SINGLE` - Single bond
56
+ - `DOUBLE` - Double bond
57
+ - `TRIPLE` - Triple bond
58
+ - `QUADRUPLE` - A quadruple bond
59
+ - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
60
+ - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
61
+ - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
62
+ """
63
+ ANY = 0
64
+ SINGLE = 1
65
+ DOUBLE = 2
66
+ TRIPLE = 3
67
+ QUADRUPLE = 4
68
+ AROMATIC_SINGLE = 5
69
+ AROMATIC_DOUBLE = 6
70
+ AROMATIC_TRIPLE = 7
71
+
72
+
73
+ def without_aromaticity(self):
74
+ """
75
+ Remove aromaticity from the bond type.
76
+
77
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
78
+ :attr:`BondType.{ORDER}`.
79
+
80
+ Returns
81
+ -------
82
+ new_bond_type : BondType
83
+ The :class:`BondType` without aromaticity.
84
+
85
+ Examples
86
+ --------
87
+
88
+ >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity())
89
+ BondType.DOUBLE
90
+ """
91
+ difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
92
+ if self >= BondType.AROMATIC_SINGLE:
93
+ difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
94
+ return BondType(self - difference)
95
+ else:
96
+ return self
97
+
98
+
99
+ @cython.boundscheck(False)
100
+ @cython.wraparound(False)
101
+ class BondList(Copyable):
102
+ """
103
+ __init__(atom_count, bonds=None)
104
+
105
+ A bond list stores indices of atoms
106
+ (usually of an :class:`AtomArray` or :class:`AtomArrayStack`)
107
+ that form chemical bonds together with the type (or order) of the
108
+ bond.
109
+
110
+ Internally the bonds are stored as *n x 3* :class:`ndarray`.
111
+ For each row, the first column specifies the index of the first
112
+ atom, the second column the index of the second atom involved in the
113
+ bond.
114
+ The third column stores an integer that is interpreted as member
115
+ of the the :class:`BondType` enum, that specifies the order of the
116
+ bond.
117
+
118
+ When indexing a :class:`BondList`, the index is not forwarded to the
119
+ internal :class:`ndarray`. Instead the indexing behavior is
120
+ consistent with indexing an :class:`AtomArray` or
121
+ :class:`AtomArrayStack`:
122
+ Bonds with at least one atom index that is not covered by the index
123
+ are removed, atom indices that occur after an uncovered atom index
124
+ move up.
125
+ Effectively, this means that after indexing an :class:`AtomArray`
126
+ and a :class:`BondList` with the same index, the atom indices in the
127
+ :class:`BondList` will still point to the same atoms in the
128
+ :class:`AtomArray`.
129
+ Indexing a :class:`BondList` with a single integer is equivalent
130
+ to calling :func:`get_bonds()`.
131
+
132
+ The same consistency applies to adding :class:`BondList` instances
133
+ via the '+' operator:
134
+ The atom indices of the second :class:`BondList` are increased by
135
+ the atom count of the first :class:`BondList` and then both
136
+ :class:`BondList` objects are merged.
137
+
138
+ Parameters
139
+ ----------
140
+ atom_count : int
141
+ A positive integer, that specifies the number of atoms the
142
+ :class:`BondList` refers to
143
+ (usually the length of an atom array (stack)).
144
+ Effectively, this value is the exclusive maximum for the indices
145
+ stored in the :class:`BondList`.
146
+ bonds : ndarray, shape=(n,2) or shape=(n,3), dtype=int, optional
147
+ This array contains the indices of atoms which are bonded:
148
+ For each row, the first column specifies the first atom,
149
+ the second row the second atom involved in a chemical bond.
150
+ If an *n x 3* array is provided, the additional column
151
+ specifies a :class:`BondType` instead of :attr:`BondType.ANY`.
152
+ By default, the created :class:`BondList` is empty.
153
+
154
+ Notes
155
+ -----
156
+ When initially providing the bonds as :class:`ndarray`, the input is
157
+ sanitized: Redundant bonds are removed, and each bond entry is
158
+ sorted so that the lower one of the two atom indices is in the first
159
+ column.
160
+ If a bond appears multiple times with different bond types, the
161
+ first bond takes precedence.
162
+
163
+ Examples
164
+ --------
165
+
166
+ Construct a :class:`BondList`, where a central atom (index 1) is
167
+ connected to three other atoms (index 0, 3 and 4):
168
+
169
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
170
+ >>> print(bond_list)
171
+ [[0 1 0]
172
+ [1 3 0]
173
+ [1 4 0]]
174
+
175
+ Remove the first atom (index 0) via indexing:
176
+ The bond containing index 0 is removed, since the corresponding atom
177
+ does not exist anymore. Since all other atoms move up in their
178
+ position, the indices in the bond list are decreased by one:
179
+
180
+ >>> bond_list = bond_list[1:]
181
+ >>> print(bond_list)
182
+ [[0 2 0]
183
+ [0 3 0]]
184
+
185
+ :class:`BondList` objects can be associated to an :class:`AtomArray`
186
+ or :class:`AtomArrayStack`.
187
+ The following snippet shows this for a benzene molecule:
188
+
189
+ >>> benzene = AtomArray(12)
190
+ >>> # Omit filling most required annotation categories for brevity
191
+ >>> benzene.atom_name = np.array(
192
+ ... ["C1", "C2", "C3", "C4", "C5", "C6", "H1", "H2", "H3", "H4", "H5", "H6"]
193
+ ... )
194
+ >>> benzene.bonds = BondList(
195
+ ... benzene.array_length(),
196
+ ... np.array([
197
+ ... # Bonds between carbon atoms in the ring
198
+ ... (0, 1, BondType.AROMATIC_SINGLE),
199
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
200
+ ... (2, 3, BondType.AROMATIC_SINGLE),
201
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
202
+ ... (4, 5, BondType.AROMATIC_SINGLE),
203
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
204
+ ... # Bonds between carbon and hydrogen
205
+ ... (0, 6, BondType.SINGLE),
206
+ ... (1, 7, BondType.SINGLE),
207
+ ... (2, 8, BondType.SINGLE),
208
+ ... (3, 9, BondType.SINGLE),
209
+ ... (4, 10, BondType.SINGLE),
210
+ ... (5, 11, BondType.SINGLE),
211
+ ... ])
212
+ ... )
213
+ >>> for i, j, bond_type in benzene.bonds.as_array():
214
+ ... print(
215
+ ... f"{str(BondType(bond_type))} bond between "
216
+ ... f"{benzene.atom_name[i]} and {benzene.atom_name[j]}"
217
+ ... )
218
+ BondType.AROMATIC_SINGLE bond between C1 and C2
219
+ BondType.AROMATIC_DOUBLE bond between C2 and C3
220
+ BondType.AROMATIC_SINGLE bond between C3 and C4
221
+ BondType.AROMATIC_DOUBLE bond between C4 and C5
222
+ BondType.AROMATIC_SINGLE bond between C5 and C6
223
+ BondType.AROMATIC_DOUBLE bond between C1 and C6
224
+ BondType.SINGLE bond between C1 and H1
225
+ BondType.SINGLE bond between C2 and H2
226
+ BondType.SINGLE bond between C3 and H3
227
+ BondType.SINGLE bond between C4 and H4
228
+ BondType.SINGLE bond between C5 and H5
229
+ BondType.SINGLE bond between C6 and H6
230
+
231
+ Obtain the bonded atoms for the :math:`C_1`:
232
+
233
+ >>> bonds, types = benzene.bonds.get_bonds(0)
234
+ >>> print(bonds)
235
+ [1 5 6]
236
+ >>> print(types)
237
+ [5 6 1]
238
+ >>> print(f"C1 is bonded to {', '.join(benzene.atom_name[bonds])}")
239
+ C1 is bonded to C2, C6, H1
240
+
241
+ Cut the benzene molecule in half.
242
+ Although the first half of the atoms are missing the indices of
243
+ the cropped :class:`BondList` still represents the bonds of the
244
+ remaining atoms:
245
+
246
+ >>> half_benzene = benzene[
247
+ ... np.isin(benzene.atom_name, ["C4", "C5", "C6", "H4", "H5", "H6"])
248
+ ... ]
249
+ >>> for i, j, bond_type in half_benzene.bonds.as_array():
250
+ ... print(
251
+ ... f"{str(BondType(bond_type))} bond between "
252
+ ... f"{half_benzene.atom_name[i]} and {half_benzene.atom_name[j]}"
253
+ ... )
254
+ BondType.AROMATIC_DOUBLE bond between C4 and C5
255
+ BondType.AROMATIC_SINGLE bond between C5 and C6
256
+ BondType.SINGLE bond between C4 and H4
257
+ BondType.SINGLE bond between C5 and H5
258
+ BondType.SINGLE bond between C6 and H6
259
+ """
260
+
261
+ def __init__(self, uint32 atom_count, np.ndarray bonds=None):
262
+ self._atom_count = atom_count
263
+
264
+ if bonds is not None and len(bonds) > 0:
265
+ if bonds.ndim != 2:
266
+ raise ValueError("Expected a 2D-ndarray for input bonds")
267
+
268
+ self._bonds = np.zeros((bonds.shape[0], 3), dtype=np.uint32)
269
+ if bonds.shape[1] == 3:
270
+ # Input contains bonds (index 0 and 1)
271
+ # including the bond type value (index 2)
272
+ # Bond indices:
273
+ self._bonds[:,:2] = np.sort(
274
+ # Indices are sorted per bond
275
+ # so that the lower index is at the first position
276
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
277
+ )
278
+ # Bond type:
279
+ if (bonds[:, 2] >= len(BondType)).any():
280
+ raise ValueError(
281
+ f"BondType {np.max(bonds[:, 2])} is invalid"
282
+ )
283
+ self._bonds[:,2] = bonds[:, 2]
284
+
285
+ # Indices are sorted per bond
286
+ # so that the lower index is at the first position
287
+ elif bonds.shape[1] == 2:
288
+ # Input contains the bonds without bond type
289
+ # -> Default: Set bond type ANY (0)
290
+ self._bonds[:,:2] = np.sort(
291
+ # Indices are sorted per bond
292
+ # so that the lower index is at the first position
293
+ _to_positive_index_array(bonds[:,:2], atom_count), axis=1
294
+ )
295
+ else:
296
+ raise ValueError(
297
+ "Input array containing bonds must be either of shape "
298
+ "(n,2) or (n,3)"
299
+ )
300
+ self._remove_redundant_bonds()
301
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
302
+
303
+ else:
304
+ # Create empty bond list
305
+ self._bonds = np.zeros((0, 3), dtype=np.uint32)
306
+ self._max_bonds_per_atom = 0
307
+
308
+ def __copy_create__(self):
309
+ # Create empty bond list to prevent
310
+ # unnecessary removal of redundant atoms
311
+ # and calculation of maximum bonds per atom
312
+ return BondList(self._atom_count)
313
+
314
+ def __copy_fill__(self, clone):
315
+ # The bonds are added here
316
+ clone._bonds = self._bonds.copy()
317
+ clone._max_bonds_per_atom = self._max_bonds_per_atom
318
+
319
+ def offset_indices(self, int offset):
320
+ """
321
+ offset_indices(offset)
322
+
323
+ Increase all atom indices in the :class:`BondList` by the given
324
+ offset.
325
+
326
+ Implicitly this increases the atom count.
327
+
328
+ Parameters
329
+ ----------
330
+ offset : int
331
+ The atom indices are increased by this value.
332
+ Must be positive.
333
+
334
+ Examples
335
+ --------
336
+
337
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
338
+ >>> print(bond_list)
339
+ [[0 1 0]
340
+ [1 3 0]
341
+ [1 4 0]]
342
+ >>> bond_list.offset_indices(2)
343
+ >>> print(bond_list)
344
+ [[2 3 0]
345
+ [3 5 0]
346
+ [3 6 0]]
347
+ """
348
+ if offset < 0:
349
+ raise ValueError("Offest must be positive")
350
+ self._bonds[:,:2] += offset
351
+ self._atom_count += offset
352
+
353
+ def as_array(self):
354
+ """
355
+ as_array()
356
+
357
+ Obtain a copy of the internal :class:`ndarray`.
358
+
359
+ Returns
360
+ -------
361
+ array : ndarray, shape=(n,3), dtype=np.uint32
362
+ Copy of the internal :class:`ndarray`.
363
+ For each row, the first column specifies the index of the
364
+ first atom, the second column the index of the second atom
365
+ involved in the bond.
366
+ The third column stores the :class:`BondType`.
367
+ """
368
+ return self._bonds.copy()
369
+
370
+ def as_set(self):
371
+ """
372
+ as_set()
373
+
374
+ Obtain a set representation of the :class:`BondList`.
375
+
376
+ Returns
377
+ -------
378
+ bond_set : set of tuple(int, int, int)
379
+ A set of tuples.
380
+ Each tuple represents one bond:
381
+ The first integer represents the first atom,
382
+ the second integer represents the second atom,
383
+ the third integer represents the :class:`BondType`.
384
+ """
385
+ cdef uint32[:,:] all_bonds_v = self._bonds
386
+ cdef int i
387
+ cdef set bond_set = set()
388
+ for i in range(all_bonds_v.shape[0]):
389
+ bond_set.add(
390
+ (all_bonds_v[i,0], all_bonds_v[i,1], all_bonds_v[i,2])
391
+ )
392
+ return bond_set
393
+
394
+ def as_graph(self):
395
+ """
396
+ as_graph()
397
+
398
+ Obtain a graph representation of the :class:`BondList`.
399
+
400
+ Returns
401
+ -------
402
+ bond_set : Graph
403
+ A *NetworkX* :class:`Graph`.
404
+ The atom indices are nodes, the bonds are edges.
405
+ Each edge has a ``"bond_type"`` attribute containing the
406
+ :class:`BondType`.
407
+
408
+ Examples
409
+ --------
410
+
411
+ >>> bond_list = BondList(5, np.array([(1,0,2), (1,3,1), (1,4,1)]))
412
+ >>> graph = bond_list.as_graph()
413
+ >>> print(graph.nodes)
414
+ [0, 1, 3, 4]
415
+ >>> print(graph.edges)
416
+ [(0, 1), (1, 3), (1, 4)]
417
+ >>> for i, j in graph.edges:
418
+ ... print(i, j, graph.get_edge_data(i, j))
419
+ 0 1 {'bond_type': <BondType.DOUBLE: 2>}
420
+ 1 3 {'bond_type': <BondType.SINGLE: 1>}
421
+ 1 4 {'bond_type': <BondType.SINGLE: 1>}
422
+ """
423
+ cdef int i
424
+
425
+ cdef uint32[:,:] all_bonds_v = self._bonds
426
+
427
+ g = nx.Graph()
428
+ cdef list edges = [None] * all_bonds_v.shape[0]
429
+ for i in range(all_bonds_v.shape[0]):
430
+ edges[i] = (
431
+ all_bonds_v[i,0], all_bonds_v[i,1],
432
+ {"bond_type": BondType(all_bonds_v[i,2])}
433
+ )
434
+ g.add_edges_from(edges)
435
+ return g
436
+
437
+ def remove_aromaticity(self):
438
+ """
439
+ Remove aromaticity from the bond types.
440
+
441
+ :attr:`BondType.AROMATIC_{ORDER}` is converted into
442
+ :attr:`BondType.{ORDER}`.
443
+
444
+ Examples
445
+ --------
446
+
447
+ >>> bond_list = BondList(3)
448
+ >>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
449
+ >>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
450
+ >>> bond_list.remove_aromaticity()
451
+ >>> for i, j, bond_type in bond_list.as_array():
452
+ ... print(i, j, BondType(bond_type))
453
+ 0 1 BondType.SINGLE
454
+ 1 2 BondType.DOUBLE
455
+ """
456
+ bonds = self._bonds
457
+ difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
458
+ bonds[bonds[:, 2] >= BondType.AROMATIC_SINGLE, 2] -= difference
459
+
460
+ def remove_bond_order(self):
461
+ """
462
+ Convert all bonds to :attr:`BondType.ANY`.
463
+ """
464
+ self._bonds[:,2] = BondType.ANY
465
+
466
+ def get_atom_count(self):
467
+ """
468
+ get_atom_count()
469
+
470
+ Get the atom count.
471
+
472
+ Returns
473
+ -------
474
+ atom_count : int
475
+ The atom count.
476
+ """
477
+ return self._atom_count
478
+
479
+ def get_bond_count(self):
480
+ """
481
+ get_bond_count()
482
+
483
+ Get the amount of bonds.
484
+
485
+ Returns
486
+ -------
487
+ bond_count : int
488
+ The amount of bonds. This is equal to the length of the
489
+ internal :class:`ndarray` containing the bonds.
490
+ """
491
+ return len(self._bonds)
492
+
493
+ def get_bonds(self, int32 atom_index):
494
+ """
495
+ get_bonds(atom_index)
496
+
497
+ Obtain the indices of the atoms bonded to the atom with the
498
+ given index as well as the corresponding bond types.
499
+
500
+ Parameters
501
+ ----------
502
+ atom_index : int
503
+ The index of the atom to get the bonds for.
504
+
505
+ Returns
506
+ -------
507
+ bonds : np.ndarray, dtype=np.uint32, shape=(k,)
508
+ The indices of connected atoms.
509
+ bond_types : np.ndarray, dtype=np.uint8, shape=(k,)
510
+ Array of integers, interpreted as :class:`BondType`
511
+ instances.
512
+ This array specifies the type (or order) of the bonds to
513
+ the connected atoms.
514
+
515
+ Examples
516
+ --------
517
+
518
+ >>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
519
+ >>> bonds, types = bond_list.get_bonds(1)
520
+ >>> print(bonds)
521
+ [0 3 4]
522
+ """
523
+ cdef int i=0, j=0
524
+
525
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
526
+
527
+ cdef uint32[:,:] all_bonds_v = self._bonds
528
+ # Pessimistic array allocation:
529
+ # assume size is equal to the atom with most bonds
530
+ cdef np.ndarray bonds = np.zeros(self._max_bonds_per_atom,
531
+ dtype=np.uint32)
532
+ cdef uint32[:] bonds_v = bonds
533
+ cdef np.ndarray bond_types = np.zeros(self._max_bonds_per_atom,
534
+ dtype=np.uint8)
535
+ cdef uint8[:] bond_types_v = bond_types
536
+
537
+ for i in range(all_bonds_v.shape[0]):
538
+ # If a bond is found for the desired atom index
539
+ # at the first or second position of the bond,
540
+ # then append the index of the respective other position
541
+ if all_bonds_v[i,0] == index:
542
+ bonds_v[j] = all_bonds_v[i,1]
543
+ bond_types_v[j] = all_bonds_v[i,2]
544
+ j += 1
545
+ elif all_bonds_v[i,1] == index:
546
+ bonds_v[j] = all_bonds_v[i,0]
547
+ bond_types_v[j] = all_bonds_v[i,2]
548
+ j += 1
549
+
550
+ # Trim to correct size
551
+ bonds = bonds[:j]
552
+ bond_types = bond_types[:j]
553
+
554
+ return bonds, bond_types
555
+
556
+
557
+ def get_all_bonds(self):
558
+ """
559
+ get_all_bonds()
560
+
561
+ For each atom index, give the indices of the atoms bonded to
562
+ this atom as well as the corresponding bond types.
563
+
564
+ Returns
565
+ -------
566
+ bonds : np.ndarray, dtype=np.uint32, shape=(n,k)
567
+ The indices of connected atoms.
568
+ The first dimension represents the atoms,
569
+ the second dimension represents the indices of atoms bonded
570
+ to the respective atom.
571
+ Atoms can have have different numbers of atoms bonded to
572
+ them.
573
+ Therefore, the length of the second dimension *k* is equal
574
+ to the maximum number of bonds for an atom in this
575
+ :class:`BondList`.
576
+ For atoms with less bonds, the corresponding entry in the
577
+ array is padded with ``-1`` values.
578
+ bond_types : np.ndarray, dtype=np.uint32, shape=(n,k)
579
+ Array of integers, interpreted as :class:`BondType`
580
+ instances.
581
+ This array specifies the bond type (or order) corresponding
582
+ to the returned `bonds`.
583
+ It uses the same ``-1``-padding.
584
+
585
+ Examples
586
+ --------
587
+
588
+ >>> # BondList for benzene
589
+ >>> bond_list = BondList(
590
+ ... 12,
591
+ ... np.array([
592
+ ... # Bonds between the carbon atoms in the ring
593
+ ... (0, 1, BondType.AROMATIC_SINGLE),
594
+ ... (1, 2, BondType.AROMATIC_DOUBLE),
595
+ ... (2, 3, BondType.AROMATIC_SINGLE),
596
+ ... (3, 4, BondType.AROMATIC_DOUBLE),
597
+ ... (4, 5, BondType.AROMATIC_SINGLE),
598
+ ... (5, 0, BondType.AROMATIC_DOUBLE),
599
+ ... # Bonds between carbon and hydrogen
600
+ ... (0, 6, BondType.SINGLE),
601
+ ... (1, 7, BondType.SINGLE),
602
+ ... (2, 8, BondType.SINGLE),
603
+ ... (3, 9, BondType.SINGLE),
604
+ ... (4, 10, BondType.SINGLE),
605
+ ... (5, 11, BondType.SINGLE),
606
+ ... ])
607
+ ... )
608
+ >>> bonds, types = bond_list.get_all_bonds()
609
+ >>> print(bonds)
610
+ [[ 1 5 6]
611
+ [ 0 2 7]
612
+ [ 1 3 8]
613
+ [ 2 4 9]
614
+ [ 3 5 10]
615
+ [ 4 0 11]
616
+ [ 0 -1 -1]
617
+ [ 1 -1 -1]
618
+ [ 2 -1 -1]
619
+ [ 3 -1 -1]
620
+ [ 4 -1 -1]
621
+ [ 5 -1 -1]]
622
+ >>> print(types)
623
+ [[ 5 6 1]
624
+ [ 5 6 1]
625
+ [ 6 5 1]
626
+ [ 5 6 1]
627
+ [ 6 5 1]
628
+ [ 5 6 1]
629
+ [ 1 -1 -1]
630
+ [ 1 -1 -1]
631
+ [ 1 -1 -1]
632
+ [ 1 -1 -1]
633
+ [ 1 -1 -1]
634
+ [ 1 -1 -1]]
635
+ >>> for i in range(bond_list.get_atom_count()):
636
+ ... bonds_for_atom = bonds[i]
637
+ ... # Remove trailing '-1' values
638
+ ... bonds_for_atom = bonds_for_atom[bonds_for_atom != -1]
639
+ ... print(f"{i}: {bonds_for_atom}")
640
+ 0: [1 5 6]
641
+ 1: [0 2 7]
642
+ 2: [1 3 8]
643
+ 3: [2 4 9]
644
+ 4: [ 3 5 10]
645
+ 5: [ 4 0 11]
646
+ 6: [0]
647
+ 7: [1]
648
+ 8: [2]
649
+ 9: [3]
650
+ 10: [4]
651
+ 11: [5]
652
+ """
653
+ cdef int i=0
654
+ cdef uint32 atom_index_i, atom_index_j, bond_type
655
+
656
+ cdef uint32[:,:] all_bonds_v = self._bonds
657
+ # The size of 2nd dimension is equal to the atom with most bonds
658
+ # Since each atom can have an individual number of bonded atoms,
659
+ # The arrays are padded with '-1'
660
+ cdef np.ndarray bonds = np.full(
661
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int32
662
+ )
663
+ cdef int32[:,:] bonds_v = bonds
664
+ cdef np.ndarray bond_types = np.full(
665
+ (self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int8
666
+ )
667
+ cdef int8[:,:] bond_types_v = bond_types
668
+ # Track the number of already found bonds for each given index
669
+ cdef np.ndarray lengths = np.zeros(self._atom_count, dtype=np.uint32)
670
+ cdef uint32[:] lengths_v = lengths
671
+
672
+ for i in range(all_bonds_v.shape[0]):
673
+ atom_index_i = all_bonds_v[i,0]
674
+ atom_index_j = all_bonds_v[i,1]
675
+ bond_type = all_bonds_v[i,2]
676
+ # Add second bonded atom for the first bonded atom
677
+ # and vice versa
678
+ # Use 'lengths' variable to append the value
679
+ bonds_v[atom_index_i, lengths_v[atom_index_i]] = atom_index_j
680
+ bonds_v[atom_index_j, lengths_v[atom_index_j]] = atom_index_i
681
+ bond_types_v[atom_index_i, lengths_v[atom_index_i]] = bond_type
682
+ bond_types_v[atom_index_j, lengths_v[atom_index_j]] = bond_type
683
+ # Increment lengths
684
+ lengths_v[atom_index_i] += 1
685
+ lengths_v[atom_index_j] += 1
686
+
687
+ return bonds, bond_types
688
+
689
+
690
+ def adjacency_matrix(self):
691
+ r"""
692
+ adjacency_matrix(bond_list)
693
+
694
+ Represent this :class:`BondList` as adjacency matrix.
695
+
696
+ The adjacency matrix is a quadratic matrix with boolean values
697
+ according to
698
+
699
+ .. math::
700
+
701
+ M_{i,j} =
702
+ \begin{cases}
703
+ \text{True}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
704
+ \text{False}, & \text{otherwise}
705
+ \end{cases}.
706
+
707
+ Returns
708
+ -------
709
+ matrix : ndarray, dtype=bool, shape=(n,n)
710
+ The created adjacency matrix.
711
+
712
+ Examples
713
+ --------
714
+
715
+ >>> # BondList for formaldehyde
716
+ >>> bond_list = BondList(
717
+ ... 4,
718
+ ... np.array([
719
+ ... # Bond between carbon and oxygen
720
+ ... (0, 1, BondType.DOUBLE),
721
+ ... # Bonds between carbon and hydrogen
722
+ ... (0, 2, BondType.SINGLE),
723
+ ... (0, 3, BondType.SINGLE),
724
+ ... ])
725
+ ... )
726
+ >>> print(bond_list.adjacency_matrix())
727
+ [[False True True True]
728
+ [ True False False False]
729
+ [ True False False False]
730
+ [ True False False False]]
731
+ """
732
+ matrix = np.zeros(
733
+ (self._atom_count, self._atom_count), dtype=bool
734
+ )
735
+ matrix[self._bonds[:,0], self._bonds[:,1]] = True
736
+ matrix[self._bonds[:,1], self._bonds[:,0]] = True
737
+ return matrix
738
+
739
+
740
+ def bond_type_matrix(self):
741
+ r"""
742
+ adjacency_matrix(bond_list)
743
+
744
+ Represent this :class:`BondList` as a matrix depicting the bond
745
+ type.
746
+
747
+ The matrix is a quadratic matrix:
748
+
749
+ .. math::
750
+
751
+ M_{i,j} =
752
+ \begin{cases}
753
+ \text{BondType}_{ij}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
754
+ -1, & \text{otherwise}
755
+ \end{cases}.
756
+
757
+ Returns
758
+ -------
759
+ matrix : ndarray, dtype=bool, shape=(n,n)
760
+ The created bond type matrix.
761
+
762
+ Examples
763
+ --------
764
+
765
+ >>> # BondList for formaldehyde
766
+ >>> bond_list = BondList(
767
+ ... 4,
768
+ ... np.array([
769
+ ... # Bond between carbon and oxygen
770
+ ... (0, 1, BondType.DOUBLE),
771
+ ... # Bonds between carbon and hydrogen
772
+ ... (0, 2, BondType.SINGLE),
773
+ ... (0, 3, BondType.SINGLE),
774
+ ... ])
775
+ ... )
776
+ >>> print(bond_list.bond_type_matrix())
777
+ [[-1 2 1 1]
778
+ [ 2 -1 -1 -1]
779
+ [ 1 -1 -1 -1]
780
+ [ 1 -1 -1 -1]]
781
+ """
782
+ matrix = np.full(
783
+ (self._atom_count, self._atom_count), -1, dtype=np.int8
784
+ )
785
+ matrix[self._bonds[:,0], self._bonds[:,1]] = self._bonds[:,2]
786
+ matrix[self._bonds[:,1], self._bonds[:,0]] = self._bonds[:,2]
787
+ return matrix
788
+
789
+
790
+ def add_bond(self, int32 atom_index1, int32 atom_index2,
791
+ bond_type=BondType.ANY):
792
+ """
793
+ add_bond(atom_index1, atom_index2, bond_type=BondType.ANY)
794
+
795
+ Add a bond to the :class:`BondList`.
796
+
797
+ If the bond is already existent, only the bond type is updated.
798
+
799
+ Parameters
800
+ ----------
801
+ atom_index1, atom_index2 : int
802
+ The indices of the atoms to create a bond for.
803
+ bond_type : BondType or int, optional
804
+ The type of the bond. Default is :attr:`BondType.ANY`.
805
+ """
806
+ if bond_type >= len(BondType):
807
+ raise ValueError(f"BondType {bond_type} is invalid")
808
+
809
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
810
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
811
+ _sort(&index1, &index2)
812
+
813
+ cdef int i
814
+ cdef uint32[:,:] all_bonds_v = self._bonds
815
+ # Check if bond is already existent in list
816
+ cdef bint in_list = False
817
+ for i in range(all_bonds_v.shape[0]):
818
+ # Since the bonds have the atom indices sorted
819
+ # the reverse check is omitted
820
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
821
+ in_list = True
822
+ # If in list, update bond type
823
+ all_bonds_v[i,2] = int(bond_type)
824
+ break
825
+ if not in_list:
826
+ self._bonds = np.append(
827
+ self._bonds,
828
+ np.array(
829
+ [(index1, index2, int(bond_type))], dtype=np.uint32
830
+ ),
831
+ axis=0
832
+ )
833
+ self._max_bonds_per_atom = self._get_max_bonds_per_atom()
834
+
835
+ def remove_bond(self, int32 atom_index1, int32 atom_index2):
836
+ """
837
+ remove_bond(atom_index1, atom_index2)
838
+
839
+ Remove a bond from the :class:`BondList`.
840
+
841
+ If the bond is not existent in the :class:`BondList`, nothing happens.
842
+
843
+ Parameters
844
+ ----------
845
+ atom_index1, atom_index2 : int
846
+ The indices of the atoms whose bond should be removed.
847
+ """
848
+ cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
849
+ cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
850
+ _sort(&index1, &index2)
851
+
852
+ # Find the bond in bond list
853
+ cdef int i
854
+ cdef uint32[:,:] all_bonds_v = self._bonds
855
+ for i in range(all_bonds_v.shape[0]):
856
+ # Since the bonds have the atom indices sorted
857
+ # the reverse check is omitted
858
+ if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
859
+ self._bonds = np.delete(self._bonds, i, axis=0)
860
+ # The maximum bonds per atom is not recalculated,
861
+ # as the value can only be decreased on bond removal
862
+ # Since this value is only used for pessimistic array allocation
863
+ # in 'get_bonds()', the slightly larger memory usage is a better
864
+ # option than the repetitive call of _get_max_bonds_per_atom()
865
+
866
+ def remove_bonds_to(self, int32 atom_index):
867
+ """
868
+ remove_bonds_to(self, atom_index)
869
+
870
+ Remove all bonds from the :class:`BondList` where the given atom
871
+ is involved.
872
+
873
+ Parameters
874
+ ----------
875
+ atom_index : int
876
+ The index of the atom whose bonds should be removed.
877
+
878
+ """
879
+ cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
880
+
881
+ cdef np.ndarray mask = np.ones(len(self._bonds), dtype=np.uint8)
882
+ cdef uint8[:] mask_v = mask
883
+
884
+ # Find the bond in bond list
885
+ cdef int i
886
+ cdef uint32[:,:] all_bonds_v = self._bonds
887
+ for i in range(all_bonds_v.shape[0]):
888
+ if (all_bonds_v[i,0] == index or all_bonds_v[i,1] == index):
889
+ mask_v[i] = False
890
+ # Remove the bonds
891
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
892
+ # The maximum bonds per atom is not recalculated
893
+ # (see 'remove_bond()')
894
+
895
+ def remove_bonds(self, bond_list):
896
+ """
897
+ remove_bonds(bond_list)
898
+
899
+ Remove multiple bonds from the :class:`BondList`.
900
+
901
+ All bonds present in `bond_list` are removed from this instance.
902
+ If a bond is not existent in this instance, nothing happens.
903
+ Only the bond indices, not the bond types, are relevant for
904
+ this.
905
+
906
+ Parameters
907
+ ----------
908
+ bond_list : BondList
909
+ The bonds in `bond_list` are removed from this instance.
910
+ """
911
+ cdef int i=0, j=0
912
+
913
+ # All bonds in the own BondList
914
+ cdef uint32[:,:] all_bonds_v = self._bonds
915
+ # The bonds that should be removed
916
+ cdef uint32[:,:] rem_bonds_v = bond_list._bonds
917
+ cdef np.ndarray mask = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
918
+ cdef uint8[:] mask_v = mask
919
+ for i in range(all_bonds_v.shape[0]):
920
+ for j in range(rem_bonds_v.shape[0]):
921
+ if all_bonds_v[i,0] == rem_bonds_v[j,0] \
922
+ and all_bonds_v[i,1] == rem_bonds_v[j,1]:
923
+ mask_v[i] = False
924
+
925
+ # Remove the bonds
926
+ self._bonds = self._bonds[mask.astype(bool, copy=False)]
927
+ # The maximum bonds per atom is not recalculated
928
+ # (see 'remove_bond()')
929
+
930
+ def merge(self, bond_list):
931
+ """
932
+ merge(bond_list)
933
+
934
+ Merge another :class:`BondList` with this instance into a new
935
+ object.
936
+ If a bond appears in both :class:`BondList`'s, the
937
+ :class:`BondType` from the given `bond_list` takes precedence.
938
+
939
+ The internal :class:`ndarray` instances containg the bonds are
940
+ simply concatenated and the new atom count is the maximum of
941
+ both bond lists.
942
+
943
+ Parameters
944
+ ----------
945
+ bond_list : BondList
946
+ This bond list is merged with this instance.
947
+
948
+ Returns
949
+ -------
950
+ bond_list : BondList
951
+ The merged :class:`BondList`.
952
+
953
+ Notes
954
+ -----
955
+ This is not equal to using the `+` operator.
956
+
957
+ Examples
958
+ --------
959
+
960
+ >>> bond_list1 = BondList(3, np.array([(0,1),(1,2)]))
961
+ >>> bond_list2 = BondList(5, np.array([(2,3),(3,4)]))
962
+ >>> merged_list = bond_list2.merge(bond_list1)
963
+ >>> print(merged_list.get_atom_count())
964
+ 5
965
+ >>> print(merged_list)
966
+ [[0 1 0]
967
+ [1 2 0]
968
+ [2 3 0]
969
+ [3 4 0]]
970
+
971
+ The BondList given as parameter takes precedence:
972
+
973
+ >>> # Specifiy bond type to see where a bond is taken from
974
+ >>> bond_list1 = BondList(4, np.array([
975
+ ... (0, 1, BondType.SINGLE),
976
+ ... (1, 2, BondType.SINGLE)
977
+ ... ]))
978
+ >>> bond_list2 = BondList(4, np.array([
979
+ ... (1, 2, BondType.DOUBLE), # This one is a duplicate
980
+ ... (2, 3, BondType.DOUBLE)
981
+ ... ]))
982
+ >>> merged_list = bond_list2.merge(bond_list1)
983
+ >>> print(merged_list)
984
+ [[0 1 1]
985
+ [1 2 1]
986
+ [2 3 2]]
987
+ """
988
+ return BondList(
989
+ max(self._atom_count, bond_list._atom_count),
990
+ np.concatenate(
991
+ [bond_list.as_array(), self.as_array()],
992
+ axis=0
993
+ )
994
+ )
995
+
996
+ def __add__(self, bond_list):
997
+ cdef np.ndarray merged_bonds \
998
+ = np.concatenate([self._bonds, bond_list._bonds])
999
+ # Offset the indices of appended bonds list
1000
+ # (consistent with addition of AtomArray)
1001
+ merged_bonds[len(self._bonds):, :2] += self._atom_count
1002
+ cdef uint32 merged_count = self._atom_count + bond_list._atom_count
1003
+ cdef merged_bond_list = BondList(merged_count)
1004
+ # Array is not used in constructor to prevent unnecessary
1005
+ # maximum and redundant bond calculation
1006
+ merged_bond_list._bonds = merged_bonds
1007
+ merged_bond_list._max_bonds_per_atom = max(
1008
+ self._max_bonds_per_atom, merged_bond_list._max_bonds_per_atom
1009
+ )
1010
+ return merged_bond_list
1011
+
1012
+ def __getitem__(self, index):
1013
+ ## Variables for both, integer and boolean index arrays
1014
+ cdef uint32[:,:] all_bonds_v
1015
+ cdef int32 new_index
1016
+ cdef int i
1017
+ cdef uint32* index1_ptr
1018
+ cdef uint32* index2_ptr
1019
+ cdef np.ndarray removal_filter
1020
+ cdef uint8[:] removal_filter_v
1021
+
1022
+ ## Variables for integer arrays
1023
+ cdef int32[:] index_v, inverse_index
1024
+ cdef int32 new_index1, new_index2
1025
+
1026
+ ## Variables for boolean mask
1027
+ # Boolean mask representation of the index
1028
+ cdef np.ndarray mask
1029
+ cdef uint8[:] mask_v
1030
+ # Boolean mask for removal of bonds
1031
+ cdef np.ndarray offsets
1032
+ cdef uint32[:] offsets_v
1033
+
1034
+ if isinstance(index, numbers.Integral):
1035
+ ## Handle single index
1036
+ return self.get_bonds(index)
1037
+
1038
+ elif isinstance(index, np.ndarray) \
1039
+ and np.issubdtype(index.dtype, np.integer):
1040
+ ## Handle index array
1041
+ copy = self.copy()
1042
+ all_bonds_v = copy._bonds
1043
+
1044
+ index = _to_positive_index_array(index, self._atom_count)
1045
+ # The inverse index is required to efficiently obtain
1046
+ # the new index of an atom in case of an unsorted index
1047
+ # array
1048
+ inverse_index_v = _invert_index(index, self._atom_count)
1049
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1050
+ removal_filter_v = removal_filter
1051
+ for i in range(all_bonds_v.shape[0]):
1052
+ # Usage of pointer to increase performance
1053
+ # as redundant indexing is avoided
1054
+ index1_ptr = &all_bonds_v[i,0]
1055
+ index2_ptr = &all_bonds_v[i,1]
1056
+ new_index1 = inverse_index_v[index1_ptr[0]]
1057
+ new_index2 = inverse_index_v[index2_ptr[0]]
1058
+ if new_index1 != -1 and new_index2 != -1:
1059
+ # Both atoms involved in bond are included
1060
+ # by index array
1061
+ # -> assign new atom indices
1062
+ index1_ptr[0] = <int32>new_index1
1063
+ index2_ptr[0] = <int32>new_index2
1064
+ else:
1065
+ # At least one atom in bond is not included
1066
+ # -> remove bond
1067
+ removal_filter_v[i] = False
1068
+
1069
+ copy._bonds = copy._bonds[
1070
+ removal_filter.astype(bool, copy=False)
1071
+ ]
1072
+ # Again, sort indices per bond
1073
+ # as the correct order is not guaranteed anymore
1074
+ # for unsorted index arrays
1075
+ copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
1076
+ copy._atom_count = len(index)
1077
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1078
+ return copy
1079
+
1080
+ else:
1081
+ ## Handle all other arrays as boolean mask
1082
+ copy = self.copy()
1083
+ all_bonds_v = copy._bonds
1084
+
1085
+ mask = _to_bool_mask(index, length=copy._atom_count)
1086
+ # Each time an atom is missing in the mask,
1087
+ # the offset is increased by one
1088
+ offsets = np.cumsum(
1089
+ ~mask.astype(bool, copy=False), dtype=np.uint32
1090
+ )
1091
+ removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
1092
+ removal_filter_v = removal_filter
1093
+ mask_v = mask
1094
+ offsets_v = offsets
1095
+ # If an atom in a bond is not masked,
1096
+ # the bond is removed from the list
1097
+ # If an atom is masked,
1098
+ # its index value is decreased by the respective offset
1099
+ # The offset is neccessary, removing atoms in an AtomArray
1100
+ # decreases the index of the following atoms
1101
+ for i in range(all_bonds_v.shape[0]):
1102
+ # Usage of pointer to increase performance
1103
+ # as redundant indexing is avoided
1104
+ index1_ptr = &all_bonds_v[i,0]
1105
+ index2_ptr = &all_bonds_v[i,1]
1106
+ if mask_v[index1_ptr[0]] and mask_v[index2_ptr[0]]:
1107
+ # Both atoms involved in bond are masked
1108
+ # -> decrease atom index by offset
1109
+ index1_ptr[0] -= offsets_v[index1_ptr[0]]
1110
+ index2_ptr[0] -= offsets_v[index2_ptr[0]]
1111
+ else:
1112
+ # At least one atom involved in bond is not masked
1113
+ # -> remove bond
1114
+ removal_filter_v[i] = False
1115
+ # Apply the bond removal filter
1116
+ copy._bonds = copy._bonds[removal_filter.astype(bool, copy=False)]
1117
+ copy._atom_count = len(np.nonzero(mask)[0])
1118
+ copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
1119
+ return copy
1120
+
1121
+ def __iter__(self):
1122
+ raise TypeError("'BondList' object is not iterable")
1123
+
1124
+ def __str__(self):
1125
+ return str(self.as_array())
1126
+
1127
+ def __eq__(self, item):
1128
+ if not isinstance(item, BondList):
1129
+ return False
1130
+ return (self._atom_count == item._atom_count and
1131
+ self.as_set() == item.as_set())
1132
+
1133
+ def __contains__(self, item):
1134
+ if not isinstance(item, tuple) and len(tuple) != 2:
1135
+ raise TypeError("Expected a tuple of atom indices")
1136
+
1137
+ cdef int i=0
1138
+
1139
+ cdef uint32 match_index1, match_index2
1140
+ # Sort indices for faster search in loop
1141
+ cdef uint32 atom_index1 = min(item)
1142
+ cdef uint32 atom_index2 = max(item)
1143
+
1144
+ cdef uint32[:,:] all_bonds_v = self._bonds
1145
+ for i in range(all_bonds_v.shape[0]):
1146
+ match_index1 = all_bonds_v[i,0]
1147
+ match_index2 = all_bonds_v[i,1]
1148
+ if atom_index1 == match_index1 and atom_index2 == match_index2:
1149
+ return True
1150
+
1151
+ return False
1152
+
1153
+
1154
+ def _get_max_bonds_per_atom(self):
1155
+ if self._atom_count == 0:
1156
+ return 0
1157
+
1158
+ cdef int i
1159
+ cdef uint32[:,:] all_bonds_v = self._bonds
1160
+ # Create an array that counts number of occurences of each index
1161
+ cdef np.ndarray index_count = np.zeros(self._atom_count,
1162
+ dtype=np.uint32)
1163
+ cdef uint32[:] index_count_v = index_count
1164
+ for i in range(all_bonds_v.shape[0]):
1165
+ # Increment count of both indices found in bond list at i
1166
+ index_count_v[all_bonds_v[i,0]] += 1
1167
+ index_count_v[all_bonds_v[i,1]] += 1
1168
+ return np.max(index_count_v)
1169
+
1170
+ def _remove_redundant_bonds(self):
1171
+ cdef int j
1172
+ cdef uint32[:,:] all_bonds_v = self._bonds
1173
+ # Boolean mask for final removal of redundant atoms
1174
+ # Unfortunately views of boolean ndarrays are not supported
1175
+ # -> use uint8 array
1176
+ cdef np.ndarray redundancy_filter = np.ones(all_bonds_v.shape[0],
1177
+ dtype=np.uint8)
1178
+ cdef uint8[:] redundancy_filter_v = redundancy_filter
1179
+ # Array of pointers to C-arrays
1180
+ # The array is indexed with the atom indices in the bond list
1181
+ # The respective C-array contains the indices of bonded atoms
1182
+ cdef ptr[:] ptrs_v = np.zeros(self._atom_count, dtype=np.uint64)
1183
+ # Stores the length of the C-arrays
1184
+ cdef int[:] array_len_v = np.zeros(self._atom_count, dtype=np.int32)
1185
+ # Iterate over bond list:
1186
+ # If bond is already listed in the array of pointers,
1187
+ # set filter to false at that position
1188
+ # Else add bond to array of pointers
1189
+ cdef uint32 i1, i2
1190
+ cdef uint32* array_ptr
1191
+ cdef int length
1192
+
1193
+ try:
1194
+ for j in range(all_bonds_v.shape[0]):
1195
+ i1 = all_bonds_v[j,0]
1196
+ i2 = all_bonds_v[j,1]
1197
+ # Since the bonds have the atom indices sorted
1198
+ # the reverse check is omitted
1199
+ if _in_array(<uint32*>ptrs_v[i1], i2, array_len_v[i1]):
1200
+ redundancy_filter_v[j] = False
1201
+ else:
1202
+ # Append bond in respective C-array
1203
+ # and update C-array length
1204
+ length = array_len_v[i1] +1
1205
+ array_ptr = <uint32*>ptrs_v[i1]
1206
+ array_ptr = <uint32*>realloc(
1207
+ array_ptr, length * sizeof(uint32)
1208
+ )
1209
+ if not array_ptr:
1210
+ raise MemoryError()
1211
+ array_ptr[length-1] = i2
1212
+ ptrs_v[i1] = <ptr>array_ptr
1213
+ array_len_v[i1] = length
1214
+
1215
+ finally:
1216
+ # Free pointers
1217
+ for i in range(ptrs_v.shape[0]):
1218
+ free(<int*>ptrs_v[i])
1219
+
1220
+ # Eventually remove redundant bonds
1221
+ self._bonds = self._bonds[redundancy_filter.astype(bool, copy=False)]
1222
+
1223
+
1224
+ cdef uint32 _to_positive_index(int32 index, uint32 array_length) except -1:
1225
+ """
1226
+ Convert a potentially negative index into a positive index.
1227
+ """
1228
+ cdef uint32 pos_index
1229
+ if index < 0:
1230
+ pos_index = <uint32> (array_length + index)
1231
+ if pos_index < 0:
1232
+ raise IndexError(
1233
+ f"Index {index} is out of range "
1234
+ f"for an atom count of {array_length}"
1235
+ )
1236
+ return pos_index
1237
+ else:
1238
+ if <uint32> index >= array_length:
1239
+ raise IndexError(
1240
+ f"Index {index} is out of range "
1241
+ f"for an atom count of {array_length}"
1242
+ )
1243
+ return <uint32> index
1244
+
1245
+
1246
+ def _to_positive_index_array(index_array, length):
1247
+ """
1248
+ Convert potentially negative values in an array into positive
1249
+ values and check for out-of-bounds values.
1250
+ """
1251
+ index_array = index_array.copy()
1252
+ orig_shape = index_array.shape
1253
+ index_array = index_array.flatten()
1254
+ negatives = index_array < 0
1255
+ index_array[negatives] = length + index_array[negatives]
1256
+ if (index_array < 0).any():
1257
+ raise IndexError(
1258
+ f"Index {np.min(index_array)} is out of range "
1259
+ f"for an atom count of {length}"
1260
+ )
1261
+ if (index_array >= length).any():
1262
+ raise IndexError(
1263
+ f"Index {np.max(index_array)} is out of range "
1264
+ f"for an atom count of {length}"
1265
+ )
1266
+ return index_array.reshape(orig_shape)
1267
+
1268
+
1269
+ cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
1270
+ """
1271
+ Test whether a value (`atom_index`) is in a C-array `array`.
1272
+ """
1273
+ cdef int i = 0
1274
+ if array == NULL:
1275
+ return False
1276
+ for i in range(array_length):
1277
+ if array[i] == atom_index:
1278
+ return True
1279
+ return False
1280
+
1281
+
1282
+ cdef inline void _sort(uint32* index1_ptr, uint32* index2_ptr):
1283
+ cdef uint32 swap
1284
+ if index1_ptr[0] > index2_ptr[0]:
1285
+ # Swap indices
1286
+ swap = index1_ptr[0]
1287
+ index1_ptr[0] = index2_ptr[0]
1288
+ index2_ptr[0] = swap
1289
+
1290
+
1291
+ @cython.wraparound(False)
1292
+ # Do bounds check, as the input indices may be out of bounds
1293
+ def _invert_index(IndexType[:] index_v, uint32 length):
1294
+ """
1295
+ Invert an input index array, so that
1296
+ if *input[i] = j*, *output[j] = i*.
1297
+ For all elements *j*, that are not in *input*, *output[j]* = -1.
1298
+ """
1299
+ cdef int32 i
1300
+ cdef IndexType index_val
1301
+ inverse_index = np.full(length, -1, dtype=np.int32)
1302
+ cdef int32[:] inverse_index_v = inverse_index
1303
+
1304
+ for i in range(index_v.shape[0]):
1305
+ index_val = index_v[i]
1306
+ if inverse_index_v[index_val] != -1:
1307
+ # One index can theoretically appear multiple times
1308
+ # This is currently not supported
1309
+ raise NotImplementedError(
1310
+ f"Duplicate indices are not supported, "
1311
+ f"but index {index_val} appeared multiple times"
1312
+ )
1313
+ inverse_index_v[index_val] = i
1314
+
1315
+
1316
+ return inverse_index
1317
+
1318
+
1319
+ def _to_bool_mask(object index, uint32 length):
1320
+ """
1321
+ Convert an index of arbitrary type into a boolean mask
1322
+ with given length.
1323
+ """
1324
+ if isinstance(index, np.ndarray) and index.dtype == bool:
1325
+ # Index is already boolean mask -> simply return as uint8
1326
+ if len(index) != length:
1327
+ raise IndexError(
1328
+ f"Boolean mask has length {len(index)}, expected {length}"
1329
+ )
1330
+ # Use 'uint8' instead of 'bool' for memory view
1331
+ return index.astype(np.uint8, copy=False)
1332
+ else:
1333
+ # Use 'uint8' instead of 'bool' for memory view
1334
+ mask = np.zeros(length, dtype=np.uint8)
1335
+ # 1 -> True
1336
+ mask[index] = 1
1337
+ return mask
1338
+
1339
+
1340
+
1341
+
1342
+ _DEFAULT_DISTANCE_RANGE = {
1343
+ # Taken from Allen et al.
1344
+ # min - 2*std max + 2*std
1345
+ ("B", "C" ) : (1.556 - 2*0.015, 1.556 + 2*0.015),
1346
+ ("BR", "C" ) : (1.875 - 2*0.029, 1.966 + 2*0.029),
1347
+ ("BR", "O" ) : (1.581 - 2*0.007, 1.581 + 2*0.007),
1348
+ ("C", "C" ) : (1.174 - 2*0.011, 1.588 + 2*0.025),
1349
+ ("C", "CL") : (1.713 - 2*0.011, 1.849 + 2*0.011),
1350
+ ("C", "F" ) : (1.320 - 2*0.009, 1.428 + 2*0.009),
1351
+ ("C", "H" ) : (1.059 - 2*0.030, 1.099 + 2*0.007),
1352
+ ("C", "I" ) : (2.095 - 2*0.015, 2.162 + 2*0.015),
1353
+ ("C", "N" ) : (1.325 - 2*0.009, 1.552 + 2*0.023),
1354
+ ("C", "O" ) : (1.187 - 2*0.011, 1.477 + 2*0.008),
1355
+ ("C", "P" ) : (1.791 - 2*0.006, 1.855 + 2*0.019),
1356
+ ("C", "S" ) : (1.630 - 2*0.014, 1.863 + 2*0.015),
1357
+ ("C", "SE") : (1.893 - 2*0.013, 1.970 + 2*0.032),
1358
+ ("C", "SI") : (1.837 - 2*0.012, 1.888 + 2*0.023),
1359
+ ("CL", "O" ) : (1.414 - 2*0.026, 1.414 + 2*0.026),
1360
+ ("CL", "P" ) : (1.997 - 2*0.035, 2.008 + 2*0.035),
1361
+ ("CL", "S" ) : (2.072 - 2*0.023, 2.072 + 2*0.023),
1362
+ ("CL", "SI") : (2.072 - 2*0.009, 2.072 + 2*0.009),
1363
+ ("F", "N" ) : (1.406 - 2*0.016, 1.406 + 2*0.016),
1364
+ ("F", "P" ) : (1.495 - 2*0.016, 1.579 + 2*0.025),
1365
+ ("F", "S" ) : (1.640 - 2*0.011, 1.640 + 2*0.011),
1366
+ ("F", "SI") : (1.588 - 2*0.014, 1.694 + 2*0.013),
1367
+ ("H", "N" ) : (1.009 - 2*0.022, 1.033 + 2*0.022),
1368
+ ("H", "O" ) : (0.967 - 2*0.010, 1.015 + 2*0.017),
1369
+ ("I", "O" ) : (2.144 - 2*0.028, 2.144 + 2*0.028),
1370
+ ("N", "N" ) : (1.124 - 2*0.015, 1.454 + 2*0.021),
1371
+ ("N", "O" ) : (1.210 - 2*0.011, 1.463 + 2*0.012),
1372
+ ("N", "P" ) : (1.571 - 2*0.013, 1.697 + 2*0.015),
1373
+ ("N", "S" ) : (1.541 - 2*0.022, 1.710 + 2*0.019),
1374
+ ("N", "SI") : (1.711 - 2*0.019, 1.748 + 2*0.022),
1375
+ ("O", "P" ) : (1.449 - 2*0.007, 1.689 + 2*0.024),
1376
+ ("O", "S" ) : (1.423 - 2*0.008, 1.580 + 2*0.015),
1377
+ ("O", "SI") : (1.622 - 2*0.014, 1.680 + 2*0.008),
1378
+ ("P", "P" ) : (2.214 - 2*0.022, 2.214 + 2*0.022),
1379
+ ("P", "S" ) : (1.913 - 2*0.014, 1.954 + 2*0.005),
1380
+ ("P", "SE") : (2.093 - 2*0.019, 2.093 + 2*0.019),
1381
+ ("P", "SI") : (2.264 - 2*0.019, 2.264 + 2*0.019),
1382
+ ("S", "S" ) : (1.897 - 2*0.012, 2.070 + 2*0.022),
1383
+ ("S", "SE") : (2.193 - 2*0.015, 2.193 + 2*0.015),
1384
+ ("S", "SI") : (2.145 - 2*0.020, 2.145 + 2*0.020),
1385
+ ("SE", "SE") : (2.340 - 2*0.024, 2.340 + 2*0.024),
1386
+ ("SI", "SE") : (2.359 - 2*0.012, 2.359 + 2*0.012),
1387
+ }
1388
+
1389
+ def connect_via_distances(atoms, dict distance_range=None, atom_mask=None,
1390
+ bint inter_residue=True,
1391
+ default_bond_type=BondType.ANY, bint periodic=False):
1392
+ """
1393
+ connect_via_distances(atoms, distance_range=None, atom_mask=None,
1394
+ inter_residue=True, default_bond_type=BondType.ANY,
1395
+ periodic=False)
1396
+
1397
+ Create a :class:`BondList` for a given atom array, based on
1398
+ pairwise atom distances.
1399
+
1400
+ A :attr:`BondType.ANY`, bond is created for two atoms within the
1401
+ same residue, if the distance between them is within the expected
1402
+ bond distance range.
1403
+ Bonds between two adjacent residues are created for the atoms
1404
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1405
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1406
+
1407
+ Parameters
1408
+ ----------
1409
+ atoms : AtomArray
1410
+ The structure to create the :class:`BondList` for.
1411
+ distance_range : dict of tuple(str, str) -> tuple(float, float), optional
1412
+ Custom minimum and maximum bond distances.
1413
+ The dictionary keys are tuples of chemical elements representing
1414
+ the atoms to be potentially bonded.
1415
+ The order of elements within each tuple does not matter.
1416
+ The dictionary values are the minimum and maximum bond distance,
1417
+ respectively, for the given combination of elements.
1418
+ This parameter updates the default dictionary.
1419
+ Hence, the default bond distances for missing element pairs are
1420
+ still taken from the default dictionary.
1421
+ The default bond distances are taken from :footcite:`Allen1987`.
1422
+ atom_mask : ndarray, dtype=bool, shape=(n,), optional
1423
+ DEPRECATED: This option has no effect.
1424
+ inter_residue : bool, optional
1425
+ If true, connections between consecutive amino acids and
1426
+ nucleotides are also added.
1427
+ default_bond_type : BondType or int, optional
1428
+ By default, all created bonds have :attr:`BondType.ANY`.
1429
+ An alternative :class:`BondType` can be given in this parameter.
1430
+ periodic : bool, optional
1431
+ If set to true, bonds can also be detected in periodic
1432
+ boundary conditions.
1433
+ The `box` attribute of `atoms` is required in this case.
1434
+
1435
+ Returns
1436
+ -------
1437
+ BondList
1438
+ The created bond list.
1439
+
1440
+ See also
1441
+ --------
1442
+ connect_via_residue_names
1443
+
1444
+ Notes
1445
+ -----
1446
+ This method might miss bonds, if the bond distance is unexpectedly
1447
+ high or low, or it might create false bonds, if two atoms within a
1448
+ residue are accidentally in the right distance.
1449
+ A more accurate method for determining bonds is
1450
+ :func:`connect_via_residue_names()`.
1451
+
1452
+ References
1453
+ ----------
1454
+
1455
+ .. footbibliography::
1456
+ """
1457
+ from .atoms import AtomArray
1458
+ from .geometry import distance
1459
+ from .residues import get_residue_starts
1460
+
1461
+ cdef list bonds = []
1462
+ cdef int i
1463
+ cdef int curr_start_i, next_start_i
1464
+ cdef np.ndarray coord = atoms.coord
1465
+ cdef np.ndarray coord_in_res
1466
+ cdef np.ndarray distances
1467
+ cdef float dist
1468
+ cdef np.ndarray elements = atoms.element
1469
+ cdef np.ndarray elements_in_res
1470
+ cdef int atom_index1, atom_index2
1471
+ cdef dict dist_ranges = {}
1472
+ cdef tuple dist_range
1473
+ cdef float min_dist, max_dist
1474
+
1475
+ if not isinstance(atoms, AtomArray):
1476
+ raise TypeError(f"Expected 'AtomArray', not '{type(atoms).__name__}'")
1477
+ if periodic:
1478
+ if atoms.box is None:
1479
+ raise BadStructureError("Atom array has no box")
1480
+ box = atoms.box
1481
+ else:
1482
+ box = None
1483
+
1484
+ # Prepare distance dictionary...
1485
+ if distance_range is None:
1486
+ distance_range = {}
1487
+ # Merge default and custom entries
1488
+ for key, val in itertools.chain(
1489
+ _DEFAULT_DISTANCE_RANGE.items(), distance_range.items()
1490
+ ):
1491
+ element1, element2 = key
1492
+ # Add entries for both element orders
1493
+ dist_ranges[(element1.upper(), element2.upper())] = val
1494
+ dist_ranges[(element2.upper(), element1.upper())] = val
1495
+
1496
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1497
+ # Omit exclsive stop in 'residue_starts'
1498
+ for i in range(len(residue_starts)-1):
1499
+ curr_start_i = residue_starts[i]
1500
+ next_start_i = residue_starts[i+1]
1501
+
1502
+ elements_in_res = elements[curr_start_i : next_start_i]
1503
+ coord_in_res = coord[curr_start_i : next_start_i]
1504
+ # Matrix containing all pairwise atom distances in the residue
1505
+ distances = distance(
1506
+ coord_in_res[:, np.newaxis, :],
1507
+ coord_in_res[np.newaxis, :, :],
1508
+ box
1509
+ )
1510
+ for atom_index1 in range(len(elements_in_res)):
1511
+ for atom_index2 in range(atom_index1):
1512
+ dist_range = dist_ranges.get((
1513
+ elements_in_res[atom_index1],
1514
+ elements_in_res[atom_index2]
1515
+ ))
1516
+ if dist_range is None:
1517
+ # No bond distance entry for this element
1518
+ # combination -> skip
1519
+ continue
1520
+ else:
1521
+ min_dist, max_dist = dist_range
1522
+ dist = distances[atom_index1, atom_index2]
1523
+ if dist >= min_dist and dist <= max_dist:
1524
+ bonds.append((
1525
+ curr_start_i + atom_index1,
1526
+ curr_start_i + atom_index2,
1527
+ default_bond_type
1528
+ ))
1529
+
1530
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1531
+
1532
+ if inter_residue:
1533
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1534
+ if default_bond_type == BondType.ANY:
1535
+ # As all bonds should be of type ANY, convert also
1536
+ # inter-residue bonds to ANY
1537
+ inter_bonds.remove_bond_order()
1538
+ return bond_list.merge(inter_bonds)
1539
+ else:
1540
+ return bond_list
1541
+
1542
+
1543
+
1544
+ def connect_via_residue_names(atoms, atom_mask=None, bint inter_residue=True,
1545
+ dict custom_bond_dict=None):
1546
+ """
1547
+ connect_via_residue_names(atoms, atom_mask=None, inter_residue=True)
1548
+
1549
+ Create a :class:`BondList` for a given atom array (stack), based on
1550
+ the deposited bonds for each residue in the RCSB ``components.cif``
1551
+ dataset.
1552
+
1553
+ Bonds between two adjacent residues are created for the atoms
1554
+ expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
1555
+ peptides and ``"O3'"`` and ``'P'`` for nucleotides.
1556
+
1557
+ Parameters
1558
+ ----------
1559
+ atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
1560
+ The structure to create the :class:`BondList` for.
1561
+ atom_mask : ndarray, dtype=bool, shape=(n,), optional
1562
+ DEPRECATED: This option has no effect.
1563
+ inter_residue : bool, optional
1564
+ If true, connections between consecutive amino acids and
1565
+ nucleotides are also added.
1566
+ custom_bond_dict : dict (str -> dict ((str, str) -> int)), optional
1567
+ A dictionary of dictionaries:
1568
+ The outer dictionary maps residue names to inner dictionaries.
1569
+ The inner dictionary maps tuples of two atom names to their
1570
+ respective :class:`BondType` (represented as integer).
1571
+ If given, these bonds are used instead of the bonds read from
1572
+ ``components.cif``.
1573
+
1574
+ Returns
1575
+ -------
1576
+ BondList
1577
+ The created bond list.
1578
+ No bonds are added for residues that are not found in
1579
+ ``components.cif``.
1580
+
1581
+ See also
1582
+ --------
1583
+ connect_via_distances
1584
+
1585
+ Notes
1586
+ -----
1587
+ This method can only find bonds for residues in the RCSB
1588
+ *Chemical Component Dictionary*, unless `custom_bond_dict` is set.
1589
+ Although this includes most molecules one encounters, this will fail
1590
+ for exotic molecules, e.g. specialized inhibitors.
1591
+
1592
+ .. currentmodule:: biotite.structure.info
1593
+
1594
+ To supplement `custom_bond_dict` with bonds for residues from the
1595
+ *Chemical Component Dictionary* you can use
1596
+ :meth:`bonds_in_residue()`.
1597
+
1598
+ >>> import pprint
1599
+ >>> custom_bond_dict = {
1600
+ ... "XYZ": {
1601
+ ... ("A", "B"): BondType.SINGLE,
1602
+ ... ("B", "C"): BondType.SINGLE
1603
+ ... }
1604
+ ... }
1605
+ >>> # Supplement with bonds for common residues
1606
+ >>> custom_bond_dict["ALA"] = bonds_in_residue("ALA")
1607
+ >>> pp = pprint.PrettyPrinter(width=40)
1608
+ >>> pp.pprint(custom_bond_dict)
1609
+ {'ALA': {('C', 'O'): <BondType.DOUBLE: 2>,
1610
+ ('C', 'OXT'): <BondType.SINGLE: 1>,
1611
+ ('CA', 'C'): <BondType.SINGLE: 1>,
1612
+ ('CA', 'CB'): <BondType.SINGLE: 1>,
1613
+ ('CA', 'HA'): <BondType.SINGLE: 1>,
1614
+ ('CB', 'HB1'): <BondType.SINGLE: 1>,
1615
+ ('CB', 'HB2'): <BondType.SINGLE: 1>,
1616
+ ('CB', 'HB3'): <BondType.SINGLE: 1>,
1617
+ ('N', 'CA'): <BondType.SINGLE: 1>,
1618
+ ('N', 'H'): <BondType.SINGLE: 1>,
1619
+ ('N', 'H2'): <BondType.SINGLE: 1>,
1620
+ ('OXT', 'HXT'): <BondType.SINGLE: 1>},
1621
+ 'XYZ': {('A', 'B'): <BondType.SINGLE: 1>,
1622
+ ('B', 'C'): <BondType.SINGLE: 1>}}
1623
+
1624
+ """
1625
+ from .info.bonds import bonds_in_residue
1626
+ from .residues import get_residue_starts
1627
+
1628
+ cdef list bonds = []
1629
+ cdef int res_i
1630
+ cdef int i, j
1631
+ cdef int curr_start_i, next_start_i
1632
+ cdef np.ndarray atom_names = atoms.atom_name
1633
+ cdef np.ndarray atom_names_in_res
1634
+ cdef np.ndarray res_names = atoms.res_name
1635
+ cdef str atom_name1, atom_name2
1636
+ cdef int64[:] atom_indices1, atom_indices2
1637
+ cdef dict bond_dict_for_res
1638
+
1639
+ residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
1640
+ # Omit exclsive stop in 'residue_starts'
1641
+ for res_i in range(len(residue_starts)-1):
1642
+ curr_start_i = residue_starts[res_i]
1643
+ next_start_i = residue_starts[res_i+1]
1644
+
1645
+ if custom_bond_dict is None:
1646
+ bond_dict_for_res = bonds_in_residue(res_names[curr_start_i])
1647
+ else:
1648
+ bond_dict_for_res = custom_bond_dict.get(
1649
+ res_names[curr_start_i], {}
1650
+ )
1651
+
1652
+ atom_names_in_res = atom_names[curr_start_i : next_start_i]
1653
+ for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
1654
+ atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
1655
+ .astype(np.int64, copy=False)
1656
+ atom_indices2 = np.where(atom_names_in_res == atom_name2)[0] \
1657
+ .astype(np.int64, copy=False)
1658
+ # In rare cases the same atom name may appear multiple times
1659
+ # (e.g. in altlocs)
1660
+ # -> create all possible bond combinations
1661
+ for i in range(atom_indices1.shape[0]):
1662
+ for j in range(atom_indices2.shape[0]):
1663
+ bonds.append((
1664
+ curr_start_i + atom_indices1[i],
1665
+ curr_start_i + atom_indices2[j],
1666
+ bond_type
1667
+ ))
1668
+
1669
+ bond_list = BondList(atoms.array_length(), np.array(bonds))
1670
+
1671
+ if inter_residue:
1672
+ inter_bonds = _connect_inter_residue(atoms, residue_starts)
1673
+ return bond_list.merge(inter_bonds)
1674
+ else:
1675
+ return bond_list
1676
+
1677
+
1678
+
1679
+ _PEPTIDE_LINKS = ["PEPTIDE LINKING", "L-PEPTIDE LINKING", "D-PEPTIDE LINKING"]
1680
+ _NUCLEIC_LINKS = ["RNA LINKING", "DNA LINKING"]
1681
+
1682
+ def _connect_inter_residue(atoms, residue_starts):
1683
+ """
1684
+ Create a :class:`BondList` containing the bonds between adjacent
1685
+ amino acid or nucleotide residues.
1686
+
1687
+ Parameters
1688
+ ----------
1689
+ atoms : AtomArray or AtomArrayStack
1690
+ The structure to create the :class:`BondList` for.
1691
+ residue_starts : ndarray, dtype=int
1692
+ Return value of
1693
+ ``get_residue_starts(atoms, add_exclusive_stop=True)``.
1694
+
1695
+ Returns
1696
+ -------
1697
+ BondList
1698
+ A bond list containing all inter residue bonds.
1699
+ """
1700
+ from .info.misc import link_type
1701
+
1702
+ cdef list bonds = []
1703
+ cdef int i
1704
+ cdef np.ndarray atom_names = atoms.atom_name
1705
+ cdef np.ndarray res_names = atoms.res_name
1706
+ cdef np.ndarray res_ids = atoms.res_id
1707
+ cdef np.ndarray chain_ids = atoms.chain_id
1708
+ cdef int curr_start_i, next_start_i, after_next_start_i
1709
+ cdef str curr_connect_atom_name, next_connect_atom_name
1710
+ cdef np.ndarray curr_connect_indices, next_connect_indices
1711
+
1712
+ # Iterate over all starts excluding:
1713
+ # - the last residue and
1714
+ # - exclusive end index of 'atoms'
1715
+ for i in range(len(residue_starts)-2):
1716
+ curr_start_i = residue_starts[i]
1717
+ next_start_i = residue_starts[i+1]
1718
+ after_next_start_i = residue_starts[i+2]
1719
+
1720
+ # Check if the current and next residue is in the same chain
1721
+ if chain_ids[next_start_i] != chain_ids[curr_start_i]:
1722
+ continue
1723
+ # Check if the current and next residue
1724
+ # have consecutive residue IDs
1725
+ # (Same residue ID is also possible if insertion code is used)
1726
+ if res_ids[next_start_i] - res_ids[curr_start_i] > 1:
1727
+ continue
1728
+
1729
+ # Get link type for this residue from RCSB components.cif
1730
+ curr_link = link_type(res_names[curr_start_i])
1731
+ next_link = link_type(res_names[next_start_i])
1732
+
1733
+ if curr_link in _PEPTIDE_LINKS and next_link in _PEPTIDE_LINKS:
1734
+ curr_connect_atom_name = "C"
1735
+ next_connect_atom_name = "N"
1736
+ elif curr_link in _NUCLEIC_LINKS and next_link in _NUCLEIC_LINKS:
1737
+ curr_connect_atom_name = "O3'"
1738
+ next_connect_atom_name = "P"
1739
+ else:
1740
+ # Create no bond if the connection types of consecutive
1741
+ # residues are not compatible
1742
+ continue
1743
+
1744
+ # Index in atom array for atom name in current residue
1745
+ # Addition of 'curr_start_i' is necessary, as only a slice of
1746
+ # 'atom_names' is taken, beginning at 'curr_start_i'
1747
+ curr_connect_indices = curr_start_i + np.where(
1748
+ atom_names[curr_start_i : next_start_i]
1749
+ == curr_connect_atom_name
1750
+ )[0]
1751
+ # Index in atom array for atom name in next residue
1752
+ next_connect_indices = next_start_i + np.where(
1753
+ atom_names[next_start_i : after_next_start_i]
1754
+ == next_connect_atom_name
1755
+ )[0]
1756
+ if len(curr_connect_indices) == 0 or len(next_connect_indices) == 0:
1757
+ # The connector atoms are not found in the adjacent residues
1758
+ # -> skip this bond
1759
+ continue
1760
+
1761
+ bonds.append((
1762
+ curr_connect_indices[0],
1763
+ next_connect_indices[0],
1764
+ BondType.SINGLE
1765
+ ))
1766
+
1767
+ return BondList(atoms.array_length(), np.array(bonds, dtype=np.uint32))
1768
+
1769
+
1770
+
1771
+ def find_connected(bond_list, uint32 root, bint as_mask=False):
1772
+ """
1773
+ find_connected(bond_list, root, as_mask=False)
1774
+
1775
+ Get indices to all atoms that are directly or inderectly connected
1776
+ to the root atom indicated by the given index.
1777
+
1778
+ An atom is *connected* to the `root` atom, if that atom is reachable
1779
+ by traversing an arbitrary number of bonds, starting from the
1780
+ `root`.
1781
+ Effectively, this means that all atoms are *connected* to `root`,
1782
+ that are in the same molecule as `root`.
1783
+ Per definition `root` is also *connected* to itself.
1784
+
1785
+ Parameters
1786
+ ----------
1787
+ bond_list : BondList
1788
+ The reference bond list.
1789
+ root : int
1790
+ The index of the root atom.
1791
+ as_mask : bool, optional
1792
+ If true, the connected atom indices are returned as boolean
1793
+ mask.
1794
+ By default, the connected atom indices are returned as integer
1795
+ array.
1796
+
1797
+ Returns
1798
+ -------
1799
+ connected : ndarray, dtype=int or ndarray, dtype=bool
1800
+ Either a boolean mask or an integer array, representing the
1801
+ connected atoms.
1802
+ In case of a boolean mask: ``connected[i] == True``, if the atom
1803
+ with index ``i`` is connected.
1804
+
1805
+ Examples
1806
+ --------
1807
+ Consider a system with 4 atoms, where only the last atom is not
1808
+ bonded with the other ones (``0-1-2 3``):
1809
+
1810
+ >>> bonds = BondList(4)
1811
+ >>> bonds.add_bond(0, 1)
1812
+ >>> bonds.add_bond(1, 2)
1813
+ >>> print(find_connected(bonds, 0))
1814
+ [0 1 2]
1815
+ >>> print(find_connected(bonds, 1))
1816
+ [0 1 2]
1817
+ >>> print(find_connected(bonds, 2))
1818
+ [0 1 2]
1819
+ >>> print(find_connected(bonds, 3))
1820
+ [3]
1821
+ """
1822
+ all_bonds, _ = bond_list.get_all_bonds()
1823
+
1824
+ if root >= bond_list.get_atom_count():
1825
+ raise ValueError(
1826
+ f"Root atom index {root} is out of bounds for bond list "
1827
+ f"representing {bond_list.get_atom_count()} atoms"
1828
+ )
1829
+
1830
+ cdef uint8[:] is_connected_mask = np.zeros(
1831
+ bond_list.get_atom_count(), dtype=np.uint8
1832
+ )
1833
+ # Find connections in a recursive way,
1834
+ # by visiting all atoms that are reachable by a bond
1835
+ _find_connected(bond_list, root, is_connected_mask, all_bonds)
1836
+ if as_mask:
1837
+ return is_connected_mask
1838
+ else:
1839
+ return np.where(np.asarray(is_connected_mask))[0]
1840
+
1841
+
1842
+ cdef _find_connected(bond_list,
1843
+ int32 index,
1844
+ uint8[:] is_connected_mask,
1845
+ int32[:,:] all_bonds):
1846
+ if is_connected_mask[index]:
1847
+ # This atom has already been visited
1848
+ # -> exit condition
1849
+ return
1850
+ is_connected_mask[index] = True
1851
+
1852
+ cdef int32 j
1853
+ cdef int32 connected_index
1854
+ for j in range(all_bonds.shape[1]):
1855
+ connected_index = all_bonds[index, j]
1856
+ if connected_index == -1:
1857
+ # Ignore padding values
1858
+ continue
1859
+ _find_connected(
1860
+ bond_list, connected_index, is_connected_mask, all_bonds
1861
+ )
1862
+
1863
+
1864
+ def find_rotatable_bonds(bonds):
1865
+ """
1866
+ find_rotatable_bonds(bonds)
1867
+
1868
+ Find all rotatable bonds in a given :class:`BondList`.
1869
+
1870
+ The following conditions must be true for a bond to be counted as
1871
+ rotatable:
1872
+
1873
+ 1. The bond must be a single bond (``BondType.SINGLE``)
1874
+ 2. The connected atoms must not be within the same cycle/ring
1875
+ 3. Both connected atoms must not be terminal, e.g. not a *C-H*
1876
+ bond, as rotation about such bonds would not change any
1877
+ coordinates
1878
+
1879
+ Parameters
1880
+ ----------
1881
+ bonds : BondList
1882
+ The bonds to find the rotatable bonds in.
1883
+
1884
+ Returns
1885
+ -------
1886
+ rotatable_bonds : BondList
1887
+ The subset of the input `bonds` that contains only rotatable
1888
+ bonds.
1889
+
1890
+ Examples
1891
+ --------
1892
+
1893
+ >>> molecule = residue("TYR")
1894
+ >>> for i, j, _ in find_rotatable_bonds(molecule.bonds).as_array():
1895
+ ... print(molecule.atom_name[i], molecule.atom_name[j])
1896
+ N CA
1897
+ CA C
1898
+ CA CB
1899
+ C OXT
1900
+ CB CG
1901
+ CZ OH
1902
+ """
1903
+ cdef uint32 i, j
1904
+ cdef uint32 bond_type
1905
+ cdef uint32 SINGLE = int(BondType.SINGLE)
1906
+ cdef bint in_same_cycle
1907
+
1908
+ bond_graph = bonds.as_graph()
1909
+ cycles = nx.algorithms.cycles.cycle_basis(bond_graph)
1910
+
1911
+ cdef int64[:] number_of_partners_v = np.count_nonzero(
1912
+ bonds.get_all_bonds()[0] != -1,
1913
+ axis=1
1914
+ ).astype(np.int64, copy=False)
1915
+
1916
+ rotatable_bonds = []
1917
+ cdef uint32[:,:] bonds_v = bonds.as_array()
1918
+ for i, j, bond_type in bonds_v:
1919
+ # Can only rotate about single bonds
1920
+ # Furthermore, it makes no sense to rotate about a bond,
1921
+ # that leads to a single atom
1922
+ if bond_type == BondType.SINGLE \
1923
+ and number_of_partners_v[i] > 1 \
1924
+ and number_of_partners_v[j] > 1:
1925
+ # Cannot rotate about a bond, if the two connected atoms
1926
+ # are in a cycle
1927
+ in_same_cycle = False
1928
+ for cycle in cycles:
1929
+ if i in cycle and j in cycle:
1930
+ in_same_cycle = True
1931
+ if not in_same_cycle:
1932
+ rotatable_bonds.append((i,j, bond_type))
1933
+ return BondList(bonds.get_atom_count(), np.array(rotatable_bonds))