biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,2036 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module allows efficient search of atoms in a defined radius around
|
|
7
|
+
a location.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = ["BondList", "BondType",
|
|
13
|
+
"connect_via_distances", "connect_via_residue_names",
|
|
14
|
+
"find_connected", "find_rotatable_bonds"]
|
|
15
|
+
|
|
16
|
+
cimport cython
|
|
17
|
+
cimport numpy as np
|
|
18
|
+
from libc.stdlib cimport free, realloc
|
|
19
|
+
|
|
20
|
+
from collections.abc import Sequence
|
|
21
|
+
import itertools
|
|
22
|
+
import numbers
|
|
23
|
+
from enum import IntEnum
|
|
24
|
+
import networkx as nx
|
|
25
|
+
import numpy as np
|
|
26
|
+
from .error import BadStructureError
|
|
27
|
+
from ..copyable import Copyable
|
|
28
|
+
|
|
29
|
+
ctypedef np.uint64_t ptr
|
|
30
|
+
ctypedef np.uint8_t uint8
|
|
31
|
+
ctypedef np.uint16_t uint16
|
|
32
|
+
ctypedef np.uint32_t uint32
|
|
33
|
+
ctypedef np.uint64_t uint64
|
|
34
|
+
ctypedef np.int8_t int8
|
|
35
|
+
ctypedef np.int16_t int16
|
|
36
|
+
ctypedef np.int32_t int32
|
|
37
|
+
ctypedef np.int64_t int64
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
ctypedef fused IndexType:
|
|
41
|
+
uint8
|
|
42
|
+
uint16
|
|
43
|
+
uint32
|
|
44
|
+
uint64
|
|
45
|
+
int8
|
|
46
|
+
int16
|
|
47
|
+
int32
|
|
48
|
+
int64
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BondType(IntEnum):
|
|
52
|
+
"""
|
|
53
|
+
This enum type represents the type of a chemical bond.
|
|
54
|
+
|
|
55
|
+
- `ANY` - Used if the actual type is unknown
|
|
56
|
+
- `SINGLE` - Single bond
|
|
57
|
+
- `DOUBLE` - Double bond
|
|
58
|
+
- `TRIPLE` - Triple bond
|
|
59
|
+
- `QUADRUPLE` - A quadruple bond
|
|
60
|
+
- `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
|
|
61
|
+
- `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
|
|
62
|
+
- `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
|
|
63
|
+
- `AROMATIC` - Aromatic bond without specification of the formal bond
|
|
64
|
+
- `COORDINATION` - Coordination complex involving a metal atom
|
|
65
|
+
"""
|
|
66
|
+
ANY = 0
|
|
67
|
+
SINGLE = 1
|
|
68
|
+
DOUBLE = 2
|
|
69
|
+
TRIPLE = 3
|
|
70
|
+
QUADRUPLE = 4
|
|
71
|
+
AROMATIC_SINGLE = 5
|
|
72
|
+
AROMATIC_DOUBLE = 6
|
|
73
|
+
AROMATIC_TRIPLE = 7
|
|
74
|
+
COORDINATION = 8
|
|
75
|
+
AROMATIC = 9
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def without_aromaticity(self):
|
|
79
|
+
"""
|
|
80
|
+
Remove aromaticity from the bond type.
|
|
81
|
+
|
|
82
|
+
:attr:`BondType.AROMATIC_{ORDER}` is converted into
|
|
83
|
+
:attr:`BondType.{ORDER}`.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
new_bond_type : BondType
|
|
88
|
+
The :class:`BondType` without aromaticity.
|
|
89
|
+
|
|
90
|
+
Examples
|
|
91
|
+
--------
|
|
92
|
+
|
|
93
|
+
>>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
|
|
94
|
+
DOUBLE
|
|
95
|
+
"""
|
|
96
|
+
if self == BondType.AROMATIC_SINGLE:
|
|
97
|
+
return BondType.SINGLE
|
|
98
|
+
elif self == BondType.AROMATIC_DOUBLE:
|
|
99
|
+
return BondType.DOUBLE
|
|
100
|
+
elif self == BondType.AROMATIC_TRIPLE:
|
|
101
|
+
return BondType.TRIPLE
|
|
102
|
+
elif self == BondType.AROMATIC:
|
|
103
|
+
return BondType.ANY
|
|
104
|
+
else:
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@cython.boundscheck(False)
|
|
109
|
+
@cython.wraparound(False)
|
|
110
|
+
class BondList(Copyable):
|
|
111
|
+
"""
|
|
112
|
+
__init__(atom_count, bonds=None)
|
|
113
|
+
|
|
114
|
+
A bond list stores indices of atoms
|
|
115
|
+
(usually of an :class:`AtomArray` or :class:`AtomArrayStack`)
|
|
116
|
+
that form chemical bonds together with the type (or order) of the
|
|
117
|
+
bond.
|
|
118
|
+
|
|
119
|
+
Internally the bonds are stored as *n x 3* :class:`ndarray`.
|
|
120
|
+
For each row, the first column specifies the index of the first
|
|
121
|
+
atom, the second column the index of the second atom involved in the
|
|
122
|
+
bond.
|
|
123
|
+
The third column stores an integer that is interpreted as member
|
|
124
|
+
of the the :class:`BondType` enum, that specifies the order of the
|
|
125
|
+
bond.
|
|
126
|
+
|
|
127
|
+
When indexing a :class:`BondList`, the index is not forwarded to the
|
|
128
|
+
internal :class:`ndarray`. Instead the indexing behavior is
|
|
129
|
+
consistent with indexing an :class:`AtomArray` or
|
|
130
|
+
:class:`AtomArrayStack`:
|
|
131
|
+
Bonds with at least one atom index that is not covered by the index
|
|
132
|
+
are removed, atom indices that occur after an uncovered atom index
|
|
133
|
+
move up.
|
|
134
|
+
Effectively, this means that after indexing an :class:`AtomArray`
|
|
135
|
+
and a :class:`BondList` with the same index, the atom indices in the
|
|
136
|
+
:class:`BondList` will still point to the same atoms in the
|
|
137
|
+
:class:`AtomArray`.
|
|
138
|
+
Indexing a :class:`BondList` with a single integer is equivalent
|
|
139
|
+
to calling :func:`get_bonds()`.
|
|
140
|
+
|
|
141
|
+
The same consistency applies to adding :class:`BondList` instances
|
|
142
|
+
via the '+' operator:
|
|
143
|
+
The atom indices of the second :class:`BondList` are increased by
|
|
144
|
+
the atom count of the first :class:`BondList` and then both
|
|
145
|
+
:class:`BondList` objects are merged.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
atom_count : int
|
|
150
|
+
A positive integer, that specifies the number of atoms the
|
|
151
|
+
:class:`BondList` refers to
|
|
152
|
+
(usually the length of an atom array (stack)).
|
|
153
|
+
Effectively, this value is the exclusive maximum for the indices
|
|
154
|
+
stored in the :class:`BondList`.
|
|
155
|
+
bonds : ndarray, shape=(n,2) or shape=(n,3), dtype=int, optional
|
|
156
|
+
This array contains the indices of atoms which are bonded:
|
|
157
|
+
For each row, the first column specifies the first atom,
|
|
158
|
+
the second row the second atom involved in a chemical bond.
|
|
159
|
+
If an *n x 3* array is provided, the additional column
|
|
160
|
+
specifies a :class:`BondType` instead of :attr:`BondType.ANY`.
|
|
161
|
+
By default, the created :class:`BondList` is empty.
|
|
162
|
+
|
|
163
|
+
Notes
|
|
164
|
+
-----
|
|
165
|
+
When initially providing the bonds as :class:`ndarray`, the input is
|
|
166
|
+
sanitized: Redundant bonds are removed, and each bond entry is
|
|
167
|
+
sorted so that the lower one of the two atom indices is in the first
|
|
168
|
+
column.
|
|
169
|
+
If a bond appears multiple times with different bond types, the
|
|
170
|
+
first bond takes precedence.
|
|
171
|
+
|
|
172
|
+
Examples
|
|
173
|
+
--------
|
|
174
|
+
|
|
175
|
+
Construct a :class:`BondList`, where a central atom (index 1) is
|
|
176
|
+
connected to three other atoms (index 0, 3 and 4):
|
|
177
|
+
|
|
178
|
+
>>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
|
|
179
|
+
>>> print(bond_list)
|
|
180
|
+
[[0 1 0]
|
|
181
|
+
[1 3 0]
|
|
182
|
+
[1 4 0]]
|
|
183
|
+
|
|
184
|
+
Remove the first atom (index 0) via indexing:
|
|
185
|
+
The bond containing index 0 is removed, since the corresponding atom
|
|
186
|
+
does not exist anymore. Since all other atoms move up in their
|
|
187
|
+
position, the indices in the bond list are decreased by one:
|
|
188
|
+
|
|
189
|
+
>>> bond_list = bond_list[1:]
|
|
190
|
+
>>> print(bond_list)
|
|
191
|
+
[[0 2 0]
|
|
192
|
+
[0 3 0]]
|
|
193
|
+
|
|
194
|
+
:class:`BondList` objects can be associated to an :class:`AtomArray`
|
|
195
|
+
or :class:`AtomArrayStack`.
|
|
196
|
+
The following snippet shows this for a benzene molecule:
|
|
197
|
+
|
|
198
|
+
>>> benzene = AtomArray(12)
|
|
199
|
+
>>> # Omit filling most required annotation categories for brevity
|
|
200
|
+
>>> benzene.atom_name = np.array(
|
|
201
|
+
... ["C1", "C2", "C3", "C4", "C5", "C6", "H1", "H2", "H3", "H4", "H5", "H6"]
|
|
202
|
+
... )
|
|
203
|
+
>>> benzene.bonds = BondList(
|
|
204
|
+
... benzene.array_length(),
|
|
205
|
+
... np.array([
|
|
206
|
+
... # Bonds between carbon atoms in the ring
|
|
207
|
+
... (0, 1, BondType.AROMATIC_SINGLE),
|
|
208
|
+
... (1, 2, BondType.AROMATIC_DOUBLE),
|
|
209
|
+
... (2, 3, BondType.AROMATIC_SINGLE),
|
|
210
|
+
... (3, 4, BondType.AROMATIC_DOUBLE),
|
|
211
|
+
... (4, 5, BondType.AROMATIC_SINGLE),
|
|
212
|
+
... (5, 0, BondType.AROMATIC_DOUBLE),
|
|
213
|
+
... # Bonds between carbon and hydrogen
|
|
214
|
+
... (0, 6, BondType.SINGLE),
|
|
215
|
+
... (1, 7, BondType.SINGLE),
|
|
216
|
+
... (2, 8, BondType.SINGLE),
|
|
217
|
+
... (3, 9, BondType.SINGLE),
|
|
218
|
+
... (4, 10, BondType.SINGLE),
|
|
219
|
+
... (5, 11, BondType.SINGLE),
|
|
220
|
+
... ])
|
|
221
|
+
... )
|
|
222
|
+
>>> for i, j, bond_type in benzene.bonds.as_array():
|
|
223
|
+
... print(
|
|
224
|
+
... f"{BondType(bond_type).name} bond between "
|
|
225
|
+
... f"{benzene.atom_name[i]} and {benzene.atom_name[j]}"
|
|
226
|
+
... )
|
|
227
|
+
AROMATIC_SINGLE bond between C1 and C2
|
|
228
|
+
AROMATIC_DOUBLE bond between C2 and C3
|
|
229
|
+
AROMATIC_SINGLE bond between C3 and C4
|
|
230
|
+
AROMATIC_DOUBLE bond between C4 and C5
|
|
231
|
+
AROMATIC_SINGLE bond between C5 and C6
|
|
232
|
+
AROMATIC_DOUBLE bond between C1 and C6
|
|
233
|
+
SINGLE bond between C1 and H1
|
|
234
|
+
SINGLE bond between C2 and H2
|
|
235
|
+
SINGLE bond between C3 and H3
|
|
236
|
+
SINGLE bond between C4 and H4
|
|
237
|
+
SINGLE bond between C5 and H5
|
|
238
|
+
SINGLE bond between C6 and H6
|
|
239
|
+
|
|
240
|
+
Obtain the bonded atoms for the :math:`C_1`:
|
|
241
|
+
|
|
242
|
+
>>> bonds, types = benzene.bonds.get_bonds(0)
|
|
243
|
+
>>> print(bonds)
|
|
244
|
+
[1 5 6]
|
|
245
|
+
>>> print(types)
|
|
246
|
+
[5 6 1]
|
|
247
|
+
>>> print(f"C1 is bonded to {', '.join(benzene.atom_name[bonds])}")
|
|
248
|
+
C1 is bonded to C2, C6, H1
|
|
249
|
+
|
|
250
|
+
Cut the benzene molecule in half.
|
|
251
|
+
Although the first half of the atoms are missing the indices of
|
|
252
|
+
the cropped :class:`BondList` still represents the bonds of the
|
|
253
|
+
remaining atoms:
|
|
254
|
+
|
|
255
|
+
>>> half_benzene = benzene[
|
|
256
|
+
... np.isin(benzene.atom_name, ["C4", "C5", "C6", "H4", "H5", "H6"])
|
|
257
|
+
... ]
|
|
258
|
+
>>> for i, j, bond_type in half_benzene.bonds.as_array():
|
|
259
|
+
... print(
|
|
260
|
+
... f"{BondType(bond_type).name} bond between "
|
|
261
|
+
... f"{half_benzene.atom_name[i]} and {half_benzene.atom_name[j]}"
|
|
262
|
+
... )
|
|
263
|
+
AROMATIC_DOUBLE bond between C4 and C5
|
|
264
|
+
AROMATIC_SINGLE bond between C5 and C6
|
|
265
|
+
SINGLE bond between C4 and H4
|
|
266
|
+
SINGLE bond between C5 and H5
|
|
267
|
+
SINGLE bond between C6 and H6
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
def __init__(self, uint32 atom_count, np.ndarray bonds=None):
|
|
271
|
+
self._atom_count = atom_count
|
|
272
|
+
|
|
273
|
+
if bonds is not None and len(bonds) > 0:
|
|
274
|
+
if bonds.ndim != 2:
|
|
275
|
+
raise ValueError("Expected a 2D-ndarray for input bonds")
|
|
276
|
+
|
|
277
|
+
self._bonds = np.zeros((bonds.shape[0], 3), dtype=np.uint32)
|
|
278
|
+
if bonds.shape[1] == 3:
|
|
279
|
+
# Input contains bonds (index 0 and 1)
|
|
280
|
+
# including the bond type value (index 2)
|
|
281
|
+
# Bond indices:
|
|
282
|
+
self._bonds[:,:2] = np.sort(
|
|
283
|
+
# Indices are sorted per bond
|
|
284
|
+
# so that the lower index is at the first position
|
|
285
|
+
_to_positive_index_array(bonds[:,:2], atom_count), axis=1
|
|
286
|
+
)
|
|
287
|
+
# Bond type:
|
|
288
|
+
if (bonds[:, 2] >= len(BondType)).any():
|
|
289
|
+
raise ValueError(
|
|
290
|
+
f"BondType {np.max(bonds[:, 2])} is invalid"
|
|
291
|
+
)
|
|
292
|
+
self._bonds[:,2] = bonds[:, 2]
|
|
293
|
+
|
|
294
|
+
# Indices are sorted per bond
|
|
295
|
+
# so that the lower index is at the first position
|
|
296
|
+
elif bonds.shape[1] == 2:
|
|
297
|
+
# Input contains the bonds without bond type
|
|
298
|
+
# -> Default: Set bond type ANY (0)
|
|
299
|
+
self._bonds[:,:2] = np.sort(
|
|
300
|
+
# Indices are sorted per bond
|
|
301
|
+
# so that the lower index is at the first position
|
|
302
|
+
_to_positive_index_array(bonds[:,:2], atom_count), axis=1
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
raise ValueError(
|
|
306
|
+
"Input array containing bonds must be either of shape "
|
|
307
|
+
"(n,2) or (n,3)"
|
|
308
|
+
)
|
|
309
|
+
self._remove_redundant_bonds()
|
|
310
|
+
self._max_bonds_per_atom = self._get_max_bonds_per_atom()
|
|
311
|
+
|
|
312
|
+
else:
|
|
313
|
+
# Create empty bond list
|
|
314
|
+
self._bonds = np.zeros((0, 3), dtype=np.uint32)
|
|
315
|
+
self._max_bonds_per_atom = 0
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def concatenate(bonds_lists):
|
|
319
|
+
"""
|
|
320
|
+
Concatenate multiple :class:`BondList` objects into a single
|
|
321
|
+
:class:`BondList`, respectively.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
bonds_lists : iterable object of BondList
|
|
326
|
+
The bond lists to be concatenated.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
concatenated_bonds : BondList
|
|
331
|
+
The concatenated bond lists.
|
|
332
|
+
|
|
333
|
+
Examples
|
|
334
|
+
--------
|
|
335
|
+
|
|
336
|
+
>>> bonds1 = BondList(2, np.array([(0, 1)]))
|
|
337
|
+
>>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
|
|
338
|
+
>>> merged_bonds = BondList.concatenate([bonds1, bonds2])
|
|
339
|
+
>>> print(merged_bonds.get_atom_count())
|
|
340
|
+
5
|
|
341
|
+
>>> print(merged_bonds.as_array()[:, :2])
|
|
342
|
+
[[0 1]
|
|
343
|
+
[2 3]
|
|
344
|
+
[2 4]]
|
|
345
|
+
"""
|
|
346
|
+
# Ensure that the bonds_lists can be iterated over multiple times
|
|
347
|
+
if not isinstance(bonds_lists, Sequence):
|
|
348
|
+
bonds_lists = list(bonds_lists)
|
|
349
|
+
|
|
350
|
+
cdef np.ndarray merged_bonds = np.concatenate(
|
|
351
|
+
[bond_list._bonds for bond_list in bonds_lists]
|
|
352
|
+
)
|
|
353
|
+
# Offset the indices of appended bonds list
|
|
354
|
+
# (consistent with addition of AtomArray)
|
|
355
|
+
cdef int start = 0, stop = 0
|
|
356
|
+
cdef int cum_atom_count = 0
|
|
357
|
+
for bond_list in bonds_lists:
|
|
358
|
+
stop = start + bond_list._bonds.shape[0]
|
|
359
|
+
merged_bonds[start : stop, :2] += cum_atom_count
|
|
360
|
+
cum_atom_count += bond_list._atom_count
|
|
361
|
+
start = stop
|
|
362
|
+
|
|
363
|
+
cdef merged_bond_list = BondList(cum_atom_count)
|
|
364
|
+
# Array is not used in constructor to prevent unnecessary
|
|
365
|
+
# maximum and redundant bond calculation
|
|
366
|
+
merged_bond_list._bonds = merged_bonds
|
|
367
|
+
merged_bond_list._max_bonds_per_atom = max(
|
|
368
|
+
[bond_list._max_bonds_per_atom for bond_list in bonds_lists]
|
|
369
|
+
)
|
|
370
|
+
return merged_bond_list
|
|
371
|
+
|
|
372
|
+
def __copy_create__(self):
|
|
373
|
+
# Create empty bond list to prevent
|
|
374
|
+
# unnecessary removal of redundant atoms
|
|
375
|
+
# and calculation of maximum bonds per atom
|
|
376
|
+
return BondList(self._atom_count)
|
|
377
|
+
|
|
378
|
+
def __copy_fill__(self, clone):
|
|
379
|
+
# The bonds are added here
|
|
380
|
+
clone._bonds = self._bonds.copy()
|
|
381
|
+
clone._max_bonds_per_atom = self._max_bonds_per_atom
|
|
382
|
+
|
|
383
|
+
def offset_indices(self, int offset):
|
|
384
|
+
"""
|
|
385
|
+
offset_indices(offset)
|
|
386
|
+
|
|
387
|
+
Increase all atom indices in the :class:`BondList` by the given
|
|
388
|
+
offset.
|
|
389
|
+
|
|
390
|
+
Implicitly this increases the atom count.
|
|
391
|
+
|
|
392
|
+
Parameters
|
|
393
|
+
----------
|
|
394
|
+
offset : int
|
|
395
|
+
The atom indices are increased by this value.
|
|
396
|
+
Must be positive.
|
|
397
|
+
|
|
398
|
+
Examples
|
|
399
|
+
--------
|
|
400
|
+
|
|
401
|
+
>>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
|
|
402
|
+
>>> print(bond_list)
|
|
403
|
+
[[0 1 0]
|
|
404
|
+
[1 3 0]
|
|
405
|
+
[1 4 0]]
|
|
406
|
+
>>> bond_list.offset_indices(2)
|
|
407
|
+
>>> print(bond_list)
|
|
408
|
+
[[2 3 0]
|
|
409
|
+
[3 5 0]
|
|
410
|
+
[3 6 0]]
|
|
411
|
+
"""
|
|
412
|
+
if offset < 0:
|
|
413
|
+
raise ValueError("Offest must be positive")
|
|
414
|
+
self._bonds[:,:2] += offset
|
|
415
|
+
self._atom_count += offset
|
|
416
|
+
|
|
417
|
+
def as_array(self):
|
|
418
|
+
"""
|
|
419
|
+
as_array()
|
|
420
|
+
|
|
421
|
+
Obtain a copy of the internal :class:`ndarray`.
|
|
422
|
+
|
|
423
|
+
Returns
|
|
424
|
+
-------
|
|
425
|
+
array : ndarray, shape=(n,3), dtype=np.uint32
|
|
426
|
+
Copy of the internal :class:`ndarray`.
|
|
427
|
+
For each row, the first column specifies the index of the
|
|
428
|
+
first atom, the second column the index of the second atom
|
|
429
|
+
involved in the bond.
|
|
430
|
+
The third column stores the :class:`BondType`.
|
|
431
|
+
"""
|
|
432
|
+
return self._bonds.copy()
|
|
433
|
+
|
|
434
|
+
def as_set(self):
|
|
435
|
+
"""
|
|
436
|
+
as_set()
|
|
437
|
+
|
|
438
|
+
Obtain a set representation of the :class:`BondList`.
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
bond_set : set of tuple(int, int, int)
|
|
443
|
+
A set of tuples.
|
|
444
|
+
Each tuple represents one bond:
|
|
445
|
+
The first integer represents the first atom,
|
|
446
|
+
the second integer represents the second atom,
|
|
447
|
+
the third integer represents the :class:`BondType`.
|
|
448
|
+
"""
|
|
449
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
450
|
+
cdef int i
|
|
451
|
+
cdef set bond_set = set()
|
|
452
|
+
for i in range(all_bonds_v.shape[0]):
|
|
453
|
+
bond_set.add(
|
|
454
|
+
(all_bonds_v[i,0], all_bonds_v[i,1], all_bonds_v[i,2])
|
|
455
|
+
)
|
|
456
|
+
return bond_set
|
|
457
|
+
|
|
458
|
+
def as_graph(self):
|
|
459
|
+
"""
|
|
460
|
+
as_graph()
|
|
461
|
+
|
|
462
|
+
Obtain a graph representation of the :class:`BondList`.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
bond_set : Graph
|
|
467
|
+
A *NetworkX* :class:`Graph`.
|
|
468
|
+
The atom indices are nodes, the bonds are edges.
|
|
469
|
+
Each edge has a ``"bond_type"`` attribute containing the
|
|
470
|
+
:class:`BondType`.
|
|
471
|
+
|
|
472
|
+
Examples
|
|
473
|
+
--------
|
|
474
|
+
|
|
475
|
+
>>> bond_list = BondList(5, np.array([(1,0,2), (1,3,1), (1,4,1)]))
|
|
476
|
+
>>> graph = bond_list.as_graph()
|
|
477
|
+
>>> print(graph.nodes)
|
|
478
|
+
[0, 1, 3, 4]
|
|
479
|
+
>>> print(graph.edges)
|
|
480
|
+
[(0, 1), (1, 3), (1, 4)]
|
|
481
|
+
>>> for i, j in graph.edges:
|
|
482
|
+
... print(i, j, graph.get_edge_data(i, j))
|
|
483
|
+
0 1 {'bond_type': <BondType.DOUBLE: 2>}
|
|
484
|
+
1 3 {'bond_type': <BondType.SINGLE: 1>}
|
|
485
|
+
1 4 {'bond_type': <BondType.SINGLE: 1>}
|
|
486
|
+
"""
|
|
487
|
+
cdef int i
|
|
488
|
+
|
|
489
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
490
|
+
|
|
491
|
+
g = nx.Graph()
|
|
492
|
+
cdef list edges = [None] * all_bonds_v.shape[0]
|
|
493
|
+
for i in range(all_bonds_v.shape[0]):
|
|
494
|
+
edges[i] = (
|
|
495
|
+
all_bonds_v[i,0], all_bonds_v[i,1],
|
|
496
|
+
{"bond_type": BondType(all_bonds_v[i,2])}
|
|
497
|
+
)
|
|
498
|
+
g.add_edges_from(edges)
|
|
499
|
+
return g
|
|
500
|
+
|
|
501
|
+
def remove_aromaticity(self):
|
|
502
|
+
"""
|
|
503
|
+
Remove aromaticity from the bond types.
|
|
504
|
+
|
|
505
|
+
:attr:`BondType.AROMATIC_{ORDER}` is converted into
|
|
506
|
+
:attr:`BondType.{ORDER}`.
|
|
507
|
+
|
|
508
|
+
Examples
|
|
509
|
+
--------
|
|
510
|
+
|
|
511
|
+
>>> bond_list = BondList(3)
|
|
512
|
+
>>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
|
|
513
|
+
>>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
|
|
514
|
+
>>> bond_list.remove_aromaticity()
|
|
515
|
+
>>> for i, j, bond_type in bond_list.as_array():
|
|
516
|
+
... print(i, j, BondType(bond_type).name)
|
|
517
|
+
0 1 SINGLE
|
|
518
|
+
1 2 DOUBLE
|
|
519
|
+
"""
|
|
520
|
+
for aromatic_type, non_aromatic_type in [
|
|
521
|
+
(BondType.AROMATIC_SINGLE, BondType.SINGLE),
|
|
522
|
+
(BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
|
|
523
|
+
(BondType.AROMATIC_TRIPLE, BondType.TRIPLE),
|
|
524
|
+
(BondType.AROMATIC, BondType.ANY),
|
|
525
|
+
]:
|
|
526
|
+
mask = self._bonds[:, 2] == aromatic_type
|
|
527
|
+
self._bonds[mask, 2] = non_aromatic_type
|
|
528
|
+
|
|
529
|
+
def remove_kekulization(self):
|
|
530
|
+
"""
|
|
531
|
+
Remove the bond order information from aromatic bonds, i.e. convert all
|
|
532
|
+
aromatic bonds to :attr:`BondType.ANY`.
|
|
533
|
+
|
|
534
|
+
Examples
|
|
535
|
+
--------
|
|
536
|
+
|
|
537
|
+
>>> bond_list = BondList(3)
|
|
538
|
+
>>> bond_list.add_bond(0, 1, BondType.AROMATIC_SINGLE)
|
|
539
|
+
>>> bond_list.add_bond(1, 2, BondType.AROMATIC_DOUBLE)
|
|
540
|
+
>>> bond_list.remove_kekulization()
|
|
541
|
+
>>> for i, j, bond_type in bond_list.as_array():
|
|
542
|
+
... print(i, j, BondType(bond_type).name)
|
|
543
|
+
0 1 AROMATIC
|
|
544
|
+
1 2 AROMATIC
|
|
545
|
+
"""
|
|
546
|
+
kekulized_mask = np.isin(
|
|
547
|
+
self._bonds[:, 2],
|
|
548
|
+
(
|
|
549
|
+
BondType.AROMATIC_SINGLE,
|
|
550
|
+
BondType.AROMATIC_DOUBLE,
|
|
551
|
+
BondType.AROMATIC_TRIPLE,
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
self._bonds[kekulized_mask, 2] = BondType.AROMATIC
|
|
555
|
+
|
|
556
|
+
def remove_bond_order(self):
|
|
557
|
+
"""
|
|
558
|
+
Convert all bonds to :attr:`BondType.ANY`.
|
|
559
|
+
"""
|
|
560
|
+
self._bonds[:,2] = BondType.ANY
|
|
561
|
+
|
|
562
|
+
def convert_bond_type(self, original_bond_type, new_bond_type):
|
|
563
|
+
"""
|
|
564
|
+
convert_bond_type(original_bond_type, new_bond_type)
|
|
565
|
+
|
|
566
|
+
Convert all occurences of a given bond type into another bond type.
|
|
567
|
+
|
|
568
|
+
Parameters
|
|
569
|
+
----------
|
|
570
|
+
original_bond_type : BondType or int
|
|
571
|
+
The bond type to convert.
|
|
572
|
+
new_bond_type : BondType or int
|
|
573
|
+
The new bond type.
|
|
574
|
+
|
|
575
|
+
Examples
|
|
576
|
+
--------
|
|
577
|
+
|
|
578
|
+
>>> bond_list = BondList(4)
|
|
579
|
+
>>> bond_list.add_bond(0, 1, BondType.DOUBLE)
|
|
580
|
+
>>> bond_list.add_bond(1, 2, BondType.COORDINATION)
|
|
581
|
+
>>> bond_list.add_bond(2, 3, BondType.COORDINATION)
|
|
582
|
+
>>> for i, j, bond_type in bond_list.as_array():
|
|
583
|
+
... print(i, j, BondType(bond_type).name)
|
|
584
|
+
0 1 DOUBLE
|
|
585
|
+
1 2 COORDINATION
|
|
586
|
+
2 3 COORDINATION
|
|
587
|
+
>>> bond_list.convert_bond_type(BondType.COORDINATION, BondType.SINGLE)
|
|
588
|
+
>>> for i, j, bond_type in bond_list.as_array():
|
|
589
|
+
... print(i, j, BondType(bond_type).name)
|
|
590
|
+
0 1 DOUBLE
|
|
591
|
+
1 2 SINGLE
|
|
592
|
+
2 3 SINGLE
|
|
593
|
+
"""
|
|
594
|
+
mask = self._bonds[:, 2] == original_bond_type
|
|
595
|
+
self._bonds[mask, 2] = new_bond_type
|
|
596
|
+
|
|
597
|
+
def get_atom_count(self):
|
|
598
|
+
"""
|
|
599
|
+
get_atom_count()
|
|
600
|
+
|
|
601
|
+
Get the atom count.
|
|
602
|
+
|
|
603
|
+
Returns
|
|
604
|
+
-------
|
|
605
|
+
atom_count : int
|
|
606
|
+
The atom count.
|
|
607
|
+
"""
|
|
608
|
+
return self._atom_count
|
|
609
|
+
|
|
610
|
+
def get_bond_count(self):
|
|
611
|
+
"""
|
|
612
|
+
get_bond_count()
|
|
613
|
+
|
|
614
|
+
Get the amount of bonds.
|
|
615
|
+
|
|
616
|
+
Returns
|
|
617
|
+
-------
|
|
618
|
+
bond_count : int
|
|
619
|
+
The amount of bonds. This is equal to the length of the
|
|
620
|
+
internal :class:`ndarray` containing the bonds.
|
|
621
|
+
"""
|
|
622
|
+
return len(self._bonds)
|
|
623
|
+
|
|
624
|
+
def get_bonds(self, int32 atom_index):
|
|
625
|
+
"""
|
|
626
|
+
get_bonds(atom_index)
|
|
627
|
+
|
|
628
|
+
Obtain the indices of the atoms bonded to the atom with the
|
|
629
|
+
given index as well as the corresponding bond types.
|
|
630
|
+
|
|
631
|
+
Parameters
|
|
632
|
+
----------
|
|
633
|
+
atom_index : int
|
|
634
|
+
The index of the atom to get the bonds for.
|
|
635
|
+
|
|
636
|
+
Returns
|
|
637
|
+
-------
|
|
638
|
+
bonds : np.ndarray, dtype=np.uint32, shape=(k,)
|
|
639
|
+
The indices of connected atoms.
|
|
640
|
+
bond_types : np.ndarray, dtype=np.uint8, shape=(k,)
|
|
641
|
+
Array of integers, interpreted as :class:`BondType`
|
|
642
|
+
instances.
|
|
643
|
+
This array specifies the type (or order) of the bonds to
|
|
644
|
+
the connected atoms.
|
|
645
|
+
|
|
646
|
+
Examples
|
|
647
|
+
--------
|
|
648
|
+
|
|
649
|
+
>>> bond_list = BondList(5, np.array([(1,0),(1,3),(1,4)]))
|
|
650
|
+
>>> bonds, types = bond_list.get_bonds(1)
|
|
651
|
+
>>> print(bonds)
|
|
652
|
+
[0 3 4]
|
|
653
|
+
"""
|
|
654
|
+
cdef int i=0, j=0
|
|
655
|
+
|
|
656
|
+
cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
|
|
657
|
+
|
|
658
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
659
|
+
# Pessimistic array allocation:
|
|
660
|
+
# assume size is equal to the atom with most bonds
|
|
661
|
+
cdef np.ndarray bonds = np.zeros(self._max_bonds_per_atom,
|
|
662
|
+
dtype=np.uint32)
|
|
663
|
+
cdef uint32[:] bonds_v = bonds
|
|
664
|
+
cdef np.ndarray bond_types = np.zeros(self._max_bonds_per_atom,
|
|
665
|
+
dtype=np.uint8)
|
|
666
|
+
cdef uint8[:] bond_types_v = bond_types
|
|
667
|
+
|
|
668
|
+
for i in range(all_bonds_v.shape[0]):
|
|
669
|
+
# If a bond is found for the desired atom index
|
|
670
|
+
# at the first or second position of the bond,
|
|
671
|
+
# then append the index of the respective other position
|
|
672
|
+
if all_bonds_v[i,0] == index:
|
|
673
|
+
bonds_v[j] = all_bonds_v[i,1]
|
|
674
|
+
bond_types_v[j] = all_bonds_v[i,2]
|
|
675
|
+
j += 1
|
|
676
|
+
elif all_bonds_v[i,1] == index:
|
|
677
|
+
bonds_v[j] = all_bonds_v[i,0]
|
|
678
|
+
bond_types_v[j] = all_bonds_v[i,2]
|
|
679
|
+
j += 1
|
|
680
|
+
|
|
681
|
+
# Trim to correct size
|
|
682
|
+
bonds = bonds[:j]
|
|
683
|
+
bond_types = bond_types[:j]
|
|
684
|
+
|
|
685
|
+
return bonds, bond_types
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def get_all_bonds(self):
|
|
689
|
+
"""
|
|
690
|
+
get_all_bonds()
|
|
691
|
+
|
|
692
|
+
For each atom index, give the indices of the atoms bonded to
|
|
693
|
+
this atom as well as the corresponding bond types.
|
|
694
|
+
|
|
695
|
+
Returns
|
|
696
|
+
-------
|
|
697
|
+
bonds : np.ndarray, dtype=np.uint32, shape=(n,k)
|
|
698
|
+
The indices of connected atoms.
|
|
699
|
+
The first dimension represents the atoms,
|
|
700
|
+
the second dimension represents the indices of atoms bonded
|
|
701
|
+
to the respective atom.
|
|
702
|
+
Atoms can have have different numbers of atoms bonded to
|
|
703
|
+
them.
|
|
704
|
+
Therefore, the length of the second dimension *k* is equal
|
|
705
|
+
to the maximum number of bonds for an atom in this
|
|
706
|
+
:class:`BondList`.
|
|
707
|
+
For atoms with less bonds, the corresponding entry in the
|
|
708
|
+
array is padded with ``-1`` values.
|
|
709
|
+
bond_types : np.ndarray, dtype=np.uint32, shape=(n,k)
|
|
710
|
+
Array of integers, interpreted as :class:`BondType`
|
|
711
|
+
instances.
|
|
712
|
+
This array specifies the bond type (or order) corresponding
|
|
713
|
+
to the returned `bonds`.
|
|
714
|
+
It uses the same ``-1``-padding.
|
|
715
|
+
|
|
716
|
+
Examples
|
|
717
|
+
--------
|
|
718
|
+
|
|
719
|
+
>>> # BondList for benzene
|
|
720
|
+
>>> bond_list = BondList(
|
|
721
|
+
... 12,
|
|
722
|
+
... np.array([
|
|
723
|
+
... # Bonds between the carbon atoms in the ring
|
|
724
|
+
... (0, 1, BondType.AROMATIC_SINGLE),
|
|
725
|
+
... (1, 2, BondType.AROMATIC_DOUBLE),
|
|
726
|
+
... (2, 3, BondType.AROMATIC_SINGLE),
|
|
727
|
+
... (3, 4, BondType.AROMATIC_DOUBLE),
|
|
728
|
+
... (4, 5, BondType.AROMATIC_SINGLE),
|
|
729
|
+
... (5, 0, BondType.AROMATIC_DOUBLE),
|
|
730
|
+
... # Bonds between carbon and hydrogen
|
|
731
|
+
... (0, 6, BondType.SINGLE),
|
|
732
|
+
... (1, 7, BondType.SINGLE),
|
|
733
|
+
... (2, 8, BondType.SINGLE),
|
|
734
|
+
... (3, 9, BondType.SINGLE),
|
|
735
|
+
... (4, 10, BondType.SINGLE),
|
|
736
|
+
... (5, 11, BondType.SINGLE),
|
|
737
|
+
... ])
|
|
738
|
+
... )
|
|
739
|
+
>>> bonds, types = bond_list.get_all_bonds()
|
|
740
|
+
>>> print(bonds)
|
|
741
|
+
[[ 1 5 6]
|
|
742
|
+
[ 0 2 7]
|
|
743
|
+
[ 1 3 8]
|
|
744
|
+
[ 2 4 9]
|
|
745
|
+
[ 3 5 10]
|
|
746
|
+
[ 4 0 11]
|
|
747
|
+
[ 0 -1 -1]
|
|
748
|
+
[ 1 -1 -1]
|
|
749
|
+
[ 2 -1 -1]
|
|
750
|
+
[ 3 -1 -1]
|
|
751
|
+
[ 4 -1 -1]
|
|
752
|
+
[ 5 -1 -1]]
|
|
753
|
+
>>> print(types)
|
|
754
|
+
[[ 5 6 1]
|
|
755
|
+
[ 5 6 1]
|
|
756
|
+
[ 6 5 1]
|
|
757
|
+
[ 5 6 1]
|
|
758
|
+
[ 6 5 1]
|
|
759
|
+
[ 5 6 1]
|
|
760
|
+
[ 1 -1 -1]
|
|
761
|
+
[ 1 -1 -1]
|
|
762
|
+
[ 1 -1 -1]
|
|
763
|
+
[ 1 -1 -1]
|
|
764
|
+
[ 1 -1 -1]
|
|
765
|
+
[ 1 -1 -1]]
|
|
766
|
+
>>> for i in range(bond_list.get_atom_count()):
|
|
767
|
+
... bonds_for_atom = bonds[i]
|
|
768
|
+
... # Remove trailing '-1' values
|
|
769
|
+
... bonds_for_atom = bonds_for_atom[bonds_for_atom != -1]
|
|
770
|
+
... print(f"{i}: {bonds_for_atom}")
|
|
771
|
+
0: [1 5 6]
|
|
772
|
+
1: [0 2 7]
|
|
773
|
+
2: [1 3 8]
|
|
774
|
+
3: [2 4 9]
|
|
775
|
+
4: [ 3 5 10]
|
|
776
|
+
5: [ 4 0 11]
|
|
777
|
+
6: [0]
|
|
778
|
+
7: [1]
|
|
779
|
+
8: [2]
|
|
780
|
+
9: [3]
|
|
781
|
+
10: [4]
|
|
782
|
+
11: [5]
|
|
783
|
+
"""
|
|
784
|
+
cdef int i=0
|
|
785
|
+
cdef uint32 atom_index_i, atom_index_j, bond_type
|
|
786
|
+
|
|
787
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
788
|
+
# The size of 2nd dimension is equal to the atom with most bonds
|
|
789
|
+
# Since each atom can have an individual number of bonded atoms,
|
|
790
|
+
# The arrays are padded with '-1'
|
|
791
|
+
cdef np.ndarray bonds = np.full(
|
|
792
|
+
(self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int32
|
|
793
|
+
)
|
|
794
|
+
cdef int32[:,:] bonds_v = bonds
|
|
795
|
+
cdef np.ndarray bond_types = np.full(
|
|
796
|
+
(self._atom_count, self._max_bonds_per_atom), -1, dtype=np.int8
|
|
797
|
+
)
|
|
798
|
+
cdef int8[:,:] bond_types_v = bond_types
|
|
799
|
+
# Track the number of already found bonds for each given index
|
|
800
|
+
cdef np.ndarray lengths = np.zeros(self._atom_count, dtype=np.uint32)
|
|
801
|
+
cdef uint32[:] lengths_v = lengths
|
|
802
|
+
|
|
803
|
+
for i in range(all_bonds_v.shape[0]):
|
|
804
|
+
atom_index_i = all_bonds_v[i,0]
|
|
805
|
+
atom_index_j = all_bonds_v[i,1]
|
|
806
|
+
bond_type = all_bonds_v[i,2]
|
|
807
|
+
# Add second bonded atom for the first bonded atom
|
|
808
|
+
# and vice versa
|
|
809
|
+
# Use 'lengths' variable to append the value
|
|
810
|
+
bonds_v[atom_index_i, lengths_v[atom_index_i]] = atom_index_j
|
|
811
|
+
bonds_v[atom_index_j, lengths_v[atom_index_j]] = atom_index_i
|
|
812
|
+
bond_types_v[atom_index_i, lengths_v[atom_index_i]] = bond_type
|
|
813
|
+
bond_types_v[atom_index_j, lengths_v[atom_index_j]] = bond_type
|
|
814
|
+
# Increment lengths
|
|
815
|
+
lengths_v[atom_index_i] += 1
|
|
816
|
+
lengths_v[atom_index_j] += 1
|
|
817
|
+
|
|
818
|
+
return bonds, bond_types
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def adjacency_matrix(self):
|
|
822
|
+
r"""
|
|
823
|
+
adjacency_matrix(bond_list)
|
|
824
|
+
|
|
825
|
+
Represent this :class:`BondList` as adjacency matrix.
|
|
826
|
+
|
|
827
|
+
The adjacency matrix is a quadratic matrix with boolean values
|
|
828
|
+
according to
|
|
829
|
+
|
|
830
|
+
.. math::
|
|
831
|
+
|
|
832
|
+
M_{i,j} =
|
|
833
|
+
\begin{cases}
|
|
834
|
+
\text{True}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
|
|
835
|
+
\text{False}, & \text{otherwise}
|
|
836
|
+
\end{cases}.
|
|
837
|
+
|
|
838
|
+
Returns
|
|
839
|
+
-------
|
|
840
|
+
matrix : ndarray, dtype=bool, shape=(n,n)
|
|
841
|
+
The created adjacency matrix.
|
|
842
|
+
|
|
843
|
+
Examples
|
|
844
|
+
--------
|
|
845
|
+
|
|
846
|
+
>>> # BondList for formaldehyde
|
|
847
|
+
>>> bond_list = BondList(
|
|
848
|
+
... 4,
|
|
849
|
+
... np.array([
|
|
850
|
+
... # Bond between carbon and oxygen
|
|
851
|
+
... (0, 1, BondType.DOUBLE),
|
|
852
|
+
... # Bonds between carbon and hydrogen
|
|
853
|
+
... (0, 2, BondType.SINGLE),
|
|
854
|
+
... (0, 3, BondType.SINGLE),
|
|
855
|
+
... ])
|
|
856
|
+
... )
|
|
857
|
+
>>> print(bond_list.adjacency_matrix())
|
|
858
|
+
[[False True True True]
|
|
859
|
+
[ True False False False]
|
|
860
|
+
[ True False False False]
|
|
861
|
+
[ True False False False]]
|
|
862
|
+
"""
|
|
863
|
+
matrix = np.zeros(
|
|
864
|
+
(self._atom_count, self._atom_count), dtype=bool
|
|
865
|
+
)
|
|
866
|
+
matrix[self._bonds[:,0], self._bonds[:,1]] = True
|
|
867
|
+
matrix[self._bonds[:,1], self._bonds[:,0]] = True
|
|
868
|
+
return matrix
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def bond_type_matrix(self):
|
|
872
|
+
r"""
|
|
873
|
+
adjacency_matrix(bond_list)
|
|
874
|
+
|
|
875
|
+
Represent this :class:`BondList` as a matrix depicting the bond
|
|
876
|
+
type.
|
|
877
|
+
|
|
878
|
+
The matrix is a quadratic matrix:
|
|
879
|
+
|
|
880
|
+
.. math::
|
|
881
|
+
|
|
882
|
+
M_{i,j} =
|
|
883
|
+
\begin{cases}
|
|
884
|
+
\text{BondType}_{ij}, & \text{if } \text{Atom}_i \text{ and } \text{Atom}_j \text{ form a bond} \\
|
|
885
|
+
-1, & \text{otherwise}
|
|
886
|
+
\end{cases}.
|
|
887
|
+
|
|
888
|
+
Returns
|
|
889
|
+
-------
|
|
890
|
+
matrix : ndarray, dtype=bool, shape=(n,n)
|
|
891
|
+
The created bond type matrix.
|
|
892
|
+
|
|
893
|
+
Examples
|
|
894
|
+
--------
|
|
895
|
+
|
|
896
|
+
>>> # BondList for formaldehyde
|
|
897
|
+
>>> bond_list = BondList(
|
|
898
|
+
... 4,
|
|
899
|
+
... np.array([
|
|
900
|
+
... # Bond between carbon and oxygen
|
|
901
|
+
... (0, 1, BondType.DOUBLE),
|
|
902
|
+
... # Bonds between carbon and hydrogen
|
|
903
|
+
... (0, 2, BondType.SINGLE),
|
|
904
|
+
... (0, 3, BondType.SINGLE),
|
|
905
|
+
... ])
|
|
906
|
+
... )
|
|
907
|
+
>>> print(bond_list.bond_type_matrix())
|
|
908
|
+
[[-1 2 1 1]
|
|
909
|
+
[ 2 -1 -1 -1]
|
|
910
|
+
[ 1 -1 -1 -1]
|
|
911
|
+
[ 1 -1 -1 -1]]
|
|
912
|
+
"""
|
|
913
|
+
matrix = np.full(
|
|
914
|
+
(self._atom_count, self._atom_count), -1, dtype=np.int8
|
|
915
|
+
)
|
|
916
|
+
matrix[self._bonds[:,0], self._bonds[:,1]] = self._bonds[:,2]
|
|
917
|
+
matrix[self._bonds[:,1], self._bonds[:,0]] = self._bonds[:,2]
|
|
918
|
+
return matrix
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def add_bond(self, int32 atom_index1, int32 atom_index2,
|
|
922
|
+
bond_type=BondType.ANY):
|
|
923
|
+
"""
|
|
924
|
+
add_bond(atom_index1, atom_index2, bond_type=BondType.ANY)
|
|
925
|
+
|
|
926
|
+
Add a bond to the :class:`BondList`.
|
|
927
|
+
|
|
928
|
+
If the bond is already existent, only the bond type is updated.
|
|
929
|
+
|
|
930
|
+
Parameters
|
|
931
|
+
----------
|
|
932
|
+
atom_index1, atom_index2 : int
|
|
933
|
+
The indices of the atoms to create a bond for.
|
|
934
|
+
bond_type : BondType or int, optional
|
|
935
|
+
The type of the bond. Default is :attr:`BondType.ANY`.
|
|
936
|
+
"""
|
|
937
|
+
if bond_type >= len(BondType):
|
|
938
|
+
raise ValueError(f"BondType {bond_type} is invalid")
|
|
939
|
+
|
|
940
|
+
cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
|
|
941
|
+
cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
|
|
942
|
+
_sort(&index1, &index2)
|
|
943
|
+
|
|
944
|
+
cdef int i
|
|
945
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
946
|
+
# Check if bond is already existent in list
|
|
947
|
+
cdef bint in_list = False
|
|
948
|
+
for i in range(all_bonds_v.shape[0]):
|
|
949
|
+
# Since the bonds have the atom indices sorted
|
|
950
|
+
# the reverse check is omitted
|
|
951
|
+
if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
|
|
952
|
+
in_list = True
|
|
953
|
+
# If in list, update bond type
|
|
954
|
+
all_bonds_v[i,2] = int(bond_type)
|
|
955
|
+
break
|
|
956
|
+
if not in_list:
|
|
957
|
+
self._bonds = np.append(
|
|
958
|
+
self._bonds,
|
|
959
|
+
np.array(
|
|
960
|
+
[(index1, index2, int(bond_type))], dtype=np.uint32
|
|
961
|
+
),
|
|
962
|
+
axis=0
|
|
963
|
+
)
|
|
964
|
+
self._max_bonds_per_atom = self._get_max_bonds_per_atom()
|
|
965
|
+
|
|
966
|
+
def remove_bond(self, int32 atom_index1, int32 atom_index2):
|
|
967
|
+
"""
|
|
968
|
+
remove_bond(atom_index1, atom_index2)
|
|
969
|
+
|
|
970
|
+
Remove a bond from the :class:`BondList`.
|
|
971
|
+
|
|
972
|
+
If the bond is not existent in the :class:`BondList`, nothing happens.
|
|
973
|
+
|
|
974
|
+
Parameters
|
|
975
|
+
----------
|
|
976
|
+
atom_index1, atom_index2 : int
|
|
977
|
+
The indices of the atoms whose bond should be removed.
|
|
978
|
+
"""
|
|
979
|
+
cdef uint32 index1 = _to_positive_index(atom_index1, self._atom_count)
|
|
980
|
+
cdef uint32 index2 = _to_positive_index(atom_index2, self._atom_count)
|
|
981
|
+
_sort(&index1, &index2)
|
|
982
|
+
|
|
983
|
+
# Find the bond in bond list
|
|
984
|
+
cdef int i
|
|
985
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
986
|
+
for i in range(all_bonds_v.shape[0]):
|
|
987
|
+
# Since the bonds have the atom indices sorted
|
|
988
|
+
# the reverse check is omitted
|
|
989
|
+
if (all_bonds_v[i,0] == index1 and all_bonds_v[i,1] == index2):
|
|
990
|
+
self._bonds = np.delete(self._bonds, i, axis=0)
|
|
991
|
+
# The maximum bonds per atom is not recalculated,
|
|
992
|
+
# as the value can only be decreased on bond removal
|
|
993
|
+
# Since this value is only used for pessimistic array allocation
|
|
994
|
+
# in 'get_bonds()', the slightly larger memory usage is a better
|
|
995
|
+
# option than the repetitive call of _get_max_bonds_per_atom()
|
|
996
|
+
|
|
997
|
+
def remove_bonds_to(self, int32 atom_index):
|
|
998
|
+
"""
|
|
999
|
+
remove_bonds_to(self, atom_index)
|
|
1000
|
+
|
|
1001
|
+
Remove all bonds from the :class:`BondList` where the given atom
|
|
1002
|
+
is involved.
|
|
1003
|
+
|
|
1004
|
+
Parameters
|
|
1005
|
+
----------
|
|
1006
|
+
atom_index : int
|
|
1007
|
+
The index of the atom whose bonds should be removed.
|
|
1008
|
+
"""
|
|
1009
|
+
cdef uint32 index = _to_positive_index(atom_index, self._atom_count)
|
|
1010
|
+
|
|
1011
|
+
cdef np.ndarray mask = np.ones(len(self._bonds), dtype=np.uint8)
|
|
1012
|
+
cdef uint8[:] mask_v = mask
|
|
1013
|
+
|
|
1014
|
+
# Find the bond in bond list
|
|
1015
|
+
cdef int i
|
|
1016
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
1017
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1018
|
+
if (all_bonds_v[i,0] == index or all_bonds_v[i,1] == index):
|
|
1019
|
+
mask_v[i] = False
|
|
1020
|
+
# Remove the bonds
|
|
1021
|
+
self._bonds = self._bonds[mask.astype(bool, copy=False)]
|
|
1022
|
+
# The maximum bonds per atom is not recalculated
|
|
1023
|
+
# (see 'remove_bond()')
|
|
1024
|
+
|
|
1025
|
+
def remove_bonds(self, bond_list):
|
|
1026
|
+
"""
|
|
1027
|
+
remove_bonds(bond_list)
|
|
1028
|
+
|
|
1029
|
+
Remove multiple bonds from the :class:`BondList`.
|
|
1030
|
+
|
|
1031
|
+
All bonds present in `bond_list` are removed from this instance.
|
|
1032
|
+
If a bond is not existent in this instance, nothing happens.
|
|
1033
|
+
Only the bond indices, not the bond types, are relevant for
|
|
1034
|
+
this.
|
|
1035
|
+
|
|
1036
|
+
Parameters
|
|
1037
|
+
----------
|
|
1038
|
+
bond_list : BondList
|
|
1039
|
+
The bonds in `bond_list` are removed from this instance.
|
|
1040
|
+
"""
|
|
1041
|
+
cdef int i=0, j=0
|
|
1042
|
+
|
|
1043
|
+
# All bonds in the own BondList
|
|
1044
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
1045
|
+
# The bonds that should be removed
|
|
1046
|
+
cdef uint32[:,:] rem_bonds_v = bond_list._bonds
|
|
1047
|
+
cdef np.ndarray mask = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
|
|
1048
|
+
cdef uint8[:] mask_v = mask
|
|
1049
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1050
|
+
for j in range(rem_bonds_v.shape[0]):
|
|
1051
|
+
if all_bonds_v[i,0] == rem_bonds_v[j,0] \
|
|
1052
|
+
and all_bonds_v[i,1] == rem_bonds_v[j,1]:
|
|
1053
|
+
mask_v[i] = False
|
|
1054
|
+
|
|
1055
|
+
# Remove the bonds
|
|
1056
|
+
self._bonds = self._bonds[mask.astype(bool, copy=False)]
|
|
1057
|
+
# The maximum bonds per atom is not recalculated
|
|
1058
|
+
# (see 'remove_bond()')
|
|
1059
|
+
|
|
1060
|
+
def merge(self, bond_list):
|
|
1061
|
+
"""
|
|
1062
|
+
merge(bond_list)
|
|
1063
|
+
|
|
1064
|
+
Merge another :class:`BondList` with this instance into a new
|
|
1065
|
+
object.
|
|
1066
|
+
If a bond appears in both :class:`BondList`'s, the
|
|
1067
|
+
:class:`BondType` from the given `bond_list` takes precedence.
|
|
1068
|
+
|
|
1069
|
+
The internal :class:`ndarray` instances containg the bonds are
|
|
1070
|
+
simply concatenated and the new atom count is the maximum of
|
|
1071
|
+
both bond lists.
|
|
1072
|
+
|
|
1073
|
+
Parameters
|
|
1074
|
+
----------
|
|
1075
|
+
bond_list : BondList
|
|
1076
|
+
This bond list is merged with this instance.
|
|
1077
|
+
|
|
1078
|
+
Returns
|
|
1079
|
+
-------
|
|
1080
|
+
bond_list : BondList
|
|
1081
|
+
The merged :class:`BondList`.
|
|
1082
|
+
|
|
1083
|
+
Notes
|
|
1084
|
+
-----
|
|
1085
|
+
This is not equal to using the `+` operator.
|
|
1086
|
+
|
|
1087
|
+
Examples
|
|
1088
|
+
--------
|
|
1089
|
+
|
|
1090
|
+
>>> bond_list1 = BondList(3, np.array([(0,1),(1,2)]))
|
|
1091
|
+
>>> bond_list2 = BondList(5, np.array([(2,3),(3,4)]))
|
|
1092
|
+
>>> merged_list = bond_list2.merge(bond_list1)
|
|
1093
|
+
>>> print(merged_list.get_atom_count())
|
|
1094
|
+
5
|
|
1095
|
+
>>> print(merged_list)
|
|
1096
|
+
[[0 1 0]
|
|
1097
|
+
[1 2 0]
|
|
1098
|
+
[2 3 0]
|
|
1099
|
+
[3 4 0]]
|
|
1100
|
+
|
|
1101
|
+
The BondList given as parameter takes precedence:
|
|
1102
|
+
|
|
1103
|
+
>>> # Specifiy bond type to see where a bond is taken from
|
|
1104
|
+
>>> bond_list1 = BondList(4, np.array([
|
|
1105
|
+
... (0, 1, BondType.SINGLE),
|
|
1106
|
+
... (1, 2, BondType.SINGLE)
|
|
1107
|
+
... ]))
|
|
1108
|
+
>>> bond_list2 = BondList(4, np.array([
|
|
1109
|
+
... (1, 2, BondType.DOUBLE), # This one is a duplicate
|
|
1110
|
+
... (2, 3, BondType.DOUBLE)
|
|
1111
|
+
... ]))
|
|
1112
|
+
>>> merged_list = bond_list2.merge(bond_list1)
|
|
1113
|
+
>>> print(merged_list)
|
|
1114
|
+
[[0 1 1]
|
|
1115
|
+
[1 2 1]
|
|
1116
|
+
[2 3 2]]
|
|
1117
|
+
"""
|
|
1118
|
+
return BondList(
|
|
1119
|
+
max(self._atom_count, bond_list._atom_count),
|
|
1120
|
+
np.concatenate(
|
|
1121
|
+
[bond_list.as_array(), self.as_array()],
|
|
1122
|
+
axis=0
|
|
1123
|
+
)
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
def __add__(self, bond_list):
|
|
1127
|
+
return BondList.concatenate([self, bond_list])
|
|
1128
|
+
|
|
1129
|
+
def __getitem__(self, index):
|
|
1130
|
+
## Variables for both, integer and boolean index arrays
|
|
1131
|
+
cdef uint32[:,:] all_bonds_v
|
|
1132
|
+
cdef int i
|
|
1133
|
+
cdef uint32* index1_ptr
|
|
1134
|
+
cdef uint32* index2_ptr
|
|
1135
|
+
cdef np.ndarray removal_filter
|
|
1136
|
+
cdef uint8[:] removal_filter_v
|
|
1137
|
+
|
|
1138
|
+
## Variables for integer arrays
|
|
1139
|
+
cdef int32[:] inverse_index_v
|
|
1140
|
+
cdef int32 new_index1, new_index2
|
|
1141
|
+
|
|
1142
|
+
## Variables for boolean mask
|
|
1143
|
+
# Boolean mask representation of the index
|
|
1144
|
+
cdef np.ndarray mask
|
|
1145
|
+
cdef uint8[:] mask_v
|
|
1146
|
+
# Boolean mask for removal of bonds
|
|
1147
|
+
cdef np.ndarray offsets
|
|
1148
|
+
cdef uint32[:] offsets_v
|
|
1149
|
+
|
|
1150
|
+
if isinstance(index, numbers.Integral):
|
|
1151
|
+
## Handle single index
|
|
1152
|
+
return self.get_bonds(index)
|
|
1153
|
+
|
|
1154
|
+
elif isinstance(index, np.ndarray) and index.dtype == bool:
|
|
1155
|
+
## Handle boolean masks
|
|
1156
|
+
copy = self.copy()
|
|
1157
|
+
all_bonds_v = copy._bonds
|
|
1158
|
+
# Use 'uint8' instead of 'bool' for memory view
|
|
1159
|
+
mask = np.frombuffer(index, dtype=np.uint8)
|
|
1160
|
+
|
|
1161
|
+
# Each time an atom is missing in the mask,
|
|
1162
|
+
# the offset is increased by one
|
|
1163
|
+
offsets = np.cumsum(
|
|
1164
|
+
~mask.astype(bool, copy=False), dtype=np.uint32
|
|
1165
|
+
)
|
|
1166
|
+
removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
|
|
1167
|
+
removal_filter_v = removal_filter
|
|
1168
|
+
mask_v = mask
|
|
1169
|
+
offsets_v = offsets
|
|
1170
|
+
# If an atom in a bond is not masked,
|
|
1171
|
+
# the bond is removed from the list
|
|
1172
|
+
# If an atom is masked,
|
|
1173
|
+
# its index value is decreased by the respective offset
|
|
1174
|
+
# The offset is neccessary, removing atoms in an AtomArray
|
|
1175
|
+
# decreases the index of the following atoms
|
|
1176
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1177
|
+
# Usage of pointer to increase performance
|
|
1178
|
+
# as redundant indexing is avoided
|
|
1179
|
+
index1_ptr = &all_bonds_v[i,0]
|
|
1180
|
+
index2_ptr = &all_bonds_v[i,1]
|
|
1181
|
+
if mask_v[index1_ptr[0]] and mask_v[index2_ptr[0]]:
|
|
1182
|
+
# Both atoms involved in bond are masked
|
|
1183
|
+
# -> decrease atom index by offset
|
|
1184
|
+
index1_ptr[0] -= offsets_v[index1_ptr[0]]
|
|
1185
|
+
index2_ptr[0] -= offsets_v[index2_ptr[0]]
|
|
1186
|
+
else:
|
|
1187
|
+
# At least one atom involved in bond is not masked
|
|
1188
|
+
# -> remove bond
|
|
1189
|
+
removal_filter_v[i] = False
|
|
1190
|
+
# Apply the bond removal filter
|
|
1191
|
+
copy._bonds = copy._bonds[removal_filter.astype(bool, copy=False)]
|
|
1192
|
+
copy._atom_count = len(np.nonzero(mask)[0])
|
|
1193
|
+
copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
|
|
1194
|
+
return copy
|
|
1195
|
+
|
|
1196
|
+
else:
|
|
1197
|
+
## Convert any other type of index into index array, as it preserves order
|
|
1198
|
+
copy = self.copy()
|
|
1199
|
+
all_bonds_v = copy._bonds
|
|
1200
|
+
index = _to_index_array(index, self._atom_count)
|
|
1201
|
+
index = _to_positive_index_array(index, self._atom_count)
|
|
1202
|
+
|
|
1203
|
+
# The inverse index is required to efficiently obtain
|
|
1204
|
+
# the new index of an atom in case of an unsorted index
|
|
1205
|
+
# array
|
|
1206
|
+
inverse_index_v = _invert_index(index, self._atom_count)
|
|
1207
|
+
removal_filter = np.ones(all_bonds_v.shape[0], dtype=np.uint8)
|
|
1208
|
+
removal_filter_v = removal_filter
|
|
1209
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1210
|
+
# Usage of pointer to increase performance
|
|
1211
|
+
# as redundant indexing is avoided
|
|
1212
|
+
index1_ptr = &all_bonds_v[i,0]
|
|
1213
|
+
index2_ptr = &all_bonds_v[i,1]
|
|
1214
|
+
new_index1 = inverse_index_v[index1_ptr[0]]
|
|
1215
|
+
new_index2 = inverse_index_v[index2_ptr[0]]
|
|
1216
|
+
if new_index1 != -1 and new_index2 != -1:
|
|
1217
|
+
# Both atoms involved in bond are included
|
|
1218
|
+
# by index array
|
|
1219
|
+
# -> assign new atom indices
|
|
1220
|
+
index1_ptr[0] = <int32>new_index1
|
|
1221
|
+
index2_ptr[0] = <int32>new_index2
|
|
1222
|
+
else:
|
|
1223
|
+
# At least one atom in bond is not included
|
|
1224
|
+
# -> remove bond
|
|
1225
|
+
removal_filter_v[i] = False
|
|
1226
|
+
|
|
1227
|
+
copy._bonds = copy._bonds[
|
|
1228
|
+
removal_filter.astype(bool, copy=False)
|
|
1229
|
+
]
|
|
1230
|
+
# Again, sort indices per bond
|
|
1231
|
+
# as the correct order is not guaranteed anymore
|
|
1232
|
+
# for unsorted index arrays
|
|
1233
|
+
copy._bonds[:,:2] = np.sort(copy._bonds[:,:2], axis=1)
|
|
1234
|
+
copy._atom_count = len(index)
|
|
1235
|
+
copy._max_bonds_per_atom = copy._get_max_bonds_per_atom()
|
|
1236
|
+
return copy
|
|
1237
|
+
|
|
1238
|
+
def __iter__(self):
|
|
1239
|
+
raise TypeError("'BondList' object is not iterable")
|
|
1240
|
+
|
|
1241
|
+
def __str__(self):
|
|
1242
|
+
return str(self.as_array())
|
|
1243
|
+
|
|
1244
|
+
def __eq__(self, item):
|
|
1245
|
+
if not isinstance(item, BondList):
|
|
1246
|
+
return False
|
|
1247
|
+
return (self._atom_count == item._atom_count and
|
|
1248
|
+
self.as_set() == item.as_set())
|
|
1249
|
+
|
|
1250
|
+
def __contains__(self, item):
|
|
1251
|
+
if not isinstance(item, tuple) and len(tuple) != 2:
|
|
1252
|
+
raise TypeError("Expected a tuple of atom indices")
|
|
1253
|
+
|
|
1254
|
+
cdef int i=0
|
|
1255
|
+
|
|
1256
|
+
cdef uint32 match_index1, match_index2
|
|
1257
|
+
# Sort indices for faster search in loop
|
|
1258
|
+
cdef uint32 atom_index1 = min(item)
|
|
1259
|
+
cdef uint32 atom_index2 = max(item)
|
|
1260
|
+
|
|
1261
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
1262
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1263
|
+
match_index1 = all_bonds_v[i,0]
|
|
1264
|
+
match_index2 = all_bonds_v[i,1]
|
|
1265
|
+
if atom_index1 == match_index1 and atom_index2 == match_index2:
|
|
1266
|
+
return True
|
|
1267
|
+
|
|
1268
|
+
return False
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
def _get_max_bonds_per_atom(self):
|
|
1272
|
+
if self._atom_count == 0:
|
|
1273
|
+
return 0
|
|
1274
|
+
|
|
1275
|
+
cdef int i
|
|
1276
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
1277
|
+
# Create an array that counts number of occurences of each index
|
|
1278
|
+
cdef np.ndarray index_count = np.zeros(self._atom_count,
|
|
1279
|
+
dtype=np.uint32)
|
|
1280
|
+
cdef uint32[:] index_count_v = index_count
|
|
1281
|
+
for i in range(all_bonds_v.shape[0]):
|
|
1282
|
+
# Increment count of both indices found in bond list at i
|
|
1283
|
+
index_count_v[all_bonds_v[i,0]] += 1
|
|
1284
|
+
index_count_v[all_bonds_v[i,1]] += 1
|
|
1285
|
+
return np.max(index_count_v)
|
|
1286
|
+
|
|
1287
|
+
def _remove_redundant_bonds(self):
|
|
1288
|
+
cdef int j
|
|
1289
|
+
cdef uint32[:,:] all_bonds_v = self._bonds
|
|
1290
|
+
# Boolean mask for final removal of redundant atoms
|
|
1291
|
+
# Unfortunately views of boolean ndarrays are not supported
|
|
1292
|
+
# -> use uint8 array
|
|
1293
|
+
cdef np.ndarray redundancy_filter = np.ones(all_bonds_v.shape[0],
|
|
1294
|
+
dtype=np.uint8)
|
|
1295
|
+
cdef uint8[:] redundancy_filter_v = redundancy_filter
|
|
1296
|
+
# Array of pointers to C-arrays
|
|
1297
|
+
# The array is indexed with the atom indices in the bond list
|
|
1298
|
+
# The respective C-array contains the indices of bonded atoms
|
|
1299
|
+
cdef ptr[:] ptrs_v = np.zeros(self._atom_count, dtype=np.uint64)
|
|
1300
|
+
# Stores the length of the C-arrays
|
|
1301
|
+
cdef int[:] array_len_v = np.zeros(self._atom_count, dtype=np.int32)
|
|
1302
|
+
# Iterate over bond list:
|
|
1303
|
+
# If bond is already listed in the array of pointers,
|
|
1304
|
+
# set filter to false at that position
|
|
1305
|
+
# Else add bond to array of pointers
|
|
1306
|
+
cdef uint32 i1, i2
|
|
1307
|
+
cdef uint32* array_ptr
|
|
1308
|
+
cdef int length
|
|
1309
|
+
|
|
1310
|
+
try:
|
|
1311
|
+
for j in range(all_bonds_v.shape[0]):
|
|
1312
|
+
i1 = all_bonds_v[j,0]
|
|
1313
|
+
i2 = all_bonds_v[j,1]
|
|
1314
|
+
# Since the bonds have the atom indices sorted
|
|
1315
|
+
# the reverse check is omitted
|
|
1316
|
+
if _in_array(<uint32*>ptrs_v[i1], i2, array_len_v[i1]):
|
|
1317
|
+
redundancy_filter_v[j] = False
|
|
1318
|
+
else:
|
|
1319
|
+
# Append bond in respective C-array
|
|
1320
|
+
# and update C-array length
|
|
1321
|
+
length = array_len_v[i1] +1
|
|
1322
|
+
array_ptr = <uint32*>ptrs_v[i1]
|
|
1323
|
+
array_ptr = <uint32*>realloc(
|
|
1324
|
+
array_ptr, length * sizeof(uint32)
|
|
1325
|
+
)
|
|
1326
|
+
if not array_ptr:
|
|
1327
|
+
raise MemoryError()
|
|
1328
|
+
array_ptr[length-1] = i2
|
|
1329
|
+
ptrs_v[i1] = <ptr>array_ptr
|
|
1330
|
+
array_len_v[i1] = length
|
|
1331
|
+
|
|
1332
|
+
finally:
|
|
1333
|
+
# Free pointers
|
|
1334
|
+
for i in range(ptrs_v.shape[0]):
|
|
1335
|
+
free(<int*>ptrs_v[i])
|
|
1336
|
+
|
|
1337
|
+
# Eventually remove redundant bonds
|
|
1338
|
+
self._bonds = self._bonds[redundancy_filter.astype(bool, copy=False)]
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
cdef uint32 _to_positive_index(int32 index, uint32 array_length) except -1:
|
|
1342
|
+
"""
|
|
1343
|
+
Convert a potentially negative index into a positive index.
|
|
1344
|
+
"""
|
|
1345
|
+
cdef uint32 pos_index
|
|
1346
|
+
if index < 0:
|
|
1347
|
+
pos_index = <uint32> (array_length + index)
|
|
1348
|
+
if pos_index < 0:
|
|
1349
|
+
raise IndexError(
|
|
1350
|
+
f"Index {index} is out of range "
|
|
1351
|
+
f"for an atom count of {array_length}"
|
|
1352
|
+
)
|
|
1353
|
+
return pos_index
|
|
1354
|
+
else:
|
|
1355
|
+
if <uint32> index >= array_length:
|
|
1356
|
+
raise IndexError(
|
|
1357
|
+
f"Index {index} is out of range "
|
|
1358
|
+
f"for an atom count of {array_length}"
|
|
1359
|
+
)
|
|
1360
|
+
return <uint32> index
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
def _to_positive_index_array(index_array, length):
|
|
1364
|
+
"""
|
|
1365
|
+
Convert potentially negative values in an array into positive
|
|
1366
|
+
values and check for out-of-bounds values.
|
|
1367
|
+
"""
|
|
1368
|
+
index_array = index_array.copy()
|
|
1369
|
+
orig_shape = index_array.shape
|
|
1370
|
+
index_array = index_array.flatten()
|
|
1371
|
+
negatives = index_array < 0
|
|
1372
|
+
index_array[negatives] = length + index_array[negatives]
|
|
1373
|
+
if (index_array < 0).any():
|
|
1374
|
+
raise IndexError(
|
|
1375
|
+
f"Index {np.min(index_array)} is out of range "
|
|
1376
|
+
f"for an atom count of {length}"
|
|
1377
|
+
)
|
|
1378
|
+
if (index_array >= length).any():
|
|
1379
|
+
raise IndexError(
|
|
1380
|
+
f"Index {np.max(index_array)} is out of range "
|
|
1381
|
+
f"for an atom count of {length}"
|
|
1382
|
+
)
|
|
1383
|
+
return index_array.reshape(orig_shape)
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
def _to_index_array(object index, uint32 length):
|
|
1387
|
+
"""
|
|
1388
|
+
Convert an index of arbitrary type into an index array.
|
|
1389
|
+
"""
|
|
1390
|
+
if isinstance(index, np.ndarray) and np.issubdtype(index.dtype, np.integer):
|
|
1391
|
+
return index
|
|
1392
|
+
else:
|
|
1393
|
+
# Convert into index array
|
|
1394
|
+
all_indices = np.arange(length, dtype=np.uint32)
|
|
1395
|
+
return all_indices[index]
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
cdef inline bint _in_array(uint32* array, uint32 atom_index, int array_length):
|
|
1399
|
+
"""
|
|
1400
|
+
Test whether a value (`atom_index`) is in a C-array `array`.
|
|
1401
|
+
"""
|
|
1402
|
+
cdef int i = 0
|
|
1403
|
+
if array == NULL:
|
|
1404
|
+
return False
|
|
1405
|
+
for i in range(array_length):
|
|
1406
|
+
if array[i] == atom_index:
|
|
1407
|
+
return True
|
|
1408
|
+
return False
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
cdef inline void _sort(uint32* index1_ptr, uint32* index2_ptr):
|
|
1412
|
+
cdef uint32 swap
|
|
1413
|
+
if index1_ptr[0] > index2_ptr[0]:
|
|
1414
|
+
# Swap indices
|
|
1415
|
+
swap = index1_ptr[0]
|
|
1416
|
+
index1_ptr[0] = index2_ptr[0]
|
|
1417
|
+
index2_ptr[0] = swap
|
|
1418
|
+
|
|
1419
|
+
|
|
1420
|
+
@cython.wraparound(False)
|
|
1421
|
+
# Do bounds check, as the input indices may be out of bounds
|
|
1422
|
+
def _invert_index(IndexType[:] index_v, uint32 length):
|
|
1423
|
+
"""
|
|
1424
|
+
Invert an input index array, so that
|
|
1425
|
+
if *input[i] = j*, *output[j] = i*.
|
|
1426
|
+
For all elements *j*, that are not in *input*, *output[j]* = -1.
|
|
1427
|
+
"""
|
|
1428
|
+
cdef int32 i
|
|
1429
|
+
cdef IndexType index_val
|
|
1430
|
+
inverse_index = np.full(length, -1, dtype=np.int32)
|
|
1431
|
+
cdef int32[:] inverse_index_v = inverse_index
|
|
1432
|
+
|
|
1433
|
+
for i in range(index_v.shape[0]):
|
|
1434
|
+
index_val = index_v[i]
|
|
1435
|
+
if inverse_index_v[index_val] != -1:
|
|
1436
|
+
# One index can theoretically appear multiple times
|
|
1437
|
+
# This is currently not supported
|
|
1438
|
+
raise NotImplementedError(
|
|
1439
|
+
f"Duplicate indices are not supported, "
|
|
1440
|
+
f"but index {index_val} appeared multiple times"
|
|
1441
|
+
)
|
|
1442
|
+
inverse_index_v[index_val] = i
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
return inverse_index
|
|
1446
|
+
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
# fmt: off
|
|
1451
|
+
_DEFAULT_DISTANCE_RANGE = {
|
|
1452
|
+
# Taken from Allen et al.
|
|
1453
|
+
# min - 2*std max + 2*std
|
|
1454
|
+
("B", "C" ) : (1.556 - 2*0.015, 1.556 + 2*0.015),
|
|
1455
|
+
("BR", "C" ) : (1.875 - 2*0.029, 1.966 + 2*0.029),
|
|
1456
|
+
("BR", "O" ) : (1.581 - 2*0.007, 1.581 + 2*0.007),
|
|
1457
|
+
("C", "C" ) : (1.174 - 2*0.011, 1.588 + 2*0.025),
|
|
1458
|
+
("C", "CL") : (1.713 - 2*0.011, 1.849 + 2*0.011),
|
|
1459
|
+
("C", "F" ) : (1.320 - 2*0.009, 1.428 + 2*0.009),
|
|
1460
|
+
("C", "H" ) : (1.059 - 2*0.030, 1.099 + 2*0.007),
|
|
1461
|
+
("C", "I" ) : (2.095 - 2*0.015, 2.162 + 2*0.015),
|
|
1462
|
+
("C", "N" ) : (1.325 - 2*0.009, 1.552 + 2*0.023),
|
|
1463
|
+
("C", "O" ) : (1.187 - 2*0.011, 1.477 + 2*0.008),
|
|
1464
|
+
("C", "P" ) : (1.791 - 2*0.006, 1.855 + 2*0.019),
|
|
1465
|
+
("C", "S" ) : (1.630 - 2*0.014, 1.863 + 2*0.015),
|
|
1466
|
+
("C", "SE") : (1.893 - 2*0.013, 1.970 + 2*0.032),
|
|
1467
|
+
("C", "SI") : (1.837 - 2*0.012, 1.888 + 2*0.023),
|
|
1468
|
+
("CL", "O" ) : (1.414 - 2*0.026, 1.414 + 2*0.026),
|
|
1469
|
+
("CL", "P" ) : (1.997 - 2*0.035, 2.008 + 2*0.035),
|
|
1470
|
+
("CL", "S" ) : (2.072 - 2*0.023, 2.072 + 2*0.023),
|
|
1471
|
+
("CL", "SI") : (2.072 - 2*0.009, 2.072 + 2*0.009),
|
|
1472
|
+
("F", "N" ) : (1.406 - 2*0.016, 1.406 + 2*0.016),
|
|
1473
|
+
("F", "P" ) : (1.495 - 2*0.016, 1.579 + 2*0.025),
|
|
1474
|
+
("F", "S" ) : (1.640 - 2*0.011, 1.640 + 2*0.011),
|
|
1475
|
+
("F", "SI") : (1.588 - 2*0.014, 1.694 + 2*0.013),
|
|
1476
|
+
("H", "N" ) : (1.009 - 2*0.022, 1.033 + 2*0.022),
|
|
1477
|
+
("H", "O" ) : (0.967 - 2*0.010, 1.015 + 2*0.017),
|
|
1478
|
+
("I", "O" ) : (2.144 - 2*0.028, 2.144 + 2*0.028),
|
|
1479
|
+
("N", "N" ) : (1.124 - 2*0.015, 1.454 + 2*0.021),
|
|
1480
|
+
("N", "O" ) : (1.210 - 2*0.011, 1.463 + 2*0.012),
|
|
1481
|
+
("N", "P" ) : (1.571 - 2*0.013, 1.697 + 2*0.015),
|
|
1482
|
+
("N", "S" ) : (1.541 - 2*0.022, 1.710 + 2*0.019),
|
|
1483
|
+
("N", "SI") : (1.711 - 2*0.019, 1.748 + 2*0.022),
|
|
1484
|
+
("O", "P" ) : (1.449 - 2*0.007, 1.689 + 2*0.024),
|
|
1485
|
+
("O", "S" ) : (1.423 - 2*0.008, 1.580 + 2*0.015),
|
|
1486
|
+
("O", "SI") : (1.622 - 2*0.014, 1.680 + 2*0.008),
|
|
1487
|
+
("P", "P" ) : (2.214 - 2*0.022, 2.214 + 2*0.022),
|
|
1488
|
+
("P", "S" ) : (1.913 - 2*0.014, 1.954 + 2*0.005),
|
|
1489
|
+
("P", "SE") : (2.093 - 2*0.019, 2.093 + 2*0.019),
|
|
1490
|
+
("P", "SI") : (2.264 - 2*0.019, 2.264 + 2*0.019),
|
|
1491
|
+
("S", "S" ) : (1.897 - 2*0.012, 2.070 + 2*0.022),
|
|
1492
|
+
("S", "SE") : (2.193 - 2*0.015, 2.193 + 2*0.015),
|
|
1493
|
+
("S", "SI") : (2.145 - 2*0.020, 2.145 + 2*0.020),
|
|
1494
|
+
("SE", "SE") : (2.340 - 2*0.024, 2.340 + 2*0.024),
|
|
1495
|
+
("SI", "SE") : (2.359 - 2*0.012, 2.359 + 2*0.012),
|
|
1496
|
+
}
|
|
1497
|
+
# fmt: on
|
|
1498
|
+
|
|
1499
|
+
def connect_via_distances(atoms, dict distance_range=None, bint inter_residue=True,
|
|
1500
|
+
default_bond_type=BondType.ANY, bint periodic=False):
|
|
1501
|
+
"""
|
|
1502
|
+
connect_via_distances(atoms, distance_range=None, inter_residue=True,
|
|
1503
|
+
default_bond_type=BondType.ANY, periodic=False)
|
|
1504
|
+
|
|
1505
|
+
Create a :class:`BondList` for a given atom array, based on
|
|
1506
|
+
pairwise atom distances.
|
|
1507
|
+
|
|
1508
|
+
A :attr:`BondType.ANY`, bond is created for two atoms within the
|
|
1509
|
+
same residue, if the distance between them is within the expected
|
|
1510
|
+
bond distance range.
|
|
1511
|
+
Bonds between two adjacent residues are created for the atoms
|
|
1512
|
+
expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
|
|
1513
|
+
peptides and ``"O3'"`` and ``'P'`` for nucleotides.
|
|
1514
|
+
|
|
1515
|
+
Parameters
|
|
1516
|
+
----------
|
|
1517
|
+
atoms : AtomArray
|
|
1518
|
+
The structure to create the :class:`BondList` for.
|
|
1519
|
+
distance_range : dict of tuple(str, str) -> tuple(float, float), optional
|
|
1520
|
+
Custom minimum and maximum bond distances.
|
|
1521
|
+
The dictionary keys are tuples of chemical elements representing
|
|
1522
|
+
the atoms to be potentially bonded.
|
|
1523
|
+
The order of elements within each tuple does not matter.
|
|
1524
|
+
The dictionary values are the minimum and maximum bond distance,
|
|
1525
|
+
respectively, for the given combination of elements.
|
|
1526
|
+
This parameter updates the default dictionary.
|
|
1527
|
+
Hence, the default bond distances for missing element pairs are
|
|
1528
|
+
still taken from the default dictionary.
|
|
1529
|
+
The default bond distances are taken from :footcite:`Allen1987`.
|
|
1530
|
+
inter_residue : bool, optional
|
|
1531
|
+
If true, connections between consecutive amino acids and
|
|
1532
|
+
nucleotides are also added.
|
|
1533
|
+
default_bond_type : BondType or int, optional
|
|
1534
|
+
By default, all created bonds have :attr:`BondType.ANY`.
|
|
1535
|
+
An alternative :class:`BondType` can be given in this parameter.
|
|
1536
|
+
periodic : bool, optional
|
|
1537
|
+
If set to true, bonds can also be detected in periodic
|
|
1538
|
+
boundary conditions.
|
|
1539
|
+
The `box` attribute of `atoms` is required in this case.
|
|
1540
|
+
|
|
1541
|
+
Returns
|
|
1542
|
+
-------
|
|
1543
|
+
BondList
|
|
1544
|
+
The created bond list.
|
|
1545
|
+
|
|
1546
|
+
See Also
|
|
1547
|
+
--------
|
|
1548
|
+
connect_via_residue_names
|
|
1549
|
+
|
|
1550
|
+
Notes
|
|
1551
|
+
-----
|
|
1552
|
+
This method might miss bonds, if the bond distance is unexpectedly
|
|
1553
|
+
high or low, or it might create false bonds, if two atoms within a
|
|
1554
|
+
residue are accidentally in the right distance.
|
|
1555
|
+
A more accurate method for determining bonds is
|
|
1556
|
+
:func:`connect_via_residue_names()`.
|
|
1557
|
+
|
|
1558
|
+
References
|
|
1559
|
+
----------
|
|
1560
|
+
|
|
1561
|
+
.. footbibliography::
|
|
1562
|
+
"""
|
|
1563
|
+
from .atoms import AtomArray
|
|
1564
|
+
from .geometry import distance
|
|
1565
|
+
from .residues import get_residue_starts
|
|
1566
|
+
|
|
1567
|
+
cdef list bonds = []
|
|
1568
|
+
cdef int i
|
|
1569
|
+
cdef int curr_start_i, next_start_i
|
|
1570
|
+
cdef np.ndarray coord = atoms.coord
|
|
1571
|
+
cdef np.ndarray coord_in_res
|
|
1572
|
+
cdef np.ndarray distances
|
|
1573
|
+
cdef float dist
|
|
1574
|
+
cdef np.ndarray elements = atoms.element
|
|
1575
|
+
cdef np.ndarray elements_in_res
|
|
1576
|
+
cdef int atom_index1, atom_index2
|
|
1577
|
+
cdef dict dist_ranges = {}
|
|
1578
|
+
cdef tuple dist_range
|
|
1579
|
+
cdef float min_dist, max_dist
|
|
1580
|
+
|
|
1581
|
+
if not isinstance(atoms, AtomArray):
|
|
1582
|
+
raise TypeError(f"Expected 'AtomArray', not '{type(atoms).__name__}'")
|
|
1583
|
+
if periodic:
|
|
1584
|
+
if atoms.box is None:
|
|
1585
|
+
raise BadStructureError("Atom array has no box")
|
|
1586
|
+
box = atoms.box
|
|
1587
|
+
else:
|
|
1588
|
+
box = None
|
|
1589
|
+
|
|
1590
|
+
# Prepare distance dictionary...
|
|
1591
|
+
if distance_range is None:
|
|
1592
|
+
distance_range = {}
|
|
1593
|
+
# Merge default and custom entries
|
|
1594
|
+
for key, val in itertools.chain(
|
|
1595
|
+
_DEFAULT_DISTANCE_RANGE.items(), distance_range.items()
|
|
1596
|
+
):
|
|
1597
|
+
element1, element2 = key
|
|
1598
|
+
# Add entries for both element orders
|
|
1599
|
+
dist_ranges[(element1.upper(), element2.upper())] = val
|
|
1600
|
+
dist_ranges[(element2.upper(), element1.upper())] = val
|
|
1601
|
+
|
|
1602
|
+
residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
|
|
1603
|
+
# Omit exclsive stop in 'residue_starts'
|
|
1604
|
+
for i in range(len(residue_starts)-1):
|
|
1605
|
+
curr_start_i = residue_starts[i]
|
|
1606
|
+
next_start_i = residue_starts[i+1]
|
|
1607
|
+
|
|
1608
|
+
elements_in_res = elements[curr_start_i : next_start_i]
|
|
1609
|
+
coord_in_res = coord[curr_start_i : next_start_i]
|
|
1610
|
+
# Matrix containing all pairwise atom distances in the residue
|
|
1611
|
+
distances = distance(
|
|
1612
|
+
coord_in_res[:, np.newaxis, :],
|
|
1613
|
+
coord_in_res[np.newaxis, :, :],
|
|
1614
|
+
box
|
|
1615
|
+
)
|
|
1616
|
+
for atom_index1 in range(len(elements_in_res)):
|
|
1617
|
+
for atom_index2 in range(atom_index1):
|
|
1618
|
+
dist_range = dist_ranges.get((
|
|
1619
|
+
elements_in_res[atom_index1],
|
|
1620
|
+
elements_in_res[atom_index2]
|
|
1621
|
+
))
|
|
1622
|
+
if dist_range is None:
|
|
1623
|
+
# No bond distance entry for this element
|
|
1624
|
+
# combination -> skip
|
|
1625
|
+
continue
|
|
1626
|
+
else:
|
|
1627
|
+
min_dist, max_dist = dist_range
|
|
1628
|
+
dist = distances[atom_index1, atom_index2]
|
|
1629
|
+
if dist >= min_dist and dist <= max_dist:
|
|
1630
|
+
bonds.append((
|
|
1631
|
+
curr_start_i + atom_index1,
|
|
1632
|
+
curr_start_i + atom_index2,
|
|
1633
|
+
default_bond_type
|
|
1634
|
+
))
|
|
1635
|
+
|
|
1636
|
+
bond_list = BondList(atoms.array_length(), np.array(bonds))
|
|
1637
|
+
|
|
1638
|
+
if inter_residue:
|
|
1639
|
+
inter_bonds = _connect_inter_residue(atoms, residue_starts)
|
|
1640
|
+
if default_bond_type == BondType.ANY:
|
|
1641
|
+
# As all bonds should be of type ANY, convert also
|
|
1642
|
+
# inter-residue bonds to ANY
|
|
1643
|
+
inter_bonds.remove_bond_order()
|
|
1644
|
+
return bond_list.merge(inter_bonds)
|
|
1645
|
+
else:
|
|
1646
|
+
return bond_list
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
|
|
1650
|
+
def connect_via_residue_names(atoms, bint inter_residue=True,
|
|
1651
|
+
dict custom_bond_dict=None):
|
|
1652
|
+
"""
|
|
1653
|
+
connect_via_residue_names(atoms, inter_residue=True, custom_bond_dict=None)
|
|
1654
|
+
|
|
1655
|
+
Create a :class:`BondList` for a given atom array (stack), based on
|
|
1656
|
+
the deposited bonds for each residue in the RCSB ``components.cif``
|
|
1657
|
+
dataset.
|
|
1658
|
+
|
|
1659
|
+
Bonds between two adjacent residues are created for the atoms
|
|
1660
|
+
expected to connect these residues, i.e. ``'C'`` and ``'N'`` for
|
|
1661
|
+
peptides and ``"O3'"`` and ``'P'`` for nucleotides.
|
|
1662
|
+
|
|
1663
|
+
Parameters
|
|
1664
|
+
----------
|
|
1665
|
+
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
1666
|
+
The structure to create the :class:`BondList` for.
|
|
1667
|
+
inter_residue : bool, optional
|
|
1668
|
+
If true, connections between consecutive amino acids and
|
|
1669
|
+
nucleotides are also added.
|
|
1670
|
+
custom_bond_dict : dict (str -> dict ((str, str) -> int)), optional
|
|
1671
|
+
A dictionary of dictionaries:
|
|
1672
|
+
The outer dictionary maps residue names to inner dictionaries.
|
|
1673
|
+
The inner dictionary maps tuples of two atom names to their
|
|
1674
|
+
respective :class:`BondType` (represented as integer).
|
|
1675
|
+
If given, these bonds are used instead of the bonds read from
|
|
1676
|
+
``components.cif``.
|
|
1677
|
+
|
|
1678
|
+
Returns
|
|
1679
|
+
-------
|
|
1680
|
+
BondList
|
|
1681
|
+
The created bond list.
|
|
1682
|
+
No bonds are added for residues that are not found in
|
|
1683
|
+
``components.cif``.
|
|
1684
|
+
|
|
1685
|
+
See Also
|
|
1686
|
+
--------
|
|
1687
|
+
connect_via_distances
|
|
1688
|
+
|
|
1689
|
+
Notes
|
|
1690
|
+
-----
|
|
1691
|
+
This method can only find bonds for residues in the RCSB
|
|
1692
|
+
*Chemical Component Dictionary*, unless `custom_bond_dict` is set.
|
|
1693
|
+
Although this includes most molecules one encounters, this will fail
|
|
1694
|
+
for exotic molecules, e.g. specialized inhibitors.
|
|
1695
|
+
|
|
1696
|
+
.. currentmodule:: biotite.structure.info
|
|
1697
|
+
|
|
1698
|
+
To supplement `custom_bond_dict` with bonds for residues from the
|
|
1699
|
+
*Chemical Component Dictionary* you can use
|
|
1700
|
+
:meth:`bonds_in_residue()`.
|
|
1701
|
+
|
|
1702
|
+
>>> import pprint
|
|
1703
|
+
>>> custom_bond_dict = {
|
|
1704
|
+
... "XYZ": {
|
|
1705
|
+
... ("A", "B"): BondType.SINGLE,
|
|
1706
|
+
... ("B", "C"): BondType.SINGLE
|
|
1707
|
+
... }
|
|
1708
|
+
... }
|
|
1709
|
+
>>> # Supplement with bonds for common residues
|
|
1710
|
+
>>> custom_bond_dict["ALA"] = bonds_in_residue("ALA")
|
|
1711
|
+
>>> pp = pprint.PrettyPrinter(width=40)
|
|
1712
|
+
>>> pp.pprint(custom_bond_dict)
|
|
1713
|
+
{'ALA': {('C', 'O'): <BondType.DOUBLE: 2>,
|
|
1714
|
+
('C', 'OXT'): <BondType.SINGLE: 1>,
|
|
1715
|
+
('CA', 'C'): <BondType.SINGLE: 1>,
|
|
1716
|
+
('CA', 'CB'): <BondType.SINGLE: 1>,
|
|
1717
|
+
('CA', 'HA'): <BondType.SINGLE: 1>,
|
|
1718
|
+
('CB', 'HB1'): <BondType.SINGLE: 1>,
|
|
1719
|
+
('CB', 'HB2'): <BondType.SINGLE: 1>,
|
|
1720
|
+
('CB', 'HB3'): <BondType.SINGLE: 1>,
|
|
1721
|
+
('N', 'CA'): <BondType.SINGLE: 1>,
|
|
1722
|
+
('N', 'H'): <BondType.SINGLE: 1>,
|
|
1723
|
+
('N', 'H2'): <BondType.SINGLE: 1>,
|
|
1724
|
+
('OXT', 'HXT'): <BondType.SINGLE: 1>},
|
|
1725
|
+
'XYZ': {('A', 'B'): <BondType.SINGLE: 1>,
|
|
1726
|
+
('B', 'C'): <BondType.SINGLE: 1>}}
|
|
1727
|
+
"""
|
|
1728
|
+
from .info.bonds import bonds_in_residue
|
|
1729
|
+
from .residues import get_residue_starts
|
|
1730
|
+
|
|
1731
|
+
cdef list bonds = []
|
|
1732
|
+
cdef int res_i
|
|
1733
|
+
cdef int i, j
|
|
1734
|
+
cdef int curr_start_i, next_start_i
|
|
1735
|
+
cdef np.ndarray atom_names = atoms.atom_name
|
|
1736
|
+
cdef np.ndarray atom_names_in_res
|
|
1737
|
+
cdef np.ndarray res_names = atoms.res_name
|
|
1738
|
+
cdef str atom_name1, atom_name2
|
|
1739
|
+
cdef int64[:] atom_indices1, atom_indices2
|
|
1740
|
+
cdef dict bond_dict_for_res
|
|
1741
|
+
|
|
1742
|
+
residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
|
|
1743
|
+
# Omit exclsive stop in 'residue_starts'
|
|
1744
|
+
for res_i in range(len(residue_starts)-1):
|
|
1745
|
+
curr_start_i = residue_starts[res_i]
|
|
1746
|
+
next_start_i = residue_starts[res_i+1]
|
|
1747
|
+
|
|
1748
|
+
if custom_bond_dict is None:
|
|
1749
|
+
bond_dict_for_res = bonds_in_residue(res_names[curr_start_i])
|
|
1750
|
+
else:
|
|
1751
|
+
bond_dict_for_res = custom_bond_dict.get(
|
|
1752
|
+
res_names[curr_start_i], {}
|
|
1753
|
+
)
|
|
1754
|
+
|
|
1755
|
+
atom_names_in_res = atom_names[curr_start_i : next_start_i]
|
|
1756
|
+
for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
|
|
1757
|
+
atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
|
|
1758
|
+
.astype(np.int64, copy=False)
|
|
1759
|
+
atom_indices2 = np.where(atom_names_in_res == atom_name2)[0] \
|
|
1760
|
+
.astype(np.int64, copy=False)
|
|
1761
|
+
# In rare cases the same atom name may appear multiple times
|
|
1762
|
+
# (e.g. in altlocs)
|
|
1763
|
+
# -> create all possible bond combinations
|
|
1764
|
+
for i in range(atom_indices1.shape[0]):
|
|
1765
|
+
for j in range(atom_indices2.shape[0]):
|
|
1766
|
+
bonds.append((
|
|
1767
|
+
curr_start_i + atom_indices1[i],
|
|
1768
|
+
curr_start_i + atom_indices2[j],
|
|
1769
|
+
bond_type
|
|
1770
|
+
))
|
|
1771
|
+
|
|
1772
|
+
bond_list = BondList(atoms.array_length(), np.array(bonds))
|
|
1773
|
+
|
|
1774
|
+
if inter_residue:
|
|
1775
|
+
inter_bonds = _connect_inter_residue(atoms, residue_starts)
|
|
1776
|
+
return bond_list.merge(inter_bonds)
|
|
1777
|
+
else:
|
|
1778
|
+
return bond_list
|
|
1779
|
+
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
_PEPTIDE_LINKS = ["PEPTIDE LINKING", "L-PEPTIDE LINKING", "D-PEPTIDE LINKING"]
|
|
1783
|
+
_NUCLEIC_LINKS = ["RNA LINKING", "DNA LINKING"]
|
|
1784
|
+
|
|
1785
|
+
def _connect_inter_residue(atoms, residue_starts):
|
|
1786
|
+
"""
|
|
1787
|
+
Create a :class:`BondList` containing the bonds between adjacent
|
|
1788
|
+
amino acid or nucleotide residues.
|
|
1789
|
+
|
|
1790
|
+
Parameters
|
|
1791
|
+
----------
|
|
1792
|
+
atoms : AtomArray or AtomArrayStack
|
|
1793
|
+
The structure to create the :class:`BondList` for.
|
|
1794
|
+
residue_starts : ndarray, dtype=int
|
|
1795
|
+
Return value of
|
|
1796
|
+
``get_residue_starts(atoms, add_exclusive_stop=True)``.
|
|
1797
|
+
|
|
1798
|
+
Returns
|
|
1799
|
+
-------
|
|
1800
|
+
BondList
|
|
1801
|
+
A bond list containing all inter residue bonds.
|
|
1802
|
+
"""
|
|
1803
|
+
from .info.misc import link_type
|
|
1804
|
+
|
|
1805
|
+
cdef list bonds = []
|
|
1806
|
+
cdef int i
|
|
1807
|
+
cdef np.ndarray atom_names = atoms.atom_name
|
|
1808
|
+
cdef np.ndarray res_names = atoms.res_name
|
|
1809
|
+
cdef np.ndarray res_ids = atoms.res_id
|
|
1810
|
+
cdef np.ndarray chain_ids = atoms.chain_id
|
|
1811
|
+
cdef int curr_start_i, next_start_i, after_next_start_i
|
|
1812
|
+
cdef str curr_connect_atom_name, next_connect_atom_name
|
|
1813
|
+
cdef np.ndarray curr_connect_indices, next_connect_indices
|
|
1814
|
+
|
|
1815
|
+
# Iterate over all starts excluding:
|
|
1816
|
+
# - the last residue and
|
|
1817
|
+
# - exclusive end index of 'atoms'
|
|
1818
|
+
for i in range(len(residue_starts)-2):
|
|
1819
|
+
curr_start_i = residue_starts[i]
|
|
1820
|
+
next_start_i = residue_starts[i+1]
|
|
1821
|
+
after_next_start_i = residue_starts[i+2]
|
|
1822
|
+
|
|
1823
|
+
# Check if the current and next residue is in the same chain
|
|
1824
|
+
if chain_ids[next_start_i] != chain_ids[curr_start_i]:
|
|
1825
|
+
continue
|
|
1826
|
+
# Check if the current and next residue
|
|
1827
|
+
# have consecutive residue IDs
|
|
1828
|
+
# (Same residue ID is also possible if insertion code is used)
|
|
1829
|
+
if res_ids[next_start_i] - res_ids[curr_start_i] > 1:
|
|
1830
|
+
continue
|
|
1831
|
+
|
|
1832
|
+
# Get link type for this residue from RCSB components.cif
|
|
1833
|
+
curr_link = link_type(res_names[curr_start_i])
|
|
1834
|
+
next_link = link_type(res_names[next_start_i])
|
|
1835
|
+
|
|
1836
|
+
if curr_link in _PEPTIDE_LINKS and next_link in _PEPTIDE_LINKS:
|
|
1837
|
+
curr_connect_atom_name = "C"
|
|
1838
|
+
next_connect_atom_name = "N"
|
|
1839
|
+
elif curr_link in _NUCLEIC_LINKS and next_link in _NUCLEIC_LINKS:
|
|
1840
|
+
curr_connect_atom_name = "O3'"
|
|
1841
|
+
next_connect_atom_name = "P"
|
|
1842
|
+
else:
|
|
1843
|
+
# Create no bond if the connection types of consecutive
|
|
1844
|
+
# residues are not compatible
|
|
1845
|
+
continue
|
|
1846
|
+
|
|
1847
|
+
# Index in atom array for atom name in current residue
|
|
1848
|
+
# Addition of 'curr_start_i' is necessary, as only a slice of
|
|
1849
|
+
# 'atom_names' is taken, beginning at 'curr_start_i'
|
|
1850
|
+
curr_connect_indices = curr_start_i + np.where(
|
|
1851
|
+
atom_names[curr_start_i : next_start_i]
|
|
1852
|
+
== curr_connect_atom_name
|
|
1853
|
+
)[0]
|
|
1854
|
+
# Index in atom array for atom name in next residue
|
|
1855
|
+
next_connect_indices = next_start_i + np.where(
|
|
1856
|
+
atom_names[next_start_i : after_next_start_i]
|
|
1857
|
+
== next_connect_atom_name
|
|
1858
|
+
)[0]
|
|
1859
|
+
if len(curr_connect_indices) == 0 or len(next_connect_indices) == 0:
|
|
1860
|
+
# The connector atoms are not found in the adjacent residues
|
|
1861
|
+
# -> skip this bond
|
|
1862
|
+
continue
|
|
1863
|
+
|
|
1864
|
+
bonds.append((
|
|
1865
|
+
curr_connect_indices[0],
|
|
1866
|
+
next_connect_indices[0],
|
|
1867
|
+
BondType.SINGLE
|
|
1868
|
+
))
|
|
1869
|
+
|
|
1870
|
+
return BondList(atoms.array_length(), np.array(bonds, dtype=np.uint32))
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
|
|
1874
|
+
def find_connected(bond_list, uint32 root, bint as_mask=False):
|
|
1875
|
+
"""
|
|
1876
|
+
find_connected(bond_list, root, as_mask=False)
|
|
1877
|
+
|
|
1878
|
+
Get indices to all atoms that are directly or inderectly connected
|
|
1879
|
+
to the root atom indicated by the given index.
|
|
1880
|
+
|
|
1881
|
+
An atom is *connected* to the `root` atom, if that atom is reachable
|
|
1882
|
+
by traversing an arbitrary number of bonds, starting from the
|
|
1883
|
+
`root`.
|
|
1884
|
+
Effectively, this means that all atoms are *connected* to `root`,
|
|
1885
|
+
that are in the same molecule as `root`.
|
|
1886
|
+
Per definition `root` is also *connected* to itself.
|
|
1887
|
+
|
|
1888
|
+
Parameters
|
|
1889
|
+
----------
|
|
1890
|
+
bond_list : BondList
|
|
1891
|
+
The reference bond list.
|
|
1892
|
+
root : int
|
|
1893
|
+
The index of the root atom.
|
|
1894
|
+
as_mask : bool, optional
|
|
1895
|
+
If true, the connected atom indices are returned as boolean
|
|
1896
|
+
mask.
|
|
1897
|
+
By default, the connected atom indices are returned as integer
|
|
1898
|
+
array.
|
|
1899
|
+
|
|
1900
|
+
Returns
|
|
1901
|
+
-------
|
|
1902
|
+
connected : ndarray, dtype=int or ndarray, dtype=bool
|
|
1903
|
+
Either a boolean mask or an integer array, representing the
|
|
1904
|
+
connected atoms.
|
|
1905
|
+
In case of a boolean mask: ``connected[i] == True``, if the atom
|
|
1906
|
+
with index ``i`` is connected.
|
|
1907
|
+
|
|
1908
|
+
Examples
|
|
1909
|
+
--------
|
|
1910
|
+
Consider a system with 4 atoms, where only the last atom is not
|
|
1911
|
+
bonded with the other ones (``0-1-2 3``):
|
|
1912
|
+
|
|
1913
|
+
>>> bonds = BondList(4)
|
|
1914
|
+
>>> bonds.add_bond(0, 1)
|
|
1915
|
+
>>> bonds.add_bond(1, 2)
|
|
1916
|
+
>>> print(find_connected(bonds, 0))
|
|
1917
|
+
[0 1 2]
|
|
1918
|
+
>>> print(find_connected(bonds, 1))
|
|
1919
|
+
[0 1 2]
|
|
1920
|
+
>>> print(find_connected(bonds, 2))
|
|
1921
|
+
[0 1 2]
|
|
1922
|
+
>>> print(find_connected(bonds, 3))
|
|
1923
|
+
[3]
|
|
1924
|
+
"""
|
|
1925
|
+
all_bonds, _ = bond_list.get_all_bonds()
|
|
1926
|
+
|
|
1927
|
+
if root >= bond_list.get_atom_count():
|
|
1928
|
+
raise ValueError(
|
|
1929
|
+
f"Root atom index {root} is out of bounds for bond list "
|
|
1930
|
+
f"representing {bond_list.get_atom_count()} atoms"
|
|
1931
|
+
)
|
|
1932
|
+
|
|
1933
|
+
cdef uint8[:] is_connected_mask = np.zeros(
|
|
1934
|
+
bond_list.get_atom_count(), dtype=np.uint8
|
|
1935
|
+
)
|
|
1936
|
+
# Find connections in a recursive way,
|
|
1937
|
+
# by visiting all atoms that are reachable by a bond
|
|
1938
|
+
_find_connected(bond_list, root, is_connected_mask, all_bonds)
|
|
1939
|
+
if as_mask:
|
|
1940
|
+
return is_connected_mask
|
|
1941
|
+
else:
|
|
1942
|
+
return np.where(np.asarray(is_connected_mask))[0]
|
|
1943
|
+
|
|
1944
|
+
|
|
1945
|
+
cdef _find_connected(bond_list,
|
|
1946
|
+
int32 index,
|
|
1947
|
+
uint8[:] is_connected_mask,
|
|
1948
|
+
int32[:,:] all_bonds):
|
|
1949
|
+
if is_connected_mask[index]:
|
|
1950
|
+
# This atom has already been visited
|
|
1951
|
+
# -> exit condition
|
|
1952
|
+
return
|
|
1953
|
+
is_connected_mask[index] = True
|
|
1954
|
+
|
|
1955
|
+
cdef int32 j
|
|
1956
|
+
cdef int32 connected_index
|
|
1957
|
+
for j in range(all_bonds.shape[1]):
|
|
1958
|
+
connected_index = all_bonds[index, j]
|
|
1959
|
+
if connected_index == -1:
|
|
1960
|
+
# Ignore padding values
|
|
1961
|
+
continue
|
|
1962
|
+
_find_connected(
|
|
1963
|
+
bond_list, connected_index, is_connected_mask, all_bonds
|
|
1964
|
+
)
|
|
1965
|
+
|
|
1966
|
+
|
|
1967
|
+
def find_rotatable_bonds(bonds):
|
|
1968
|
+
"""
|
|
1969
|
+
find_rotatable_bonds(bonds)
|
|
1970
|
+
|
|
1971
|
+
Find all rotatable bonds in a given :class:`BondList`.
|
|
1972
|
+
|
|
1973
|
+
The following conditions must be true for a bond to be counted as
|
|
1974
|
+
rotatable:
|
|
1975
|
+
|
|
1976
|
+
1. The bond must be a single bond (``BondType.SINGLE``)
|
|
1977
|
+
2. The connected atoms must not be within the same cycle/ring
|
|
1978
|
+
3. Both connected atoms must not be terminal, e.g. not a *C-H*
|
|
1979
|
+
bond, as rotation about such bonds would not change any
|
|
1980
|
+
coordinates
|
|
1981
|
+
|
|
1982
|
+
Parameters
|
|
1983
|
+
----------
|
|
1984
|
+
bonds : BondList
|
|
1985
|
+
The bonds to find the rotatable bonds in.
|
|
1986
|
+
|
|
1987
|
+
Returns
|
|
1988
|
+
-------
|
|
1989
|
+
rotatable_bonds : BondList
|
|
1990
|
+
The subset of the input `bonds` that contains only rotatable
|
|
1991
|
+
bonds.
|
|
1992
|
+
|
|
1993
|
+
Examples
|
|
1994
|
+
--------
|
|
1995
|
+
|
|
1996
|
+
>>> molecule = residue("TYR")
|
|
1997
|
+
>>> for i, j, _ in find_rotatable_bonds(molecule.bonds).as_array():
|
|
1998
|
+
... print(molecule.atom_name[i], molecule.atom_name[j])
|
|
1999
|
+
N CA
|
|
2000
|
+
CA C
|
|
2001
|
+
CA CB
|
|
2002
|
+
C OXT
|
|
2003
|
+
CB CG
|
|
2004
|
+
CZ OH
|
|
2005
|
+
"""
|
|
2006
|
+
cdef uint32 i, j
|
|
2007
|
+
cdef uint32 bond_type
|
|
2008
|
+
cdef uint32 SINGLE = int(BondType.SINGLE)
|
|
2009
|
+
cdef bint in_same_cycle
|
|
2010
|
+
|
|
2011
|
+
bond_graph = bonds.as_graph()
|
|
2012
|
+
cycles = nx.algorithms.cycles.cycle_basis(bond_graph)
|
|
2013
|
+
|
|
2014
|
+
cdef int64[:] number_of_partners_v = np.count_nonzero(
|
|
2015
|
+
bonds.get_all_bonds()[0] != -1,
|
|
2016
|
+
axis=1
|
|
2017
|
+
).astype(np.int64, copy=False)
|
|
2018
|
+
|
|
2019
|
+
rotatable_bonds = []
|
|
2020
|
+
cdef uint32[:,:] bonds_v = bonds.as_array()
|
|
2021
|
+
for i, j, bond_type in bonds_v:
|
|
2022
|
+
# Can only rotate about single bonds
|
|
2023
|
+
# Furthermore, it makes no sense to rotate about a bond,
|
|
2024
|
+
# that leads to a single atom
|
|
2025
|
+
if bond_type == BondType.SINGLE \
|
|
2026
|
+
and number_of_partners_v[i] > 1 \
|
|
2027
|
+
and number_of_partners_v[j] > 1:
|
|
2028
|
+
# Cannot rotate about a bond, if the two connected atoms
|
|
2029
|
+
# are in a cycle
|
|
2030
|
+
in_same_cycle = False
|
|
2031
|
+
for cycle in cycles:
|
|
2032
|
+
if i in cycle and j in cycle:
|
|
2033
|
+
in_same_cycle = True
|
|
2034
|
+
if not in_same_cycle:
|
|
2035
|
+
rotatable_bonds.append((i,j, bond_type))
|
|
2036
|
+
return BondList(bonds.get_atom_count(), np.array(rotatable_bonds))
|