biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.interface.rdkit"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Simon Mathis"
|
|
7
|
+
__all__ = ["to_mol", "from_mol"]
|
|
8
|
+
|
|
9
|
+
import copy
|
|
10
|
+
import numbers
|
|
11
|
+
import warnings
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
import numpy as np
|
|
14
|
+
import rdkit.Chem.AllChem as Chem
|
|
15
|
+
from rdkit.Chem import SanitizeFlags
|
|
16
|
+
from rdkit.rdBase import BlockLogs
|
|
17
|
+
from biotite.interface.version import requires_version
|
|
18
|
+
from biotite.interface.warning import LossyConversionWarning
|
|
19
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack
|
|
20
|
+
from biotite.structure.bonds import BondList, BondType
|
|
21
|
+
from biotite.structure.error import BadStructureError
|
|
22
|
+
|
|
23
|
+
_KEKULIZED_TO_AROMATIC_BOND_TYPE = {
|
|
24
|
+
BondType.SINGLE: BondType.AROMATIC_SINGLE,
|
|
25
|
+
BondType.DOUBLE: BondType.AROMATIC_DOUBLE,
|
|
26
|
+
BondType.TRIPLE: BondType.AROMATIC_TRIPLE,
|
|
27
|
+
}
|
|
28
|
+
_BIOTITE_TO_RDKIT_BOND_TYPE = {
|
|
29
|
+
BondType.ANY: Chem.BondType.UNSPECIFIED,
|
|
30
|
+
BondType.SINGLE: Chem.BondType.SINGLE,
|
|
31
|
+
BondType.DOUBLE: Chem.BondType.DOUBLE,
|
|
32
|
+
BondType.TRIPLE: Chem.BondType.TRIPLE,
|
|
33
|
+
BondType.QUADRUPLE: Chem.BondType.QUADRUPLE,
|
|
34
|
+
BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC,
|
|
35
|
+
BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC,
|
|
36
|
+
BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC,
|
|
37
|
+
BondType.AROMATIC: Chem.BondType.AROMATIC,
|
|
38
|
+
# Dative bonds may lead to a KekulizeException and may potentially be deprecated
|
|
39
|
+
# in the future (https://github.com/rdkit/rdkit/discussions/6995)
|
|
40
|
+
BondType.COORDINATION: Chem.BondType.SINGLE,
|
|
41
|
+
}
|
|
42
|
+
_RDKIT_TO_BIOTITE_BOND_TYPE = {
|
|
43
|
+
Chem.BondType.UNSPECIFIED: BondType.ANY,
|
|
44
|
+
Chem.BondType.SINGLE: BondType.SINGLE,
|
|
45
|
+
Chem.BondType.DOUBLE: BondType.DOUBLE,
|
|
46
|
+
Chem.BondType.TRIPLE: BondType.TRIPLE,
|
|
47
|
+
Chem.BondType.QUADRUPLE: BondType.QUADRUPLE,
|
|
48
|
+
Chem.BondType.DATIVE: BondType.COORDINATION,
|
|
49
|
+
}
|
|
50
|
+
_STANDARD_ANNOTATIONS = frozenset(
|
|
51
|
+
{
|
|
52
|
+
"chain_id",
|
|
53
|
+
"res_id",
|
|
54
|
+
"ins_code",
|
|
55
|
+
"res_name",
|
|
56
|
+
"hetero",
|
|
57
|
+
"atom_name",
|
|
58
|
+
"element",
|
|
59
|
+
"charge",
|
|
60
|
+
"b_factor",
|
|
61
|
+
"occupancy",
|
|
62
|
+
"altloc_id",
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# `Conformer.SetPositions()` was added in RDKit 2024.09.1
|
|
68
|
+
@requires_version("rdkit", ">=2024.09.1")
|
|
69
|
+
def to_mol(
|
|
70
|
+
atoms,
|
|
71
|
+
kekulize=False,
|
|
72
|
+
use_dative_bonds=False,
|
|
73
|
+
include_extra_annotations=(),
|
|
74
|
+
explicit_hydrogen=None,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
|
|
78
|
+
:class:`rdkit.Chem.rdchem.Mol`.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
atoms : AtomArray or AtomArrayStack
|
|
83
|
+
The molecule to be converted.
|
|
84
|
+
Must have an associated :class:`BondList`.
|
|
85
|
+
kekulize : bool, optional
|
|
86
|
+
If set to true, aromatic bonds are represented by single, double and triple
|
|
87
|
+
bonds.
|
|
88
|
+
By default, aromatic bond types are converted to
|
|
89
|
+
:attr:`rdkit.rdchem.BondType.AROMATIC`.
|
|
90
|
+
use_dative_bonds : bool, optional
|
|
91
|
+
If set to true, :attr:`BondType.COORDINATION` bonds are translated to
|
|
92
|
+
:attr:`rdkit.rdchem.BondType.DATIVE` bonds instead of
|
|
93
|
+
:attr:`rdkit.rdchem.BondType.SINGLE` bonds.
|
|
94
|
+
This may have the undesired side effect that a
|
|
95
|
+
:class:`rdkit.Chem.rdchem.KekulizeException` is raised for some molecules, when
|
|
96
|
+
the returned :class:`rdkit.Chem.rdchem.Mol` is kekulized.
|
|
97
|
+
include_extra_annotations : list of str, optional
|
|
98
|
+
Names of annotation arrays in `atoms` that are added as atom-level property with
|
|
99
|
+
the same name to the returned :class:`rdkit.Chem.rdchem.Mol`.
|
|
100
|
+
These properties can be accessed with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`.
|
|
101
|
+
Note that standard annotations (e.g. ``'chain_id', 'atom_name', 'res_name'``)
|
|
102
|
+
are always included per default. These standard annotations can be accessed
|
|
103
|
+
with :meth:`rdkit.Chem.rdchem.Atom.GetPDBResidueInfo()` for each atom in the
|
|
104
|
+
returned :class:`rdkit.Chem.rdchem.Mol`.
|
|
105
|
+
explicit_hydrogen : bool, optional
|
|
106
|
+
If set to true, the conversion process expects that all hydrogen atoms are
|
|
107
|
+
explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
|
|
108
|
+
If set to false, the conversion process treats all hydrogen atoms as implicit.
|
|
109
|
+
By default, explicit hydrogen atoms are only assumed if any hydrogen atoms are
|
|
110
|
+
present in `atoms`.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
mol : rdkit.Chem.rdchem.Mol
|
|
115
|
+
The *RDKit* molecule.
|
|
116
|
+
If the input `atoms` is an :class:`AtomArrayStack`, all models are included
|
|
117
|
+
as conformers with conformer IDs starting from ``0``.
|
|
118
|
+
|
|
119
|
+
Raises
|
|
120
|
+
------
|
|
121
|
+
BadStructureError
|
|
122
|
+
If the input `atoms` does not have an associated :class:`BondList`.
|
|
123
|
+
Also raises a :class:`BadStructureError`, if `explicit_hydrogen` is set to
|
|
124
|
+
``False`` despite hydrogen atoms being present in `atoms`.
|
|
125
|
+
|
|
126
|
+
Notes
|
|
127
|
+
-----
|
|
128
|
+
The atoms in the return value are in the same order as the input `atoms`,
|
|
129
|
+
i.e. indices pointing to the :class:`rdkit.Chem.rdchem.Mol` can be used to point to
|
|
130
|
+
the same atoms in the :class:`.AtomArray`.
|
|
131
|
+
|
|
132
|
+
Examples
|
|
133
|
+
--------
|
|
134
|
+
|
|
135
|
+
>>> from rdkit.Chem import MolToSmiles
|
|
136
|
+
>>> alanine_atom_array = residue("ALA")
|
|
137
|
+
>>> mol = to_mol(alanine_atom_array)
|
|
138
|
+
>>> print(MolToSmiles(mol))
|
|
139
|
+
[H]OC(=O)C([H])(N([H])[H])C([H])([H])[H]
|
|
140
|
+
|
|
141
|
+
By default, ``'atom_name'`` is stored in RDKit's PDBResidueInfo grouping
|
|
142
|
+
for each atom. We can access it manually as below
|
|
143
|
+
|
|
144
|
+
>>> for atom in mol.GetAtoms():
|
|
145
|
+
... print(atom.GetPDBResidueInfo().GetName())
|
|
146
|
+
N
|
|
147
|
+
CA
|
|
148
|
+
C
|
|
149
|
+
O
|
|
150
|
+
CB
|
|
151
|
+
OXT
|
|
152
|
+
H
|
|
153
|
+
H2
|
|
154
|
+
HA
|
|
155
|
+
HB1
|
|
156
|
+
HB2
|
|
157
|
+
HB3
|
|
158
|
+
HXT
|
|
159
|
+
"""
|
|
160
|
+
hydrogen_mask = atoms.element == "H"
|
|
161
|
+
_has_hydrogen = hydrogen_mask.any()
|
|
162
|
+
if explicit_hydrogen is None:
|
|
163
|
+
explicit_hydrogen = _has_hydrogen
|
|
164
|
+
elif explicit_hydrogen:
|
|
165
|
+
if not _has_hydrogen:
|
|
166
|
+
warnings.warn(
|
|
167
|
+
"No hydrogen found, although 'explicit_hydrogen' is 'True'. "
|
|
168
|
+
"This may lead to radicals after sanitization in RDKit.",
|
|
169
|
+
UserWarning,
|
|
170
|
+
)
|
|
171
|
+
else:
|
|
172
|
+
if _has_hydrogen:
|
|
173
|
+
raise BadStructureError(
|
|
174
|
+
"Hydrogen atoms are present in the input, although 'explicit_hydrogen' "
|
|
175
|
+
"is set to 'False'"
|
|
176
|
+
)
|
|
177
|
+
atoms = atoms[..., ~hydrogen_mask]
|
|
178
|
+
|
|
179
|
+
mol = Chem.EditableMol(Chem.Mol())
|
|
180
|
+
|
|
181
|
+
has_annot = frozenset(atoms.get_annotation_categories())
|
|
182
|
+
extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS
|
|
183
|
+
|
|
184
|
+
for i in range(atoms.array_length()):
|
|
185
|
+
rdkit_atom = Chem.Atom(atoms.element[i].capitalize())
|
|
186
|
+
if explicit_hydrogen:
|
|
187
|
+
# ... tell RDKit to not assume any implicit hydrogens
|
|
188
|
+
rdkit_atom.SetNoImplicit(True)
|
|
189
|
+
if "charge" in has_annot:
|
|
190
|
+
rdkit_atom.SetFormalCharge(atoms.charge[i].item())
|
|
191
|
+
|
|
192
|
+
# add standard pdb annotations
|
|
193
|
+
rdkit_atom_res_info = Chem.AtomPDBResidueInfo(
|
|
194
|
+
atomName=atoms.atom_name[i].item(),
|
|
195
|
+
residueName=atoms.res_name[i].item(),
|
|
196
|
+
chainId=atoms.chain_id[i].item(),
|
|
197
|
+
residueNumber=atoms.res_id[i].item(),
|
|
198
|
+
isHeteroAtom=atoms.hetero[i].item(),
|
|
199
|
+
insertionCode=atoms.ins_code[i].item(),
|
|
200
|
+
)
|
|
201
|
+
if "occupancy" in has_annot:
|
|
202
|
+
rdkit_atom_res_info.SetOccupancy(atoms.occupancy[i].item())
|
|
203
|
+
if "b_factor" in has_annot:
|
|
204
|
+
rdkit_atom_res_info.SetTempFactor(atoms.b_factor[i].item())
|
|
205
|
+
if "altloc_id" in has_annot:
|
|
206
|
+
rdkit_atom_res_info.SetAltLoc(atoms.altloc_id[i].item())
|
|
207
|
+
rdkit_atom.SetPDBResidueInfo(rdkit_atom_res_info)
|
|
208
|
+
|
|
209
|
+
# add extra annotations
|
|
210
|
+
for annot_name in extra_annot:
|
|
211
|
+
_set_property(
|
|
212
|
+
rdkit_atom, annot_name, atoms.get_annotation(annot_name)[i].item()
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# add atom to molecule
|
|
216
|
+
mol.AddAtom(rdkit_atom)
|
|
217
|
+
|
|
218
|
+
if atoms.bonds is None:
|
|
219
|
+
raise BadStructureError("An AtomArray with associated BondList is required")
|
|
220
|
+
if kekulize:
|
|
221
|
+
bonds = atoms.bonds.copy()
|
|
222
|
+
bonds.remove_aromaticity()
|
|
223
|
+
else:
|
|
224
|
+
bonds = atoms.bonds
|
|
225
|
+
for atom_i, atom_j, bond_type in bonds.as_array():
|
|
226
|
+
if not use_dative_bonds and bond_type == BondType.COORDINATION:
|
|
227
|
+
bond_type = BondType.SINGLE
|
|
228
|
+
mol.AddBond(
|
|
229
|
+
atom_i.item(), atom_j.item(), _BIOTITE_TO_RDKIT_BOND_TYPE[bond_type]
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Create a proper 'frozen' Mol object
|
|
233
|
+
mol = mol.GetMol()
|
|
234
|
+
coord = atoms.coord
|
|
235
|
+
if coord.ndim == 2:
|
|
236
|
+
# Handle AtomArray and AtomArrayStack consistently
|
|
237
|
+
coord = coord[None, :, :]
|
|
238
|
+
for model_coord in coord:
|
|
239
|
+
conformer = Chem.Conformer(mol.GetNumAtoms())
|
|
240
|
+
# RDKit silently expects the data to be in C-contiguous order
|
|
241
|
+
# Otherwise the coordinates would be completely misassigned
|
|
242
|
+
# (https://github.com/rdkit/rdkit/issues/8221)
|
|
243
|
+
conformer.SetPositions(np.ascontiguousarray(model_coord, dtype=np.float64))
|
|
244
|
+
conformer.Set3D(True)
|
|
245
|
+
mol.AddConformer(conformer)
|
|
246
|
+
|
|
247
|
+
return mol
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@requires_version("rdkit", ">=2020")
|
|
251
|
+
def from_mol(mol, conformer_id=None, add_hydrogen=None):
|
|
252
|
+
"""
|
|
253
|
+
Convert a :class:`rdkit.Chem.rdchem.Mol` into an :class:`.AtomArray` or
|
|
254
|
+
:class:`.AtomArrayStack`.
|
|
255
|
+
|
|
256
|
+
Parameters
|
|
257
|
+
----------
|
|
258
|
+
mol : rdkit.Chem.rdchem.Mol
|
|
259
|
+
The molecule to be converted.
|
|
260
|
+
conformer_id : int or {"2D", "3D"}, optional
|
|
261
|
+
The ID of the conformer to be converted.
|
|
262
|
+
If set to "2D" or "3D", an :class:`AtomArrayStack` with only the 2D or 3D
|
|
263
|
+
conformer is returned, respectively.
|
|
264
|
+
By default, an :class:`AtomArrayStack` with all conformers (2D and 3D) is
|
|
265
|
+
returned.
|
|
266
|
+
add_hydrogen : bool, optional
|
|
267
|
+
If set to true, explicit hydrogen atoms are always added.
|
|
268
|
+
If set to false, explicit hydrogen atoms are never added.
|
|
269
|
+
By default, explicit hydrogen atoms are only added, if hydrogen atoms are not
|
|
270
|
+
already present.
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
atoms : AtomArray or AtomArrayStack
|
|
275
|
+
The converted atoms.
|
|
276
|
+
An :class:`AtomArray` is returned if an integer `conformer_id` is given.
|
|
277
|
+
Otherwise, an :class:`AtomArrayStack` is returned.
|
|
278
|
+
If the input `mol` does not have a conformer, an `AtomArrayStack` with a
|
|
279
|
+
single model, where all coordinates are *NaN*, is returned.
|
|
280
|
+
|
|
281
|
+
Notes
|
|
282
|
+
-----
|
|
283
|
+
The atoms in the return value are in the same order as the input `mol`,
|
|
284
|
+
i.e. indices pointing to the :class:`rdkit.Chem.rdchem.Mol` can be used to point to
|
|
285
|
+
the same atoms in the :class:`.AtomArray`.
|
|
286
|
+
|
|
287
|
+
All atom-level properties of `mol`
|
|
288
|
+
(obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as annotation
|
|
289
|
+
array with the same name.
|
|
290
|
+
``element`` and ``charge`` are not inferred from properties but from the
|
|
291
|
+
dedicated attributes in the :class:`rdkit.Chem.rdchem.Mol` object.
|
|
292
|
+
|
|
293
|
+
Examples
|
|
294
|
+
--------
|
|
295
|
+
|
|
296
|
+
>>> from rdkit.Chem import MolFromSmiles
|
|
297
|
+
>>> from rdkit.Chem.rdDistGeom import EmbedMolecule
|
|
298
|
+
>>> from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
|
|
299
|
+
>>> from rdkit.Chem.rdmolops import AddHs
|
|
300
|
+
>>> mol = MolFromSmiles("C[C@@H](C(=O)O)N")
|
|
301
|
+
>>> mol = AddHs(mol)
|
|
302
|
+
>>> # Create a 3D conformer
|
|
303
|
+
>>> conformer_id = EmbedMolecule(mol)
|
|
304
|
+
>>> UFFOptimizeMolecule(mol)
|
|
305
|
+
0
|
|
306
|
+
>>> alanine_atom_array = from_mol(mol, conformer_id)
|
|
307
|
+
>>> # RDKit does not assign atom names -> for convenience, do this in Biotite
|
|
308
|
+
>>> alanine_atom_array.atom_name = create_atom_names(alanine_atom_array)
|
|
309
|
+
>>> print(alanine_atom_array)
|
|
310
|
+
0 C1 C -1.076 1.102 -0.094
|
|
311
|
+
0 C2 C -0.363 -0.246 -0.218
|
|
312
|
+
0 C3 C 1.129 -0.073 -0.109
|
|
313
|
+
0 O1 O 1.644 0.373 0.952
|
|
314
|
+
0 O2 O 1.943 -0.405 -1.187
|
|
315
|
+
0 N1 N -0.861 -1.175 0.798
|
|
316
|
+
0 H1 H -0.724 1.795 -0.888
|
|
317
|
+
0 H2 H -2.171 0.960 -0.212
|
|
318
|
+
0 H3 H -0.881 1.561 0.899
|
|
319
|
+
0 H4 H -0.600 -0.664 -1.221
|
|
320
|
+
0 H5 H 2.949 -0.295 -1.132
|
|
321
|
+
0 H6 H -0.595 -0.830 1.750
|
|
322
|
+
0 H7 H -0.395 -2.102 0.660
|
|
323
|
+
"""
|
|
324
|
+
if add_hydrogen is None:
|
|
325
|
+
add_hydrogen = not _has_explicit_hydrogen(mol)
|
|
326
|
+
if add_hydrogen:
|
|
327
|
+
mol = copy.deepcopy(mol)
|
|
328
|
+
with BlockLogs():
|
|
329
|
+
# Avoid modifying the input molecule
|
|
330
|
+
Chem.SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
|
|
331
|
+
mol = Chem.AddHs(mol, addCoords=False, addResidueInfo=False)
|
|
332
|
+
|
|
333
|
+
rdkit_atoms = mol.GetAtoms()
|
|
334
|
+
if rdkit_atoms is None:
|
|
335
|
+
raise BadStructureError("Could not obtains atoms from Mol")
|
|
336
|
+
|
|
337
|
+
if conformer_id in (None, "2D", "3D"):
|
|
338
|
+
conformers = [conf for conf in mol.GetConformers()]
|
|
339
|
+
if conformer_id == "2D":
|
|
340
|
+
conformers = [conf for conf in conformers if not conf.Is3D()]
|
|
341
|
+
elif conformer_id == "3D":
|
|
342
|
+
conformers = [conf for conf in conformers if conf.Is3D()]
|
|
343
|
+
if len(conformers) == 0:
|
|
344
|
+
# No conformer in 'Mol' that fulfills the criteria
|
|
345
|
+
# -> create a single model with all coordinates set to NaN
|
|
346
|
+
atoms = AtomArrayStack(1, len(rdkit_atoms))
|
|
347
|
+
atoms.coord = np.full((1, len(rdkit_atoms), 3), np.nan)
|
|
348
|
+
else:
|
|
349
|
+
atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
|
|
350
|
+
for i, conformer in enumerate(conformers):
|
|
351
|
+
atoms.coord[i] = np.array(conformer.GetPositions(), dtype=np.float32)
|
|
352
|
+
else:
|
|
353
|
+
conformer = mol.GetConformer(conformer_id)
|
|
354
|
+
atoms = AtomArray(len(rdkit_atoms))
|
|
355
|
+
atoms.coord = np.array(conformer.GetPositions(), dtype=np.float32)
|
|
356
|
+
|
|
357
|
+
extra_annotations = defaultdict(
|
|
358
|
+
# The dtype of each annotation array is inferred later
|
|
359
|
+
lambda: [None] * atoms.array_length()
|
|
360
|
+
)
|
|
361
|
+
atoms.add_annotation("charge", int)
|
|
362
|
+
atoms.add_annotation("b_factor", float)
|
|
363
|
+
atoms.add_annotation("occupancy", float)
|
|
364
|
+
atoms.add_annotation("altloc_id", str)
|
|
365
|
+
|
|
366
|
+
for rdkit_atom in rdkit_atoms:
|
|
367
|
+
_atom_idx = rdkit_atom.GetIdx()
|
|
368
|
+
|
|
369
|
+
# ... add standard annotations
|
|
370
|
+
element = rdkit_atom.GetSymbol().upper().strip()
|
|
371
|
+
atoms.element[_atom_idx] = element
|
|
372
|
+
atoms.charge[_atom_idx] = rdkit_atom.GetFormalCharge()
|
|
373
|
+
|
|
374
|
+
# ... add PDB related annotations
|
|
375
|
+
residue_info = rdkit_atom.GetPDBResidueInfo()
|
|
376
|
+
if residue_info is None:
|
|
377
|
+
# ... default values for atoms with missing residue information
|
|
378
|
+
residue_info = Chem.AtomPDBResidueInfo(
|
|
379
|
+
atomName="",
|
|
380
|
+
occupancy=0.0,
|
|
381
|
+
tempFactor=float("nan"),
|
|
382
|
+
altLoc=".",
|
|
383
|
+
)
|
|
384
|
+
if element == "H":
|
|
385
|
+
# ... attempt inferring residue information from nearest heavy atom
|
|
386
|
+
# in case of a hydrogen atom without explicit residue information
|
|
387
|
+
nearest_heavy_atom = rdkit_atom.GetNeighbors()[0]
|
|
388
|
+
nearest_heavy_atom_res_info = nearest_heavy_atom.GetPDBResidueInfo()
|
|
389
|
+
if nearest_heavy_atom_res_info is not None:
|
|
390
|
+
residue_info.SetChainId(nearest_heavy_atom_res_info.GetChainId())
|
|
391
|
+
residue_info.SetResidueName(
|
|
392
|
+
nearest_heavy_atom_res_info.GetResidueName()
|
|
393
|
+
)
|
|
394
|
+
residue_info.SetResidueNumber(
|
|
395
|
+
nearest_heavy_atom_res_info.GetResidueNumber()
|
|
396
|
+
)
|
|
397
|
+
residue_info.SetInsertionCode(
|
|
398
|
+
nearest_heavy_atom_res_info.GetInsertionCode()
|
|
399
|
+
)
|
|
400
|
+
residue_info.SetIsHeteroAtom(
|
|
401
|
+
nearest_heavy_atom_res_info.GetIsHeteroAtom()
|
|
402
|
+
)
|
|
403
|
+
residue_info.SetAltLoc(nearest_heavy_atom_res_info.GetAltLoc())
|
|
404
|
+
|
|
405
|
+
atoms.chain_id[_atom_idx] = residue_info.GetChainId()
|
|
406
|
+
atoms.res_id[_atom_idx] = residue_info.GetResidueNumber()
|
|
407
|
+
atoms.ins_code[_atom_idx] = residue_info.GetInsertionCode()
|
|
408
|
+
atoms.res_name[_atom_idx] = residue_info.GetResidueName()
|
|
409
|
+
atoms.altloc_id[_atom_idx] = residue_info.GetAltLoc()
|
|
410
|
+
atoms.hetero[_atom_idx] = residue_info.GetIsHeteroAtom()
|
|
411
|
+
atoms.b_factor[_atom_idx] = residue_info.GetTempFactor()
|
|
412
|
+
atoms.occupancy[_atom_idx] = residue_info.GetOccupancy()
|
|
413
|
+
atoms.atom_name[_atom_idx] = residue_info.GetName().strip()
|
|
414
|
+
|
|
415
|
+
# ... add extra annotations
|
|
416
|
+
for annot, value in rdkit_atom.GetPropsAsDict(includePrivate=False).items():
|
|
417
|
+
extra_annotations[annot][_atom_idx] = value
|
|
418
|
+
|
|
419
|
+
for annot, array in extra_annotations.items():
|
|
420
|
+
# Handle special case of implicit hydrogen atom flags,
|
|
421
|
+
# that is set by 'AddHs()' to hydrogen atoms
|
|
422
|
+
if annot == "isImplicit":
|
|
423
|
+
annotation_array = np.array(array, dtype=bool)
|
|
424
|
+
else:
|
|
425
|
+
annotation_array = np.array(array)
|
|
426
|
+
atoms.set_annotation(annot, annotation_array)
|
|
427
|
+
|
|
428
|
+
rdkit_bonds = list(mol.GetBonds())
|
|
429
|
+
is_aromatic = np.array(
|
|
430
|
+
[bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds]
|
|
431
|
+
)
|
|
432
|
+
if np.any(is_aromatic):
|
|
433
|
+
# Determine the kekulized order of aromatic bonds
|
|
434
|
+
# Copy as 'Kekulize()' modifies the molecule in-place
|
|
435
|
+
mol = Chem.Mol(mol)
|
|
436
|
+
try:
|
|
437
|
+
with BlockLogs():
|
|
438
|
+
Chem.Kekulize(mol)
|
|
439
|
+
except Chem.KekulizeException:
|
|
440
|
+
warnings.warn(
|
|
441
|
+
"Kekulization failed, "
|
|
442
|
+
"using 'BondType.AROMATIC' instead for aromatic bonds instead",
|
|
443
|
+
LossyConversionWarning,
|
|
444
|
+
)
|
|
445
|
+
rdkit_bonds = list(mol.GetBonds())
|
|
446
|
+
bond_array = np.full((len(rdkit_bonds), 3), BondType.ANY, dtype=np.uint32)
|
|
447
|
+
for i, bond in enumerate(rdkit_bonds):
|
|
448
|
+
bond_type = _RDKIT_TO_BIOTITE_BOND_TYPE.get(bond.GetBondType())
|
|
449
|
+
if bond_type is None:
|
|
450
|
+
warnings.warn(
|
|
451
|
+
f"Bond type '{bond.GetBondType().name}' cannot be mapped to Biotite, "
|
|
452
|
+
"using 'BondType.ANY' instead",
|
|
453
|
+
LossyConversionWarning,
|
|
454
|
+
)
|
|
455
|
+
bond_type = BondType.ANY
|
|
456
|
+
if is_aromatic[i]:
|
|
457
|
+
try:
|
|
458
|
+
bond_type = _KEKULIZED_TO_AROMATIC_BOND_TYPE[bond_type]
|
|
459
|
+
except KeyError:
|
|
460
|
+
bond_type = BondType.AROMATIC
|
|
461
|
+
warnings.warn(
|
|
462
|
+
"Kekulization returned invalid bond type, "
|
|
463
|
+
"using generic 'BondType.AROMATIC' instead",
|
|
464
|
+
LossyConversionWarning,
|
|
465
|
+
)
|
|
466
|
+
bond_array[i, 0] = bond.GetBeginAtomIdx()
|
|
467
|
+
bond_array[i, 1] = bond.GetEndAtomIdx()
|
|
468
|
+
bond_array[i, 2] = bond_type
|
|
469
|
+
atoms.bonds = BondList(atoms.array_length(), bond_array)
|
|
470
|
+
|
|
471
|
+
return atoms
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _has_explicit_hydrogen(mol):
|
|
475
|
+
return mol.GetNumAtoms() > mol.GetNumHeavyAtoms()
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _set_property(atom, annot_name, value):
|
|
479
|
+
if isinstance(value, bool):
|
|
480
|
+
atom.SetBoolProp(annot_name, value)
|
|
481
|
+
elif isinstance(value, numbers.Integral):
|
|
482
|
+
atom.SetIntProp(annot_name, value)
|
|
483
|
+
elif isinstance(value, numbers.Real):
|
|
484
|
+
atom.SetDoubleProp(annot_name, value)
|
|
485
|
+
elif isinstance(value, str):
|
|
486
|
+
atom.SetProp(annot_name, value)
|
|
487
|
+
else:
|
|
488
|
+
raise TypeError(
|
|
489
|
+
f"Unsupported dtype '{type(value).__name__}' for annotation '{annot_name}'"
|
|
490
|
+
)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.interface"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["VersionError", "requires_version"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import functools
|
|
11
|
+
import importlib.metadata
|
|
12
|
+
from packaging.specifiers import SpecifierSet
|
|
13
|
+
from packaging.version import Version
|
|
14
|
+
|
|
15
|
+
# Stores the variant of interface functions
|
|
16
|
+
# compatible with the respective installed package version
|
|
17
|
+
_functions_for_version = {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class VersionError(Exception):
|
|
21
|
+
"""
|
|
22
|
+
This exception is raised when the installed version of an interfaced package is
|
|
23
|
+
incompatible with all implemented variants of a function.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def require_package(package):
|
|
30
|
+
"""
|
|
31
|
+
Check if the given package is installed and raise an exception if not.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
package : str
|
|
36
|
+
The name of the package to be checked.
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
ImportError
|
|
41
|
+
If the package is not installed.
|
|
42
|
+
|
|
43
|
+
Notes
|
|
44
|
+
-----
|
|
45
|
+
It is useful to call this function in the ``__init__.py`` of each ``interface``
|
|
46
|
+
subpackage, to obtain clear error messages about missing dependencies.
|
|
47
|
+
"""
|
|
48
|
+
if importlib.util.find_spec(package) is None:
|
|
49
|
+
raise ImportError(f"'{package}' is not installed")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def requires_version(package, version_specifier):
|
|
53
|
+
"""
|
|
54
|
+
Declare a function variant that is compatible with a specific version range of the
|
|
55
|
+
interfaced package.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
package : str
|
|
60
|
+
The name of the interfaced package.
|
|
61
|
+
version_specifier : str or list of str
|
|
62
|
+
The :pep:`440` version specifier(s) for the interfaced package that are
|
|
63
|
+
compatible with the function.
|
|
64
|
+
Multiple constraints can be either given as a list of strings or as a single
|
|
65
|
+
comma-separated string.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def decorator(function):
|
|
69
|
+
@functools.wraps(function)
|
|
70
|
+
def wrapper(*args, **kwargs):
|
|
71
|
+
function_for_version = _functions_for_version.get(function.__name__)
|
|
72
|
+
if function_for_version is None:
|
|
73
|
+
raise VersionError(
|
|
74
|
+
f"No variant of '{function.__name__}()' "
|
|
75
|
+
f"found for installed '{package}'=={package_version}'"
|
|
76
|
+
)
|
|
77
|
+
return function_for_version(*args, **kwargs)
|
|
78
|
+
|
|
79
|
+
if isinstance(version_specifier, str):
|
|
80
|
+
specifier = SpecifierSet(version_specifier)
|
|
81
|
+
else:
|
|
82
|
+
specifier = SpecifierSet.intersection(*version_specifier)
|
|
83
|
+
try:
|
|
84
|
+
package_version = Version(importlib.metadata.version(package))
|
|
85
|
+
except importlib.metadata.PackageNotFoundError:
|
|
86
|
+
raise ImportError(
|
|
87
|
+
f"'{function.__name__}()' requires the '{package}' package"
|
|
88
|
+
)
|
|
89
|
+
if package_version in specifier:
|
|
90
|
+
_functions_for_version[function.__name__] = function
|
|
91
|
+
|
|
92
|
+
return wrapper
|
|
93
|
+
|
|
94
|
+
return decorator
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.interface"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["LossyConversionWarning"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LossyConversionWarning(UserWarning):
|
|
11
|
+
"""
|
|
12
|
+
Warning raised, when some information is lost during conversion.
|
|
13
|
+
|
|
14
|
+
Note that most conversion functions will be inherently lossy to some extent.
|
|
15
|
+
This warning is only raised, when the loss of information happens only for
|
|
16
|
+
some edge case.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
pass
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for handling sequences.
|
|
7
|
+
|
|
8
|
+
A :class:`Sequence` can be seen as a succession of symbols.
|
|
9
|
+
The set of symbols, that can occur in a sequence, is defined by an
|
|
10
|
+
:class:`Alphabet`.
|
|
11
|
+
For example, an unambiguous DNA sequence has an :class:`Alphabet`, that
|
|
12
|
+
includes the 4 letters (strings) ``'A'``, ``'C'``, ``'G'`` and ``'T'``.
|
|
13
|
+
But furthermore, an :class:`Alphabet` can also contain any immutable and
|
|
14
|
+
hashable Python object like :class:`int`, :class:`tuple`, etc.
|
|
15
|
+
If a :class:`Sequence` is created with at least a symbol,
|
|
16
|
+
that is not in the given :class:`Alphabet`, an :class:`AlphabetError` is
|
|
17
|
+
raised.
|
|
18
|
+
|
|
19
|
+
Internally, a :class:`Sequence` is saved as a *NumPy* :class:`ndarray`
|
|
20
|
+
of integer values, where each integer represents a symbol in the
|
|
21
|
+
:class:`Alphabet`.
|
|
22
|
+
For example, ``'A'``, ``'C'``, ``'G'`` and ``'T'`` would be encoded into
|
|
23
|
+
0, 1, 2 and 3, respectively.
|
|
24
|
+
These integer values are called *symbol code*, the encoding of an entire
|
|
25
|
+
sequence of symbols is called *sequence code*.
|
|
26
|
+
|
|
27
|
+
.. figure:: /static/assets/figures/symbol_encoding.png
|
|
28
|
+
:alt: Symbol encoding in Biotite
|
|
29
|
+
:scale: 50%
|
|
30
|
+
|
|
31
|
+
Taken from
|
|
32
|
+
`Kunzmann & Hamacher 2018 <https://doi.org/10.1186/s12859-018-2367-z>`_
|
|
33
|
+
licensed under `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_.
|
|
34
|
+
|
|
35
|
+
The size of the symbol code type in the array is determined by the
|
|
36
|
+
size of the :class:`Alphabet`:
|
|
37
|
+
If the :class:`Alphabet` contains 256 symbols or less, one byte is used
|
|
38
|
+
per array element, between 257 and 65536 symbols, two bytes are used,
|
|
39
|
+
and so on.
|
|
40
|
+
|
|
41
|
+
This approach has multiple advantages:
|
|
42
|
+
|
|
43
|
+
- Wider spectrum of what kind of objects can be represented by
|
|
44
|
+
:class:`Sequence` objects
|
|
45
|
+
- Efficient memory usage and faster calculations due to
|
|
46
|
+
alphabet-tailored *symbol code* type size
|
|
47
|
+
- C-acceleration due to usage of :class:`ndarray` objects
|
|
48
|
+
- Most functions applied on :class:`Sequence` objects are
|
|
49
|
+
indifferent to the actual type of sequence.
|
|
50
|
+
- Symbol codes are directly indices for substitution matrices in
|
|
51
|
+
alignments
|
|
52
|
+
- *k-mers* can be computed fast
|
|
53
|
+
|
|
54
|
+
The abstract :class:`Sequence` superclass cannot be instantiated
|
|
55
|
+
directly, as it does not define an :class:`Alphabet` by itself.
|
|
56
|
+
Instead usually the concrete subclasses :class:`NucleotideSequence`
|
|
57
|
+
(for DNA and RNA sequences) and :class:`ProteinSequence`
|
|
58
|
+
(for amino acid sequences) are used.
|
|
59
|
+
These classes have defined alphabets and provide additional sequence
|
|
60
|
+
type specific methods.
|
|
61
|
+
The class :class:`GeneralSequence` allows the usage of a custom
|
|
62
|
+
:class:`Alphabet` without the need to subclass :class:`Sequence`.
|
|
63
|
+
|
|
64
|
+
Additionally, this subpackage provides support for sequence features,
|
|
65
|
+
as used in e.g. GenBank or GFF files.
|
|
66
|
+
A :class:`Feature` stores its key name, its qualifiers and locations.
|
|
67
|
+
An :class:`Annotation` is a group of multiple :class:`Feature` objects
|
|
68
|
+
and offers convenient location based indexing.
|
|
69
|
+
An :class:`AnnotatedSequence` combines an :class:`Annotation` and a
|
|
70
|
+
:class:`Sequence`.
|
|
71
|
+
|
|
72
|
+
Sequence profiles can be created with the :class:`SequenceProfile` class.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
__name__ = "biotite.sequence"
|
|
76
|
+
__author__ = "Patrick Kunzmann"
|
|
77
|
+
|
|
78
|
+
from .alphabet import *
|
|
79
|
+
from .annotation import *
|
|
80
|
+
from .codon import *
|
|
81
|
+
from .profile import *
|
|
82
|
+
from .search import *
|
|
83
|
+
from .seqtypes import *
|
|
84
|
+
from .sequence import *
|