biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,940 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.io.mol"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Benjamin Mayer"
|
|
7
|
+
__all__ = ["SDFile", "SDRecord", "Metadata"]
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
import warnings
|
|
11
|
+
from collections.abc import Mapping, MutableMapping
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
import numpy as np
|
|
14
|
+
from biotite.file import (
|
|
15
|
+
DeserializationError,
|
|
16
|
+
File,
|
|
17
|
+
InvalidFileError,
|
|
18
|
+
SerializationError,
|
|
19
|
+
is_open_compatible,
|
|
20
|
+
is_text,
|
|
21
|
+
)
|
|
22
|
+
from biotite.structure.atoms import AtomArray
|
|
23
|
+
from biotite.structure.bonds import BondList, BondType
|
|
24
|
+
from biotite.structure.io.mol.ctab import (
|
|
25
|
+
read_structure_from_ctab,
|
|
26
|
+
write_structure_to_ctab,
|
|
27
|
+
)
|
|
28
|
+
from biotite.structure.io.mol.header import Header
|
|
29
|
+
|
|
30
|
+
_N_HEADER = 3
|
|
31
|
+
# Number of header lines
|
|
32
|
+
_RECORD_DELIMITER = "$$$$"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Metadata(MutableMapping):
|
|
36
|
+
r"""
|
|
37
|
+
Additional nonstructural data in an SD record.
|
|
38
|
+
|
|
39
|
+
The metadata is stored as key-value pairs.
|
|
40
|
+
As SDF allows multiple different identifiers for keys,
|
|
41
|
+
the keys are represented by :class:`Metadata.Key`.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
metadata : dict, optional
|
|
46
|
+
The metadata as key-value pairs.
|
|
47
|
+
Keys are instances of :class:`Metadata.Key`.
|
|
48
|
+
Alternatively, keys can be given as strings, in which case the
|
|
49
|
+
string is used as the :attr:`Metadata.Key.name`.
|
|
50
|
+
Values are strings.
|
|
51
|
+
Line breaks in values are allowed.
|
|
52
|
+
|
|
53
|
+
Notes
|
|
54
|
+
-----
|
|
55
|
+
Key names may only contain alphanumeric characters, underscores and
|
|
56
|
+
periods.
|
|
57
|
+
|
|
58
|
+
Examples
|
|
59
|
+
--------
|
|
60
|
+
|
|
61
|
+
>>> metadata = Metadata({
|
|
62
|
+
... "foo": "Lorem ipsum",
|
|
63
|
+
... Metadata.Key(number=42, name="bar"): "dolor sit amet,\nconsectetur"
|
|
64
|
+
... })
|
|
65
|
+
>>> print(metadata)
|
|
66
|
+
> <foo>
|
|
67
|
+
Lorem ipsum
|
|
68
|
+
<BLANKLINE>
|
|
69
|
+
> DT42 <bar>
|
|
70
|
+
dolor sit amet,
|
|
71
|
+
consectetur
|
|
72
|
+
<BLANKLINE>
|
|
73
|
+
>>> print(metadata["foo"])
|
|
74
|
+
Lorem ipsum
|
|
75
|
+
>>> # Strings can be only used for access, if the key contains only a name
|
|
76
|
+
>>> print("bar" in metadata)
|
|
77
|
+
False
|
|
78
|
+
>>> print(metadata[Metadata.Key(number=42, name="bar")])
|
|
79
|
+
dolor sit amet,
|
|
80
|
+
consectetur
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True, kw_only=True)
|
|
84
|
+
class Key:
|
|
85
|
+
"""
|
|
86
|
+
A metadata key.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
number : int, optional
|
|
91
|
+
number of the field in the database.
|
|
92
|
+
name : str, optional
|
|
93
|
+
Name of the field.
|
|
94
|
+
May only contain alphanumeric characters, underscores and
|
|
95
|
+
periods.
|
|
96
|
+
registry_internal : int, optional
|
|
97
|
+
Internal registry number.
|
|
98
|
+
registry_external : str, optional
|
|
99
|
+
External registry number.
|
|
100
|
+
|
|
101
|
+
Attributes
|
|
102
|
+
----------
|
|
103
|
+
number, name, registry_internal, registry_external
|
|
104
|
+
The same as the parameters.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
# The characters that can be given as input to `name`
|
|
108
|
+
# First character must be alphanumeric,
|
|
109
|
+
# following characters may include underscores and periods
|
|
110
|
+
# Although the V3000 specification forbids the use of periods,
|
|
111
|
+
# they are still used in practice and therefore allowed here
|
|
112
|
+
_NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
|
|
113
|
+
# These regexes are used to parse the key from a line
|
|
114
|
+
_COMPONENT_REGEX = {
|
|
115
|
+
"number": re.compile(r"^DT(\d+)$"),
|
|
116
|
+
"name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
|
|
117
|
+
"registry_internal": re.compile(r"^(\d+)$"),
|
|
118
|
+
"registry_external": re.compile(r"^\(([\w.-]*)\)$"),
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
number: ... = None
|
|
122
|
+
name: ... = None
|
|
123
|
+
registry_internal: ... = None
|
|
124
|
+
registry_external: ... = None
|
|
125
|
+
|
|
126
|
+
def __post_init__(self):
|
|
127
|
+
if self.name is None and self.number is None:
|
|
128
|
+
raise ValueError("At least the field number or name must be set")
|
|
129
|
+
if self.name is not None:
|
|
130
|
+
if not Metadata.Key._NAME_INPUT_REGEX.match(self.name):
|
|
131
|
+
raise ValueError(
|
|
132
|
+
f"Invalid name '{self.name}', must only contains "
|
|
133
|
+
"alphanumeric characters, underscores and periods"
|
|
134
|
+
)
|
|
135
|
+
if self.number is not None:
|
|
136
|
+
# Cannot set field directly as 'frozen=True'
|
|
137
|
+
object.__setattr__(self, "number", int(self.number))
|
|
138
|
+
if self.registry_internal is not None:
|
|
139
|
+
object.__setattr__(
|
|
140
|
+
self, "registry_internal", int(self.registry_internal)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def deserialize(text):
|
|
145
|
+
"""
|
|
146
|
+
Create a :class:`Metadata.Key` object by deserializing the given text
|
|
147
|
+
content.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
text : str
|
|
152
|
+
The content to be deserialized.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
key : Metadata.Key
|
|
157
|
+
The parsed key.
|
|
158
|
+
"""
|
|
159
|
+
# Omit the leading '>'
|
|
160
|
+
key_components = text[1:].split()
|
|
161
|
+
parsed_component_dict = {}
|
|
162
|
+
for component in key_components:
|
|
163
|
+
# For each component in each the key,
|
|
164
|
+
# try to match it with each of the regexes
|
|
165
|
+
for attr_name, regex in Metadata.Key._COMPONENT_REGEX.items():
|
|
166
|
+
pattern_match = regex.match(component)
|
|
167
|
+
if pattern_match is None:
|
|
168
|
+
# Try next pattern
|
|
169
|
+
continue
|
|
170
|
+
if attr_name in parsed_component_dict:
|
|
171
|
+
raise DeserializationError(
|
|
172
|
+
f"Duplicate key component for '{attr_name}'"
|
|
173
|
+
)
|
|
174
|
+
value = pattern_match.group(1)
|
|
175
|
+
parsed_component_dict[attr_name] = value
|
|
176
|
+
break
|
|
177
|
+
else:
|
|
178
|
+
# There is no matching pattern
|
|
179
|
+
raise DeserializationError(f"Invalid key component '{component}'")
|
|
180
|
+
return Metadata.Key(**parsed_component_dict)
|
|
181
|
+
|
|
182
|
+
def serialize(self):
|
|
183
|
+
"""
|
|
184
|
+
Convert this object into text content.
|
|
185
|
+
|
|
186
|
+
Returns
|
|
187
|
+
-------
|
|
188
|
+
content : str
|
|
189
|
+
The serialized content.
|
|
190
|
+
"""
|
|
191
|
+
key_string = "> "
|
|
192
|
+
if self.number is not None:
|
|
193
|
+
key_string += f"DT{self.number} "
|
|
194
|
+
if self.name is not None:
|
|
195
|
+
key_string += f"<{self.name}> "
|
|
196
|
+
if self.registry_internal is not None:
|
|
197
|
+
key_string += f"{self.registry_internal} "
|
|
198
|
+
if self.registry_external is not None:
|
|
199
|
+
key_string += f"({self.registry_external}) "
|
|
200
|
+
return key_string
|
|
201
|
+
|
|
202
|
+
def __str__(self):
|
|
203
|
+
return self.serialize()
|
|
204
|
+
|
|
205
|
+
def __init__(self, metadata=None):
|
|
206
|
+
if metadata is None:
|
|
207
|
+
metadata = {}
|
|
208
|
+
self._metadata = {}
|
|
209
|
+
for key, value in metadata.items():
|
|
210
|
+
self._metadata[_to_metadata_key(key)] = value
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def deserialize(text):
|
|
214
|
+
"""
|
|
215
|
+
Create a :class:`Metadata` objtect by deserializing the given text content.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
text : str
|
|
220
|
+
The content to be deserialized.
|
|
221
|
+
|
|
222
|
+
Returns
|
|
223
|
+
-------
|
|
224
|
+
metadata : Metadata
|
|
225
|
+
The parsed metadata.
|
|
226
|
+
"""
|
|
227
|
+
metadata = {}
|
|
228
|
+
current_key = None
|
|
229
|
+
current_value = None
|
|
230
|
+
for line in text.splitlines():
|
|
231
|
+
line = line.strip()
|
|
232
|
+
if len(line) == 0:
|
|
233
|
+
# Skip empty lines
|
|
234
|
+
continue
|
|
235
|
+
if line.startswith(">"):
|
|
236
|
+
_add_key_value_pair(metadata, current_key, current_value)
|
|
237
|
+
current_key = Metadata.Key.deserialize(line)
|
|
238
|
+
current_value = None
|
|
239
|
+
else:
|
|
240
|
+
if current_key is None:
|
|
241
|
+
raise DeserializationError("Value found before metadata key")
|
|
242
|
+
if current_value is None:
|
|
243
|
+
current_value = line
|
|
244
|
+
else:
|
|
245
|
+
current_value += "\n" + line
|
|
246
|
+
# Add final pair
|
|
247
|
+
_add_key_value_pair(metadata, current_key, current_value)
|
|
248
|
+
return Metadata(metadata)
|
|
249
|
+
|
|
250
|
+
def serialize(self):
|
|
251
|
+
"""
|
|
252
|
+
Convert this object into text content.
|
|
253
|
+
|
|
254
|
+
Returns
|
|
255
|
+
-------
|
|
256
|
+
content : str
|
|
257
|
+
The serialized content.
|
|
258
|
+
"""
|
|
259
|
+
text_blocks = []
|
|
260
|
+
for key, value in self._metadata.items():
|
|
261
|
+
text_blocks.append(key.serialize())
|
|
262
|
+
# Add empty line after value
|
|
263
|
+
text_blocks.append(value + "\n")
|
|
264
|
+
return _join_with_terminal_newline(text_blocks)
|
|
265
|
+
|
|
266
|
+
def __getitem__(self, key):
|
|
267
|
+
return self._metadata[_to_metadata_key(key)]
|
|
268
|
+
|
|
269
|
+
def __setitem__(self, key, value):
|
|
270
|
+
if len(value) == 0:
|
|
271
|
+
raise ValueError("Metadata value must not be empty")
|
|
272
|
+
self._metadata[_to_metadata_key(key)] = value
|
|
273
|
+
|
|
274
|
+
def __delitem__(self, key):
|
|
275
|
+
del self._metadata[_to_metadata_key(key)]
|
|
276
|
+
|
|
277
|
+
def __iter__(self):
|
|
278
|
+
return iter(self._metadata)
|
|
279
|
+
|
|
280
|
+
def __len__(self):
|
|
281
|
+
return len(self._metadata)
|
|
282
|
+
|
|
283
|
+
def __eq__(self, other):
|
|
284
|
+
if not isinstance(other, type(self)):
|
|
285
|
+
return False
|
|
286
|
+
if set(self.keys()) != set(other.keys()):
|
|
287
|
+
return False
|
|
288
|
+
for key in self.keys():
|
|
289
|
+
if self[key] != other[key]:
|
|
290
|
+
return False
|
|
291
|
+
return True
|
|
292
|
+
|
|
293
|
+
def __str__(self):
|
|
294
|
+
return self.serialize()
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class SDRecord:
|
|
298
|
+
"""
|
|
299
|
+
A record in a SD file.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
header : Header, optional
|
|
304
|
+
The header of the record.
|
|
305
|
+
By default, an empty header is created.
|
|
306
|
+
ctab : str, optional
|
|
307
|
+
The connection table (atoms and bonds) in the record.
|
|
308
|
+
By default, an empty structure is created.
|
|
309
|
+
metadata : Metadata, Mapping or str, optional
|
|
310
|
+
The metadata of the record.
|
|
311
|
+
Can be given as dictionary mapping :attr:`Metadata.Key.name`
|
|
312
|
+
to the respective metadata value.
|
|
313
|
+
By default, no metadata is appended to the record.
|
|
314
|
+
|
|
315
|
+
Attributes
|
|
316
|
+
----------
|
|
317
|
+
header, ctab, metadata
|
|
318
|
+
The same as the parameters.
|
|
319
|
+
|
|
320
|
+
Examples
|
|
321
|
+
--------
|
|
322
|
+
|
|
323
|
+
>>> atoms = residue("ALA")
|
|
324
|
+
>>> record = SDRecord(header=Header(mol_name="ALA", dimensions="3D"))
|
|
325
|
+
>>> record.set_structure(atoms)
|
|
326
|
+
>>> print(record.get_structure())
|
|
327
|
+
0 N -0.966 0.493 1.500
|
|
328
|
+
0 C 0.257 0.418 0.692
|
|
329
|
+
0 C -0.094 0.017 -0.716
|
|
330
|
+
0 O -1.056 -0.682 -0.923
|
|
331
|
+
0 C 1.204 -0.620 1.296
|
|
332
|
+
0 O 0.661 0.439 -1.742
|
|
333
|
+
0 H -1.383 -0.425 1.482
|
|
334
|
+
0 H -0.676 0.661 2.452
|
|
335
|
+
0 H 0.746 1.392 0.682
|
|
336
|
+
0 H 1.459 -0.330 2.316
|
|
337
|
+
0 H 0.715 -1.594 1.307
|
|
338
|
+
0 H 2.113 -0.676 0.697
|
|
339
|
+
0 H 0.435 0.182 -2.647
|
|
340
|
+
>>> # Add the record to an SD file
|
|
341
|
+
>>> file = SDFile()
|
|
342
|
+
>>> file["ALA"] = record
|
|
343
|
+
>>> print(file)
|
|
344
|
+
ALA
|
|
345
|
+
3D
|
|
346
|
+
<BLANKLINE>
|
|
347
|
+
13 12 0 0 0 0 0 0 0 1 V2000
|
|
348
|
+
-0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
349
|
+
0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
350
|
+
-0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
351
|
+
-1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
352
|
+
1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
353
|
+
0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
354
|
+
-1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
355
|
+
-0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
356
|
+
0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
357
|
+
1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
358
|
+
0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
359
|
+
2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
360
|
+
0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
361
|
+
1 2 1 0 0 0 0
|
|
362
|
+
1 7 1 0 0 0 0
|
|
363
|
+
1 8 1 0 0 0 0
|
|
364
|
+
2 3 1 0 0 0 0
|
|
365
|
+
2 5 1 0 0 0 0
|
|
366
|
+
2 9 1 0 0 0 0
|
|
367
|
+
3 4 2 0 0 0 0
|
|
368
|
+
3 6 1 0 0 0 0
|
|
369
|
+
5 10 1 0 0 0 0
|
|
370
|
+
5 11 1 0 0 0 0
|
|
371
|
+
5 12 1 0 0 0 0
|
|
372
|
+
6 13 1 0 0 0 0
|
|
373
|
+
M END
|
|
374
|
+
$$$$
|
|
375
|
+
<BLANKLINE>
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
def __init__(self, header=None, ctab=None, metadata=None):
|
|
379
|
+
if header is None:
|
|
380
|
+
self._header = Header()
|
|
381
|
+
else:
|
|
382
|
+
self._header = header
|
|
383
|
+
|
|
384
|
+
self._ctab = ctab
|
|
385
|
+
|
|
386
|
+
if metadata is None:
|
|
387
|
+
self._metadata = Metadata()
|
|
388
|
+
elif isinstance(metadata, Metadata):
|
|
389
|
+
self._metadata = metadata
|
|
390
|
+
elif isinstance(metadata, Mapping):
|
|
391
|
+
self._metadata = Metadata(metadata)
|
|
392
|
+
elif isinstance(metadata, str):
|
|
393
|
+
# Serialized form -> will be lazily deserialized
|
|
394
|
+
self._metadata = metadata
|
|
395
|
+
else:
|
|
396
|
+
raise TypeError(
|
|
397
|
+
"Expected 'Metadata', Mapping or str, "
|
|
398
|
+
f"but got '{type(metadata).__name__}'"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def header(self):
|
|
403
|
+
if isinstance(self._header, str):
|
|
404
|
+
try:
|
|
405
|
+
self._header = Header.deserialize(self._header)
|
|
406
|
+
except Exception:
|
|
407
|
+
raise DeserializationError("Failed to deserialize header")
|
|
408
|
+
return self._header
|
|
409
|
+
|
|
410
|
+
@header.setter
|
|
411
|
+
def header(self, header):
|
|
412
|
+
self._header = header
|
|
413
|
+
|
|
414
|
+
@property
|
|
415
|
+
def ctab(self):
|
|
416
|
+
# CTAB string cannot be changed directly -> no setter
|
|
417
|
+
return self._ctab
|
|
418
|
+
|
|
419
|
+
@property
|
|
420
|
+
def metadata(self):
|
|
421
|
+
if isinstance(self._metadata, str):
|
|
422
|
+
try:
|
|
423
|
+
self._metadata = Metadata.deserialize(self._metadata)
|
|
424
|
+
except Exception:
|
|
425
|
+
raise DeserializationError("Failed to deserialize metadata")
|
|
426
|
+
return self._metadata
|
|
427
|
+
|
|
428
|
+
@metadata.setter
|
|
429
|
+
def metadata(self, metadata):
|
|
430
|
+
if isinstance(metadata, Metadata):
|
|
431
|
+
self._metadata = metadata
|
|
432
|
+
elif isinstance(metadata, Mapping):
|
|
433
|
+
self._metadata = Metadata(metadata)
|
|
434
|
+
else:
|
|
435
|
+
raise TypeError(
|
|
436
|
+
f"Expected 'Metadata' or Mapping, but got '{type(metadata).__name__}'"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
@staticmethod
|
|
440
|
+
def deserialize(text):
|
|
441
|
+
"""
|
|
442
|
+
Create an :class:`SDRecord` by deserializing the given text content.
|
|
443
|
+
|
|
444
|
+
Parameters
|
|
445
|
+
----------
|
|
446
|
+
text : str
|
|
447
|
+
The content to be deserialized.
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
record : SDRecord
|
|
452
|
+
The parsed record.
|
|
453
|
+
"""
|
|
454
|
+
lines = text.splitlines()
|
|
455
|
+
ctab_end = _get_ctab_stop(lines)
|
|
456
|
+
|
|
457
|
+
header = _join_with_terminal_newline(lines[:_N_HEADER])
|
|
458
|
+
ctab = _join_with_terminal_newline(lines[_N_HEADER:ctab_end])
|
|
459
|
+
metadata = _join_with_terminal_newline(lines[ctab_end:])
|
|
460
|
+
return SDRecord(header, ctab, metadata)
|
|
461
|
+
|
|
462
|
+
def serialize(self):
|
|
463
|
+
"""
|
|
464
|
+
Convert this object into text content.
|
|
465
|
+
|
|
466
|
+
Returns
|
|
467
|
+
-------
|
|
468
|
+
content : str
|
|
469
|
+
The serialized content.
|
|
470
|
+
"""
|
|
471
|
+
if isinstance(self._header, str):
|
|
472
|
+
header_string = self._header
|
|
473
|
+
else:
|
|
474
|
+
header_string = self._header.serialize()
|
|
475
|
+
|
|
476
|
+
if self._ctab is None:
|
|
477
|
+
ctab_string = _empty_ctab()
|
|
478
|
+
else:
|
|
479
|
+
ctab_string = self._ctab
|
|
480
|
+
|
|
481
|
+
if isinstance(self._metadata, str):
|
|
482
|
+
metadata_string = self._metadata
|
|
483
|
+
else:
|
|
484
|
+
metadata_string = self._metadata.serialize()
|
|
485
|
+
|
|
486
|
+
return header_string + ctab_string + metadata_string
|
|
487
|
+
|
|
488
|
+
def get_structure(self):
|
|
489
|
+
"""
|
|
490
|
+
Parse the structural data in the SD record.
|
|
491
|
+
|
|
492
|
+
Returns
|
|
493
|
+
-------
|
|
494
|
+
array : AtomArray
|
|
495
|
+
This :class:`AtomArray` contains the optional ``charge``
|
|
496
|
+
annotation and has an associated :class:`BondList`.
|
|
497
|
+
All other annotation categories, except ``element`` are
|
|
498
|
+
empty.
|
|
499
|
+
"""
|
|
500
|
+
ctab_lines = self._ctab.splitlines()
|
|
501
|
+
if len(ctab_lines) == 0:
|
|
502
|
+
raise InvalidFileError("File does not contain structure data")
|
|
503
|
+
return read_structure_from_ctab(ctab_lines)
|
|
504
|
+
|
|
505
|
+
def set_structure(self, atoms, default_bond_type=BondType.ANY, version=None):
|
|
506
|
+
"""
|
|
507
|
+
Set the structural data in the SD record.
|
|
508
|
+
|
|
509
|
+
Parameters
|
|
510
|
+
----------
|
|
511
|
+
atoms : AtomArray
|
|
512
|
+
The array to be saved into this file.
|
|
513
|
+
Must have an associated :class:`BondList`.
|
|
514
|
+
default_bond_type : BondType, optional
|
|
515
|
+
Bond type fallback for the *Bond block*, if a
|
|
516
|
+
:class:`BondType` has no CTAB counterpart.
|
|
517
|
+
By default, each such bond is treated as
|
|
518
|
+
:attr:`BondType.ANY`.
|
|
519
|
+
version : {"V2000", "V3000"}, optional
|
|
520
|
+
The version of the CTAB format.
|
|
521
|
+
``"V2000"`` uses the *Atom* and *Bond* block, while
|
|
522
|
+
``"V3000"`` uses the *Properties* block.
|
|
523
|
+
By default, ``"V2000"`` is used, unless the number of atoms
|
|
524
|
+
or bonds exceeds 999, in which case ``"V3000"`` is used.
|
|
525
|
+
"""
|
|
526
|
+
self._ctab = _join_with_terminal_newline(
|
|
527
|
+
write_structure_to_ctab(atoms, default_bond_type, version)
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
def __eq__(self, other):
|
|
531
|
+
if not isinstance(other, type(self)):
|
|
532
|
+
return False
|
|
533
|
+
if not self.header == other.header:
|
|
534
|
+
return False
|
|
535
|
+
if not self.ctab == other.ctab:
|
|
536
|
+
return False
|
|
537
|
+
if not self.metadata == other.metadata:
|
|
538
|
+
return False
|
|
539
|
+
return True
|
|
540
|
+
|
|
541
|
+
def __str__(self):
|
|
542
|
+
return self.serialize()
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
class SDFile(File, MutableMapping):
|
|
546
|
+
"""
|
|
547
|
+
This class represents an SD file for storing small molecule
|
|
548
|
+
structures.
|
|
549
|
+
|
|
550
|
+
The records for each molecule in the file can be accessed and
|
|
551
|
+
modified like a dictionary.
|
|
552
|
+
The structures can be parsed and written from/to each
|
|
553
|
+
:class:`SDRecord` object via :func:`get_structure()` or
|
|
554
|
+
:func:`set_structure()`, respectively.
|
|
555
|
+
|
|
556
|
+
Parameters
|
|
557
|
+
----------
|
|
558
|
+
records : dict (str -> SDRecord), optional
|
|
559
|
+
The initial records of the file.
|
|
560
|
+
Maps the record names to the corresponding :class:`SDRecord` objects.
|
|
561
|
+
By default no initial records are added.
|
|
562
|
+
|
|
563
|
+
Attributes
|
|
564
|
+
----------
|
|
565
|
+
record : CIFBlock
|
|
566
|
+
The sole record of the file.
|
|
567
|
+
If the file contains multiple records, an exception is raised.
|
|
568
|
+
|
|
569
|
+
Examples
|
|
570
|
+
--------
|
|
571
|
+
Read a SD file and parse the molecular structure:
|
|
572
|
+
|
|
573
|
+
>>> import os.path
|
|
574
|
+
>>> file = SDFile.read(os.path.join(path_to_structures, "molecules", "TYR.sdf"))
|
|
575
|
+
>>> molecule = file.record.get_structure()
|
|
576
|
+
>>> print(molecule)
|
|
577
|
+
0 N 1.320 0.952 1.428
|
|
578
|
+
0 C -0.018 0.429 1.734
|
|
579
|
+
0 C -0.103 0.094 3.201
|
|
580
|
+
0 O 0.886 -0.254 3.799
|
|
581
|
+
0 C -0.274 -0.831 0.907
|
|
582
|
+
0 C -0.189 -0.496 -0.559
|
|
583
|
+
0 C 1.022 -0.589 -1.219
|
|
584
|
+
0 C -1.324 -0.102 -1.244
|
|
585
|
+
0 C 1.103 -0.282 -2.563
|
|
586
|
+
0 C -1.247 0.210 -2.587
|
|
587
|
+
0 C -0.032 0.118 -3.252
|
|
588
|
+
0 O 0.044 0.420 -4.574
|
|
589
|
+
0 O -1.279 0.184 3.842
|
|
590
|
+
0 H 1.977 0.225 1.669
|
|
591
|
+
0 H 1.365 1.063 0.426
|
|
592
|
+
0 H -0.767 1.183 1.489
|
|
593
|
+
0 H 0.473 -1.585 1.152
|
|
594
|
+
0 H -1.268 -1.219 1.134
|
|
595
|
+
0 H 1.905 -0.902 -0.683
|
|
596
|
+
0 H -2.269 -0.031 -0.727
|
|
597
|
+
0 H 2.049 -0.354 -3.078
|
|
598
|
+
0 H -2.132 0.523 -3.121
|
|
599
|
+
0 H -0.123 -0.399 -5.059
|
|
600
|
+
0 H -1.333 -0.030 4.784
|
|
601
|
+
|
|
602
|
+
Note that important atom annotations may be missing.
|
|
603
|
+
These can be set afterwards:
|
|
604
|
+
|
|
605
|
+
>>> molecule.res_name[:] = "TYR"
|
|
606
|
+
>>> molecule.atom_name[:] = create_atom_names(molecule)
|
|
607
|
+
>>> print(molecule)
|
|
608
|
+
0 TYR N1 N 1.320 0.952 1.428
|
|
609
|
+
0 TYR C1 C -0.018 0.429 1.734
|
|
610
|
+
0 TYR C2 C -0.103 0.094 3.201
|
|
611
|
+
0 TYR O1 O 0.886 -0.254 3.799
|
|
612
|
+
0 TYR C3 C -0.274 -0.831 0.907
|
|
613
|
+
0 TYR C4 C -0.189 -0.496 -0.559
|
|
614
|
+
0 TYR C5 C 1.022 -0.589 -1.219
|
|
615
|
+
0 TYR C6 C -1.324 -0.102 -1.244
|
|
616
|
+
0 TYR C7 C 1.103 -0.282 -2.563
|
|
617
|
+
0 TYR C8 C -1.247 0.210 -2.587
|
|
618
|
+
0 TYR C9 C -0.032 0.118 -3.252
|
|
619
|
+
0 TYR O2 O 0.044 0.420 -4.574
|
|
620
|
+
0 TYR O3 O -1.279 0.184 3.842
|
|
621
|
+
0 TYR H1 H 1.977 0.225 1.669
|
|
622
|
+
0 TYR H2 H 1.365 1.063 0.426
|
|
623
|
+
0 TYR H3 H -0.767 1.183 1.489
|
|
624
|
+
0 TYR H4 H 0.473 -1.585 1.152
|
|
625
|
+
0 TYR H5 H -1.268 -1.219 1.134
|
|
626
|
+
0 TYR H6 H 1.905 -0.902 -0.683
|
|
627
|
+
0 TYR H7 H -2.269 -0.031 -0.727
|
|
628
|
+
0 TYR H8 H 2.049 -0.354 -3.078
|
|
629
|
+
0 TYR H9 H -2.132 0.523 -3.121
|
|
630
|
+
0 TYR H10 H -0.123 -0.399 -5.059
|
|
631
|
+
0 TYR H11 H -1.333 -0.030 4.784
|
|
632
|
+
|
|
633
|
+
Create a SD file and write it to disk:
|
|
634
|
+
|
|
635
|
+
>>> another_molecule = residue("ALA")
|
|
636
|
+
>>> file = SDFile()
|
|
637
|
+
>>> record = SDRecord()
|
|
638
|
+
>>> record.set_structure(molecule)
|
|
639
|
+
>>> file["TYR"] = record
|
|
640
|
+
>>> record = SDRecord()
|
|
641
|
+
>>> record.set_structure(another_molecule)
|
|
642
|
+
>>> file["ALA"] = record
|
|
643
|
+
>>> file.write(os.path.join(path_to_directory, "some_file.cif"))
|
|
644
|
+
>>> print(file)
|
|
645
|
+
TYR
|
|
646
|
+
<BLANKLINE>
|
|
647
|
+
<BLANKLINE>
|
|
648
|
+
24 24 0 0 0 0 0 0 0 1 V2000
|
|
649
|
+
1.3200 0.9520 1.4280 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
650
|
+
-0.0180 0.4290 1.7340 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
651
|
+
-0.1030 0.0940 3.2010 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
652
|
+
0.8860 -0.2540 3.7990 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
653
|
+
-0.2740 -0.8310 0.9070 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
654
|
+
-0.1890 -0.4960 -0.5590 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
655
|
+
1.0220 -0.5890 -1.2190 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
656
|
+
-1.3240 -0.1020 -1.2440 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
657
|
+
1.1030 -0.2820 -2.5630 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
658
|
+
-1.2470 0.2100 -2.5870 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
659
|
+
-0.0320 0.1180 -3.2520 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
660
|
+
0.0440 0.4200 -4.5740 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
661
|
+
-1.2790 0.1840 3.8420 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
662
|
+
1.9770 0.2250 1.6690 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
663
|
+
1.3650 1.0630 0.4260 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
664
|
+
-0.7670 1.1830 1.4890 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
665
|
+
0.4730 -1.5850 1.1520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
666
|
+
-1.2680 -1.2190 1.1340 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
667
|
+
1.9050 -0.9020 -0.6830 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
668
|
+
-2.2690 -0.0310 -0.7270 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
669
|
+
2.0490 -0.3540 -3.0780 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
670
|
+
-2.1320 0.5230 -3.1210 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
671
|
+
-0.1230 -0.3990 -5.0590 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
672
|
+
-1.3330 -0.0300 4.7840 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
673
|
+
1 2 1 0 0 0 0
|
|
674
|
+
1 14 1 0 0 0 0
|
|
675
|
+
1 15 1 0 0 0 0
|
|
676
|
+
2 3 1 0 0 0 0
|
|
677
|
+
2 5 1 0 0 0 0
|
|
678
|
+
2 16 1 0 0 0 0
|
|
679
|
+
3 4 2 0 0 0 0
|
|
680
|
+
3 13 1 0 0 0 0
|
|
681
|
+
5 6 1 0 0 0 0
|
|
682
|
+
5 17 1 0 0 0 0
|
|
683
|
+
5 18 1 0 0 0 0
|
|
684
|
+
6 7 2 0 0 0 0
|
|
685
|
+
6 8 1 0 0 0 0
|
|
686
|
+
7 9 1 0 0 0 0
|
|
687
|
+
7 19 1 0 0 0 0
|
|
688
|
+
8 10 2 0 0 0 0
|
|
689
|
+
8 20 1 0 0 0 0
|
|
690
|
+
9 11 2 0 0 0 0
|
|
691
|
+
9 21 1 0 0 0 0
|
|
692
|
+
10 11 1 0 0 0 0
|
|
693
|
+
10 22 1 0 0 0 0
|
|
694
|
+
11 12 1 0 0 0 0
|
|
695
|
+
12 23 1 0 0 0 0
|
|
696
|
+
13 24 1 0 0 0 0
|
|
697
|
+
M END
|
|
698
|
+
$$$$
|
|
699
|
+
ALA
|
|
700
|
+
<BLANKLINE>
|
|
701
|
+
<BLANKLINE>
|
|
702
|
+
13 12 0 0 0 0 0 0 0 1 V2000
|
|
703
|
+
-0.9660 0.4930 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
704
|
+
0.2570 0.4180 0.6920 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
705
|
+
-0.0940 0.0170 -0.7160 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
706
|
+
-1.0560 -0.6820 -0.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
707
|
+
1.2040 -0.6200 1.2960 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
708
|
+
0.6610 0.4390 -1.7420 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
709
|
+
-1.3830 -0.4250 1.4820 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
710
|
+
-0.6760 0.6610 2.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
711
|
+
0.7460 1.3920 0.6820 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
712
|
+
1.4590 -0.3300 2.3160 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
713
|
+
0.7150 -1.5940 1.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
714
|
+
2.1130 -0.6760 0.6970 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
715
|
+
0.4350 0.1820 -2.6470 H 0 0 0 0 0 0 0 0 0 0 0 0
|
|
716
|
+
1 2 1 0 0 0 0
|
|
717
|
+
1 7 1 0 0 0 0
|
|
718
|
+
1 8 1 0 0 0 0
|
|
719
|
+
2 3 1 0 0 0 0
|
|
720
|
+
2 5 1 0 0 0 0
|
|
721
|
+
2 9 1 0 0 0 0
|
|
722
|
+
3 4 2 0 0 0 0
|
|
723
|
+
3 6 1 0 0 0 0
|
|
724
|
+
5 10 1 0 0 0 0
|
|
725
|
+
5 11 1 0 0 0 0
|
|
726
|
+
5 12 1 0 0 0 0
|
|
727
|
+
6 13 1 0 0 0 0
|
|
728
|
+
M END
|
|
729
|
+
$$$$
|
|
730
|
+
<BLANKLINE>
|
|
731
|
+
"""
|
|
732
|
+
|
|
733
|
+
def __init__(self, records=None):
|
|
734
|
+
self._records = {}
|
|
735
|
+
if records is not None:
|
|
736
|
+
for mol_name, record in records.items():
|
|
737
|
+
if isinstance(record, SDRecord):
|
|
738
|
+
record.header.mol_name = mol_name
|
|
739
|
+
self._records[mol_name] = record
|
|
740
|
+
|
|
741
|
+
@property
|
|
742
|
+
def lines(self):
|
|
743
|
+
return self.serialize().splitlines()
|
|
744
|
+
|
|
745
|
+
@property
|
|
746
|
+
def record(self):
|
|
747
|
+
if len(self) == 0:
|
|
748
|
+
raise ValueError("There are no records in the file")
|
|
749
|
+
if len(self) > 1:
|
|
750
|
+
raise ValueError("There are multiple records in the file")
|
|
751
|
+
return self[next(iter(self))]
|
|
752
|
+
|
|
753
|
+
@staticmethod
|
|
754
|
+
def deserialize(text):
|
|
755
|
+
"""
|
|
756
|
+
Create an :class:`SDFile` by deserializing the given text content.
|
|
757
|
+
|
|
758
|
+
Parameters
|
|
759
|
+
----------
|
|
760
|
+
text : str
|
|
761
|
+
The content to be deserialized.
|
|
762
|
+
|
|
763
|
+
Returns
|
|
764
|
+
-------
|
|
765
|
+
file_object : SDFile
|
|
766
|
+
The parsed file.
|
|
767
|
+
"""
|
|
768
|
+
lines = text.splitlines()
|
|
769
|
+
record_ends = np.array(
|
|
770
|
+
[i for i, line in enumerate(lines) if line.startswith(_RECORD_DELIMITER)],
|
|
771
|
+
dtype=int,
|
|
772
|
+
)
|
|
773
|
+
if len(record_ends) == 0:
|
|
774
|
+
warnings.warn(
|
|
775
|
+
"Final record delimiter missing, "
|
|
776
|
+
"maybe this is a MOL file instead of a SD file"
|
|
777
|
+
)
|
|
778
|
+
record_ends = np.array([len(lines) - 1], dtype=int)
|
|
779
|
+
# The first record starts at the first line and the last
|
|
780
|
+
# delimiter is at the end of the file
|
|
781
|
+
# Records in the middle start directly after the delimiter
|
|
782
|
+
record_starts = np.concatenate(([0], record_ends[:-1] + 1), dtype=int)
|
|
783
|
+
record_names = [lines[start].strip() for start in record_starts]
|
|
784
|
+
return SDFile(
|
|
785
|
+
{
|
|
786
|
+
# Do not include the delimiter
|
|
787
|
+
# -> stop at end (instead of end + 1)
|
|
788
|
+
name: _join_with_terminal_newline(lines[start:end])
|
|
789
|
+
for name, start, end in zip(record_names, record_starts, record_ends)
|
|
790
|
+
}
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
def serialize(self):
|
|
794
|
+
"""
|
|
795
|
+
Convert this object into text content.
|
|
796
|
+
|
|
797
|
+
Returns
|
|
798
|
+
-------
|
|
799
|
+
content : str
|
|
800
|
+
The serialized content.
|
|
801
|
+
"""
|
|
802
|
+
text_blocks = []
|
|
803
|
+
for record_name, record in self._records.items():
|
|
804
|
+
if isinstance(record, str):
|
|
805
|
+
# Record is already stored as text
|
|
806
|
+
text_blocks.append(record)
|
|
807
|
+
else:
|
|
808
|
+
try:
|
|
809
|
+
text_blocks.append(record.serialize())
|
|
810
|
+
except Exception:
|
|
811
|
+
raise SerializationError(
|
|
812
|
+
f"Failed to serialize record '{record_name}'"
|
|
813
|
+
)
|
|
814
|
+
text_blocks.append(_RECORD_DELIMITER + "\n")
|
|
815
|
+
return "".join(text_blocks)
|
|
816
|
+
|
|
817
|
+
@classmethod
|
|
818
|
+
def read(cls, file):
|
|
819
|
+
"""
|
|
820
|
+
Read a SD file.
|
|
821
|
+
|
|
822
|
+
Parameters
|
|
823
|
+
----------
|
|
824
|
+
file : file-like object or str
|
|
825
|
+
The file to be read.
|
|
826
|
+
Alternatively a file path can be supplied.
|
|
827
|
+
|
|
828
|
+
Returns
|
|
829
|
+
-------
|
|
830
|
+
file_object : SDFile
|
|
831
|
+
The parsed file.
|
|
832
|
+
"""
|
|
833
|
+
# File name
|
|
834
|
+
if is_open_compatible(file):
|
|
835
|
+
with open(file, "r") as f:
|
|
836
|
+
text = f.read()
|
|
837
|
+
# File object
|
|
838
|
+
else:
|
|
839
|
+
if not is_text(file):
|
|
840
|
+
raise TypeError("A file opened in 'text' mode is required")
|
|
841
|
+
text = file.read()
|
|
842
|
+
return SDFile.deserialize(text)
|
|
843
|
+
|
|
844
|
+
def write(self, file):
|
|
845
|
+
"""
|
|
846
|
+
Write the contents of this object into a SD file.
|
|
847
|
+
|
|
848
|
+
Parameters
|
|
849
|
+
----------
|
|
850
|
+
file : file-like object or str
|
|
851
|
+
The file to be written to.
|
|
852
|
+
Alternatively a file path can be supplied.
|
|
853
|
+
"""
|
|
854
|
+
if is_open_compatible(file):
|
|
855
|
+
with open(file, "w") as f:
|
|
856
|
+
f.write(self.serialize())
|
|
857
|
+
else:
|
|
858
|
+
if not is_text(file):
|
|
859
|
+
raise TypeError("A file opened in 'text' mode is required")
|
|
860
|
+
file.write(self.serialize())
|
|
861
|
+
|
|
862
|
+
def __getitem__(self, key):
|
|
863
|
+
record = self._records[key]
|
|
864
|
+
if isinstance(record, str):
|
|
865
|
+
# Element is stored in serialized form
|
|
866
|
+
# -> must be deserialized first
|
|
867
|
+
try:
|
|
868
|
+
record = SDRecord.deserialize(record)
|
|
869
|
+
except Exception:
|
|
870
|
+
raise DeserializationError(f"Failed to deserialize record '{key}'")
|
|
871
|
+
# Update with deserialized object
|
|
872
|
+
self._records[key] = record
|
|
873
|
+
return record
|
|
874
|
+
|
|
875
|
+
def __setitem__(self, key, record):
|
|
876
|
+
if not isinstance(record, SDRecord):
|
|
877
|
+
raise TypeError(f"Expected 'SDRecord', but got '{type(record).__name__}'")
|
|
878
|
+
# The molecule name in the header is unique across the file
|
|
879
|
+
record.header.mol_name = key
|
|
880
|
+
self._records[key] = record
|
|
881
|
+
|
|
882
|
+
def __delitem__(self, key):
|
|
883
|
+
del self._records[key]
|
|
884
|
+
|
|
885
|
+
def __iter__(self):
|
|
886
|
+
return iter(self._records)
|
|
887
|
+
|
|
888
|
+
def __len__(self):
|
|
889
|
+
return len(self._records)
|
|
890
|
+
|
|
891
|
+
def __eq__(self, other):
|
|
892
|
+
if not isinstance(other, type(self)):
|
|
893
|
+
return False
|
|
894
|
+
if set(self.keys()) != set(other.keys()):
|
|
895
|
+
return False
|
|
896
|
+
for record_name in self.keys():
|
|
897
|
+
if self[record_name] != other[record_name]:
|
|
898
|
+
return False
|
|
899
|
+
return True
|
|
900
|
+
|
|
901
|
+
def __str__(self):
|
|
902
|
+
return self.serialize()
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def _join_with_terminal_newline(text_blocks):
|
|
906
|
+
if len(text_blocks) == 0:
|
|
907
|
+
return ""
|
|
908
|
+
else:
|
|
909
|
+
return "\n".join(text_blocks) + "\n"
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def _empty_ctab():
|
|
913
|
+
empty_atoms = AtomArray(0)
|
|
914
|
+
empty_atoms.bonds = BondList(0)
|
|
915
|
+
return _join_with_terminal_newline(write_structure_to_ctab(empty_atoms))
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
def _to_metadata_key(key):
|
|
919
|
+
if isinstance(key, Metadata.Key):
|
|
920
|
+
return key
|
|
921
|
+
elif isinstance(key, str):
|
|
922
|
+
return Metadata.Key(name=key)
|
|
923
|
+
else:
|
|
924
|
+
raise TypeError(
|
|
925
|
+
f"Expected 'Metadata.Key' or str, but got '{type(key).__name__}'"
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def _add_key_value_pair(metadata, key, value):
|
|
930
|
+
if key is not None:
|
|
931
|
+
if value is None:
|
|
932
|
+
raise DeserializationError(f"No value found for metadata key {key}")
|
|
933
|
+
metadata[key] = value
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
def _get_ctab_stop(lines):
|
|
937
|
+
for i in range(_N_HEADER, len(lines)):
|
|
938
|
+
if lines[i].startswith("M END"):
|
|
939
|
+
return i + 1
|
|
940
|
+
return len(lines)
|