biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,819 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.pubchem"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Query",
|
|
9
|
+
"NameQuery",
|
|
10
|
+
"SmilesQuery",
|
|
11
|
+
"InchiQuery",
|
|
12
|
+
"InchiKeyQuery",
|
|
13
|
+
"FormulaQuery",
|
|
14
|
+
"SuperstructureQuery",
|
|
15
|
+
"SubstructureQuery",
|
|
16
|
+
"SimilarityQuery",
|
|
17
|
+
"IdentityQuery",
|
|
18
|
+
"search",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
import abc
|
|
22
|
+
import collections
|
|
23
|
+
import copy
|
|
24
|
+
import requests
|
|
25
|
+
from biotite.database.error import RequestError
|
|
26
|
+
from biotite.database.pubchem.error import parse_error_details
|
|
27
|
+
from biotite.database.pubchem.throttle import ThrottleStatus
|
|
28
|
+
from biotite.structure.io.mol.mol import MOLFile
|
|
29
|
+
|
|
30
|
+
_base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Query(metaclass=abc.ABCMeta):
|
|
34
|
+
"""
|
|
35
|
+
A search query for the *PubChem* REST API.
|
|
36
|
+
Unlike most other database interfaces in *Biotite*, multiple queries
|
|
37
|
+
cannot be combined using logical operators.
|
|
38
|
+
|
|
39
|
+
This is the abstract base class for all queries.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@abc.abstractmethod
|
|
43
|
+
def get_input_url_path(self):
|
|
44
|
+
"""
|
|
45
|
+
Get the *input* part of the request URL.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
get_input_url_path : str
|
|
50
|
+
The *input* part of the request URL.
|
|
51
|
+
Must not contain slash characters at the beginning and end
|
|
52
|
+
of the string.
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def get_params(self):
|
|
57
|
+
"""
|
|
58
|
+
Get the POST payload for this query.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
params : dict (str -> object)
|
|
63
|
+
The payload.
|
|
64
|
+
"""
|
|
65
|
+
return {}
|
|
66
|
+
|
|
67
|
+
def get_files(self):
|
|
68
|
+
"""
|
|
69
|
+
Get the POST file payload for this query.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
params : dict (str -> object)
|
|
74
|
+
The file payload.
|
|
75
|
+
"""
|
|
76
|
+
return {}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class NameQuery(Query):
|
|
80
|
+
"""
|
|
81
|
+
A query that searches for compounds with the given name.
|
|
82
|
+
|
|
83
|
+
The name of the compound must match the given name completely,
|
|
84
|
+
but synonyms of the compound name are also considered.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
name : str
|
|
89
|
+
The compound name to be searched.
|
|
90
|
+
|
|
91
|
+
Examples
|
|
92
|
+
--------
|
|
93
|
+
|
|
94
|
+
>>> print(search(NameQuery("Alanine")))
|
|
95
|
+
[5950, ..., ...]
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(self, name):
|
|
99
|
+
self._name = name
|
|
100
|
+
|
|
101
|
+
def get_input_url_path(self):
|
|
102
|
+
return "compound/name"
|
|
103
|
+
|
|
104
|
+
def get_params(self):
|
|
105
|
+
return {"name": self._name}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class SmilesQuery(Query):
|
|
109
|
+
"""
|
|
110
|
+
A query that searches for compounds with a given
|
|
111
|
+
*Simplified Molecular Input Line Entry Specification* (*SMILES*)
|
|
112
|
+
string.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
smiles : str
|
|
117
|
+
The *SMILES* string.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
|
|
122
|
+
>>> print(search(SmilesQuery("CCCC")))
|
|
123
|
+
[7843]
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(self, smiles):
|
|
127
|
+
self._smiles = smiles
|
|
128
|
+
|
|
129
|
+
def get_input_url_path(self):
|
|
130
|
+
return "compound/smiles"
|
|
131
|
+
|
|
132
|
+
def get_params(self):
|
|
133
|
+
return {"smiles": self._smiles}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class InchiQuery(Query):
|
|
137
|
+
"""
|
|
138
|
+
A query that searches for compounds with a given
|
|
139
|
+
*International Chemical Identifier* (*InChI*) string.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
inchi : str
|
|
144
|
+
The *InChI* string.
|
|
145
|
+
|
|
146
|
+
Examples
|
|
147
|
+
--------
|
|
148
|
+
|
|
149
|
+
>>> print(search(InchiQuery("InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3")))
|
|
150
|
+
[7843]
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, inchi):
|
|
154
|
+
self._inchi = inchi
|
|
155
|
+
|
|
156
|
+
def get_input_url_path(self):
|
|
157
|
+
return "compound/inchi"
|
|
158
|
+
|
|
159
|
+
def get_params(self):
|
|
160
|
+
return {"inchi": self._inchi}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class InchiKeyQuery(Query):
|
|
164
|
+
"""
|
|
165
|
+
A query that searches for compounds with a given
|
|
166
|
+
*International Chemical Identifier* (*InChI*) key.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
inchi_key : str
|
|
171
|
+
The *InChI* key.
|
|
172
|
+
|
|
173
|
+
Examples
|
|
174
|
+
--------
|
|
175
|
+
|
|
176
|
+
>>> print(search(InchiKeyQuery("IJDNQMDRQITEOD-UHFFFAOYSA-N")))
|
|
177
|
+
[7843]
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def __init__(self, inchi_key):
|
|
181
|
+
self._inchi_key = inchi_key
|
|
182
|
+
|
|
183
|
+
def get_input_url_path(self):
|
|
184
|
+
return "compound/inchikey"
|
|
185
|
+
|
|
186
|
+
def get_params(self):
|
|
187
|
+
return {"inchikey": self._inchi_key}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class FormulaQuery(Query):
|
|
191
|
+
"""
|
|
192
|
+
A query that searches for compounds with the given molecular
|
|
193
|
+
formula.
|
|
194
|
+
|
|
195
|
+
The formula can also be created from an :class:`AtomArray` using
|
|
196
|
+
the :meth:`from_atoms()` method.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
formula : str
|
|
201
|
+
The molecular formula, i.e. each capitalized element with its
|
|
202
|
+
count in the compound concatenated into a single string.
|
|
203
|
+
allow_other_elements : bool, optional
|
|
204
|
+
If set to true, compounds with additional elements, not present
|
|
205
|
+
in the molecular formula, will also match.
|
|
206
|
+
number : int, optional
|
|
207
|
+
The maximum number of matches that this query may return.
|
|
208
|
+
By default, the *PubChem* default value is used, which can be
|
|
209
|
+
considered unlimited.
|
|
210
|
+
|
|
211
|
+
Examples
|
|
212
|
+
--------
|
|
213
|
+
|
|
214
|
+
>>> print(search(FormulaQuery("C4H10", number=5)))
|
|
215
|
+
[..., ..., ..., ..., ...]
|
|
216
|
+
>>> atom_array = residue("ALA")
|
|
217
|
+
>>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
|
|
218
|
+
[..., ..., ..., ..., ...]
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def __init__(self, formula, allow_other_elements=False, number=None):
|
|
222
|
+
self._formula = formula
|
|
223
|
+
self._allow_other_elements = allow_other_elements
|
|
224
|
+
self._number = number
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def from_atoms(atoms, allow_other_elements=False, number=None):
|
|
228
|
+
"""
|
|
229
|
+
Create the query from an the given structure by using its
|
|
230
|
+
molecular formula.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
atoms : AtomArray or AtomArrayStack
|
|
235
|
+
The structure to take the molecular formula from.
|
|
236
|
+
allow_other_elements : bool, optional
|
|
237
|
+
If set to true, compounds with additional elements, not
|
|
238
|
+
present in the molecular formula, will also match.
|
|
239
|
+
number : int, optional
|
|
240
|
+
The maximum number of matches that this query may return.
|
|
241
|
+
By default, the *PubChem* default value is used, which can
|
|
242
|
+
be considered unlimited.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
query : FormulaQuery
|
|
247
|
+
The query.
|
|
248
|
+
"""
|
|
249
|
+
element_counter = collections.Counter(atoms.element)
|
|
250
|
+
formula = ""
|
|
251
|
+
# C and H come first in molecular formula
|
|
252
|
+
if "C" in element_counter:
|
|
253
|
+
formula += _format_element("C", element_counter["C"])
|
|
254
|
+
del element_counter["C"]
|
|
255
|
+
if "H" in element_counter:
|
|
256
|
+
formula += _format_element("H", element_counter["H"])
|
|
257
|
+
del element_counter["H"]
|
|
258
|
+
# All other elements follow in alphabetical order
|
|
259
|
+
sorted_elements = sorted(element_counter.keys())
|
|
260
|
+
for element in sorted_elements:
|
|
261
|
+
formula += _format_element(element, element_counter[element])
|
|
262
|
+
return FormulaQuery(formula, allow_other_elements, number)
|
|
263
|
+
|
|
264
|
+
def get_input_url_path(self):
|
|
265
|
+
# The 'fastformula' service seems not to accept the formula
|
|
266
|
+
# in the parameter section of the request
|
|
267
|
+
return f"compound/fastformula/{self._formula}"
|
|
268
|
+
|
|
269
|
+
def get_params(self):
|
|
270
|
+
params = {"AllowOtherElements": self._allow_other_elements}
|
|
271
|
+
# Only set maximum number, if provided by the user
|
|
272
|
+
# The PubChem default value for this might change over time
|
|
273
|
+
if self._number is not None:
|
|
274
|
+
params["MaxRecords"] = self._number
|
|
275
|
+
return params
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _format_element(element, count):
|
|
279
|
+
if count == 1:
|
|
280
|
+
return element.capitalize()
|
|
281
|
+
else:
|
|
282
|
+
return element.capitalize() + str(count)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
286
|
+
"""
|
|
287
|
+
Abstract superclass for all structure based searches.
|
|
288
|
+
This class handles structure inputs and option formatting.
|
|
289
|
+
|
|
290
|
+
Exactly one of the input structure parameters `smiles`, `smarts`,
|
|
291
|
+
`inchi`, `sdf` or `cid` must be given.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
smiles : str, optional
|
|
296
|
+
The query *SMILES* string.
|
|
297
|
+
smarts : str, optional
|
|
298
|
+
The query *SMARTS* pattern.
|
|
299
|
+
inchi : str, optional
|
|
300
|
+
The query *InChI* string.
|
|
301
|
+
sdf : str, optional
|
|
302
|
+
A query structure as SDF formatted string.
|
|
303
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
304
|
+
:class:`AtomArray`.
|
|
305
|
+
cid : int, optional
|
|
306
|
+
The query structure given as CID.
|
|
307
|
+
number : int, optional
|
|
308
|
+
The maximum number of matches that this query may return.
|
|
309
|
+
By default, the *PubChem* default value is used, which can
|
|
310
|
+
be considered unlimited.
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
_query_keys = ("smiles", "smarts", "inchi", "sdf", "cid")
|
|
314
|
+
|
|
315
|
+
def __init__(self, **kwargs):
|
|
316
|
+
query_key_found = False
|
|
317
|
+
for query_key in StructureQuery._query_keys:
|
|
318
|
+
if query_key in kwargs:
|
|
319
|
+
if not query_key_found:
|
|
320
|
+
self._query_key = query_key
|
|
321
|
+
self._query_val = kwargs[query_key]
|
|
322
|
+
# Delete parameter from kwargs for later check for
|
|
323
|
+
# unused (invalid) parameters
|
|
324
|
+
del kwargs[query_key]
|
|
325
|
+
query_key_found = True
|
|
326
|
+
else:
|
|
327
|
+
# A query key was already found,
|
|
328
|
+
# duplicates are not allowed
|
|
329
|
+
raise TypeError(
|
|
330
|
+
"Only one of 'smiles', 'smarts', 'inchi', 'sdf' or "
|
|
331
|
+
"'cid' may be given"
|
|
332
|
+
)
|
|
333
|
+
if not query_key_found:
|
|
334
|
+
raise TypeError(
|
|
335
|
+
"Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' or 'cid'"
|
|
336
|
+
)
|
|
337
|
+
if "number" in kwargs:
|
|
338
|
+
self._number = kwargs["number"]
|
|
339
|
+
del kwargs["number"]
|
|
340
|
+
else:
|
|
341
|
+
self._number = None
|
|
342
|
+
# If there are still remaining parameters that were not handled
|
|
343
|
+
# by this superclass or the inheriting class, they are invalid
|
|
344
|
+
for key in kwargs:
|
|
345
|
+
raise TypeError(f"'{key}' is an invalid keyword argument")
|
|
346
|
+
|
|
347
|
+
@classmethod
|
|
348
|
+
def from_atoms(cls, atoms, *args, **kwargs):
|
|
349
|
+
"""
|
|
350
|
+
Create a query using the given query structure.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
atoms : AtomArray or AtomArrayStack
|
|
355
|
+
The query structure.
|
|
356
|
+
*args, **kwargs
|
|
357
|
+
See the constructor for additional options.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
query : StructureQuery
|
|
362
|
+
The query object.
|
|
363
|
+
"""
|
|
364
|
+
mol_file = MOLFile()
|
|
365
|
+
mol_file.set_structure(atoms)
|
|
366
|
+
# Every MOL string with "$$$$" is a valid SDF string
|
|
367
|
+
# Important: USE MS-style new lines
|
|
368
|
+
return cls(*args, sdf="\r\n".join(mol_file.lines) + "\r\n$$$$\r\n", **kwargs)
|
|
369
|
+
|
|
370
|
+
def get_input_url_path(self):
|
|
371
|
+
input_string = f"compound/{self.search_type()}/{self._query_key}"
|
|
372
|
+
if self._query_key == "cid":
|
|
373
|
+
# Put CID in URL and not in POST payload,
|
|
374
|
+
# as PubChem is confused otherwise
|
|
375
|
+
input_string += "/" + str(self._query_val)
|
|
376
|
+
return input_string
|
|
377
|
+
|
|
378
|
+
def get_params(self):
|
|
379
|
+
if self._query_key not in ("cid", "sdf"):
|
|
380
|
+
# CID is in URL
|
|
381
|
+
# SDF is given as file
|
|
382
|
+
params = {self._query_key: self._query_val}
|
|
383
|
+
else:
|
|
384
|
+
params = {}
|
|
385
|
+
# Only set maximum number, if provided by the user
|
|
386
|
+
# The PubChem default value for this might change over time
|
|
387
|
+
if self._number is not None:
|
|
388
|
+
params["MaxRecords"] = self._number
|
|
389
|
+
for key, val in self.search_options().items():
|
|
390
|
+
# Convert 'snake case' Python parameters
|
|
391
|
+
# to 'camel case' request parameters
|
|
392
|
+
key = "".join([word.capitalize() for word in key.split("_")])
|
|
393
|
+
params[key] = val
|
|
394
|
+
return params
|
|
395
|
+
|
|
396
|
+
def get_files(self):
|
|
397
|
+
# Multi-line SDF string requires payload as file
|
|
398
|
+
if self._query_key == "sdf":
|
|
399
|
+
return {"sdf": self._query_val}
|
|
400
|
+
else:
|
|
401
|
+
return {}
|
|
402
|
+
|
|
403
|
+
@abc.abstractmethod
|
|
404
|
+
def search_type(self):
|
|
405
|
+
"""
|
|
406
|
+
Get the type of performed search for the request input part.
|
|
407
|
+
|
|
408
|
+
PROTECTED: Override when inheriting.
|
|
409
|
+
|
|
410
|
+
Returns
|
|
411
|
+
-------
|
|
412
|
+
search_type : str
|
|
413
|
+
The search type for the input part, i.e. the part directly
|
|
414
|
+
after ``compound/``.
|
|
415
|
+
"""
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
def search_options(self):
|
|
419
|
+
"""
|
|
420
|
+
Get additional options for the POST options.
|
|
421
|
+
|
|
422
|
+
PROTECTED: Override when inheriting.
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
options : dict (str -> object)
|
|
427
|
+
They keys are automatically converted from *snake case* to
|
|
428
|
+
*camel case* required by the request parameters.
|
|
429
|
+
"""
|
|
430
|
+
return {}
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
434
|
+
"""
|
|
435
|
+
Abstract superclass for super- and substructure searches.
|
|
436
|
+
This class handles specific options for these searches.
|
|
437
|
+
|
|
438
|
+
Exactly one of the input structure parameters `smiles`, `smarts`,
|
|
439
|
+
`inchi`, `sdf` or `cid` must be given.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
smiles : str, optional
|
|
444
|
+
The query *SMILES* string.
|
|
445
|
+
smarts : str, optional
|
|
446
|
+
The query *SMARTS* pattern.
|
|
447
|
+
inchi : str, optional
|
|
448
|
+
The query *InChI* string.
|
|
449
|
+
sdf : str, optional
|
|
450
|
+
A query structure as SDF formatted string.
|
|
451
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
452
|
+
:class:`AtomArray`.
|
|
453
|
+
cid : int, optional
|
|
454
|
+
The query structure given as CID.
|
|
455
|
+
number : int, optional
|
|
456
|
+
The maximum number of matches that this query may return.
|
|
457
|
+
By default, the *PubChem* default value is used, which can
|
|
458
|
+
be considered unlimited.
|
|
459
|
+
match_charges : bool, optional
|
|
460
|
+
If set to true, atoms must match the specified charge.
|
|
461
|
+
match_tautomers : bool, optional
|
|
462
|
+
If set to true, allow match to tautomers of the given structure.
|
|
463
|
+
rings_not_embedded : bool, optional
|
|
464
|
+
If set to true, rings may not be embedded in a larger system.
|
|
465
|
+
single_double_bonds_match : bool, optional
|
|
466
|
+
If set to true, single or double bonds match aromatic bonds.
|
|
467
|
+
chains_match_rings : bool, optional
|
|
468
|
+
If set to true, chain bonds in the query may match rings in
|
|
469
|
+
hits.
|
|
470
|
+
strip_hydrogen : bool, optional
|
|
471
|
+
If set to true, remove any explicit hydrogens before searching.
|
|
472
|
+
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
473
|
+
How to handle stereo.
|
|
474
|
+
|
|
475
|
+
Notes
|
|
476
|
+
-----
|
|
477
|
+
Optional parameter descriptions are taken from the *PubChem* REST
|
|
478
|
+
API
|
|
479
|
+
`documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
|
|
480
|
+
"""
|
|
481
|
+
|
|
482
|
+
_option_defaults = {
|
|
483
|
+
"match_charges": False,
|
|
484
|
+
"match_tautomers": False,
|
|
485
|
+
"rings_not_embedded": False,
|
|
486
|
+
"single_double_bonds_match": True,
|
|
487
|
+
"chains_match_rings": True,
|
|
488
|
+
"strip_hydrogen": False,
|
|
489
|
+
"stereo": "ignore",
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
def __init__(self, **kwargs):
|
|
493
|
+
self._options = copy.copy(SuperOrSubstructureQuery._option_defaults)
|
|
494
|
+
for option, value in kwargs.items():
|
|
495
|
+
if option in SuperOrSubstructureQuery._option_defaults.keys():
|
|
496
|
+
self._options[option] = value
|
|
497
|
+
del kwargs[option]
|
|
498
|
+
super().__init__(**kwargs)
|
|
499
|
+
|
|
500
|
+
def search_options(self):
|
|
501
|
+
return self._options
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class SuperstructureQuery(SuperOrSubstructureQuery):
|
|
505
|
+
"""
|
|
506
|
+
A query that searches for all structures, where the given
|
|
507
|
+
input structure is a superstructure.
|
|
508
|
+
In other words, this query matches substructures of the input
|
|
509
|
+
structure.
|
|
510
|
+
|
|
511
|
+
Exactly one of the input structure parameters `smiles`, `smarts`,
|
|
512
|
+
`inchi`, `sdf` or `cid` must be given.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
smiles : str, optional
|
|
517
|
+
The query *SMILES* string.
|
|
518
|
+
smarts : str, optional
|
|
519
|
+
The query *SMARTS* pattern.
|
|
520
|
+
inchi : str, optional
|
|
521
|
+
The query *InChI* string.
|
|
522
|
+
sdf : str, optional
|
|
523
|
+
A query structure as SDF formatted string.
|
|
524
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
525
|
+
:class:`AtomArray`.
|
|
526
|
+
cid : int, optional
|
|
527
|
+
The query structure given as CID.
|
|
528
|
+
number : int, optional
|
|
529
|
+
The maximum number of matches that this query may return.
|
|
530
|
+
By default, the *PubChem* default value is used, which can
|
|
531
|
+
be considered unlimited.
|
|
532
|
+
match_charges : bool, optional
|
|
533
|
+
If set to true, atoms must match the specified charge.
|
|
534
|
+
match_tautomers : bool, optional
|
|
535
|
+
If set to true, allow match to tautomers of the given structure.
|
|
536
|
+
rings_not_embedded : bool, optional
|
|
537
|
+
If set to true, rings may not be embedded in a larger system.
|
|
538
|
+
single_double_bonds_match : bool, optional
|
|
539
|
+
If set to true, single or double bonds match aromatic bonds.
|
|
540
|
+
chains_match_rings : bool, optional
|
|
541
|
+
If set to true, chain bonds in the query may match rings in
|
|
542
|
+
hits.
|
|
543
|
+
strip_hydrogen : bool, optional
|
|
544
|
+
If set to true, remove any explicit hydrogens before searching.
|
|
545
|
+
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
546
|
+
How to handle stereo.
|
|
547
|
+
|
|
548
|
+
Notes
|
|
549
|
+
-----
|
|
550
|
+
Optional parameter descriptions are taken from the *PubChem* REST
|
|
551
|
+
API
|
|
552
|
+
`documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
|
|
553
|
+
|
|
554
|
+
Examples
|
|
555
|
+
--------
|
|
556
|
+
|
|
557
|
+
>>> # CID of alanine
|
|
558
|
+
>>> print(search(SuperstructureQuery(cid=5950, number=5)))
|
|
559
|
+
[..., ..., ..., ..., ...]
|
|
560
|
+
>>> # AtomArray of alanine
|
|
561
|
+
>>> atom_array = residue("ALA")
|
|
562
|
+
>>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
|
|
563
|
+
[..., ..., ..., ..., ...]
|
|
564
|
+
"""
|
|
565
|
+
|
|
566
|
+
def search_type(self):
|
|
567
|
+
return "fastsuperstructure"
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
class SubstructureQuery(SuperOrSubstructureQuery):
|
|
571
|
+
"""
|
|
572
|
+
A query that searches for all structures, where the given
|
|
573
|
+
input structure is a substructure.
|
|
574
|
+
In other words, this query matches superstructures of the input
|
|
575
|
+
structure.
|
|
576
|
+
|
|
577
|
+
Exactly one of the input structure parameters `smiles`, `smarts`,
|
|
578
|
+
`inchi`, `sdf` or `cid` must be given.
|
|
579
|
+
|
|
580
|
+
Parameters
|
|
581
|
+
----------
|
|
582
|
+
smiles : str, optional
|
|
583
|
+
The query *SMILES* string.
|
|
584
|
+
smarts : str, optional
|
|
585
|
+
The query *SMARTS* pattern.
|
|
586
|
+
inchi : str, optional
|
|
587
|
+
The query *InChI* string.
|
|
588
|
+
sdf : str, optional
|
|
589
|
+
A query structure as SDF formatted string.
|
|
590
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
591
|
+
:class:`AtomArray`.
|
|
592
|
+
cid : int, optional
|
|
593
|
+
The query structure given as CID.
|
|
594
|
+
number : int, optional
|
|
595
|
+
The maximum number of matches that this query may return.
|
|
596
|
+
By default, the *PubChem* default value is used, which can
|
|
597
|
+
be considered unlimited.
|
|
598
|
+
match_charges : bool, optional
|
|
599
|
+
If set to true, atoms must match the specified charge.
|
|
600
|
+
match_tautomers : bool, optional
|
|
601
|
+
If set to true, allow match to tautomers of the given structure.
|
|
602
|
+
rings_not_embedded : bool, optional
|
|
603
|
+
If set to true, rings may not be embedded in a larger system.
|
|
604
|
+
single_double_bonds_match : bool, optional
|
|
605
|
+
If set to true, single or double bonds match aromatic bonds.
|
|
606
|
+
chains_match_rings : bool, optional
|
|
607
|
+
If set to true, chain bonds in the query may match rings in
|
|
608
|
+
hits.
|
|
609
|
+
strip_hydrogen : bool, optional
|
|
610
|
+
If set to true, remove any explicit hydrogens before searching.
|
|
611
|
+
stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
|
|
612
|
+
How to handle stereo.
|
|
613
|
+
|
|
614
|
+
Notes
|
|
615
|
+
-----
|
|
616
|
+
Optional parameter descriptions are taken from the *PubChem* REST
|
|
617
|
+
API
|
|
618
|
+
`documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
|
|
619
|
+
|
|
620
|
+
Examples
|
|
621
|
+
--------
|
|
622
|
+
|
|
623
|
+
>>> # CID of alanine
|
|
624
|
+
>>> print(search(SubstructureQuery(cid=5950, number=5)))
|
|
625
|
+
[5950, ..., ..., ..., ...]
|
|
626
|
+
>>> # AtomArray of alanine
|
|
627
|
+
>>> atom_array = residue("ALA")
|
|
628
|
+
>>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
|
|
629
|
+
[5950, ..., ..., ..., ...]
|
|
630
|
+
"""
|
|
631
|
+
|
|
632
|
+
def search_type(self):
|
|
633
|
+
return "fastsubstructure"
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
class SimilarityQuery(StructureQuery):
|
|
637
|
+
"""
|
|
638
|
+
A query that searches for all structures similar to the given
|
|
639
|
+
input structure.
|
|
640
|
+
|
|
641
|
+
Exactly one of the input structure parameters `smiles`, `smarts`,
|
|
642
|
+
`inchi`, `sdf` or `cid` must be given.
|
|
643
|
+
|
|
644
|
+
Parameters
|
|
645
|
+
----------
|
|
646
|
+
threshold : float, optional
|
|
647
|
+
The minimum required *Tanimoto* similarity for a match.
|
|
648
|
+
Must be between 0 (no similarity) and 1 (complete match).
|
|
649
|
+
conformation_based : bool, optional
|
|
650
|
+
If set to true, the similarity is computed based on the
|
|
651
|
+
3D conformation.
|
|
652
|
+
By default, only the elements and bonds between the atoms are
|
|
653
|
+
considered for similarity computation.
|
|
654
|
+
smiles : str, optional
|
|
655
|
+
The query *SMILES* string.
|
|
656
|
+
smarts : str, optional
|
|
657
|
+
The query *SMARTS* pattern.
|
|
658
|
+
inchi : str, optional
|
|
659
|
+
The query *InChI* string.
|
|
660
|
+
sdf : str, optional
|
|
661
|
+
A query structure as SDF formatted string.
|
|
662
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
663
|
+
:class:`AtomArray`.
|
|
664
|
+
cid : int, optional
|
|
665
|
+
The query structure given as CID.
|
|
666
|
+
number : int, optional
|
|
667
|
+
The maximum number of matches that this query may return.
|
|
668
|
+
By default, the *PubChem* default value is used, which can
|
|
669
|
+
be considered unlimited.
|
|
670
|
+
|
|
671
|
+
Notes
|
|
672
|
+
-----
|
|
673
|
+
The conformation based similarity measure uses *shape-Tanimoto* and
|
|
674
|
+
*color-Tanimoto* scores :footcite:`Kim2018`.
|
|
675
|
+
|
|
676
|
+
References
|
|
677
|
+
----------
|
|
678
|
+
|
|
679
|
+
.. footbibliography::
|
|
680
|
+
|
|
681
|
+
Examples
|
|
682
|
+
--------
|
|
683
|
+
|
|
684
|
+
>>> # CID of alanine
|
|
685
|
+
>>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
|
|
686
|
+
[5950, ..., ..., ..., ...]
|
|
687
|
+
>>> # AtomArray of alanine
|
|
688
|
+
>>> atom_array = residue("ALA")
|
|
689
|
+
>>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
|
|
690
|
+
[5950, ..., ..., ..., ...]
|
|
691
|
+
"""
|
|
692
|
+
|
|
693
|
+
def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
|
|
694
|
+
self._threshold = threshold
|
|
695
|
+
self._conformation_based = conformation_based
|
|
696
|
+
super().__init__(**kwargs)
|
|
697
|
+
|
|
698
|
+
def search_type(self):
|
|
699
|
+
dim = "3d" if self._conformation_based else "2d"
|
|
700
|
+
return f"fastsimilarity_{dim}"
|
|
701
|
+
|
|
702
|
+
def search_options(self):
|
|
703
|
+
return {"threshold": int(round(self._threshold * 100))}
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
class IdentityQuery(StructureQuery):
|
|
707
|
+
"""
|
|
708
|
+
A query that searches for all structures that are identical to the
|
|
709
|
+
given input structure.
|
|
710
|
+
|
|
711
|
+
Exactly one of the input structure parameters `smiles`, `smarts`, `inchi`,
|
|
712
|
+
`sdf` or `cid` must be given.
|
|
713
|
+
|
|
714
|
+
Parameters
|
|
715
|
+
----------
|
|
716
|
+
identity_type : {'same_connectivity', 'same_tautomer', 'same_stereo', 'same_isotope', 'same_stereo_isotope', 'nonconflicting_stereo', 'same_isotope_nonconflicting_stereo'}, optional
|
|
717
|
+
The type of identity search.
|
|
718
|
+
smiles : str, optional
|
|
719
|
+
The query *SMILES* string.
|
|
720
|
+
smarts : str, optional
|
|
721
|
+
The query *SMARTS* pattern.
|
|
722
|
+
inchi : str, optional
|
|
723
|
+
The query *InChI* string.
|
|
724
|
+
sdf : str, optional
|
|
725
|
+
A query structure as SDF formatted string.
|
|
726
|
+
Usually :meth:`from_atoms()` is used to create the SDF from an
|
|
727
|
+
:class:`AtomArray`.
|
|
728
|
+
cid : int, optional
|
|
729
|
+
The query structure given as CID.
|
|
730
|
+
number : int, optional
|
|
731
|
+
The maximum number of matches that this query may return.
|
|
732
|
+
By default, the *PubChem* default value is used, which can
|
|
733
|
+
be considered unlimited.
|
|
734
|
+
|
|
735
|
+
Examples
|
|
736
|
+
--------
|
|
737
|
+
|
|
738
|
+
>>> # CID of alanine
|
|
739
|
+
>>> print(search(IdentityQuery(cid=5950)))
|
|
740
|
+
[5950]
|
|
741
|
+
>>> # AtomArray of alanine
|
|
742
|
+
>>> atom_array = residue("ALA")
|
|
743
|
+
>>> print(search(IdentityQuery.from_atoms(atom_array)))
|
|
744
|
+
[5950]
|
|
745
|
+
"""
|
|
746
|
+
|
|
747
|
+
def __init__(self, identity_type="same_stereo_isotope", **kwargs):
|
|
748
|
+
self._identity_type = identity_type
|
|
749
|
+
super().__init__(**kwargs)
|
|
750
|
+
|
|
751
|
+
def search_type(self):
|
|
752
|
+
return "fastidentity"
|
|
753
|
+
|
|
754
|
+
def get_params(self):
|
|
755
|
+
# Use 'get_params()' instead of 'search_options()', since the
|
|
756
|
+
# parameter 'identity_type' in the REST API is *snake case*
|
|
757
|
+
# -> Conversion to *camel case* is undesirable
|
|
758
|
+
params = super().get_params()
|
|
759
|
+
params["identity_type"] = self._identity_type
|
|
760
|
+
return params
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
764
|
+
"""
|
|
765
|
+
Get all CIDs that meet the given query requirements,
|
|
766
|
+
via the PubChem REST API.
|
|
767
|
+
|
|
768
|
+
This function requires an internet connection.
|
|
769
|
+
|
|
770
|
+
Parameters
|
|
771
|
+
----------
|
|
772
|
+
query : Query
|
|
773
|
+
The search query.
|
|
774
|
+
throttle_threshold : float or None, optional
|
|
775
|
+
A value between 0 and 1.
|
|
776
|
+
If the load of either the request time or count exceeds this
|
|
777
|
+
value the execution is halted.
|
|
778
|
+
See :class:`ThrottleStatus` for more information.
|
|
779
|
+
If ``None`` is given, the execution is never halted.
|
|
780
|
+
return_throttle_status : float, optional
|
|
781
|
+
If set to true, the :class:`ThrottleStatus` is also returned.
|
|
782
|
+
|
|
783
|
+
Returns
|
|
784
|
+
-------
|
|
785
|
+
ids : list of int
|
|
786
|
+
List of all compound IDs (CIDs) that meet the query requirement.
|
|
787
|
+
throttle_status : ThrottleStatus
|
|
788
|
+
The :class:`ThrottleStatus` obtained from the server response.
|
|
789
|
+
This can be used for custom request throttling, for example.
|
|
790
|
+
Only returned, if `return_throttle_status` is set to true.
|
|
791
|
+
|
|
792
|
+
Examples
|
|
793
|
+
--------
|
|
794
|
+
|
|
795
|
+
>>> print(search(NameQuery("Alanine")))
|
|
796
|
+
[5950, ..., ...]
|
|
797
|
+
"""
|
|
798
|
+
# Use POST to be compatible with the larger payloads
|
|
799
|
+
# of structure searches
|
|
800
|
+
if query.get_files():
|
|
801
|
+
files = {key: file for key, file in query.get_files().items()}
|
|
802
|
+
else:
|
|
803
|
+
files = None
|
|
804
|
+
r = requests.post(
|
|
805
|
+
_base_url + query.get_input_url_path() + "/cids/TXT",
|
|
806
|
+
data=query.get_params(),
|
|
807
|
+
files=files,
|
|
808
|
+
)
|
|
809
|
+
if not r.ok:
|
|
810
|
+
raise RequestError(parse_error_details(r.text))
|
|
811
|
+
throttle_status = ThrottleStatus.from_response(r)
|
|
812
|
+
if throttle_threshold is not None:
|
|
813
|
+
throttle_status.wait_if_busy(throttle_threshold)
|
|
814
|
+
|
|
815
|
+
cids = [int(cid) for cid in r.text.splitlines()]
|
|
816
|
+
if return_throttle_status:
|
|
817
|
+
return cids, throttle_status
|
|
818
|
+
else:
|
|
819
|
+
return cids
|