biotite 1.5.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-darwin.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-darwin.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
from xml.etree import ElementTree
|
|
11
|
+
import requests
|
|
12
|
+
from biotite.database.entrez.check import check_for_errors
|
|
13
|
+
from biotite.database.entrez.dbnames import sanitize_database_name
|
|
14
|
+
from biotite.database.entrez.key import get_api_key
|
|
15
|
+
from biotite.database.error import RequestError
|
|
16
|
+
|
|
17
|
+
_search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Query(metaclass=abc.ABCMeta):
|
|
21
|
+
"""
|
|
22
|
+
Base class for a wrapper around a search term
|
|
23
|
+
for the NCBI Entrez search service.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def __str__(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def __or__(self, operand):
|
|
34
|
+
if not isinstance(operand, Query):
|
|
35
|
+
operand = SimpleQuery(operand)
|
|
36
|
+
return CompositeQuery("OR", self, operand)
|
|
37
|
+
|
|
38
|
+
def __and__(self, operand):
|
|
39
|
+
if not isinstance(operand, Query):
|
|
40
|
+
operand = SimpleQuery(operand)
|
|
41
|
+
return CompositeQuery("AND", self, operand)
|
|
42
|
+
|
|
43
|
+
def __xor__(self, operand):
|
|
44
|
+
if not isinstance(operand, Query):
|
|
45
|
+
operand = SimpleQuery(operand)
|
|
46
|
+
return CompositeQuery("NOT", self, operand)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CompositeQuery(Query):
|
|
50
|
+
"""
|
|
51
|
+
A representation of an composite query
|
|
52
|
+
for the NCBI Entrez search service.
|
|
53
|
+
|
|
54
|
+
A composite query is a combination of two other queries,
|
|
55
|
+
combined either with an 'AND', 'OR' or 'NOT' operator.
|
|
56
|
+
|
|
57
|
+
Usually the user does not create instances of this class directly,
|
|
58
|
+
but :class:`Query` instances are combined with
|
|
59
|
+
``|`` (OR), ``&`` (AND) or ``^`` (NOT).
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
operator : str, {"AND", "OR", "NOT"}
|
|
64
|
+
The combination operator.
|
|
65
|
+
query1, query2 : SimpleQuery
|
|
66
|
+
The queries to be combined.
|
|
67
|
+
|
|
68
|
+
Examples
|
|
69
|
+
--------
|
|
70
|
+
|
|
71
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism") & \\
|
|
72
|
+
... SimpleQuery("90:100", "Sequence Length")
|
|
73
|
+
>>> print(type(query).__name__)
|
|
74
|
+
CompositeQuery
|
|
75
|
+
>>> print(query)
|
|
76
|
+
("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, operator, query1, query2):
|
|
80
|
+
super().__init__()
|
|
81
|
+
self._op = operator
|
|
82
|
+
self._q1 = query1
|
|
83
|
+
self._q2 = query2
|
|
84
|
+
|
|
85
|
+
def __str__(self):
|
|
86
|
+
return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class SimpleQuery(Query):
|
|
90
|
+
"""
|
|
91
|
+
A simple query for the NCBI Entrez search service without
|
|
92
|
+
combination via 'AND', 'OR' or 'NOT'. A query consists of a search
|
|
93
|
+
term and an optional field.
|
|
94
|
+
|
|
95
|
+
A list of available search fields with description can be found
|
|
96
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
term : str
|
|
101
|
+
The search term.
|
|
102
|
+
field : str, optional
|
|
103
|
+
The field to search the term in.
|
|
104
|
+
The list of possible fields and the required search term
|
|
105
|
+
formatting can be found
|
|
106
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
107
|
+
By default the field is omitted and all fields are searched in
|
|
108
|
+
for the term, implicitly.
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
|
|
113
|
+
>>> query = SimpleQuery("Escherichia coli")
|
|
114
|
+
>>> print(query)
|
|
115
|
+
"Escherichia coli"
|
|
116
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism")
|
|
117
|
+
>>> print(query)
|
|
118
|
+
"Escherichia coli"[Organism]
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# Field identifiers are taken from
|
|
122
|
+
# https://www.ncbi.nlm.nih.gov/books/NBK49540/
|
|
123
|
+
_fields = [
|
|
124
|
+
"Accession",
|
|
125
|
+
"All Fields",
|
|
126
|
+
"Author",
|
|
127
|
+
"EC/RN Number",
|
|
128
|
+
"Feature Key",
|
|
129
|
+
"Filter",
|
|
130
|
+
"Gene Name",
|
|
131
|
+
"Genome Project",
|
|
132
|
+
"Issue",
|
|
133
|
+
"Journal",
|
|
134
|
+
"Keyword",
|
|
135
|
+
"Modification Date",
|
|
136
|
+
"Molecular Weight",
|
|
137
|
+
"Organism",
|
|
138
|
+
"Page Number",
|
|
139
|
+
"Primary Accession",
|
|
140
|
+
"Properties",
|
|
141
|
+
"Protein Name",
|
|
142
|
+
"Publication Date",
|
|
143
|
+
"SeqID String",
|
|
144
|
+
"Sequence Length",
|
|
145
|
+
"Substance Name",
|
|
146
|
+
"Text Word",
|
|
147
|
+
"Title",
|
|
148
|
+
"Volume",
|
|
149
|
+
# Abbreviations
|
|
150
|
+
"ACCN",
|
|
151
|
+
"ALL",
|
|
152
|
+
"AU",
|
|
153
|
+
"AUTH",
|
|
154
|
+
"ECNO",
|
|
155
|
+
"FKEY",
|
|
156
|
+
"FILT",
|
|
157
|
+
"SB",
|
|
158
|
+
"GENE",
|
|
159
|
+
"ISS",
|
|
160
|
+
"JOUR",
|
|
161
|
+
"KYWD",
|
|
162
|
+
"MDAT",
|
|
163
|
+
"MOLWT",
|
|
164
|
+
"ORGN",
|
|
165
|
+
"PAGE",
|
|
166
|
+
"PACC",
|
|
167
|
+
"PORGN",
|
|
168
|
+
"PROP",
|
|
169
|
+
"PROT",
|
|
170
|
+
"PDAT",
|
|
171
|
+
"SQID",
|
|
172
|
+
"SLEN",
|
|
173
|
+
"SUBS",
|
|
174
|
+
"WORD",
|
|
175
|
+
"TI",
|
|
176
|
+
"TITL",
|
|
177
|
+
"VOL",
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
def __init__(self, term, field=None):
|
|
181
|
+
super().__init__()
|
|
182
|
+
if field is not None:
|
|
183
|
+
if field not in SimpleQuery._fields:
|
|
184
|
+
raise ValueError(f"Unknown field identifier '{field}'")
|
|
185
|
+
for invalid_string in ['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
|
|
186
|
+
if invalid_string in term:
|
|
187
|
+
raise ValueError(f"Query contains illegal term {invalid_string}")
|
|
188
|
+
if " " in term:
|
|
189
|
+
# Encapsulate in quotes if spaces are in search term
|
|
190
|
+
term = f'"{term}"'
|
|
191
|
+
self._term = term
|
|
192
|
+
self._field = field
|
|
193
|
+
|
|
194
|
+
def __str__(self):
|
|
195
|
+
string = self._term
|
|
196
|
+
if self._field is not None:
|
|
197
|
+
string += f"[{self._field}]"
|
|
198
|
+
return string
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def search(query, db_name, number=20):
|
|
202
|
+
r"""
|
|
203
|
+
Get all PDB IDs that meet the given query requirements,
|
|
204
|
+
via the NCBI ESearch service.
|
|
205
|
+
|
|
206
|
+
This function requires an internet connection.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
query : Query
|
|
211
|
+
The search query.
|
|
212
|
+
db_name : str:
|
|
213
|
+
E-utility or common database name.
|
|
214
|
+
number : Query
|
|
215
|
+
The maximum number of UIDs that are obtained.
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
ids : list of str
|
|
220
|
+
A list of strings containing all NCBI UIDs (accession number)
|
|
221
|
+
that meet the query requirements.
|
|
222
|
+
|
|
223
|
+
Warnings
|
|
224
|
+
--------
|
|
225
|
+
Even if you give valid input to this function, in rare cases the
|
|
226
|
+
database might return no or malformed data to you.
|
|
227
|
+
In these cases the request should be retried.
|
|
228
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
229
|
+
input.
|
|
230
|
+
|
|
231
|
+
Notes
|
|
232
|
+
-----
|
|
233
|
+
A list of available search fields with description can be found
|
|
234
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
235
|
+
|
|
236
|
+
Examples
|
|
237
|
+
--------
|
|
238
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism") & \
|
|
239
|
+
... SimpleQuery("90:100", "Sequence Length")
|
|
240
|
+
>>> ids = search(query, "nuccore", number=5)
|
|
241
|
+
>>> print(ids)
|
|
242
|
+
['...', '...', '...', '...', '...']
|
|
243
|
+
"""
|
|
244
|
+
param_dict = {
|
|
245
|
+
"db": sanitize_database_name(db_name),
|
|
246
|
+
"term": str(query),
|
|
247
|
+
"retmax": str(number),
|
|
248
|
+
}
|
|
249
|
+
api_key = get_api_key()
|
|
250
|
+
if api_key is not None:
|
|
251
|
+
param_dict["api_key"] = api_key
|
|
252
|
+
r = requests.get(_search_url, params=param_dict)
|
|
253
|
+
xml_response = r.text
|
|
254
|
+
check_for_errors(xml_response)
|
|
255
|
+
try:
|
|
256
|
+
root = ElementTree.fromstring(xml_response)
|
|
257
|
+
except ElementTree.ParseError:
|
|
258
|
+
if len(xml_response) > 100:
|
|
259
|
+
xml_response = xml_response[:100] + "..."
|
|
260
|
+
raise RequestError(f"Invalid server response: {xml_response}")
|
|
261
|
+
xpath = ".//IdList/Id"
|
|
262
|
+
uids = [element.text for element in root.findall(xpath)]
|
|
263
|
+
return uids
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["RequestError"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RequestError(Exception):
|
|
11
|
+
"""
|
|
12
|
+
Indicates that the database returned a response with an error
|
|
13
|
+
message or other malformed content.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
pass
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for searching and downloading files from the *PubChem*
|
|
7
|
+
database.
|
|
8
|
+
Although *PubChem* is part of *NCBI Entrez*,
|
|
9
|
+
:mod:`biotite.database.entrez` is only capable of accessing
|
|
10
|
+
meta-information from *PubChem*.
|
|
11
|
+
This subpackage, on the other hand, supports searching *PubChem*
|
|
12
|
+
compounds based on chemical information and is able to download
|
|
13
|
+
structure records.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__name__ = "biotite.database.pubchem"
|
|
17
|
+
__author__ = "Patrick Kunzmann"
|
|
18
|
+
|
|
19
|
+
from .download import *
|
|
20
|
+
from .query import *
|
|
21
|
+
from .throttle import *
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.pubchem"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["fetch", "fetch_property"]
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import numbers
|
|
11
|
+
import os
|
|
12
|
+
from os.path import getsize, isdir, isfile, join
|
|
13
|
+
import requests
|
|
14
|
+
from biotite.database.error import RequestError
|
|
15
|
+
from biotite.database.pubchem.error import parse_error_details
|
|
16
|
+
from biotite.database.pubchem.throttle import ThrottleStatus
|
|
17
|
+
|
|
18
|
+
_base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
|
|
19
|
+
_binary_formats = ["png", "asnb"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fetch(
|
|
23
|
+
cids,
|
|
24
|
+
format="sdf",
|
|
25
|
+
target_path=None,
|
|
26
|
+
as_structural_formula=False,
|
|
27
|
+
overwrite=False,
|
|
28
|
+
verbose=False,
|
|
29
|
+
throttle_threshold=0.5,
|
|
30
|
+
return_throttle_status=False,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
Download structure files from *PubChem* in various formats.
|
|
34
|
+
|
|
35
|
+
This function requires an internet connection.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
cids : int or iterable object or int
|
|
40
|
+
A single compound ID (CID) or a list of CIDs of the structure(s)
|
|
41
|
+
to be downloaded.
|
|
42
|
+
format : {'sdf', 'asnt' 'asnb', 'xml', 'json', 'jsonp', 'png'}
|
|
43
|
+
The format of the files to be downloaded.
|
|
44
|
+
target_path : str, optional
|
|
45
|
+
The target directory of the downloaded files.
|
|
46
|
+
By default, the file content is stored in a file-like object
|
|
47
|
+
(:class:`StringIO` or :class:`BytesIO`, respectively).
|
|
48
|
+
as_structural_formula : bool, optional
|
|
49
|
+
If set to true, the structural formula is download instead of
|
|
50
|
+
an 3D conformer.
|
|
51
|
+
This means that coordinates lie in th xy-plane and represent
|
|
52
|
+
the positions atoms would have an a structural formula
|
|
53
|
+
representation.
|
|
54
|
+
overwrite : bool, optional
|
|
55
|
+
If true, existing files will be overwritten.
|
|
56
|
+
Otherwise the respective file will only be downloaded, if the
|
|
57
|
+
file does not exist yet in the specified target directory or if
|
|
58
|
+
the file is empty.
|
|
59
|
+
verbose : bool, optional
|
|
60
|
+
If set to true, the function will output the download progress.
|
|
61
|
+
throttle_threshold : float or None, optional
|
|
62
|
+
A value between 0 and 1.
|
|
63
|
+
If the load of either the request time or count exceeds this
|
|
64
|
+
value the execution is halted.
|
|
65
|
+
See :class:`ThrottleStatus` for more information.
|
|
66
|
+
If ``None`` is given, the execution is never halted.
|
|
67
|
+
return_throttle_status : float, optional
|
|
68
|
+
If set to true, the :class:`ThrottleStatus` of the final request
|
|
69
|
+
is also returned.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
74
|
+
The file path(s) to the downloaded files.
|
|
75
|
+
If a single CID was given in `cids`,
|
|
76
|
+
a single string is returned. If a list (or other iterable
|
|
77
|
+
object) was given, a list of strings is returned.
|
|
78
|
+
If no `target_path` was given, the file contents are stored in
|
|
79
|
+
either :class:`StringIO` or :class:`BytesIO` objects.
|
|
80
|
+
throttle_status : ThrottleStatus
|
|
81
|
+
The :class:`ThrottleStatus` obtained from the server response.
|
|
82
|
+
If multiple CIDs are requested, the :class:`ThrottleStatus` of
|
|
83
|
+
of the final response is returned.
|
|
84
|
+
This can be used for custom request throttling, for example.
|
|
85
|
+
Only returned, if `return_throttle_status` is set to true.
|
|
86
|
+
|
|
87
|
+
Examples
|
|
88
|
+
--------
|
|
89
|
+
|
|
90
|
+
>>> import os.path
|
|
91
|
+
>>> file = fetch(2244, "sdf", path_to_directory)
|
|
92
|
+
>>> print(os.path.basename(file))
|
|
93
|
+
2244.sdf
|
|
94
|
+
>>> files = fetch([2244, 5950], "sdf", path_to_directory)
|
|
95
|
+
>>> print([os.path.basename(file) for file in files])
|
|
96
|
+
['2244.sdf', '5950.sdf']
|
|
97
|
+
"""
|
|
98
|
+
# If only a single CID is present,
|
|
99
|
+
# put it into a single element list
|
|
100
|
+
if isinstance(cids, numbers.Integral):
|
|
101
|
+
cids = [cids]
|
|
102
|
+
single_element = True
|
|
103
|
+
else:
|
|
104
|
+
single_element = False
|
|
105
|
+
# Create the target folder, if not existing
|
|
106
|
+
if target_path is not None and not isdir(target_path):
|
|
107
|
+
os.makedirs(target_path)
|
|
108
|
+
|
|
109
|
+
files = []
|
|
110
|
+
for i, cid in enumerate(cids):
|
|
111
|
+
# Prevent IDs as strings, this could be a common error, as other
|
|
112
|
+
# database interfaces of Biotite use string IDs
|
|
113
|
+
if isinstance(cid, str):
|
|
114
|
+
raise TypeError("CIDs must be given as integers, not as string")
|
|
115
|
+
# Verbose output
|
|
116
|
+
if verbose:
|
|
117
|
+
print(f"Fetching file {i + 1:d} / {len(cids):d} ({cid})...", end="\r")
|
|
118
|
+
|
|
119
|
+
# Fetch file from database
|
|
120
|
+
if target_path is not None:
|
|
121
|
+
file = join(target_path, str(cid) + "." + format)
|
|
122
|
+
else:
|
|
123
|
+
# 'file = None' -> store content in a file-like object
|
|
124
|
+
file = None
|
|
125
|
+
|
|
126
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
127
|
+
record_type = "2d" if as_structural_formula else "3d"
|
|
128
|
+
r = requests.get(
|
|
129
|
+
_base_url + f"compound/cid/{cid}/{format.upper()}",
|
|
130
|
+
params={"record_type": record_type},
|
|
131
|
+
)
|
|
132
|
+
if not r.ok:
|
|
133
|
+
raise RequestError(parse_error_details(r.text))
|
|
134
|
+
|
|
135
|
+
if format.lower() in _binary_formats:
|
|
136
|
+
content = r.content
|
|
137
|
+
else:
|
|
138
|
+
content = r.text
|
|
139
|
+
|
|
140
|
+
if file is None:
|
|
141
|
+
if format in _binary_formats:
|
|
142
|
+
file = io.BytesIO(content)
|
|
143
|
+
else:
|
|
144
|
+
file = io.StringIO(content)
|
|
145
|
+
else:
|
|
146
|
+
mode = "wb+" if format in _binary_formats else "w+"
|
|
147
|
+
with open(file, mode) as f:
|
|
148
|
+
f.write(content)
|
|
149
|
+
|
|
150
|
+
throttle_status = ThrottleStatus.from_response(r)
|
|
151
|
+
if throttle_threshold is not None:
|
|
152
|
+
throttle_status.wait_if_busy(throttle_threshold)
|
|
153
|
+
|
|
154
|
+
files.append(file)
|
|
155
|
+
if verbose:
|
|
156
|
+
print("\nDone")
|
|
157
|
+
# If input was a single ID, return only a single path
|
|
158
|
+
if single_element:
|
|
159
|
+
return_value = files[0]
|
|
160
|
+
else:
|
|
161
|
+
return_value = files
|
|
162
|
+
if return_throttle_status:
|
|
163
|
+
return return_value, throttle_status
|
|
164
|
+
else:
|
|
165
|
+
return return_value
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def fetch_property(cids, name, throttle_threshold=0.5, return_throttle_status=False):
|
|
169
|
+
"""
|
|
170
|
+
Download the given property for the given CID(s).
|
|
171
|
+
|
|
172
|
+
This function requires an internet connection.
|
|
173
|
+
|
|
174
|
+
Parameters
|
|
175
|
+
----------
|
|
176
|
+
cids : int or iterable object or int
|
|
177
|
+
A single compound ID (CID) or a list of CIDs to get the property
|
|
178
|
+
for.
|
|
179
|
+
name : str
|
|
180
|
+
The name of the desired property.
|
|
181
|
+
Valid properties are given in the *PubChem* REST API
|
|
182
|
+
`documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Compound-Property-Tables>`_.
|
|
183
|
+
throttle_threshold : float or None, optional
|
|
184
|
+
A value between 0 and 1.
|
|
185
|
+
If the load of either the request time or count exceeds this
|
|
186
|
+
value the execution is halted.
|
|
187
|
+
See :class:`ThrottleStatus` for more information.
|
|
188
|
+
If ``None`` is given, the execution is never halted.
|
|
189
|
+
return_throttle_status : float, optional
|
|
190
|
+
If set to true, the :class:`ThrottleStatus` of the final request
|
|
191
|
+
is also returned.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
property : str or list of str
|
|
196
|
+
The requested property for each given CID.
|
|
197
|
+
If a single CID was given in `cids`,
|
|
198
|
+
a single string is returned.
|
|
199
|
+
If a list (or other iterable
|
|
200
|
+
object) was given, a list of strings is returned.
|
|
201
|
+
throttle_status : ThrottleStatus
|
|
202
|
+
The :class:`ThrottleStatus` obtained from the server response.
|
|
203
|
+
This can be used for custom request throttling, for example.
|
|
204
|
+
Only returned, if `return_throttle_status` is set to true.
|
|
205
|
+
|
|
206
|
+
Examples
|
|
207
|
+
--------
|
|
208
|
+
|
|
209
|
+
>>> butane_cids = np.array(search(FormulaQuery("C4H10")))
|
|
210
|
+
>>> # Filter natural isotopes...
|
|
211
|
+
>>> n_iso = np.array(fetch_property(butane_cids, "IsotopeAtomCount"), dtype=int)
|
|
212
|
+
>>> # ...and neutral compounds
|
|
213
|
+
>>> charge = np.array(fetch_property(butane_cids, "Charge"), dtype=int)
|
|
214
|
+
>>> butane_cids = butane_cids[(n_iso == 0) & (charge == 0)]
|
|
215
|
+
>>> print(sorted(butane_cids.tolist()))
|
|
216
|
+
[6360, 7843, 18402699, 19029854, 19048342, 157632982, 158271732, 158934736, 161295599, 161897780]
|
|
217
|
+
>>> # Get the IUPAC names for each compound
|
|
218
|
+
>>> iupac_names = fetch_property(butane_cids, "IUPACName")
|
|
219
|
+
>>> # Compounds with multiple molecules use ';' as separator
|
|
220
|
+
>>> print(iupac_names)
|
|
221
|
+
['butane', '2-methylpropane', 'methane;prop-1-ene', 'ethane;ethene', 'cyclopropane;methane', 'cyclobutane;molecular hydrogen', 'acetylene;methane', 'carbanide;propane', 'carbanylium;propane', 'methylcyclopropane;molecular hydrogen']
|
|
222
|
+
"""
|
|
223
|
+
# If only a single CID is present,
|
|
224
|
+
# put it into a single element list
|
|
225
|
+
if isinstance(cids, numbers.Integral):
|
|
226
|
+
cids = [cids]
|
|
227
|
+
single_element = True
|
|
228
|
+
else:
|
|
229
|
+
single_element = False
|
|
230
|
+
|
|
231
|
+
# Property names may only contain letters and numbers
|
|
232
|
+
if not name.isalnum():
|
|
233
|
+
raise ValueError(f"Property '{name}' contains invalid characters")
|
|
234
|
+
|
|
235
|
+
# Use TXT format instead of CSV to avoid issues with ',' characters
|
|
236
|
+
# within table elements
|
|
237
|
+
r = requests.post(
|
|
238
|
+
_base_url + f"compound/cid/property/{name}/TXT",
|
|
239
|
+
data={"cid": ",".join([str(cid) for cid in cids])},
|
|
240
|
+
)
|
|
241
|
+
if not r.ok:
|
|
242
|
+
raise RequestError(parse_error_details(r.text))
|
|
243
|
+
throttle_status = ThrottleStatus.from_response(r)
|
|
244
|
+
if throttle_threshold is not None:
|
|
245
|
+
throttle_status.wait_if_busy(throttle_threshold)
|
|
246
|
+
|
|
247
|
+
# Each line contains the property for one CID
|
|
248
|
+
properties = r.text.splitlines()
|
|
249
|
+
|
|
250
|
+
# If input was a single ID, return only a single value
|
|
251
|
+
if single_element:
|
|
252
|
+
return_value = properties[0]
|
|
253
|
+
else:
|
|
254
|
+
return_value = properties
|
|
255
|
+
if return_throttle_status:
|
|
256
|
+
return return_value, throttle_status
|
|
257
|
+
else:
|
|
258
|
+
return return_value
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.pubchem"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["parse_error_details"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_error_details(response_text):
|
|
11
|
+
"""
|
|
12
|
+
Parse the ``Detail: ...`` or alternatively ``Message: ...`` part of
|
|
13
|
+
an error response.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
response_text : str
|
|
18
|
+
The text of the response.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
error_details : str
|
|
23
|
+
The error details.
|
|
24
|
+
"""
|
|
25
|
+
for message_line_indicator in ["Detail: ", "Message: "]:
|
|
26
|
+
for line in response_text.splitlines():
|
|
27
|
+
if line.startswith(message_line_indicator):
|
|
28
|
+
return line[len(message_line_indicator) :]
|
|
29
|
+
# No 'Detail: ...' or 'Message: ' line found
|
|
30
|
+
return "Unknown error"
|