biotite 0.41.1__cp311-cp311-macosx_10_16_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +19 -0
- biotite/application/__init__.py +43 -0
- biotite/application/application.py +265 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +505 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +83 -0
- biotite/application/blast/webapp.py +421 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +238 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +152 -0
- biotite/application/localapp.py +306 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +122 -0
- biotite/application/msaapp.py +374 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +254 -0
- biotite/application/muscle/app5.py +171 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +456 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +222 -0
- biotite/application/util.py +59 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +304 -0
- biotite/application/viennarna/rnafold.py +269 -0
- biotite/application/viennarna/rnaplot.py +187 -0
- biotite/application/viennarna/util.py +72 -0
- biotite/application/webapp.py +77 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +61 -0
- biotite/database/entrez/dbnames.py +89 -0
- biotite/database/entrez/download.py +223 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +223 -0
- biotite/database/error.py +15 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +260 -0
- biotite/database/pubchem/error.py +20 -0
- biotite/database/pubchem/query.py +827 -0
- biotite/database/pubchem/throttle.py +99 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +167 -0
- biotite/database/rcsb/query.py +959 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +32 -0
- biotite/database/uniprot/download.py +134 -0
- biotite/database/uniprot/query.py +209 -0
- biotite/file.py +251 -0
- biotite/sequence/__init__.py +73 -0
- biotite/sequence/align/__init__.py +49 -0
- biotite/sequence/align/alignment.py +658 -0
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +69 -0
- biotite/sequence/align/cigar.py +434 -0
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +574 -0
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3400 -0
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +405 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +620 -0
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +587 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +305 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +956 -0
- biotite/sequence/align/statistics.py +265 -0
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +566 -0
- biotite/sequence/annotation.py +829 -0
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +466 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1034 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +139 -0
- biotite/sequence/graphics/dendrogram.py +184 -0
- biotite/sequence/graphics/features.py +510 -0
- biotite/sequence/graphics/logo.py +110 -0
- biotite/sequence/graphics/plasmid.py +661 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +273 -0
- biotite/sequence/io/fasta/file.py +278 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +120 -0
- biotite/sequence/io/fastq/file.py +551 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +277 -0
- biotite/sequence/io/genbank/file.py +575 -0
- biotite/sequence/io/genbank/metadata.py +324 -0
- biotite/sequence/io/genbank/sequence.py +172 -0
- biotite/sequence/io/general.py +192 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +133 -0
- biotite/sequence/io/gff/file.py +434 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +456 -0
- biotite/sequence/search.py +116 -0
- biotite/sequence/seqtypes.py +556 -0
- biotite/sequence/sequence.py +374 -0
- biotite/structure/__init__.py +132 -0
- biotite/structure/atoms.py +1455 -0
- biotite/structure/basepairs.py +1415 -0
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +1933 -0
- biotite/structure/box.py +592 -0
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.pyx +849 -0
- biotite/structure/chains.py +298 -0
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +274 -0
- biotite/structure/density.py +114 -0
- biotite/structure/dotbracket.py +216 -0
- biotite/structure/error.py +31 -0
- biotite/structure/filter.py +585 -0
- biotite/structure/geometry.py +697 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +226 -0
- biotite/structure/graphics/rna.py +282 -0
- biotite/structure/hbond.py +409 -0
- biotite/structure/info/__init__.py +25 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +82 -0
- biotite/structure/info/bonds.py +145 -0
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +123 -0
- biotite/structure/info/misc.py +144 -0
- biotite/structure/info/radii.py +197 -0
- biotite/structure/info/standardize.py +196 -0
- biotite/structure/integrity.py +268 -0
- biotite/structure/io/__init__.py +30 -0
- biotite/structure/io/ctab.py +72 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +65 -0
- biotite/structure/io/general.py +257 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mmtf/__init__.py +21 -0
- biotite/structure/io/mmtf/assembly.py +214 -0
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +341 -0
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +501 -0
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +152 -0
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +183 -0
- biotite/structure/io/mmtf/file.py +233 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +115 -0
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/mol.py +193 -0
- biotite/structure/io/mol/sdf.py +916 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +63 -0
- biotite/structure/io/npz/__init__.py +20 -0
- biotite/structure/io/npz/file.py +152 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +293 -0
- biotite/structure/io/pdb/file.py +1240 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +107 -0
- biotite/structure/io/pdbqt/file.py +640 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +1597 -0
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +950 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/io/tng/__init__.py +13 -0
- biotite/structure/io/tng/file.py +46 -0
- biotite/structure/io/trajfile.py +710 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +46 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +46 -0
- biotite/structure/mechanics.py +75 -0
- biotite/structure/molecules.py +353 -0
- biotite/structure/pseudoknots.py +642 -0
- biotite/structure/rdf.py +243 -0
- biotite/structure/repair.py +253 -0
- biotite/structure/residues.py +562 -0
- biotite/structure/resutil.py +178 -0
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/sse.py +327 -0
- biotite/structure/superimpose.py +727 -0
- biotite/structure/transform.py +504 -0
- biotite/structure/util.py +98 -0
- biotite/temp.py +86 -0
- biotite/version.py +16 -0
- biotite/visualize.py +251 -0
- biotite-0.41.1.dist-info/METADATA +187 -0
- biotite-0.41.1.dist-info/RECORD +340 -0
- biotite-0.41.1.dist-info/WHEEL +4 -0
- biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for downloading files from the NCBI Entrez database.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.database.entrez"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .dbnames import *
|
|
13
|
+
from .download import *
|
|
14
|
+
from .query import *
|
|
15
|
+
from .key import *
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
|
|
7
|
+
__all__ = ["check_for_errors"]
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from ..error import RequestError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Taken from https://github.com/kblin/ncbi-entrez-error-messages
|
|
14
|
+
_error_messages = [
|
|
15
|
+
"Error reading from remote server",
|
|
16
|
+
"Bad gateway",
|
|
17
|
+
"Bad Gateway",
|
|
18
|
+
"Cannot process ID list",
|
|
19
|
+
"server is temporarily unable to service your request",
|
|
20
|
+
"Service unavailable",
|
|
21
|
+
"Server Error",
|
|
22
|
+
"ID list is empty",
|
|
23
|
+
"Supplied id parameter is empty",
|
|
24
|
+
"Resource temporarily unavailable",
|
|
25
|
+
"Failed to retrieve sequence",
|
|
26
|
+
"Failed to understand id",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def check_for_errors(message):
|
|
31
|
+
"""
|
|
32
|
+
Check for common error messages in NCBI Entrez database responses.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
message : str
|
|
37
|
+
The message received from NCBI Entrez.
|
|
38
|
+
|
|
39
|
+
Raises
|
|
40
|
+
------
|
|
41
|
+
RequestError
|
|
42
|
+
If the message contains an error message.
|
|
43
|
+
"""
|
|
44
|
+
# Server can respond short JSON error messages
|
|
45
|
+
if len(message) < 500:
|
|
46
|
+
try:
|
|
47
|
+
message_json = json.loads(message)
|
|
48
|
+
if "error" in message_json:
|
|
49
|
+
raise RequestError(message_json["error"])
|
|
50
|
+
except json.decoder.JSONDecodeError:
|
|
51
|
+
# It is not a JSON message
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
# Error always appear at the end of message
|
|
55
|
+
message_end = message[-200:]
|
|
56
|
+
# Seemingly arbitrary '+' characters are in NCBI error messages
|
|
57
|
+
message_end = message_end.replace("+", "")
|
|
58
|
+
for error_msg in _error_messages:
|
|
59
|
+
# Often whitespace is also replaced by '+' in error message
|
|
60
|
+
if error_msg.replace(" ", "") in message_end:
|
|
61
|
+
raise RequestError(error_msg)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["get_database_name"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_db_names = {
|
|
11
|
+
"BioProject" : "bioproject",
|
|
12
|
+
"BioSample" : "biosample",
|
|
13
|
+
"Biosystems" : "biosystems",
|
|
14
|
+
"Books" : "books",
|
|
15
|
+
"Conserved Domains" : "cdd",
|
|
16
|
+
"dbGaP" : "gap",
|
|
17
|
+
"dbVar" : "dbvar",
|
|
18
|
+
"Epigenomics" : "epigenomics",
|
|
19
|
+
"EST" : "nucest",
|
|
20
|
+
"Gene" : "gene",
|
|
21
|
+
"Genome" : "genome",
|
|
22
|
+
"GEO Datasets" : "gds",
|
|
23
|
+
"GEO Profiles" : "geoprofiles",
|
|
24
|
+
"GSS" : "nucgss",
|
|
25
|
+
"HomoloGene" : "homologene",
|
|
26
|
+
"MeSH" : "mesh",
|
|
27
|
+
"NCBI C++ Toolkit" : "toolkit",
|
|
28
|
+
"NCBI Web Site" : "ncbisearch",
|
|
29
|
+
"NLM Catalog" : "nlmcatalog",
|
|
30
|
+
"Nucleotide" : "nuccore",
|
|
31
|
+
"OMIA" : "omia",
|
|
32
|
+
"PopSet" : "popset",
|
|
33
|
+
"Probe" : "probe",
|
|
34
|
+
"Protein" : "protein",
|
|
35
|
+
"Protein Clusters" : "proteinclusters",
|
|
36
|
+
"PubChem BioAssay" : "pcassay",
|
|
37
|
+
"PubChem Compound" : "pccompound",
|
|
38
|
+
"PubChem Substance" : "pcsubstance",
|
|
39
|
+
"PubMed" : "pubmed",
|
|
40
|
+
"PubMed Central" : "pmc",
|
|
41
|
+
"SNP" : "snp",
|
|
42
|
+
"SRA" : "sra",
|
|
43
|
+
"Structure" : "structure",
|
|
44
|
+
"Taxonomy" : "taxonomy",
|
|
45
|
+
"UniGene" : "unigene",
|
|
46
|
+
"UniSTS" : "unists"
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_database_name(database):
|
|
51
|
+
"""
|
|
52
|
+
Map a common NCBI Entrez database name to an E-utility database
|
|
53
|
+
name.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
database : str
|
|
58
|
+
Entrez database name.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
name : str
|
|
63
|
+
E-utility database name.
|
|
64
|
+
|
|
65
|
+
Examples
|
|
66
|
+
--------
|
|
67
|
+
|
|
68
|
+
>>> print(get_database_name("Nucleotide"))
|
|
69
|
+
nuccore
|
|
70
|
+
"""
|
|
71
|
+
return _db_names[database]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def sanitize_database_name(db_name):
|
|
75
|
+
"""
|
|
76
|
+
Map a common NCBI Entrez database name to an E-utility database
|
|
77
|
+
name, return E-utility database name, or raise an exception if the
|
|
78
|
+
database name is not existing.
|
|
79
|
+
|
|
80
|
+
Only for internal usage in ``download.py`` and ``query.py``.
|
|
81
|
+
"""
|
|
82
|
+
if db_name in _db_names.keys():
|
|
83
|
+
# Convert into E-utility database name
|
|
84
|
+
return _db_names[db_name]
|
|
85
|
+
elif db_name in _db_names.values():
|
|
86
|
+
# Is already E-utility database name
|
|
87
|
+
return db_name
|
|
88
|
+
else:
|
|
89
|
+
raise ValueError("Database '{db_name}' is not existing")
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["fetch", "fetch_single_file"]
|
|
8
|
+
|
|
9
|
+
from os.path import isdir, isfile, join, getsize
|
|
10
|
+
import os
|
|
11
|
+
import glob
|
|
12
|
+
import io
|
|
13
|
+
import requests
|
|
14
|
+
from .check import check_for_errors
|
|
15
|
+
from .dbnames import sanitize_database_name
|
|
16
|
+
from .key import get_api_key
|
|
17
|
+
from ..error import RequestError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def fetch(uids, target_path, suffix, db_name, ret_type,
|
|
24
|
+
ret_mode="text", overwrite=False, verbose=False):
|
|
25
|
+
"""
|
|
26
|
+
Download files from the NCBI Entrez database in various formats.
|
|
27
|
+
|
|
28
|
+
The data for each UID will be fetched into a separate file.
|
|
29
|
+
|
|
30
|
+
A list of valid database, retrieval type and mode combinations can
|
|
31
|
+
be found under
|
|
32
|
+
`<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
|
|
33
|
+
|
|
34
|
+
This function requires an internet connection.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
uids : str or iterable object of str
|
|
39
|
+
A single *unique identifier* (UID) or a list of UIDs of the
|
|
40
|
+
file(s) to be downloaded.
|
|
41
|
+
target_path : str or None
|
|
42
|
+
The target directory of the downloaded files.
|
|
43
|
+
If ``None``, the file content is stored in a file-like object
|
|
44
|
+
(`StringIO` or `BytesIO`, respectively).
|
|
45
|
+
suffix : str
|
|
46
|
+
The file suffix of the downloaded files. This value is
|
|
47
|
+
independent of the retrieval type.
|
|
48
|
+
db_name : str:
|
|
49
|
+
E-utility or common database name.
|
|
50
|
+
ret_type : str
|
|
51
|
+
Retrieval type
|
|
52
|
+
ret_mode : str, optional
|
|
53
|
+
Retrieval mode
|
|
54
|
+
overwrite : bool, optional
|
|
55
|
+
If true, existing files will be overwritten. Otherwise the
|
|
56
|
+
respective file will only be downloaded if the file does not
|
|
57
|
+
exist yet in the specified target directory or if the file is
|
|
58
|
+
empty. (Default: False)
|
|
59
|
+
verbose: bool, optional
|
|
60
|
+
If true, the function will output the download progress.
|
|
61
|
+
(Default: False)
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
66
|
+
The file path(s) to the downloaded files.
|
|
67
|
+
If a single string (a single UID) was given in `uids`,
|
|
68
|
+
a single string is returned. If a list (or other iterable
|
|
69
|
+
object) was given, a list of strings is returned.
|
|
70
|
+
If `target_path` is ``None``, the file contents are stored in
|
|
71
|
+
either `StringIO` or `BytesIO` objects.
|
|
72
|
+
|
|
73
|
+
Warnings
|
|
74
|
+
--------
|
|
75
|
+
Even if you give valid input to this function, in rare cases the
|
|
76
|
+
database might return no or malformed data to you.
|
|
77
|
+
In these cases the request should be retried.
|
|
78
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
79
|
+
input.
|
|
80
|
+
|
|
81
|
+
See also
|
|
82
|
+
--------
|
|
83
|
+
fetch_single_file
|
|
84
|
+
|
|
85
|
+
Examples
|
|
86
|
+
--------
|
|
87
|
+
|
|
88
|
+
>>> import os.path
|
|
89
|
+
>>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
|
|
90
|
+
... db_name="protein", ret_type="fasta")
|
|
91
|
+
>>> print([os.path.basename(file) for file in files])
|
|
92
|
+
['1L2Y_A.fa', '3O5R_A.fa']
|
|
93
|
+
"""
|
|
94
|
+
# If only a single UID is present,
|
|
95
|
+
# put it into a single element list
|
|
96
|
+
if isinstance(uids, str):
|
|
97
|
+
uids = [uids]
|
|
98
|
+
single_element = True
|
|
99
|
+
else:
|
|
100
|
+
single_element = False
|
|
101
|
+
# Create the target folder, if not existing
|
|
102
|
+
if target_path is not None and not isdir(target_path):
|
|
103
|
+
os.makedirs(target_path)
|
|
104
|
+
files = []
|
|
105
|
+
for i, id in enumerate(uids):
|
|
106
|
+
# Verbose output
|
|
107
|
+
if verbose:
|
|
108
|
+
print(f"Fetching file {i+1:d} / {len(uids):d} ({id})...", end="\r")
|
|
109
|
+
# Fetch file from database
|
|
110
|
+
if target_path is not None:
|
|
111
|
+
file = join(target_path, id + "." + suffix)
|
|
112
|
+
else:
|
|
113
|
+
file = None
|
|
114
|
+
if file is None \
|
|
115
|
+
or not isfile(file) \
|
|
116
|
+
or getsize(file) == 0 \
|
|
117
|
+
or overwrite:
|
|
118
|
+
param_dict = {
|
|
119
|
+
"db" : sanitize_database_name(db_name),
|
|
120
|
+
"id" : id,
|
|
121
|
+
"rettype" : ret_type,
|
|
122
|
+
"retmode" : ret_mode,
|
|
123
|
+
"tool" : "Biotite",
|
|
124
|
+
"mail" : "padix.key@gmail.com"
|
|
125
|
+
}
|
|
126
|
+
api_key = get_api_key()
|
|
127
|
+
if api_key is not None:
|
|
128
|
+
param_dict["api_key"] = api_key
|
|
129
|
+
r = requests.get(_fetch_url, params=param_dict)
|
|
130
|
+
content = r.text
|
|
131
|
+
check_for_errors(content)
|
|
132
|
+
if content.startswith(" Error"):
|
|
133
|
+
raise RequestError(content[8:])
|
|
134
|
+
if file is None:
|
|
135
|
+
file = io.StringIO(content)
|
|
136
|
+
else:
|
|
137
|
+
with open(file, "w+") as f:
|
|
138
|
+
f.write(content)
|
|
139
|
+
files.append(file)
|
|
140
|
+
if verbose:
|
|
141
|
+
print("\nDone")
|
|
142
|
+
# If input was a single ID, return only a single path
|
|
143
|
+
if single_element:
|
|
144
|
+
return files[0]
|
|
145
|
+
else:
|
|
146
|
+
return files
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
|
|
150
|
+
overwrite=False):
|
|
151
|
+
"""
|
|
152
|
+
Almost the same as :func:`fetch()`, but the data for the given UIDs
|
|
153
|
+
will be stored in a single file.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
uids : iterable object of str
|
|
158
|
+
A list of UIDs of the
|
|
159
|
+
file(s) to be downloaded.
|
|
160
|
+
file_name : str or None
|
|
161
|
+
The file path, including file name, to the target file.
|
|
162
|
+
db_name : str:
|
|
163
|
+
E-utility or common database name.
|
|
164
|
+
ret_type : str
|
|
165
|
+
Retrieval type.
|
|
166
|
+
ret_mode : str, optional
|
|
167
|
+
Retrieval mode.
|
|
168
|
+
overwrite : bool, optional
|
|
169
|
+
If false, the file is only downloaded, if no file with the same
|
|
170
|
+
name already exists.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
file : str or StringIO or BytesIO
|
|
175
|
+
The file name of the downloaded file.
|
|
176
|
+
If `file_name` is ``None``, the file content is stored in
|
|
177
|
+
either a `StringIO` or a `BytesIO` object.
|
|
178
|
+
|
|
179
|
+
Warnings
|
|
180
|
+
--------
|
|
181
|
+
Even if you give valid input to this function, in rare cases the
|
|
182
|
+
database might return no or malformed data to you.
|
|
183
|
+
In these cases the request should be retried.
|
|
184
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
185
|
+
input.
|
|
186
|
+
|
|
187
|
+
See also
|
|
188
|
+
--------
|
|
189
|
+
fetch
|
|
190
|
+
"""
|
|
191
|
+
if file_name is not None \
|
|
192
|
+
and os.path.isfile(file_name) \
|
|
193
|
+
and getsize(file_name) > 0 \
|
|
194
|
+
and not overwrite:
|
|
195
|
+
# Do no redownload the already existing file
|
|
196
|
+
return file_name
|
|
197
|
+
uid_list_str = ""
|
|
198
|
+
for id in uids:
|
|
199
|
+
uid_list_str += id + ","
|
|
200
|
+
# Remove terminal comma
|
|
201
|
+
uid_list_str = uid_list_str[:-1]
|
|
202
|
+
param_dict = {
|
|
203
|
+
"db" : sanitize_database_name(db_name),
|
|
204
|
+
"id" : uid_list_str,
|
|
205
|
+
"rettype" : ret_type,
|
|
206
|
+
"retmode" : ret_mode,
|
|
207
|
+
"tool" : "Biotite",
|
|
208
|
+
"mail" : "padix.key@gmail.com"
|
|
209
|
+
}
|
|
210
|
+
api_key = get_api_key()
|
|
211
|
+
if api_key is not None:
|
|
212
|
+
param_dict["api_key"] = api_key
|
|
213
|
+
r = requests.get(_fetch_url, params=param_dict)
|
|
214
|
+
content = r.text
|
|
215
|
+
check_for_errors(content)
|
|
216
|
+
if content.startswith(" Error"):
|
|
217
|
+
raise RequestError(content[8:])
|
|
218
|
+
if file_name is None:
|
|
219
|
+
return io.StringIO(content)
|
|
220
|
+
else:
|
|
221
|
+
with open(file_name, "w+") as f:
|
|
222
|
+
f.write(content)
|
|
223
|
+
return file_name
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["set_api_key", "get_api_key"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_API_KEY = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_api_key():
|
|
14
|
+
"""
|
|
15
|
+
Get the
|
|
16
|
+
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
api_key : str or None
|
|
21
|
+
The API key, if it was already set before, ``None`` otherwise.
|
|
22
|
+
"""
|
|
23
|
+
global _API_KEY
|
|
24
|
+
return _API_KEY
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def set_api_key(key):
|
|
28
|
+
"""
|
|
29
|
+
Set the
|
|
30
|
+
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
|
|
31
|
+
|
|
32
|
+
Using an API key increases the request limit on the NCBI servers
|
|
33
|
+
and is automatically used by functions in
|
|
34
|
+
:mod:`biotite.database.entrez`.
|
|
35
|
+
This key is kept only in memory and hence removed in the end of the
|
|
36
|
+
Python session.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
api_key : str
|
|
41
|
+
The API key.
|
|
42
|
+
"""
|
|
43
|
+
global _API_KEY
|
|
44
|
+
_API_KEY = key
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
import abc
|
|
11
|
+
from xml.etree import ElementTree
|
|
12
|
+
from .check import check_for_errors
|
|
13
|
+
from .dbnames import sanitize_database_name
|
|
14
|
+
from ..error import RequestError
|
|
15
|
+
from .key import get_api_key
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
19
|
+
|
|
20
|
+
class Query(metaclass=abc.ABCMeta):
|
|
21
|
+
"""
|
|
22
|
+
Base class for a wrapper around a search term
|
|
23
|
+
for the NCBI Entrez search service.
|
|
24
|
+
"""
|
|
25
|
+
def __init__(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def __str__(self):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def __or__(self, operand):
|
|
33
|
+
if not isinstance(operand, Query):
|
|
34
|
+
operand = SimpleQuery(operand)
|
|
35
|
+
return CompositeQuery("OR", self, operand)
|
|
36
|
+
|
|
37
|
+
def __and__(self, operand):
|
|
38
|
+
if not isinstance(operand, Query):
|
|
39
|
+
operand = SimpleQuery(operand)
|
|
40
|
+
return CompositeQuery("AND", self, operand)
|
|
41
|
+
|
|
42
|
+
def __xor__(self, operand):
|
|
43
|
+
if not isinstance(operand, Query):
|
|
44
|
+
operand = SimpleQuery(operand)
|
|
45
|
+
return CompositeQuery("NOT", self, operand)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class CompositeQuery(Query):
|
|
49
|
+
"""
|
|
50
|
+
A representation of an composite query
|
|
51
|
+
for the NCBI Entrez search service.
|
|
52
|
+
|
|
53
|
+
A composite query is a combination of two other queries,
|
|
54
|
+
combined either with an 'AND', 'OR' or 'NOT' operator.
|
|
55
|
+
|
|
56
|
+
Usually the user does not create instances of this class directly,
|
|
57
|
+
but :class:`Query` instances are combined with
|
|
58
|
+
``|`` (OR), ``&`` (AND) or ``^`` (NOT).
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
operator: str, {"AND", "OR", "NOT"}
|
|
63
|
+
The combination operator.
|
|
64
|
+
queries : iterable object of SimpleQuery
|
|
65
|
+
The queries to be combined.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
|
|
70
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism") & \\
|
|
71
|
+
... SimpleQuery("90:100", "Sequence Length")
|
|
72
|
+
>>> print(type(query).__name__)
|
|
73
|
+
CompositeQuery
|
|
74
|
+
>>> print(query)
|
|
75
|
+
("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, operator, query1, query2):
|
|
79
|
+
super().__init__()
|
|
80
|
+
self._op = operator
|
|
81
|
+
self._q1 = query1
|
|
82
|
+
self._q2 = query2
|
|
83
|
+
|
|
84
|
+
def __str__(self):
|
|
85
|
+
return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class SimpleQuery(Query):
|
|
90
|
+
"""
|
|
91
|
+
A simple query for the NCBI Entrez search service without
|
|
92
|
+
combination via 'AND', 'OR' or 'NOT'. A query consists of a search
|
|
93
|
+
term and an optional field.
|
|
94
|
+
|
|
95
|
+
A list of available search fields with description can be found
|
|
96
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
term: str
|
|
101
|
+
The search term.
|
|
102
|
+
field : str, optional
|
|
103
|
+
The field to search the term in.
|
|
104
|
+
The list of possible fields and the required search term
|
|
105
|
+
formatting can be found
|
|
106
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
107
|
+
By default the field is omitted and all fields are searched in
|
|
108
|
+
for the term, implicitly.
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
|
|
113
|
+
>>> query = SimpleQuery("Escherichia coli")
|
|
114
|
+
>>> print(query)
|
|
115
|
+
"Escherichia coli"
|
|
116
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism")
|
|
117
|
+
>>> print(query)
|
|
118
|
+
"Escherichia coli"[Organism]
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# Field identifiers are taken from
|
|
122
|
+
# https://www.ncbi.nlm.nih.gov/books/NBK49540/
|
|
123
|
+
_fields = [
|
|
124
|
+
"Accession", "All Fields", "Author", "EC/RN Number", "Feature Key",
|
|
125
|
+
"Filter", "Gene Name", "Genome Project", "Issue", "Journal", "Keyword",
|
|
126
|
+
"Modification Date", "Molecular Weight", "Organism", "Page Number",
|
|
127
|
+
"Primary Accession", "Properties", "Protein Name", "Publication Date",
|
|
128
|
+
"SeqID String", "Sequence Length", "Substance Name", "Text Word",
|
|
129
|
+
"Title", "Volume",
|
|
130
|
+
# Abbreviations
|
|
131
|
+
"ACCN", "ALL", "AU", "AUTH", "ECNO", "FKEY", "FILT", "SB", "GENE",
|
|
132
|
+
"ISS", "JOUR", "KYWD", "MDAT", "MOLWT", "ORGN", "PAGE", "PACC",
|
|
133
|
+
"PORGN", "PROP", "PROT", "PDAT", "SQID", "SLEN", "SUBS", "WORD", "TI",
|
|
134
|
+
"TITL" "VOL"
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
def __init__(self, term, field=None):
|
|
138
|
+
super().__init__()
|
|
139
|
+
if field is not None:
|
|
140
|
+
if field not in SimpleQuery._fields:
|
|
141
|
+
raise ValueError(f"Unknown field identifier '{field}'")
|
|
142
|
+
for invalid_string in \
|
|
143
|
+
['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
|
|
144
|
+
if invalid_string in term:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
f"Query contains illegal term {invalid_string}"
|
|
147
|
+
)
|
|
148
|
+
if " " in term:
|
|
149
|
+
# Encapsulate in quotes if spaces are in search term
|
|
150
|
+
term = f'"{term}"'
|
|
151
|
+
self._term = term
|
|
152
|
+
self._field = field
|
|
153
|
+
|
|
154
|
+
def __str__(self):
|
|
155
|
+
string = self._term
|
|
156
|
+
if self._field is not None:
|
|
157
|
+
string += f"[{self._field}]"
|
|
158
|
+
return string
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def search(query, db_name, number=20):
|
|
162
|
+
r"""
|
|
163
|
+
Get all PDB IDs that meet the given query requirements,
|
|
164
|
+
via the NCBI ESearch service.
|
|
165
|
+
|
|
166
|
+
This function requires an internet connection.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
query : Query
|
|
171
|
+
The search query.
|
|
172
|
+
db_name : str:
|
|
173
|
+
E-utility or common database name.
|
|
174
|
+
number : Query
|
|
175
|
+
The maximum number of UIDs that are obtained.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
ids : list of str
|
|
180
|
+
A list of strings containing all NCBI UIDs (accession number)
|
|
181
|
+
that meet the query requirements.
|
|
182
|
+
|
|
183
|
+
Warnings
|
|
184
|
+
--------
|
|
185
|
+
Even if you give valid input to this function, in rare cases the
|
|
186
|
+
database might return no or malformed data to you.
|
|
187
|
+
In these cases the request should be retried.
|
|
188
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
189
|
+
input.
|
|
190
|
+
|
|
191
|
+
Notes
|
|
192
|
+
-----
|
|
193
|
+
A list of available search fields with description can be found
|
|
194
|
+
`here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
|
|
195
|
+
|
|
196
|
+
Examples
|
|
197
|
+
--------
|
|
198
|
+
>>> query = SimpleQuery("Escherichia coli", "Organism") & \
|
|
199
|
+
... SimpleQuery("90:100", "Sequence Length")
|
|
200
|
+
>>> ids = search(query, "nuccore", number=5)
|
|
201
|
+
>>> print(ids)
|
|
202
|
+
['...', '...', '...', '...', '...']
|
|
203
|
+
"""
|
|
204
|
+
param_dict = {
|
|
205
|
+
"db": sanitize_database_name(db_name),
|
|
206
|
+
"term": str(query),
|
|
207
|
+
"retmax": str(number),
|
|
208
|
+
}
|
|
209
|
+
api_key = get_api_key()
|
|
210
|
+
if api_key is not None:
|
|
211
|
+
param_dict["api_key"] = api_key
|
|
212
|
+
r = requests.get(_search_url, params=param_dict)
|
|
213
|
+
xml_response = r.text
|
|
214
|
+
check_for_errors(xml_response)
|
|
215
|
+
try:
|
|
216
|
+
root = ElementTree.fromstring(xml_response)
|
|
217
|
+
except ElementTree.ParseError:
|
|
218
|
+
if len(xml_response) > 100:
|
|
219
|
+
xml_response = xml_response[:100] + "..."
|
|
220
|
+
raise RequestError(f"Invalid server response: {xml_response}")
|
|
221
|
+
xpath = ".//IdList/Id"
|
|
222
|
+
uids = [element.text for element in root.findall(xpath)]
|
|
223
|
+
return uids
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["RequestError"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RequestError(Exception):
|
|
11
|
+
"""
|
|
12
|
+
Indicates that the database returned a response with an error
|
|
13
|
+
message or other malformed content.
|
|
14
|
+
"""
|
|
15
|
+
pass
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for searching and downloading files from the *PubChem*
|
|
7
|
+
database.
|
|
8
|
+
Although *PubChem* is part of *NCBI Entrez*,
|
|
9
|
+
:mod:`biotite.database.entrez` is only capable of accessing
|
|
10
|
+
meta-information from *PubChem*.
|
|
11
|
+
This subpackage, on the other hand, supports searching *PubChem*
|
|
12
|
+
compounds based on chemical information and is able to download
|
|
13
|
+
structure records.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__name__ = "biotite.database.pubchem"
|
|
17
|
+
__author__ = "Patrick Kunzmann"
|
|
18
|
+
|
|
19
|
+
from .download import *
|
|
20
|
+
from .query import *
|
|
21
|
+
from .throttle import *
|