biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for downloading predicted protein structures from the AlphaFold DB.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.database.afdb"
|
|
10
|
+
__author__ = "Alex Carlin"
|
|
11
|
+
|
|
12
|
+
from .download import *
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.afdb"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Alex Carlin"
|
|
7
|
+
__all__ = ["fetch"]
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from xml.etree import ElementTree
|
|
13
|
+
import requests
|
|
14
|
+
from biotite.database.error import RequestError
|
|
15
|
+
|
|
16
|
+
_METADATA_URL = "https://alphafold.com/api/prediction"
|
|
17
|
+
_BINARY_FORMATS = ["bcif"]
|
|
18
|
+
# Adopted from https://www.uniprot.org/help/accession_numbers
|
|
19
|
+
# adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
|
|
20
|
+
_UNIPROT_PATTERN = (
|
|
21
|
+
r"^(?P<prefix>(AF-)|(AF_AF))?"
|
|
22
|
+
r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
|
|
23
|
+
r"(?P<suffix>-?F1)?$"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
28
|
+
"""
|
|
29
|
+
Download predicted protein structures from the AlphaFold DB.
|
|
30
|
+
|
|
31
|
+
This function requires an internet connection.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
ids : str or iterable object of str
|
|
36
|
+
A single ID or a list of IDs of the file(s) to be downloaded.
|
|
37
|
+
They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
|
|
38
|
+
(e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
|
|
39
|
+
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
|
|
40
|
+
The format of the files to be downloaded.
|
|
41
|
+
target_path : str, optional
|
|
42
|
+
The target directory of the downloaded files.
|
|
43
|
+
By default, the file content is stored in a file-like object
|
|
44
|
+
(`StringIO` or `BytesIO`, respectively).
|
|
45
|
+
overwrite : bool, optional
|
|
46
|
+
If true, existing files will be overwritten.
|
|
47
|
+
Otherwise the respective file will only be downloaded if the file does not
|
|
48
|
+
exist yet in the specified target directory or if the file is empty.
|
|
49
|
+
verbose : bool, optional
|
|
50
|
+
If true, the function will output the download progress.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
55
|
+
The file path(s) to the downloaded files.
|
|
56
|
+
If a single string (a single ID) was given in `ids`, a single string is
|
|
57
|
+
returned.
|
|
58
|
+
If a list (or other iterable object) was given, a list of strings is returned.
|
|
59
|
+
If no `target_path` was given, the file contents are stored in either
|
|
60
|
+
``StringIO`` or ``BytesIO`` objects.
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
|
|
65
|
+
>>> from pathlib import Path
|
|
66
|
+
>>> file = fetch("P12345", "cif", path_to_directory)
|
|
67
|
+
>>> print(Path(file).name)
|
|
68
|
+
P12345.cif
|
|
69
|
+
>>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
|
|
70
|
+
>>> print([Path(file).name for file in files])
|
|
71
|
+
['P12345.cif', 'Q8K9I1.cif']
|
|
72
|
+
"""
|
|
73
|
+
if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
|
|
74
|
+
raise ValueError(f"Format '{format}' is not supported")
|
|
75
|
+
if format in ["pdbx", "mmcif"]:
|
|
76
|
+
format = "cif"
|
|
77
|
+
|
|
78
|
+
# If only a single ID is present,
|
|
79
|
+
# put it into a single element list
|
|
80
|
+
if isinstance(ids, str):
|
|
81
|
+
ids = [ids]
|
|
82
|
+
single_element = True
|
|
83
|
+
else:
|
|
84
|
+
single_element = False
|
|
85
|
+
if target_path is not None:
|
|
86
|
+
target_path = Path(target_path)
|
|
87
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
|
|
89
|
+
files = []
|
|
90
|
+
for i, id in enumerate(ids):
|
|
91
|
+
# Verbose output
|
|
92
|
+
if verbose:
|
|
93
|
+
print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
|
|
94
|
+
# Fetch file from database
|
|
95
|
+
if target_path is not None:
|
|
96
|
+
file = target_path / f"{id}.{format}"
|
|
97
|
+
else:
|
|
98
|
+
# 'file = None' -> store content in a file-like object
|
|
99
|
+
file = None
|
|
100
|
+
if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
|
|
101
|
+
file_response = requests.get(_get_file_url(id, format))
|
|
102
|
+
_assert_valid_file(file_response, id)
|
|
103
|
+
if format in _BINARY_FORMATS:
|
|
104
|
+
content = file_response.content
|
|
105
|
+
else:
|
|
106
|
+
content = file_response.text
|
|
107
|
+
|
|
108
|
+
if file is None:
|
|
109
|
+
if format in _BINARY_FORMATS:
|
|
110
|
+
file = io.BytesIO(content)
|
|
111
|
+
else:
|
|
112
|
+
file = io.StringIO(content)
|
|
113
|
+
else:
|
|
114
|
+
mode = "wb+" if format in _BINARY_FORMATS else "w+"
|
|
115
|
+
with open(file, mode) as f:
|
|
116
|
+
f.write(content)
|
|
117
|
+
|
|
118
|
+
files.append(file)
|
|
119
|
+
if verbose:
|
|
120
|
+
print("\nDone")
|
|
121
|
+
|
|
122
|
+
# Return paths as strings
|
|
123
|
+
files = [file.as_posix() if isinstance(file, Path) else file for file in files]
|
|
124
|
+
# If input was a single ID, return only a single element
|
|
125
|
+
if single_element:
|
|
126
|
+
return files[0]
|
|
127
|
+
else:
|
|
128
|
+
return files
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _get_file_url(id, format):
|
|
132
|
+
"""
|
|
133
|
+
Get the actual file URL for the given ID from the ``prediction`` API endpoint.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
id : str
|
|
138
|
+
The ID of the file to be downloaded.
|
|
139
|
+
format : str
|
|
140
|
+
The format of the file to be downloaded.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
file_url : str
|
|
145
|
+
The URL of the file to be downloaded.
|
|
146
|
+
"""
|
|
147
|
+
uniprot_id = _extract_id(id)
|
|
148
|
+
try:
|
|
149
|
+
metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
|
|
150
|
+
except requests.exceptions.JSONDecodeError:
|
|
151
|
+
raise RequestError("Received malformed JSON response")
|
|
152
|
+
if len(metadata) == 0:
|
|
153
|
+
raise RequestError(f"ID {id} is invalid")
|
|
154
|
+
# A list of length 1 is always returned, if the response is valid
|
|
155
|
+
return metadata[0][f"{format}Url"]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _extract_id(id):
|
|
159
|
+
"""
|
|
160
|
+
Extract a AFDB compatible UniProt ID from the given qualifier.
|
|
161
|
+
This may comprise
|
|
162
|
+
|
|
163
|
+
- Directly the UniProt ID (e.g. ``P12345``) (trivial case)
|
|
164
|
+
- Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
id : str
|
|
169
|
+
The qualifier to extract the UniProt ID from.
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
uniprot_id : str
|
|
174
|
+
The UniProt ID.
|
|
175
|
+
"""
|
|
176
|
+
match = re.match(_UNIPROT_PATTERN, id)
|
|
177
|
+
if match is None:
|
|
178
|
+
raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
|
|
179
|
+
return match.group("id")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _assert_valid_file(response, id):
|
|
183
|
+
"""
|
|
184
|
+
Checks whether the response is an actual structure file
|
|
185
|
+
or the response a *404* error due to invalid UniProt ID.
|
|
186
|
+
"""
|
|
187
|
+
if len(response.text) == 0:
|
|
188
|
+
raise RequestError(f"Received no repsone for '{id}'")
|
|
189
|
+
try:
|
|
190
|
+
root = ElementTree.fromstring(response.text)
|
|
191
|
+
if root.tag == "Error":
|
|
192
|
+
raise RequestError(
|
|
193
|
+
f"Error while fetching '{id}': {root.find('Message').text}"
|
|
194
|
+
)
|
|
195
|
+
except ElementTree.ParseError:
|
|
196
|
+
# This is not XML -> the response is probably a valid file
|
|
197
|
+
pass
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for downloading files from the NCBI Entrez database.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.database.entrez"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .dbnames import *
|
|
13
|
+
from .download import *
|
|
14
|
+
from .key import *
|
|
15
|
+
from .query import *
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
|
|
7
|
+
__all__ = ["check_for_errors"]
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from biotite.database.error import RequestError
|
|
11
|
+
|
|
12
|
+
# Taken from https://github.com/kblin/ncbi-entrez-error-messages
|
|
13
|
+
_error_messages = [
|
|
14
|
+
"Error reading from remote server",
|
|
15
|
+
"Bad gateway",
|
|
16
|
+
"Bad Gateway",
|
|
17
|
+
"Cannot process ID list",
|
|
18
|
+
"server is temporarily unable to service your request",
|
|
19
|
+
"Service unavailable",
|
|
20
|
+
"Server Error",
|
|
21
|
+
"ID list is empty",
|
|
22
|
+
"Supplied id parameter is empty",
|
|
23
|
+
"Resource temporarily unavailable",
|
|
24
|
+
"Failed to retrieve sequence",
|
|
25
|
+
"Failed to understand id",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def check_for_errors(message):
|
|
30
|
+
"""
|
|
31
|
+
Check for common error messages in NCBI Entrez database responses.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
message : str
|
|
36
|
+
The message received from NCBI Entrez.
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
RequestError
|
|
41
|
+
If the message contains an error message.
|
|
42
|
+
"""
|
|
43
|
+
# Server can respond short JSON error messages
|
|
44
|
+
if len(message) < 500:
|
|
45
|
+
try:
|
|
46
|
+
message_json = json.loads(message)
|
|
47
|
+
if "error" in message_json:
|
|
48
|
+
raise RequestError(message_json["error"])
|
|
49
|
+
except json.decoder.JSONDecodeError:
|
|
50
|
+
# It is not a JSON message
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
# Error always appear at the end of message
|
|
54
|
+
message_end = message[-200:]
|
|
55
|
+
# Seemingly arbitrary '+' characters are in NCBI error messages
|
|
56
|
+
message_end = message_end.replace("+", "")
|
|
57
|
+
for error_msg in _error_messages:
|
|
58
|
+
# Often whitespace is also replaced by '+' in error message
|
|
59
|
+
if error_msg.replace(" ", "") in message_end:
|
|
60
|
+
raise RequestError(error_msg)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["get_database_name"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# fmt: off
|
|
11
|
+
_db_names = {
|
|
12
|
+
"BioProject" : "bioproject",
|
|
13
|
+
"BioSample" : "biosample",
|
|
14
|
+
"Biosystems" : "biosystems",
|
|
15
|
+
"Books" : "books",
|
|
16
|
+
"Conserved Domains" : "cdd",
|
|
17
|
+
"dbGaP" : "gap",
|
|
18
|
+
"dbVar" : "dbvar",
|
|
19
|
+
"Epigenomics" : "epigenomics",
|
|
20
|
+
"EST" : "nucest",
|
|
21
|
+
"Gene" : "gene",
|
|
22
|
+
"Genome" : "genome",
|
|
23
|
+
"GEO Datasets" : "gds",
|
|
24
|
+
"GEO Profiles" : "geoprofiles",
|
|
25
|
+
"GSS" : "nucgss",
|
|
26
|
+
"HomoloGene" : "homologene",
|
|
27
|
+
"MeSH" : "mesh",
|
|
28
|
+
"NCBI C++ Toolkit" : "toolkit",
|
|
29
|
+
"NCBI Web Site" : "ncbisearch",
|
|
30
|
+
"NLM Catalog" : "nlmcatalog",
|
|
31
|
+
"Nucleotide" : "nuccore",
|
|
32
|
+
"OMIA" : "omia",
|
|
33
|
+
"PopSet" : "popset",
|
|
34
|
+
"Probe" : "probe",
|
|
35
|
+
"Protein" : "protein",
|
|
36
|
+
"Protein Clusters" : "proteinclusters",
|
|
37
|
+
"PubChem BioAssay" : "pcassay",
|
|
38
|
+
"PubChem Compound" : "pccompound",
|
|
39
|
+
"PubChem Substance" : "pcsubstance",
|
|
40
|
+
"PubMed" : "pubmed",
|
|
41
|
+
"PubMed Central" : "pmc",
|
|
42
|
+
"SNP" : "snp",
|
|
43
|
+
"SRA" : "sra",
|
|
44
|
+
"Structure" : "structure",
|
|
45
|
+
"Taxonomy" : "taxonomy",
|
|
46
|
+
"UniGene" : "unigene",
|
|
47
|
+
"UniSTS" : "unists"
|
|
48
|
+
}
|
|
49
|
+
# fmt: on
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_database_name(database):
|
|
53
|
+
"""
|
|
54
|
+
Map a common NCBI Entrez database name to an E-utility database
|
|
55
|
+
name.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
database : str
|
|
60
|
+
Entrez database name.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
name : str
|
|
65
|
+
E-utility database name.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
|
|
70
|
+
>>> print(get_database_name("Nucleotide"))
|
|
71
|
+
nuccore
|
|
72
|
+
"""
|
|
73
|
+
return _db_names[database]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def sanitize_database_name(db_name):
|
|
77
|
+
"""
|
|
78
|
+
Map a common NCBI Entrez database name to an E-utility database
|
|
79
|
+
name, return E-utility database name, or raise an exception if the
|
|
80
|
+
database name is not existing.
|
|
81
|
+
|
|
82
|
+
Only for internal usage in ``download.py`` and ``query.py``.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
db_name : str
|
|
87
|
+
Entrez database name.
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
name : str
|
|
92
|
+
E-utility database name.
|
|
93
|
+
"""
|
|
94
|
+
if db_name in _db_names.keys():
|
|
95
|
+
# Convert into E-utility database name
|
|
96
|
+
return _db_names[db_name]
|
|
97
|
+
elif db_name in _db_names.values():
|
|
98
|
+
# Is already E-utility database name
|
|
99
|
+
return db_name
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError("Database '{db_name}' is not existing")
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["fetch", "fetch_single_file"]
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import os
|
|
11
|
+
from os.path import getsize, isdir, isfile, join
|
|
12
|
+
import requests
|
|
13
|
+
from biotite.database.entrez.check import check_for_errors
|
|
14
|
+
from biotite.database.entrez.dbnames import sanitize_database_name
|
|
15
|
+
from biotite.database.entrez.key import get_api_key
|
|
16
|
+
from biotite.database.error import RequestError
|
|
17
|
+
|
|
18
|
+
_fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def fetch(
|
|
22
|
+
uids,
|
|
23
|
+
target_path,
|
|
24
|
+
suffix,
|
|
25
|
+
db_name,
|
|
26
|
+
ret_type,
|
|
27
|
+
ret_mode="text",
|
|
28
|
+
overwrite=False,
|
|
29
|
+
verbose=False,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Download files from the NCBI Entrez database in various formats.
|
|
33
|
+
|
|
34
|
+
The data for each UID will be fetched into a separate file.
|
|
35
|
+
|
|
36
|
+
A list of valid database, retrieval type and mode combinations can
|
|
37
|
+
be found under
|
|
38
|
+
`<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
|
|
39
|
+
|
|
40
|
+
This function requires an internet connection.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
uids : str or iterable object of str
|
|
45
|
+
A single *unique identifier* (UID) or a list of UIDs of the
|
|
46
|
+
file(s) to be downloaded.
|
|
47
|
+
target_path : str or None
|
|
48
|
+
The target directory of the downloaded files.
|
|
49
|
+
If ``None``, the file content is stored in a file-like object
|
|
50
|
+
(`StringIO` or `BytesIO`, respectively).
|
|
51
|
+
suffix : str
|
|
52
|
+
The file suffix of the downloaded files. This value is
|
|
53
|
+
independent of the retrieval type.
|
|
54
|
+
db_name : str:
|
|
55
|
+
E-utility or common database name.
|
|
56
|
+
ret_type : str
|
|
57
|
+
Retrieval type.
|
|
58
|
+
ret_mode : str, optional
|
|
59
|
+
Retrieval mode.
|
|
60
|
+
overwrite : bool, optional
|
|
61
|
+
If true, existing files will be overwritten. Otherwise the
|
|
62
|
+
respective file will only be downloaded if the file does not
|
|
63
|
+
exist yet in the specified target directory or if the file is
|
|
64
|
+
empty.
|
|
65
|
+
verbose : bool, optional
|
|
66
|
+
If true, the function will output the download progress.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
|
|
71
|
+
The file path(s) to the downloaded files.
|
|
72
|
+
If a single string (a single UID) was given in `uids`,
|
|
73
|
+
a single string is returned. If a list (or other iterable
|
|
74
|
+
object) was given, a list of strings is returned.
|
|
75
|
+
If `target_path` is ``None``, the file contents are stored in
|
|
76
|
+
either `StringIO` or `BytesIO` objects.
|
|
77
|
+
|
|
78
|
+
Warnings
|
|
79
|
+
--------
|
|
80
|
+
Even if you give valid input to this function, in rare cases the
|
|
81
|
+
database might return no or malformed data to you.
|
|
82
|
+
In these cases the request should be retried.
|
|
83
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
84
|
+
input.
|
|
85
|
+
|
|
86
|
+
See Also
|
|
87
|
+
--------
|
|
88
|
+
fetch_single_file : Fetch multiple entries as a single file.
|
|
89
|
+
|
|
90
|
+
Examples
|
|
91
|
+
--------
|
|
92
|
+
|
|
93
|
+
>>> import os.path
|
|
94
|
+
>>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
|
|
95
|
+
... db_name="protein", ret_type="fasta")
|
|
96
|
+
>>> print([os.path.basename(file) for file in files])
|
|
97
|
+
['1L2Y_A.fa', '3O5R_A.fa']
|
|
98
|
+
"""
|
|
99
|
+
# If only a single UID is present,
|
|
100
|
+
# put it into a single element list
|
|
101
|
+
if isinstance(uids, str):
|
|
102
|
+
uids = [uids]
|
|
103
|
+
single_element = True
|
|
104
|
+
else:
|
|
105
|
+
single_element = False
|
|
106
|
+
# Create the target folder, if not existing
|
|
107
|
+
if target_path is not None and not isdir(target_path):
|
|
108
|
+
os.makedirs(target_path)
|
|
109
|
+
files = []
|
|
110
|
+
for i, id in enumerate(uids):
|
|
111
|
+
# Verbose output
|
|
112
|
+
if verbose:
|
|
113
|
+
print(f"Fetching file {i + 1:d} / {len(uids):d} ({id})...", end="\r")
|
|
114
|
+
# Fetch file from database
|
|
115
|
+
if target_path is not None:
|
|
116
|
+
file = join(target_path, id + "." + suffix)
|
|
117
|
+
else:
|
|
118
|
+
file = None
|
|
119
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
120
|
+
param_dict = {
|
|
121
|
+
"db": sanitize_database_name(db_name),
|
|
122
|
+
"id": id,
|
|
123
|
+
"rettype": ret_type,
|
|
124
|
+
"retmode": ret_mode,
|
|
125
|
+
"tool": "Biotite",
|
|
126
|
+
"mail": "padix.key@gmail.com",
|
|
127
|
+
}
|
|
128
|
+
api_key = get_api_key()
|
|
129
|
+
if api_key is not None:
|
|
130
|
+
param_dict["api_key"] = api_key
|
|
131
|
+
r = requests.get(_fetch_url, params=param_dict)
|
|
132
|
+
content = r.text
|
|
133
|
+
check_for_errors(content)
|
|
134
|
+
if content.startswith(" Error"):
|
|
135
|
+
raise RequestError(content[8:])
|
|
136
|
+
if file is None:
|
|
137
|
+
file = io.StringIO(content)
|
|
138
|
+
else:
|
|
139
|
+
with open(file, "w+") as f:
|
|
140
|
+
f.write(content)
|
|
141
|
+
files.append(file)
|
|
142
|
+
if verbose:
|
|
143
|
+
print("\nDone")
|
|
144
|
+
# If input was a single ID, return only a single path
|
|
145
|
+
if single_element:
|
|
146
|
+
return files[0]
|
|
147
|
+
else:
|
|
148
|
+
return files
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def fetch_single_file(
|
|
152
|
+
uids, file_name, db_name, ret_type, ret_mode="text", overwrite=False
|
|
153
|
+
):
|
|
154
|
+
"""
|
|
155
|
+
Almost the same as :func:`fetch()`, but the data for the given UIDs
|
|
156
|
+
will be stored in a single file.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
uids : iterable object of str
|
|
161
|
+
A list of UIDs of the
|
|
162
|
+
file(s) to be downloaded.
|
|
163
|
+
file_name : str or None
|
|
164
|
+
The file path, including file name, to the target file.
|
|
165
|
+
db_name : str:
|
|
166
|
+
E-utility or common database name.
|
|
167
|
+
ret_type : str
|
|
168
|
+
Retrieval type.
|
|
169
|
+
ret_mode : str, optional
|
|
170
|
+
Retrieval mode.
|
|
171
|
+
overwrite : bool, optional
|
|
172
|
+
If false, the file is only downloaded, if no file with the same
|
|
173
|
+
name already exists.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
file : str or StringIO or BytesIO
|
|
178
|
+
The file name of the downloaded file.
|
|
179
|
+
If `file_name` is ``None``, the file content is stored in
|
|
180
|
+
either a `StringIO` or a `BytesIO` object.
|
|
181
|
+
|
|
182
|
+
Warnings
|
|
183
|
+
--------
|
|
184
|
+
Even if you give valid input to this function, in rare cases the
|
|
185
|
+
database might return no or malformed data to you.
|
|
186
|
+
In these cases the request should be retried.
|
|
187
|
+
When the issue occurs repeatedly, the error is probably in your
|
|
188
|
+
input.
|
|
189
|
+
|
|
190
|
+
See Also
|
|
191
|
+
--------
|
|
192
|
+
fetch : Fetch one or multiple entries as separate files.
|
|
193
|
+
"""
|
|
194
|
+
if (
|
|
195
|
+
file_name is not None
|
|
196
|
+
and os.path.isfile(file_name)
|
|
197
|
+
and getsize(file_name) > 0
|
|
198
|
+
and not overwrite
|
|
199
|
+
):
|
|
200
|
+
# Do no redownload the already existing file
|
|
201
|
+
return file_name
|
|
202
|
+
uid_list_str = ""
|
|
203
|
+
for id in uids:
|
|
204
|
+
uid_list_str += id + ","
|
|
205
|
+
# Remove terminal comma
|
|
206
|
+
uid_list_str = uid_list_str[:-1]
|
|
207
|
+
param_dict = {
|
|
208
|
+
"db": sanitize_database_name(db_name),
|
|
209
|
+
"id": uid_list_str,
|
|
210
|
+
"rettype": ret_type,
|
|
211
|
+
"retmode": ret_mode,
|
|
212
|
+
"tool": "Biotite",
|
|
213
|
+
"mail": "padix.key@gmail.com",
|
|
214
|
+
}
|
|
215
|
+
api_key = get_api_key()
|
|
216
|
+
if api_key is not None:
|
|
217
|
+
param_dict["api_key"] = api_key
|
|
218
|
+
r = requests.get(_fetch_url, params=param_dict)
|
|
219
|
+
content = r.text
|
|
220
|
+
check_for_errors(content)
|
|
221
|
+
if content.startswith(" Error"):
|
|
222
|
+
raise RequestError(content[8:])
|
|
223
|
+
if file_name is None:
|
|
224
|
+
return io.StringIO(content)
|
|
225
|
+
else:
|
|
226
|
+
with open(file_name, "w+") as f:
|
|
227
|
+
f.write(content)
|
|
228
|
+
return file_name
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.database.entrez"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["set_api_key", "get_api_key"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_API_KEY = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_api_key():
|
|
14
|
+
"""
|
|
15
|
+
Get the
|
|
16
|
+
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
api_key : str or None
|
|
21
|
+
The API key, if it was already set before, ``None`` otherwise.
|
|
22
|
+
"""
|
|
23
|
+
global _API_KEY
|
|
24
|
+
return _API_KEY
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def set_api_key(key):
|
|
28
|
+
"""
|
|
29
|
+
Set the
|
|
30
|
+
`NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
|
|
31
|
+
|
|
32
|
+
Using an API key increases the request limit on the NCBI servers
|
|
33
|
+
and is automatically used by functions in
|
|
34
|
+
:mod:`biotite.database.entrez`.
|
|
35
|
+
This key is kept only in memory and hence removed in the end of the
|
|
36
|
+
Python session.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
key : str
|
|
41
|
+
The API key.
|
|
42
|
+
"""
|
|
43
|
+
global _API_KEY
|
|
44
|
+
_API_KEY = key
|