PyPI - biotite - Versions diffs - 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl - Mend

biotite 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (124) hide show

biotite/__init__.py +3 -3
biotite/application/application.py +33 -28
biotite/application/dssp/app.py +18 -18
biotite/application/sra/__init__.py +5 -0
biotite/application/sra/app.py +337 -55
biotite/database/entrez/__init__.py +2 -1
biotite/database/entrez/check.py +14 -3
biotite/database/entrez/download.py +20 -13
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +38 -34
biotite/database/pubchem/query.py +44 -44
biotite/database/rcsb/download.py +19 -14
biotite/database/rcsb/query.py +46 -46
biotite/sequence/align/__init__.py +5 -1
biotite/sequence/align/banded.c +1408 -1025
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +389 -0
biotite/sequence/align/kmeralphabet.c +3220 -2850
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.c +713 -663
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cpp +68398 -0
biotite/sequence/align/localgapped.c +1507 -1074
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.c +1143 -833
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.c +1569 -1092
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.c +1612 -1212
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.c +33259 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.c +685 -646
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/codec.c +1159 -841
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/graphics/alignment.py +212 -2
biotite/sequence/io/genbank/annotation.py +11 -11
biotite/sequence/phylo/nj.c +684 -636
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.c +970 -673
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.c +672 -626
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/structure/__init__.py +1 -1
biotite/structure/atoms.py +1 -1
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +3861 -3749
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/celllist.c +727 -707
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/charges.c +1561 -1560
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +25 -67
biotite/structure/info/bonds.py +46 -100
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1646 -0
biotite/structure/info/ccd/carbohydrates.txt +1133 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +797 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +11 -22
biotite/structure/info/standardize.py +17 -12
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +1 -1
biotite/structure/io/general.py +37 -43
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +528 -365
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.c +725 -676
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.c +1070 -754
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.c +727 -677
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +72 -70
biotite/structure/io/pdb/hybrid36.c +540 -478
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +82 -68
biotite/structure/io/pdbx/__init__.py +13 -6
biotite/structure/io/pdbx/bcif.py +649 -0
biotite/structure/io/pdbx/cif.py +1028 -0
biotite/structure/io/pdbx/component.py +243 -0
biotite/structure/io/pdbx/convert.py +707 -359
biotite/structure/io/pdbx/encoding.c +112813 -0
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/error.py +14 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/residues.py +40 -40
biotite/structure/sasa.c +713 -644
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/superimpose.py +158 -115
biotite/visualize.py +9 -11
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0

biotite/application/sra/app.py CHANGED Viewed

@@ -4,19 +4,26 @@
 __name__ = "biotite.application.sra"
 __author__ = "Patrick Kunzmann"
-__all__ = ["FastqDumpApp"]
+__all__ = ["FastaDumpApp", "FastqDumpApp"]
+import abc
+from os.path import join
+from subprocess import Popen, SubprocessError, PIPE, TimeoutExpired
 import glob
-from tempfile import NamedTemporaryFile, gettempdir
-from ..localapp import LocalApp, cleanup_tempfile
-from ..application import AppState, requires_state
+from tempfile import TemporaryDirectory
+from ..application import Application, AppState, AppStateError, \
+                          requires_state
+from ...sequence.seqtypes import NucleotideSequence
 from ...sequence.io.fastq.file import FastqFile
-from ...sequence.io.fastq.convert import get_sequences
+from ...sequence.io.fasta.file import FastaFile
+from ...sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
+from ...sequence.io.fasta.convert import get_sequences
-class FastqDumpApp(LocalApp):
+# Do not use LocalApp, as two programs are executed
+class _DumpApp(Application, metaclass=abc.ABCMeta):
     """
-    Fetch sequencing data as FASTQ from the *NCBI sequence read archive*
+    Fetch sequencing data from the *NCBI sequence read archive*
     (SRA) using *sra-tools*.
     Parameters
@@ -31,85 +38,212 @@ class FastqDumpApp(LocalApp):
         multiple reads per spot.
         By default, the files are created in a temporary directory and
         deleted after the files have been read.
-    bin_path : str, optional
-        Path to the ``fasterq-dump`` binary.
+    prefetch_path, fasterq_dump_path : str, optional
+        Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
+        respectively.
     offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
         This value is subtracted from the FASTQ ASCII code to obtain the
         quality score.
         Can either be directly the value, or a string that indicates
         the score format.
     """
-    def __init__(self, uid, output_path_prefix=None, bin_path="fasterq-dump",
-                 offset="Sanger"):
-        super().__init__(bin_path)
+    def __init__(self, uid, output_path_prefix=None,
+                 prefetch_path="prefetch", fasterq_dump_path="fasterq-dump"):
+        super().__init__()
+        self._prefetch_path = prefetch_path
+        self._fasterq_dump_path = fasterq_dump_path
         self._uid = uid
-        self._offset = offset
+        self._sra_dir = TemporaryDirectory(suffix="_sra")
         if output_path_prefix is None:
-            # NamedTemporaryFile is only created to obtain prefix
-            # for FASTQ files
-            self._out_file = NamedTemporaryFile("r")
-            self._prefix = self._out_file.name
+            self._prefix = join(self._sra_dir.name, self._uid)
         else:
-            self._out_file = None
             self._prefix = output_path_prefix
+        self._prefetch_process = None
+        self._fasterq_dump_process = None
+    @requires_state(AppState.RUNNING | AppState.FINISHED)
+    def join(self, timeout=None):
+        # Override method as repetitive calls of 'is_finished()'
+        # are not necessary as 'communicate()' already waits for the
+        # finished application
+        try:
+            _, self._stderr = self._process.communicate(
+                timeout=timeout
+            )
+        except TimeoutExpired:
+            self.cancel()
+            raise TimeoutError(
+                f"The application expired its timeout ({timeout:.1f} s)"
+            )
+        self._state = AppState.FINISHED
+        try:
+            self.evaluate()
+        except AppStateError:
+            raise
+        except:
+            self._state = AppState.CANCELLED
+            raise
+        else:
+            self._state = AppState.JOINED
+        self.clean_up()
     def run(self):
-        self.set_arguments([
-            "-o", self._prefix + ".fastq",
-            "-t", gettempdir(),
-            "-f",
-            self._uid
-        ])
-        super().run()
+        # Prefetch into a temp directory with file name equaling UID
+        # This ensures that the ID in the header is not the temp prefix
+        sra_file_name = join(self._sra_dir.name, self._uid)
+        command = (
+            f"{self._prefetch_path} -q -O {self._sra_dir.name} "
+            f"{self.get_prefetch_options()} {self._uid}; "
+            f"{self._fasterq_dump_path} -q -o {self._prefix}.fastq "
+            f"{self.get_fastq_dump_options()} {sra_file_name}"
+        )
+        self._process = Popen(
+            command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
+        )
+    def is_finished(self):
+        code = self._process.poll()
+        if code == None:
+            return False
+        else:
+            _,  self._stderr = self._process.communicate()
+            return True
     def evaluate(self):
         super().evaluate()
+        # Check if applicaion terminated correctly
+        exit_code = self._process.returncode
+        if exit_code != 0:
+            err_msg = self._stderr.replace("\n", " ")
+            raise SubprocessError(
+                f"'prefetch' or 'fasterq-dump' returned with exit code "
+                f"{exit_code}: {err_msg}"
+            )
         self._file_names = (
             # For entries with one read per spot
-            glob.glob(self._prefix +   ".fastq") +
+            glob.glob(self._prefix +   ".fastq") +
             # For entries with multiple reads per spot
             glob.glob(self._prefix + "_*.fastq")
         )
         # Only load FASTQ files into memory when needed
         self._fastq_files = None
+    def wait_interval(self):
+        # Not used in this implementation of 'join()'
+        raise NotImplementedError()
     def clean_up(self):
-        super().clean_up()
-        if self._out_file is not None:
-            # This file was only created to reserve a unique file name
-            # Now it is not needed anymore
-            self._out_file.close()
+        if self.get_app_state() == AppState.CANCELLED:
+            self._process.kill()
+        # Directory with temp files does not need to be deleted,
+        # as temp dir is automatically deleted upon object destruction
+    @requires_state(AppState.CREATED)
+    def get_prefetch_options(self):
+        """
+        Get additional options for the `prefetch` call.
+        PROTECTED: Override when inheriting.
+        Returns
+        -------
+        options: str
+            The additional options.
+        """
+        return ""
+    @requires_state(AppState.CREATED)
+    def get_fastq_dump_options(self):
+        """
+        Get additional options for the `fasterq-dump` call.
+        PROTECTED: Override when inheriting.
+        Returns
+        -------
+        options: str
+            The additional options.
+        """
+        return ""
     @requires_state(AppState.JOINED)
     def get_file_paths(self):
         """
-        Get the file paths to the downloaded FASTQ files.
+        Get the file paths to the downloaded files.
         Returns
         -------
         paths : list of str
             The file paths to the downloaded files.
         """
         return self._file_names
     @requires_state(AppState.JOINED)
+    @abc.abstractmethod
     def get_sequences(self):
         """
-        Get the sequences and score values from the downloaded file(s).
+        Get the sequences from the downloaded file(s).
         Returns
         -------
-        sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
+        sequences : list of dict (str -> NucleotideSequence)
             This list contains the reads for each spot:
             The first item contains the first read for each spot, the
             second item contains the second read for each spot (if existing),
             etc.
             Each item in the list is a dictionary mapping identifiers to its
-            corresponding sequence and score values.
+            corresponding sequence.
         """
-        fastq_files = self.get_fastq()
-        return [get_sequences(fastq_file) for fastq_file in fastq_files]
+        pass
+class FastqDumpApp(_DumpApp):
+    """
+    Fetch sequencing data from the *NCBI sequence read archive*
+    (SRA) using *sra-tools*.
+    Parameters
+    ----------
+    uid : str
+        A *unique identifier* (UID) of the file to be downloaded.
+    output_path_prefix : str, optional
+        The prefix of the path to store the downloaded FASTQ file.
+        ``.fastq`` is appended to this prefix if the run contains
+        a single read per spot.
+        ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
+        multiple reads per spot.
+        By default, the files are created in a temporary directory and
+        deleted after the files have been read.
+    prefetch_path, fasterq_dump_path : str, optional
+        Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
+        respectively.
+    offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
+        This value is subtracted from the FASTQ ASCII code to obtain the
+        quality score.
+        Can either be directly the value, or a string that indicates
+        the score format.
+    """
+    def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
+                 fasterq_dump_path="fasterq-dump", offset="Sanger"):
+        super().__init__(
+            uid, output_path_prefix, prefetch_path, fasterq_dump_path
+        )
+        self._offset = offset
+        self._fastq_files = None
     @requires_state(AppState.JOINED)
     def get_fastq(self):
@@ -130,12 +264,47 @@ class FastqDumpApp(LocalApp):
                 for file_name in self.get_file_paths()
             ]
         return self._fastq_files
-    @staticmethod
-    def fetch(uid, output_path_prefix=None, bin_path="fasterq-dump",
-              offset="Sanger"):
+    @requires_state(AppState.JOINED)
+    def get_sequences(self):
+        return [
+            {
+                header: NucleotideSequence(
+                    seq_str.replace("U","T").replace("X","N")
+                )
+                for header, (seq_str, _) in fastq_file.items()
+            }
+            for fastq_file in self.get_fastq()
+        ]
+    @requires_state(AppState.JOINED)
+    def get_sequences_and_scores(self):
+        """
+        Get the sequences and score values from the downloaded file(s).
+        Returns
+        -------
+        sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
+            This list contains the reads for each spot:
+            The first item contains the first read for each spot, the
+            second item contains the second read for each spot (if existing),
+            etc.
+            Each item in the list is a dictionary mapping identifiers to its
+            corresponding sequence and score values.
         """
-        Get the sequences and score values belonging to the UID from the
+        return [
+            get_sequences_and_scores(fastq_file)
+            for fastq_file in self.get_fastq()
+        ]
+    @classmethod
+    def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
+              fasterq_dump_path="fasterq-dump", offset="Sanger"):
+        """
+        Get the sequences belonging to the UID from the
         *NCBI sequence read archive* (SRA).
         Parameters
@@ -150,25 +319,138 @@ class FastqDumpApp(LocalApp):
             multiple reads per spot.
             By default, the files are created in a temporary directory and
             deleted after the files have been read.
-        bin_path : str, optional
-            Path to the ``fasterq-dump`` binary.
+        prefetch_path, fasterq_dump_path : str, optional
+            Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
+            respectively.
         offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
             This value is subtracted from the FASTQ ASCII code to obtain the
             quality score.
             Can either be directly the value, or a string that indicates
             the score format.
         Returns
         -------
-        sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
+        sequences : list of dict (str -> NucleotideSequence)
             This list contains the reads for each spot:
             The first item contains the first read for each spot, the
             second item contains the second read for each spot (if existing),
             etc.
             Each item in the list is a dictionary mapping identifiers to its
-            corresponding sequence and score values.
+            corresponding sequence.
         """
-        app = FastqDumpApp(uid, output_path_prefix, bin_path, offset)
+        app = cls(
+            uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset
+        )
         app.start()
         app.join()
         return app.get_sequences()
+class FastaDumpApp(_DumpApp):
+    """
+    Fetch sequencing data from the *NCBI sequence read archive*
+    (SRA) using *sra-tools*.
+    Parameters
+    ----------
+    uid : str
+        A *unique identifier* (UID) of the file to be downloaded.
+    output_path_prefix : str, optional
+        The prefix of the path to store the downloaded FASTQ file.
+        ``.fastq`` is appended to this prefix if the run contains
+        a single read per spot.
+        ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
+        multiple reads per spot.
+        By default, the files are created in a temporary directory and
+        deleted after the files have been read.
+    prefetch_path, fasterq_dump_path : str, optional
+        Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
+        respectively.
+    """
+    def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
+                 fasterq_dump_path="fasterq-dump"):
+        super().__init__(
+            uid, output_path_prefix, prefetch_path, fasterq_dump_path
+        )
+        self._fasta_files = None
+    @requires_state(AppState.CREATED)
+    def get_prefetch_options(self):
+        return
+        # TODO: Use '--eliminate-quals'
+        # when https://github.com/ncbi/sra-tools/issues/883 is resolved
+        # return "--eliminate-quals"
+    @requires_state(AppState.CREATED)
+    def get_fastq_dump_options(self):
+        return "--fasta"
+    @requires_state(AppState.JOINED)
+    def get_fasta(self):
+        """
+        Get the `FastaFile` objects from the downloaded file(s).
+        Returns
+        -------
+        fasta_files : list of FastaFile
+            This list contains the reads for each spot:
+            The first item contains the first read for each spot, the
+            second item contains the second read for each spot (if existing),
+            etc.
+        """
+        if self._fasta_files is None:
+            self._fasta_files = [
+                FastaFile.read(file_name)
+                for file_name in self.get_file_paths()
+            ]
+        return self._fasta_files
+    @requires_state(AppState.JOINED)
+    def get_sequences(self):
+        return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
+    @classmethod
+    def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
+              fasterq_dump_path="fasterq-dump"):
+        """
+        Get the sequences belonging to the UID from the
+        *NCBI sequence read archive* (SRA).
+        Parameters
+        ----------
+        uid : str
+            A *unique identifier* (UID) of the file to be downloaded.
+        output_path_prefix : str, optional
+            The prefix of the path to store the downloaded FASTQ file.
+            ``.fastq`` is appended to this prefix if the run contains
+            a single read per spot.
+            ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
+            multiple reads per spot.
+            By default, the files are created in a temporary directory and
+            deleted after the files have been read.
+        prefetch_path, fasterq_dump_path : str, optional
+            Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
+            respectively.
+        Returns
+        -------
+        sequences : list of dict (str -> NucleotideSequence)
+            This list contains the reads for each spot:
+            The first item contains the first read for each spot, the
+            second item contains the second read for each spot (if existing),
+            etc.
+            Each item in the list is a dictionary mapping identifiers to its
+            corresponding sequence.
+        """
+        app = cls(
+            uid, output_path_prefix, prefetch_path, fasterq_dump_path
+        )
+        app.start()
+        app.join()
+        return app.get_sequences()

biotite/database/entrez/__init__.py CHANGED Viewed

@@ -11,4 +11,5 @@ __author__ = "Patrick Kunzmann"
 from .dbnames import *
 from .download import *
-from .query import *
+from .query import *
+from .key import *

biotite/database/entrez/check.py CHANGED Viewed

@@ -6,6 +6,7 @@ __name__ = "biotite.database.entrez"
 __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
 __all__ = ["check_for_errors"]
+import json
 from ..error import RequestError
@@ -29,17 +30,27 @@ _error_messages = [
 def check_for_errors(message):
     """
     Check for common error messages in NCBI Entrez database responses.
     Parameters
     ----------
     message : str
-        The message received from NCBI Entrez.
+        The message received from NCBI Entrez.
     Raises
     ------
     RequestError
         If the message contains an error message.
     """
+    # Server can respond short JSON error messages
+    if len(message) < 500:
+        try:
+            message_json = json.loads(message)
+            if "error" in message_json:
+                raise RequestError(message_json["error"])
+        except json.decoder.JSONDecodeError:
+            # It is not a JSON message
+            pass
     # Error always appear at the end of message
     message_end = message[-200:]
     # Seemingly arbitrary '+' characters are in NCBI error messages

biotite/database/entrez/download.py CHANGED Viewed

@@ -13,6 +13,7 @@ import io
 import requests
 from .check import check_for_errors
 from .dbnames import sanitize_database_name
+from .key import get_api_key
 from ..error import RequestError
@@ -23,15 +24,15 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
           ret_mode="text", overwrite=False, verbose=False):
     """
     Download files from the NCBI Entrez database in various formats.
     The data for each UID will be fetched into a separate file.
     A list of valid database, retrieval type and mode combinations can
     be found under
     `<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
     This function requires an internet connection.
     Parameters
     ----------
     uids : str or iterable object of str
@@ -58,7 +59,7 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
     verbose: bool, optional
         If true, the function will output the download progress.
         (Default: False)
     Returns
     -------
     files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
@@ -68,7 +69,7 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
         object) was given, a list of strings is returned.
         If `target_path` is ``None``, the file contents are stored in
         either `StringIO` or `BytesIO` objects.
     Warnings
     --------
     Even if you give valid input to this function, in rare cases the
@@ -76,14 +77,14 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
     In these cases the request should be retried.
     When the issue occurs repeatedly, the error is probably in your
     input.
     See also
     --------
     fetch_single_file
     Examples
     --------
     >>> import os.path
     >>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
     ...               db_name="protein", ret_type="fasta")
@@ -122,6 +123,9 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
                     "tool" : "Biotite",
                     "mail" : "padix.key@gmail.com"
                 }
+                api_key = get_api_key()
+                if api_key is not None:
+                    param_dict["api_key"] = api_key
                 r = requests.get(_fetch_url, params=param_dict)
                 content = r.text
                 check_for_errors(content)
@@ -147,7 +151,7 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
     """
     Almost the same as :func:`fetch()`, but the data for the given UIDs
     will be stored in a single file.
     Parameters
     ----------
     uids : iterable object of str
@@ -164,14 +168,14 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
     overwrite : bool, optional
         If false, the file is only downloaded, if no file with the same
         name already exists.
     Returns
     -------
     file : str or StringIO or BytesIO
         The file name of the downloaded file.
         If `file_name` is ``None``, the file content is stored in
         either a `StringIO` or a `BytesIO` object.
     Warnings
     --------
     Even if you give valid input to this function, in rare cases the
@@ -179,7 +183,7 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
     In these cases the request should be retried.
     When the issue occurs repeatedly, the error is probably in your
     input.
     See also
     --------
     fetch
@@ -203,6 +207,9 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
         "tool" : "Biotite",
         "mail" : "padix.key@gmail.com"
     }
+    api_key = get_api_key()
+    if api_key is not None:
+        param_dict["api_key"] = api_key
     r = requests.get(_fetch_url, params=param_dict)
     content = r.text
     check_for_errors(content)

biotite/database/entrez/key.py ADDED Viewed

@@ -0,0 +1,44 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.database.entrez"
+__author__ = "Patrick Kunzmann"
+__all__ = ["set_api_key", "get_api_key"]
+_API_KEY = None
+def get_api_key():
+    """
+    Get the
+    `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
+    Returns
+    -------
+    api_key : str or None
+        The API key, if it was already set before, ``None`` otherwise.
+    """
+    global _API_KEY
+    return _API_KEY
+def set_api_key(key):
+    """
+    Set the
+    `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
+    Using an API key increases the request limit on the NCBI servers
+    and is automatically used by functions in
+    :mod:`biotite.database.entrez`.
+    This key is kept only in memory and hence removed in the end of the
+    Python session.
+    Parameters
+    ----------
+    api_key : str
+        The API key.
+    """
+    global _API_KEY
+    _API_KEY = key