biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.0__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -6,22 +6,28 @@ __name__ = "biotite.database.entrez"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["fetch", "fetch_single_file"]
|
|
8
8
|
|
|
9
|
-
from os.path import isdir, isfile, join, getsize
|
|
10
|
-
import os
|
|
11
|
-
import glob
|
|
12
9
|
import io
|
|
10
|
+
import os
|
|
11
|
+
from os.path import getsize, isdir, isfile, join
|
|
13
12
|
import requests
|
|
14
|
-
from .check import check_for_errors
|
|
15
|
-
from .dbnames import sanitize_database_name
|
|
16
|
-
from .key import get_api_key
|
|
17
|
-
from
|
|
18
|
-
|
|
13
|
+
from biotite.database.entrez.check import check_for_errors
|
|
14
|
+
from biotite.database.entrez.dbnames import sanitize_database_name
|
|
15
|
+
from biotite.database.entrez.key import get_api_key
|
|
16
|
+
from biotite.database.error import RequestError
|
|
19
17
|
|
|
20
18
|
_fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
21
19
|
|
|
22
20
|
|
|
23
|
-
def fetch(
|
|
24
|
-
|
|
21
|
+
def fetch(
|
|
22
|
+
uids,
|
|
23
|
+
target_path,
|
|
24
|
+
suffix,
|
|
25
|
+
db_name,
|
|
26
|
+
ret_type,
|
|
27
|
+
ret_mode="text",
|
|
28
|
+
overwrite=False,
|
|
29
|
+
verbose=False,
|
|
30
|
+
):
|
|
25
31
|
"""
|
|
26
32
|
Download files from the NCBI Entrez database in various formats.
|
|
27
33
|
|
|
@@ -111,31 +117,28 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
|
|
|
111
117
|
file = join(target_path, id + "." + suffix)
|
|
112
118
|
else:
|
|
113
119
|
file = None
|
|
114
|
-
if file is None
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
content
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
else:
|
|
137
|
-
with open(file, "w+") as f:
|
|
138
|
-
f.write(content)
|
|
120
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
121
|
+
param_dict = {
|
|
122
|
+
"db": sanitize_database_name(db_name),
|
|
123
|
+
"id": id,
|
|
124
|
+
"rettype": ret_type,
|
|
125
|
+
"retmode": ret_mode,
|
|
126
|
+
"tool": "Biotite",
|
|
127
|
+
"mail": "padix.key@gmail.com",
|
|
128
|
+
}
|
|
129
|
+
api_key = get_api_key()
|
|
130
|
+
if api_key is not None:
|
|
131
|
+
param_dict["api_key"] = api_key
|
|
132
|
+
r = requests.get(_fetch_url, params=param_dict)
|
|
133
|
+
content = r.text
|
|
134
|
+
check_for_errors(content)
|
|
135
|
+
if content.startswith(" Error"):
|
|
136
|
+
raise RequestError(content[8:])
|
|
137
|
+
if file is None:
|
|
138
|
+
file = io.StringIO(content)
|
|
139
|
+
else:
|
|
140
|
+
with open(file, "w+") as f:
|
|
141
|
+
f.write(content)
|
|
139
142
|
files.append(file)
|
|
140
143
|
if verbose:
|
|
141
144
|
print("\nDone")
|
|
@@ -146,8 +149,9 @@ def fetch(uids, target_path, suffix, db_name, ret_type,
|
|
|
146
149
|
return files
|
|
147
150
|
|
|
148
151
|
|
|
149
|
-
def fetch_single_file(
|
|
150
|
-
|
|
152
|
+
def fetch_single_file(
|
|
153
|
+
uids, file_name, db_name, ret_type, ret_mode="text", overwrite=False
|
|
154
|
+
):
|
|
151
155
|
"""
|
|
152
156
|
Almost the same as :func:`fetch()`, but the data for the given UIDs
|
|
153
157
|
will be stored in a single file.
|
|
@@ -188,24 +192,26 @@ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
|
|
|
188
192
|
--------
|
|
189
193
|
fetch
|
|
190
194
|
"""
|
|
191
|
-
if
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
195
|
+
if (
|
|
196
|
+
file_name is not None
|
|
197
|
+
and os.path.isfile(file_name)
|
|
198
|
+
and getsize(file_name) > 0
|
|
199
|
+
and not overwrite
|
|
200
|
+
):
|
|
201
|
+
# Do no redownload the already existing file
|
|
202
|
+
return file_name
|
|
197
203
|
uid_list_str = ""
|
|
198
204
|
for id in uids:
|
|
199
205
|
uid_list_str += id + ","
|
|
200
206
|
# Remove terminal comma
|
|
201
207
|
uid_list_str = uid_list_str[:-1]
|
|
202
208
|
param_dict = {
|
|
203
|
-
"db"
|
|
204
|
-
"id"
|
|
205
|
-
"rettype"
|
|
206
|
-
"retmode"
|
|
207
|
-
"tool"
|
|
208
|
-
"mail"
|
|
209
|
+
"db": sanitize_database_name(db_name),
|
|
210
|
+
"id": uid_list_str,
|
|
211
|
+
"rettype": ret_type,
|
|
212
|
+
"retmode": ret_mode,
|
|
213
|
+
"tool": "Biotite",
|
|
214
|
+
"mail": "padix.key@gmail.com",
|
|
209
215
|
}
|
|
210
216
|
api_key = get_api_key()
|
|
211
217
|
if api_key is not None:
|
biotite/database/entrez/key.py
CHANGED
biotite/database/entrez/query.py
CHANGED
|
@@ -6,22 +6,23 @@ __name__ = "biotite.database.entrez"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
|
|
8
8
|
|
|
9
|
-
import requests
|
|
10
9
|
import abc
|
|
11
10
|
from xml.etree import ElementTree
|
|
12
|
-
|
|
13
|
-
from .
|
|
14
|
-
from
|
|
15
|
-
from .key import get_api_key
|
|
16
|
-
|
|
11
|
+
import requests
|
|
12
|
+
from biotite.database.entrez.check import check_for_errors
|
|
13
|
+
from biotite.database.entrez.dbnames import sanitize_database_name
|
|
14
|
+
from biotite.database.entrez.key import get_api_key
|
|
15
|
+
from biotite.database.error import RequestError
|
|
17
16
|
|
|
18
17
|
_search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
19
18
|
|
|
19
|
+
|
|
20
20
|
class Query(metaclass=abc.ABCMeta):
|
|
21
21
|
"""
|
|
22
22
|
Base class for a wrapper around a search term
|
|
23
23
|
for the NCBI Entrez search service.
|
|
24
24
|
"""
|
|
25
|
+
|
|
25
26
|
def __init__(self):
|
|
26
27
|
pass
|
|
27
28
|
|
|
@@ -85,7 +86,6 @@ class CompositeQuery(Query):
|
|
|
85
86
|
return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
|
|
86
87
|
|
|
87
88
|
|
|
88
|
-
|
|
89
89
|
class SimpleQuery(Query):
|
|
90
90
|
"""
|
|
91
91
|
A simple query for the NCBI Entrez search service without
|
|
@@ -121,17 +121,59 @@ class SimpleQuery(Query):
|
|
|
121
121
|
# Field identifiers are taken from
|
|
122
122
|
# https://www.ncbi.nlm.nih.gov/books/NBK49540/
|
|
123
123
|
_fields = [
|
|
124
|
-
"Accession",
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"
|
|
124
|
+
"Accession",
|
|
125
|
+
"All Fields",
|
|
126
|
+
"Author",
|
|
127
|
+
"EC/RN Number",
|
|
128
|
+
"Feature Key",
|
|
129
|
+
"Filter",
|
|
130
|
+
"Gene Name",
|
|
131
|
+
"Genome Project",
|
|
132
|
+
"Issue",
|
|
133
|
+
"Journal",
|
|
134
|
+
"Keyword",
|
|
135
|
+
"Modification Date",
|
|
136
|
+
"Molecular Weight",
|
|
137
|
+
"Organism",
|
|
138
|
+
"Page Number",
|
|
139
|
+
"Primary Accession",
|
|
140
|
+
"Properties",
|
|
141
|
+
"Protein Name",
|
|
142
|
+
"Publication Date",
|
|
143
|
+
"SeqID String",
|
|
144
|
+
"Sequence Length",
|
|
145
|
+
"Substance Name",
|
|
146
|
+
"Text Word",
|
|
147
|
+
"Title",
|
|
148
|
+
"Volume",
|
|
130
149
|
# Abbreviations
|
|
131
|
-
"ACCN",
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
150
|
+
"ACCN",
|
|
151
|
+
"ALL",
|
|
152
|
+
"AU",
|
|
153
|
+
"AUTH",
|
|
154
|
+
"ECNO",
|
|
155
|
+
"FKEY",
|
|
156
|
+
"FILT",
|
|
157
|
+
"SB",
|
|
158
|
+
"GENE",
|
|
159
|
+
"ISS",
|
|
160
|
+
"JOUR",
|
|
161
|
+
"KYWD",
|
|
162
|
+
"MDAT",
|
|
163
|
+
"MOLWT",
|
|
164
|
+
"ORGN",
|
|
165
|
+
"PAGE",
|
|
166
|
+
"PACC",
|
|
167
|
+
"PORGN",
|
|
168
|
+
"PROP",
|
|
169
|
+
"PROT",
|
|
170
|
+
"PDAT",
|
|
171
|
+
"SQID",
|
|
172
|
+
"SLEN",
|
|
173
|
+
"SUBS",
|
|
174
|
+
"WORD",
|
|
175
|
+
"TI",
|
|
176
|
+
"TITL" "VOL",
|
|
135
177
|
]
|
|
136
178
|
|
|
137
179
|
def __init__(self, term, field=None):
|
|
@@ -139,12 +181,9 @@ class SimpleQuery(Query):
|
|
|
139
181
|
if field is not None:
|
|
140
182
|
if field not in SimpleQuery._fields:
|
|
141
183
|
raise ValueError(f"Unknown field identifier '{field}'")
|
|
142
|
-
for invalid_string in \
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
raise ValueError(
|
|
146
|
-
f"Query contains illegal term {invalid_string}"
|
|
147
|
-
)
|
|
184
|
+
for invalid_string in ['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
|
|
185
|
+
if invalid_string in term:
|
|
186
|
+
raise ValueError(f"Query contains illegal term {invalid_string}")
|
|
148
187
|
if " " in term:
|
|
149
188
|
# Encapsulate in quotes if spaces are in search term
|
|
150
189
|
term = f'"{term}"'
|
biotite/database/error.py
CHANGED
|
@@ -6,24 +6,29 @@ __name__ = "biotite.database.pubchem"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["fetch", "fetch_property"]
|
|
8
8
|
|
|
9
|
+
import io
|
|
9
10
|
import numbers
|
|
10
|
-
import requests
|
|
11
|
-
from os.path import isdir, isfile, join, getsize
|
|
12
11
|
import os
|
|
13
|
-
import
|
|
14
|
-
import
|
|
15
|
-
from .
|
|
16
|
-
from .error import parse_error_details
|
|
17
|
-
from
|
|
18
|
-
|
|
12
|
+
from os.path import getsize, isdir, isfile, join
|
|
13
|
+
import requests
|
|
14
|
+
from biotite.database.error import RequestError
|
|
15
|
+
from biotite.database.pubchem.error import parse_error_details
|
|
16
|
+
from biotite.database.pubchem.throttle import ThrottleStatus
|
|
19
17
|
|
|
20
18
|
_base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
|
|
21
19
|
_binary_formats = ["png", "asnb"]
|
|
22
20
|
|
|
23
21
|
|
|
24
|
-
def fetch(
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
def fetch(
|
|
23
|
+
cids,
|
|
24
|
+
format="sdf",
|
|
25
|
+
target_path=None,
|
|
26
|
+
as_structural_formula=False,
|
|
27
|
+
overwrite=False,
|
|
28
|
+
verbose=False,
|
|
29
|
+
throttle_threshold=0.5,
|
|
30
|
+
return_throttle_status=False,
|
|
31
|
+
):
|
|
27
32
|
"""
|
|
28
33
|
Download structure files from *PubChem* in various formats.
|
|
29
34
|
|
|
@@ -109,8 +114,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
109
114
|
raise TypeError("CIDs must be given as integers, not as string")
|
|
110
115
|
# Verbose output
|
|
111
116
|
if verbose:
|
|
112
|
-
print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...",
|
|
113
|
-
end="\r")
|
|
117
|
+
print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...", end="\r")
|
|
114
118
|
|
|
115
119
|
# Fetch file from database
|
|
116
120
|
if target_path is not None:
|
|
@@ -119,36 +123,33 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
119
123
|
# 'file = None' -> store content in a file-like object
|
|
120
124
|
file = None
|
|
121
125
|
|
|
122
|
-
if file is None
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
)
|
|
131
|
-
if not r.ok:
|
|
132
|
-
raise RequestError(parse_error_details(r.text))
|
|
126
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
127
|
+
record_type = "2d" if as_structural_formula else "3d"
|
|
128
|
+
r = requests.get(
|
|
129
|
+
_base_url + f"compound/cid/{cid}/{format.upper()}",
|
|
130
|
+
params={"record_type": record_type},
|
|
131
|
+
)
|
|
132
|
+
if not r.ok:
|
|
133
|
+
raise RequestError(parse_error_details(r.text))
|
|
133
134
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
if format.lower() in _binary_formats:
|
|
136
|
+
content = r.content
|
|
137
|
+
else:
|
|
138
|
+
content = r.text
|
|
138
139
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
else:
|
|
143
|
-
file = io.StringIO(content)
|
|
140
|
+
if file is None:
|
|
141
|
+
if format in _binary_formats:
|
|
142
|
+
file = io.BytesIO(content)
|
|
144
143
|
else:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
144
|
+
file = io.StringIO(content)
|
|
145
|
+
else:
|
|
146
|
+
mode = "wb+" if format in _binary_formats else "w+"
|
|
147
|
+
with open(file, mode) as f:
|
|
148
|
+
f.write(content)
|
|
148
149
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
throttle_status = ThrottleStatus.from_response(r)
|
|
151
|
+
if throttle_threshold is not None:
|
|
152
|
+
throttle_status.wait_if_busy(throttle_threshold)
|
|
152
153
|
|
|
153
154
|
files.append(file)
|
|
154
155
|
if verbose:
|
|
@@ -164,8 +165,7 @@ def fetch(cids, format="sdf", target_path=None, as_structural_formula=False,
|
|
|
164
165
|
return return_value
|
|
165
166
|
|
|
166
167
|
|
|
167
|
-
def fetch_property(cids, name,
|
|
168
|
-
throttle_threshold=0.5, return_throttle_status=False):
|
|
168
|
+
def fetch_property(cids, name, throttle_threshold=0.5, return_throttle_status=False):
|
|
169
169
|
"""
|
|
170
170
|
Download the given property for the given CID(s).
|
|
171
171
|
|
|
@@ -230,15 +230,13 @@ def fetch_property(cids, name,
|
|
|
230
230
|
|
|
231
231
|
# Property names may only contain letters and numbers
|
|
232
232
|
if not name.isalnum():
|
|
233
|
-
raise ValueError(
|
|
234
|
-
f"Property '{name}' contains invalid characters"
|
|
235
|
-
)
|
|
233
|
+
raise ValueError(f"Property '{name}' contains invalid characters")
|
|
236
234
|
|
|
237
235
|
# Use TXT format instead of CSV to avoid issues with ',' characters
|
|
238
236
|
# within table elements
|
|
239
237
|
r = requests.post(
|
|
240
238
|
_base_url + f"compound/cid/property/{name}/TXT",
|
|
241
|
-
data={"cid":
|
|
239
|
+
data={"cid": ",".join([str(cid) for cid in cids])},
|
|
242
240
|
)
|
|
243
241
|
if not r.ok:
|
|
244
242
|
raise RequestError(parse_error_details(r.text))
|
|
@@ -15,6 +15,6 @@ def parse_error_details(response_text):
|
|
|
15
15
|
for message_line_indicator in ["Detail: ", "Message: "]:
|
|
16
16
|
for line in response_text.splitlines():
|
|
17
17
|
if line.startswith(message_line_indicator):
|
|
18
|
-
return line[len(message_line_indicator):]
|
|
18
|
+
return line[len(message_line_indicator) :]
|
|
19
19
|
# No 'Detail: ...' or 'Message: ' line found
|
|
20
|
-
return "Unknown error"
|
|
20
|
+
return "Unknown error"
|
|
@@ -4,20 +4,28 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.database.pubchem"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Query",
|
|
9
|
+
"NameQuery",
|
|
10
|
+
"SmilesQuery",
|
|
11
|
+
"InchiQuery",
|
|
12
|
+
"InchiKeyQuery",
|
|
13
|
+
"FormulaQuery",
|
|
14
|
+
"SuperstructureQuery",
|
|
15
|
+
"SubstructureQuery",
|
|
16
|
+
"SimilarityQuery",
|
|
17
|
+
"IdentityQuery",
|
|
18
|
+
"search",
|
|
19
|
+
]
|
|
11
20
|
|
|
12
|
-
import copy
|
|
13
21
|
import abc
|
|
14
22
|
import collections
|
|
23
|
+
import copy
|
|
15
24
|
import requests
|
|
16
|
-
from .error import
|
|
17
|
-
from .
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
|
|
25
|
+
from biotite.database.error import RequestError
|
|
26
|
+
from biotite.database.pubchem.error import parse_error_details
|
|
27
|
+
from biotite.database.pubchem.throttle import ThrottleStatus
|
|
28
|
+
from biotite.structure.io.mol.mol import MOLFile
|
|
21
29
|
|
|
22
30
|
_base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
|
|
23
31
|
|
|
@@ -258,9 +266,10 @@ class FormulaQuery(Query):
|
|
|
258
266
|
# Only set maximum number, if provided by the user
|
|
259
267
|
# The PubChem default value for this might change over time
|
|
260
268
|
if self._number is not None:
|
|
261
|
-
|
|
269
|
+
params["MaxRecords"] = self._number
|
|
262
270
|
return params
|
|
263
271
|
|
|
272
|
+
|
|
264
273
|
def _format_element(element, count):
|
|
265
274
|
if count == 1:
|
|
266
275
|
return element.capitalize()
|
|
@@ -318,8 +327,8 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
318
327
|
)
|
|
319
328
|
if not query_key_found:
|
|
320
329
|
raise TypeError(
|
|
321
|
-
"Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' "
|
|
322
|
-
|
|
330
|
+
"Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' " "or 'cid'"
|
|
331
|
+
)
|
|
323
332
|
if "number" in kwargs:
|
|
324
333
|
self._number = kwargs["number"]
|
|
325
334
|
del kwargs["number"]
|
|
@@ -346,14 +355,10 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
346
355
|
mol_file.set_structure(atoms)
|
|
347
356
|
# Every MOL string with "$$$$" is a valid SDF string
|
|
348
357
|
# Important: USE MS-style new lines
|
|
349
|
-
return cls(
|
|
350
|
-
*args,
|
|
351
|
-
sdf = "\r\n".join(mol_file.lines) + "\r\n$$$$\r\n",
|
|
352
|
-
**kwargs
|
|
353
|
-
)
|
|
358
|
+
return cls(*args, sdf="\r\n".join(mol_file.lines) + "\r\n$$$$\r\n", **kwargs)
|
|
354
359
|
|
|
355
360
|
def get_input_url_path(self):
|
|
356
|
-
input_string =
|
|
361
|
+
input_string = f"compound/{self.search_type()}/{self._query_key}"
|
|
357
362
|
if self._query_key == "cid":
|
|
358
363
|
# Put CID in URL and not in POST payload,
|
|
359
364
|
# as PubChem is confused otherwise
|
|
@@ -370,7 +375,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
|
|
|
370
375
|
# Only set maximum number, if provided by the user
|
|
371
376
|
# The PubChem default value for this might change over time
|
|
372
377
|
if self._number is not None:
|
|
373
|
-
|
|
378
|
+
params["MaxRecords"] = self._number
|
|
374
379
|
for key, val in self.search_options().items():
|
|
375
380
|
# Convert 'snake case' Python parameters
|
|
376
381
|
# to 'camel case' request parameters
|
|
@@ -472,13 +477,13 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
|
|
|
472
477
|
"""
|
|
473
478
|
|
|
474
479
|
_option_defaults = {
|
|
475
|
-
"match_charges"
|
|
476
|
-
"match_tautomers"
|
|
477
|
-
"rings_not_embedded"
|
|
478
|
-
"single_double_bonds_match"
|
|
479
|
-
"chains_match_rings"
|
|
480
|
-
"strip_hydrogen"
|
|
481
|
-
"stereo"
|
|
480
|
+
"match_charges": False,
|
|
481
|
+
"match_tautomers": False,
|
|
482
|
+
"rings_not_embedded": False,
|
|
483
|
+
"single_double_bonds_match": True,
|
|
484
|
+
"chains_match_rings": True,
|
|
485
|
+
"strip_hydrogen": False,
|
|
486
|
+
"stereo": "ignore",
|
|
482
487
|
}
|
|
483
488
|
|
|
484
489
|
def __init__(self, **kwargs):
|
|
@@ -706,7 +711,7 @@ class SimilarityQuery(StructureQuery):
|
|
|
706
711
|
return f"fastsimilarity_{dim}"
|
|
707
712
|
|
|
708
713
|
def search_options(self):
|
|
709
|
-
return {"threshold"
|
|
714
|
+
return {"threshold": int(round(self._threshold * 100))}
|
|
710
715
|
|
|
711
716
|
|
|
712
717
|
class IdentityQuery(StructureQuery):
|
|
@@ -766,8 +771,6 @@ class IdentityQuery(StructureQuery):
|
|
|
766
771
|
return params
|
|
767
772
|
|
|
768
773
|
|
|
769
|
-
|
|
770
|
-
|
|
771
774
|
def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
772
775
|
"""
|
|
773
776
|
Get all CIDs that meet the given query requirements,
|
|
@@ -812,7 +815,7 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
|
|
|
812
815
|
r = requests.post(
|
|
813
816
|
_base_url + query.get_input_url_path() + "/cids/TXT",
|
|
814
817
|
data=query.get_params(),
|
|
815
|
-
files=files
|
|
818
|
+
files=files,
|
|
816
819
|
)
|
|
817
820
|
if not r.ok:
|
|
818
821
|
raise RequestError(parse_error_details(r.text))
|
|
@@ -7,8 +7,8 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["ThrottleStatus"]
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
from dataclasses import dataclass
|
|
11
10
|
import time
|
|
11
|
+
from dataclasses import dataclass
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@dataclass(frozen=True)
|
|
@@ -67,8 +67,7 @@ class ThrottleStatus:
|
|
|
67
67
|
"""
|
|
68
68
|
throttle_control = response.headers["X-Throttling-Control"]
|
|
69
69
|
throttle_status = [
|
|
70
|
-
substring.split(")")[0] for substring
|
|
71
|
-
in throttle_control.split("(")[1:]
|
|
70
|
+
substring.split(")")[0] for substring in throttle_control.split("(")[1:]
|
|
72
71
|
]
|
|
73
72
|
# Remove '%' sign and convert to int
|
|
74
73
|
count_status, time_status, service_status = [
|
|
@@ -96,4 +95,4 @@ class ThrottleStatus:
|
|
|
96
95
|
threshold is exceeded.
|
|
97
96
|
"""
|
|
98
97
|
if self.count > threshold or self.time > threshold:
|
|
99
|
-
time.sleep(wait_time)
|
|
98
|
+
time.sleep(wait_time)
|