biotite 0.41.2__cp312-cp312-macosx_11_0_arm64.whl → 1.0.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -6,20 +6,17 @@ __name__ = "biotite.database.rcsb"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["fetch"]
|
|
8
8
|
|
|
9
|
-
import requests
|
|
10
|
-
from os.path import isdir, isfile, join, getsize
|
|
11
|
-
import os
|
|
12
|
-
import glob
|
|
13
9
|
import io
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
import os
|
|
11
|
+
from os.path import getsize, isfile, join
|
|
12
|
+
import requests
|
|
13
|
+
from biotite.database.error import RequestError
|
|
16
14
|
|
|
17
15
|
_standard_url = "https://files.rcsb.org/download/"
|
|
18
|
-
_mmtf_url = "https://mmtf.rcsb.org/v1.0/full/"
|
|
19
16
|
_bcif_url = "https://models.rcsb.org/"
|
|
20
17
|
_fasta_url = "https://www.rcsb.org/fasta/entry/"
|
|
21
18
|
|
|
22
|
-
_binary_formats = ["
|
|
19
|
+
_binary_formats = ["bcif"]
|
|
23
20
|
|
|
24
21
|
|
|
25
22
|
def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
@@ -34,7 +31,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
34
31
|
pdb_ids : str or iterable object of str
|
|
35
32
|
A single PDB ID or a list of PDB IDs of the structure(s)
|
|
36
33
|
to be downloaded.
|
|
37
|
-
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', '
|
|
34
|
+
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
|
|
38
35
|
The format of the files to be downloaded.
|
|
39
36
|
``'pdbx'``, ``'cif'`` and ``'mmcif'`` are synonyms for
|
|
40
37
|
the same format.
|
|
@@ -94,8 +91,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
94
91
|
for i, id in enumerate(pdb_ids):
|
|
95
92
|
# Verbose output
|
|
96
93
|
if verbose:
|
|
97
|
-
print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...",
|
|
98
|
-
end="\r")
|
|
94
|
+
print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...", end="\r")
|
|
99
95
|
|
|
100
96
|
# Fetch file from database
|
|
101
97
|
if target_path is not None:
|
|
@@ -104,42 +100,35 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
104
100
|
# 'file = None' -> store content in a file-like object
|
|
105
101
|
file = None
|
|
106
102
|
|
|
107
|
-
if file is None
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
_assert_valid_file(content, id)
|
|
131
|
-
else:
|
|
132
|
-
raise ValueError(f"Format '{format}' is not supported")
|
|
133
|
-
|
|
134
|
-
if file is None:
|
|
135
|
-
if format in _binary_formats:
|
|
136
|
-
file = io.BytesIO(content)
|
|
137
|
-
else:
|
|
138
|
-
file = io.StringIO(content)
|
|
103
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
104
|
+
if format == "pdb":
|
|
105
|
+
r = requests.get(_standard_url + id + ".pdb")
|
|
106
|
+
content = r.text
|
|
107
|
+
_assert_valid_file(content, id)
|
|
108
|
+
elif format in ["cif", "mmcif", "pdbx"]:
|
|
109
|
+
r = requests.get(_standard_url + id + ".cif")
|
|
110
|
+
content = r.text
|
|
111
|
+
_assert_valid_file(content, id)
|
|
112
|
+
elif format in ["bcif"]:
|
|
113
|
+
r = requests.get(_bcif_url + id + ".bcif")
|
|
114
|
+
content = r.content
|
|
115
|
+
_assert_valid_file(r.text, id)
|
|
116
|
+
elif format == "fasta":
|
|
117
|
+
r = requests.get(_fasta_url + id)
|
|
118
|
+
content = r.text
|
|
119
|
+
_assert_valid_file(content, id)
|
|
120
|
+
else:
|
|
121
|
+
raise ValueError(f"Format '{format}' is not supported")
|
|
122
|
+
|
|
123
|
+
if file is None:
|
|
124
|
+
if format in _binary_formats:
|
|
125
|
+
file = io.BytesIO(content)
|
|
139
126
|
else:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
127
|
+
file = io.StringIO(content)
|
|
128
|
+
else:
|
|
129
|
+
mode = "wb+" if format in _binary_formats else "w+"
|
|
130
|
+
with open(file, mode) as f:
|
|
131
|
+
f.write(content)
|
|
143
132
|
|
|
144
133
|
files.append(file)
|
|
145
134
|
if verbose:
|
|
@@ -158,10 +147,13 @@ def _assert_valid_file(response_text, pdb_id):
|
|
|
158
147
|
"""
|
|
159
148
|
# Structure file and FASTA file retrieval
|
|
160
149
|
# have different error messages
|
|
161
|
-
if len(response_text) == 0 or any(
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
150
|
+
if len(response_text) == 0 or any(
|
|
151
|
+
err_msg in response_text
|
|
152
|
+
for err_msg in [
|
|
153
|
+
"404 Not Found",
|
|
154
|
+
"<title>RCSB Protein Data Bank Error Page</title>",
|
|
155
|
+
"No fasta files were found.",
|
|
156
|
+
"No valid PDB IDs were submitted.",
|
|
157
|
+
]
|
|
158
|
+
):
|
|
167
159
|
raise RequestError("PDB ID {:} is invalid".format(pdb_id))
|
biotite/database/rcsb/query.py
CHANGED
|
@@ -4,28 +4,38 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.database.rcsb"
|
|
6
6
|
__author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Query",
|
|
9
|
+
"SingleQuery",
|
|
10
|
+
"CompositeQuery",
|
|
11
|
+
"BasicQuery",
|
|
12
|
+
"FieldQuery",
|
|
13
|
+
"SequenceQuery",
|
|
14
|
+
"StructureQuery",
|
|
15
|
+
"MotifQuery",
|
|
16
|
+
"Sorting",
|
|
17
|
+
"Grouping",
|
|
18
|
+
"DepositGrouping",
|
|
19
|
+
"IdentityGrouping",
|
|
20
|
+
"UniprotGrouping",
|
|
21
|
+
"search",
|
|
22
|
+
"count",
|
|
23
|
+
]
|
|
13
24
|
|
|
14
25
|
import abc
|
|
15
|
-
import json
|
|
16
26
|
import copy
|
|
27
|
+
import json
|
|
17
28
|
from datetime import datetime
|
|
18
29
|
import numpy as np
|
|
19
30
|
import requests
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
|
|
31
|
+
from biotite.database.error import RequestError
|
|
32
|
+
from biotite.sequence.seqtypes import NucleotideSequence
|
|
23
33
|
|
|
24
34
|
_search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
|
|
25
35
|
_scope_to_target = {
|
|
26
36
|
"protein": "pdb_protein_sequence",
|
|
27
|
-
"rna":
|
|
28
|
-
"dna":
|
|
37
|
+
"rna": "pdb_rna_sequence",
|
|
38
|
+
"dna": "pdb_dna_sequence",
|
|
29
39
|
}
|
|
30
40
|
|
|
31
41
|
|
|
@@ -35,6 +45,7 @@ class Query(metaclass=abc.ABCMeta):
|
|
|
35
45
|
|
|
36
46
|
This is the abstract base class for all queries.
|
|
37
47
|
"""
|
|
48
|
+
|
|
38
49
|
@abc.abstractmethod
|
|
39
50
|
def get_content(self):
|
|
40
51
|
"""
|
|
@@ -58,7 +69,6 @@ class Query(metaclass=abc.ABCMeta):
|
|
|
58
69
|
return CompositeQuery([self, query], "or")
|
|
59
70
|
|
|
60
71
|
|
|
61
|
-
|
|
62
72
|
class SingleQuery(Query, metaclass=abc.ABCMeta):
|
|
63
73
|
"""
|
|
64
74
|
A terminal query node for the RCSB search API.
|
|
@@ -69,6 +79,7 @@ class SingleQuery(Query, metaclass=abc.ABCMeta):
|
|
|
69
79
|
This is the abstract base class for all queries that are
|
|
70
80
|
terminal nodes.
|
|
71
81
|
"""
|
|
82
|
+
|
|
72
83
|
@abc.abstractmethod
|
|
73
84
|
def get_content(self):
|
|
74
85
|
return {"parameters": {}}
|
|
@@ -91,12 +102,11 @@ class CompositeQuery(Query):
|
|
|
91
102
|
operator : {'or', 'and'}
|
|
92
103
|
The type of combination.
|
|
93
104
|
"""
|
|
105
|
+
|
|
94
106
|
def __init__(self, queries, operator):
|
|
95
107
|
self._queries = queries
|
|
96
108
|
if operator not in ("or", "and"):
|
|
97
|
-
raise ValueError(
|
|
98
|
-
f"Operator must be 'or' or 'and', not '{operator}'"
|
|
99
|
-
)
|
|
109
|
+
raise ValueError(f"Operator must be 'or' or 'and', not '{operator}'")
|
|
100
110
|
self._operator = operator
|
|
101
111
|
|
|
102
112
|
def get_content(self):
|
|
@@ -113,12 +123,11 @@ class CompositeQuery(Query):
|
|
|
113
123
|
content = {
|
|
114
124
|
"type": "group",
|
|
115
125
|
"logical_operator": self._operator,
|
|
116
|
-
"nodes": [query.get_content() for query in self._queries]
|
|
126
|
+
"nodes": [query.get_content() for query in self._queries],
|
|
117
127
|
}
|
|
118
128
|
return content
|
|
119
129
|
|
|
120
130
|
|
|
121
|
-
|
|
122
131
|
class BasicQuery(SingleQuery):
|
|
123
132
|
"""
|
|
124
133
|
A text query for searching for a given term across all available
|
|
@@ -139,8 +148,9 @@ class BasicQuery(SingleQuery):
|
|
|
139
148
|
|
|
140
149
|
>>> query = BasicQuery("tc5b")
|
|
141
150
|
>>> print(sorted(search(query)))
|
|
142
|
-
['1L2Y', '8ANG', '8ANH', '8ANI', '8ANM']
|
|
151
|
+
['1L2Y', '8ANG', '8ANH', '8ANI', '8ANM', '8QWW']
|
|
143
152
|
"""
|
|
153
|
+
|
|
144
154
|
def __init__(self, term):
|
|
145
155
|
super().__init__()
|
|
146
156
|
self._term = term
|
|
@@ -212,7 +222,10 @@ class FieldQuery(SingleQuery):
|
|
|
212
222
|
>>> print(sorted(search(query)))
|
|
213
223
|
['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
|
|
214
224
|
"""
|
|
215
|
-
|
|
225
|
+
|
|
226
|
+
def __init__(
|
|
227
|
+
self, field, molecular_definition=False, case_sensitive=False, **kwargs
|
|
228
|
+
):
|
|
216
229
|
super().__init__()
|
|
217
230
|
self._negation = False
|
|
218
231
|
self._field = field
|
|
@@ -231,20 +244,25 @@ class FieldQuery(SingleQuery):
|
|
|
231
244
|
|
|
232
245
|
if self._operator not in [
|
|
233
246
|
"exact_match",
|
|
234
|
-
"contains_words",
|
|
235
|
-
"
|
|
236
|
-
"
|
|
247
|
+
"contains_words",
|
|
248
|
+
"contains_phrase",
|
|
249
|
+
"greater",
|
|
250
|
+
"less",
|
|
251
|
+
"greater_or_equal",
|
|
252
|
+
"less_or_equal",
|
|
253
|
+
"equals",
|
|
254
|
+
"range",
|
|
255
|
+
"range_closed",
|
|
237
256
|
"is_in",
|
|
238
|
-
"exists"
|
|
257
|
+
"exists",
|
|
239
258
|
]:
|
|
240
259
|
raise TypeError(
|
|
241
|
-
f"Constructor got an unexpected keyword argument "
|
|
242
|
-
f"'{self._operator}'"
|
|
260
|
+
f"Constructor got an unexpected keyword argument " f"'{self._operator}'"
|
|
243
261
|
)
|
|
244
262
|
|
|
245
263
|
# Convert dates into ISO 8601
|
|
246
264
|
if isinstance(self._value, datetime):
|
|
247
|
-
|
|
265
|
+
self._value = _to_isoformat(self._value)
|
|
248
266
|
elif isinstance(self._value, (tuple, list, np.ndarray)):
|
|
249
267
|
self._value = [
|
|
250
268
|
_to_isoformat(val) if isinstance(val, datetime) else val
|
|
@@ -257,14 +275,14 @@ class FieldQuery(SingleQuery):
|
|
|
257
275
|
"from": self._value[0],
|
|
258
276
|
"include_lower": False,
|
|
259
277
|
"to": self._value[1],
|
|
260
|
-
"include_upper": False
|
|
278
|
+
"include_upper": False,
|
|
261
279
|
}
|
|
262
280
|
elif self._operator == "range_closed":
|
|
263
281
|
self._value = {
|
|
264
282
|
"from": self._value[0],
|
|
265
283
|
"include_lower": True,
|
|
266
284
|
"to": self._value[1],
|
|
267
|
-
"include_upper": True
|
|
285
|
+
"include_upper": True,
|
|
268
286
|
}
|
|
269
287
|
|
|
270
288
|
# Rename operators to names used in API
|
|
@@ -332,8 +350,8 @@ class SequenceQuery(SingleQuery):
|
|
|
332
350
|
>>> print(sorted(search(query)))
|
|
333
351
|
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2LL5', '2MJ9', '3UC7', '3UC8']
|
|
334
352
|
"""
|
|
335
|
-
|
|
336
|
-
|
|
353
|
+
|
|
354
|
+
def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
|
|
337
355
|
super().__init__()
|
|
338
356
|
self._target = _scope_to_target.get(scope.lower())
|
|
339
357
|
if self._target is None:
|
|
@@ -381,6 +399,7 @@ class MotifQuery(SingleQuery):
|
|
|
381
399
|
... "protein"
|
|
382
400
|
... )
|
|
383
401
|
"""
|
|
402
|
+
|
|
384
403
|
def __init__(self, pattern, pattern_type, scope):
|
|
385
404
|
super().__init__()
|
|
386
405
|
self._pattern = pattern
|
|
@@ -424,27 +443,20 @@ class StructureQuery(SingleQuery):
|
|
|
424
443
|
>>> print(sorted(search(query)))
|
|
425
444
|
['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
|
|
426
445
|
"""
|
|
446
|
+
|
|
427
447
|
def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
|
|
428
448
|
super().__init__()
|
|
429
449
|
|
|
430
|
-
if (chain is None and assembly is None)
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
)
|
|
450
|
+
if (chain is None and assembly is None) or (
|
|
451
|
+
chain is not None and assembly is not None
|
|
452
|
+
):
|
|
453
|
+
raise TypeError("Either the chain ID or assembly ID must be set")
|
|
435
454
|
elif chain is None:
|
|
436
|
-
self._value = {
|
|
437
|
-
"entry_id": pdb_id,
|
|
438
|
-
"asssembly_id": assembly
|
|
439
|
-
}
|
|
455
|
+
self._value = {"entry_id": pdb_id, "asssembly_id": assembly}
|
|
440
456
|
else:
|
|
441
|
-
self._value = {
|
|
442
|
-
"entry_id": pdb_id,
|
|
443
|
-
"asym_id": chain
|
|
444
|
-
}
|
|
457
|
+
self._value = {"entry_id": pdb_id, "asym_id": chain}
|
|
445
458
|
|
|
446
|
-
self._operator = "strict_shape_match" if strict
|
|
447
|
-
else "relaxed_shape_match"
|
|
459
|
+
self._operator = "strict_shape_match" if strict else "relaxed_shape_match"
|
|
448
460
|
|
|
449
461
|
def get_content(self):
|
|
450
462
|
content = super().get_content()
|
|
@@ -455,10 +467,7 @@ class StructureQuery(SingleQuery):
|
|
|
455
467
|
return content
|
|
456
468
|
|
|
457
469
|
|
|
458
|
-
|
|
459
|
-
|
|
460
470
|
class Sorting:
|
|
461
|
-
|
|
462
471
|
def __init__(self, field, descending=True):
|
|
463
472
|
self._field = field
|
|
464
473
|
self._descending = descending
|
|
@@ -487,12 +496,7 @@ class Sorting:
|
|
|
487
496
|
``'ranking_criteria_type'`` attributes.
|
|
488
497
|
"""
|
|
489
498
|
direction = "desc" if self._descending else "asc"
|
|
490
|
-
return {
|
|
491
|
-
"sort_by" : self._field,
|
|
492
|
-
"direction" : direction
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
|
|
499
|
+
return {"sort_by": self._field, "direction": direction}
|
|
496
500
|
|
|
497
501
|
|
|
498
502
|
class Grouping(metaclass=abc.ABCMeta):
|
|
@@ -539,7 +543,7 @@ class Grouping(metaclass=abc.ABCMeta):
|
|
|
539
543
|
The content dictionary for the ``'group_by'`` attributes.
|
|
540
544
|
"""
|
|
541
545
|
if self._sorting is not None:
|
|
542
|
-
return {"ranking_criteria_type"
|
|
546
|
+
return {"ranking_criteria_type": self._sorting.get_content()}
|
|
543
547
|
else:
|
|
544
548
|
return {}
|
|
545
549
|
|
|
@@ -627,6 +631,7 @@ class IdentityGrouping(Grouping):
|
|
|
627
631
|
To choose the order a :class:`Sorting` object needs to be
|
|
628
632
|
provided.
|
|
629
633
|
"""
|
|
634
|
+
|
|
630
635
|
def __init__(self, similarity_cutoff, sort_by=None):
|
|
631
636
|
super().__init__(sort_by)
|
|
632
637
|
if similarity_cutoff not in (100, 95, 90, 70, 50, 30):
|
|
@@ -677,11 +682,7 @@ class UniprotGrouping(Grouping):
|
|
|
677
682
|
return return_type == "polymer_entity"
|
|
678
683
|
|
|
679
684
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
def count(query, return_type="entry", group_by=None,
|
|
684
|
-
content_types=("experimental",)):
|
|
685
|
+
def count(query, return_type="entry", group_by=None, content_types=("experimental",)):
|
|
685
686
|
"""
|
|
686
687
|
Count PDB entries that meet the given query requirements,
|
|
687
688
|
via the RCSB search API.
|
|
@@ -737,9 +738,7 @@ def count(query, return_type="entry", group_by=None,
|
|
|
737
738
|
>>> print(sorted(ids))
|
|
738
739
|
['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
|
|
739
740
|
"""
|
|
740
|
-
query_dict = _initialize_query_dict(
|
|
741
|
-
query, return_type, group_by, content_types
|
|
742
|
-
)
|
|
741
|
+
query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
|
|
743
742
|
|
|
744
743
|
query_dict["request_options"]["return_counts"] = True
|
|
745
744
|
|
|
@@ -761,8 +760,15 @@ def count(query, return_type="entry", group_by=None,
|
|
|
761
760
|
raise RequestError(f"Error {r.status_code}")
|
|
762
761
|
|
|
763
762
|
|
|
764
|
-
def search(
|
|
765
|
-
|
|
763
|
+
def search(
|
|
764
|
+
query,
|
|
765
|
+
return_type="entry",
|
|
766
|
+
range=None,
|
|
767
|
+
sort_by=None,
|
|
768
|
+
group_by=None,
|
|
769
|
+
return_groups=False,
|
|
770
|
+
content_types=("experimental",),
|
|
771
|
+
):
|
|
766
772
|
"""
|
|
767
773
|
Get all PDB IDs that meet the given query requirements,
|
|
768
774
|
via the RCSB search API.
|
|
@@ -862,19 +868,15 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
|
|
|
862
868
|
... query, return_type="polymer_entity", return_groups=True,
|
|
863
869
|
... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
|
|
864
870
|
... ))
|
|
865
|
-
|
|
871
|
+
{'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['1EJG_1', '3NIR_1']}
|
|
866
872
|
"""
|
|
867
|
-
query_dict = _initialize_query_dict(
|
|
868
|
-
query, return_type, group_by, content_types
|
|
869
|
-
)
|
|
873
|
+
query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
|
|
870
874
|
|
|
871
875
|
if group_by is not None:
|
|
872
876
|
if return_groups:
|
|
873
|
-
query_dict["request_options"]["group_by_return_type"]
|
|
874
|
-
= "groups"
|
|
877
|
+
query_dict["request_options"]["group_by_return_type"] = "groups"
|
|
875
878
|
else:
|
|
876
|
-
query_dict["request_options"]["group_by_return_type"]
|
|
877
|
-
= "representatives"
|
|
879
|
+
query_dict["request_options"]["group_by_return_type"] = "representatives"
|
|
878
880
|
|
|
879
881
|
if sort_by is not None:
|
|
880
882
|
if isinstance(sort_by, Sorting):
|
|
@@ -890,7 +892,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
|
|
|
890
892
|
else:
|
|
891
893
|
query_dict["request_options"]["paginate"] = {
|
|
892
894
|
"start": int(range[0]),
|
|
893
|
-
"rows": int(range[1]) - int(range[0])
|
|
895
|
+
"rows": int(range[1]) - int(range[0]),
|
|
894
896
|
}
|
|
895
897
|
|
|
896
898
|
r = requests.get(_search_url, params={"json": json.dumps(query_dict)})
|
|
@@ -900,7 +902,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
|
|
|
900
902
|
return [result["identifier"] for result in r.json()["result_set"]]
|
|
901
903
|
else:
|
|
902
904
|
return {
|
|
903
|
-
group["identifier"]
|
|
905
|
+
group["identifier"]: [
|
|
904
906
|
result["identifier"] for result in group["result_set"]
|
|
905
907
|
]
|
|
906
908
|
for group in r.json()["group_set"]
|
|
@@ -922,8 +924,11 @@ def _initialize_query_dict(query, return_type, group_by, content_types):
|
|
|
922
924
|
`count()` and `search()` have in common.
|
|
923
925
|
"""
|
|
924
926
|
if return_type not in [
|
|
925
|
-
"entry",
|
|
926
|
-
"
|
|
927
|
+
"entry",
|
|
928
|
+
"polymer_instance",
|
|
929
|
+
"assembly",
|
|
930
|
+
"polymer_entity",
|
|
931
|
+
"non_polymer_entity",
|
|
927
932
|
]:
|
|
928
933
|
raise ValueError(f"'{return_type}' is an invalid return type")
|
|
929
934
|
|
|
@@ -947,7 +952,7 @@ def _initialize_query_dict(query, return_type, group_by, content_types):
|
|
|
947
952
|
query_dict = {
|
|
948
953
|
"query": query.get_content(),
|
|
949
954
|
"return_type": return_type,
|
|
950
|
-
"request_options": request_options
|
|
955
|
+
"request_options": request_options,
|
|
951
956
|
}
|
|
952
957
|
return query_dict
|
|
953
958
|
|
|
@@ -956,4 +961,4 @@ def _to_isoformat(object):
|
|
|
956
961
|
"""
|
|
957
962
|
Convert a datetime into the specifc ISO 8601 format required by the RCSB.
|
|
958
963
|
"""
|
|
959
|
-
return object.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
964
|
+
return object.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
@@ -6,7 +6,7 @@ __name__ = "biotite.database.uniprot"
|
|
|
6
6
|
__author__ = "Maximilian Greil"
|
|
7
7
|
__all__ = ["assert_valid_response"]
|
|
8
8
|
|
|
9
|
-
from
|
|
9
|
+
from biotite.database.error import RequestError
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
# Taken from https://www.uniprot.org/help/api_retrieve_entries
|
|
@@ -27,6 +27,9 @@ def assert_valid_response(response_status_code):
|
|
|
27
27
|
raise RequestError("Gone. The resource you requested was removed.")
|
|
28
28
|
elif response_status_code == 500:
|
|
29
29
|
raise RequestError(
|
|
30
|
-
"Internal server error. Most likely a temporary problem, but if the problem persists please contact UniProt team."
|
|
30
|
+
"Internal server error. Most likely a temporary problem, but if the problem persists please contact UniProt team."
|
|
31
|
+
)
|
|
31
32
|
elif response_status_code == 503:
|
|
32
|
-
raise RequestError(
|
|
33
|
+
raise RequestError(
|
|
34
|
+
"Service not available. The server is being updated, try again later."
|
|
35
|
+
)
|
|
@@ -6,11 +6,11 @@ __name__ = "biotite.database.uniprot"
|
|
|
6
6
|
__author__ = "Maximilian Greil"
|
|
7
7
|
__all__ = ["fetch"]
|
|
8
8
|
|
|
9
|
-
from os.path import isdir, isfile, join, getsize
|
|
10
|
-
import os
|
|
11
9
|
import io
|
|
10
|
+
import os
|
|
11
|
+
from os.path import getsize, isdir, isfile, join
|
|
12
12
|
import requests
|
|
13
|
-
from .check import assert_valid_response
|
|
13
|
+
from biotite.database.uniprot.check import assert_valid_response
|
|
14
14
|
|
|
15
15
|
_fetch_url = "https://rest.uniprot.org/"
|
|
16
16
|
|
|
@@ -36,8 +36,7 @@ def _get_database_name(id):
|
|
|
36
36
|
return "uniprotkb"
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def fetch(ids, format, target_path=None,
|
|
40
|
-
overwrite=False, verbose=False):
|
|
39
|
+
def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
41
40
|
"""
|
|
42
41
|
Download files from the UniProt in various formats.
|
|
43
42
|
|
|
@@ -101,18 +100,14 @@ def fetch(ids, format, target_path=None,
|
|
|
101
100
|
db_name = _get_database_name(id)
|
|
102
101
|
# Verbose output
|
|
103
102
|
if verbose:
|
|
104
|
-
print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...",
|
|
105
|
-
end="\r")
|
|
103
|
+
print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
|
|
106
104
|
# Fetch file from database
|
|
107
105
|
if target_path is not None:
|
|
108
106
|
file = join(target_path, id + "." + format)
|
|
109
107
|
else:
|
|
110
108
|
# 'file = None' -> store content in a file-like object
|
|
111
109
|
file = None
|
|
112
|
-
if file is None
|
|
113
|
-
or not isfile(file) \
|
|
114
|
-
or getsize(file) == 0 \
|
|
115
|
-
or overwrite:
|
|
110
|
+
if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
|
|
116
111
|
if format in ["fasta", "gff", "txt", "xml", "rdf", "tab"]:
|
|
117
112
|
r = requests.get(_fetch_url + db_name + "/" + id + "." + format)
|
|
118
113
|
content = r.text
|