biotite 1.0.1__cp311-cp311-win_amd64.whl → 1.2.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/dssp/app.py +13 -3
  6. biotite/application/localapp.py +36 -2
  7. biotite/application/msaapp.py +10 -10
  8. biotite/application/muscle/app3.py +5 -18
  9. biotite/application/muscle/app5.py +5 -5
  10. biotite/application/sra/app.py +0 -5
  11. biotite/application/util.py +22 -2
  12. biotite/application/viennarna/rnaalifold.py +8 -8
  13. biotite/application/viennarna/rnaplot.py +9 -3
  14. biotite/application/viennarna/util.py +1 -1
  15. biotite/application/webapp.py +1 -1
  16. biotite/database/afdb/__init__.py +12 -0
  17. biotite/database/afdb/download.py +191 -0
  18. biotite/database/entrez/dbnames.py +10 -0
  19. biotite/database/entrez/download.py +9 -10
  20. biotite/database/entrez/key.py +1 -1
  21. biotite/database/entrez/query.py +5 -4
  22. biotite/database/pubchem/download.py +6 -6
  23. biotite/database/pubchem/error.py +10 -0
  24. biotite/database/pubchem/query.py +12 -23
  25. biotite/database/rcsb/download.py +3 -2
  26. biotite/database/rcsb/query.py +8 -9
  27. biotite/database/uniprot/check.py +22 -17
  28. biotite/database/uniprot/download.py +3 -6
  29. biotite/database/uniprot/query.py +4 -5
  30. biotite/file.py +14 -2
  31. biotite/interface/__init__.py +19 -0
  32. biotite/interface/openmm/__init__.py +16 -0
  33. biotite/interface/openmm/state.py +93 -0
  34. biotite/interface/openmm/system.py +227 -0
  35. biotite/interface/pymol/__init__.py +198 -0
  36. biotite/interface/pymol/cgo.py +346 -0
  37. biotite/interface/pymol/convert.py +185 -0
  38. biotite/interface/pymol/display.py +267 -0
  39. biotite/interface/pymol/object.py +1226 -0
  40. biotite/interface/pymol/shapes.py +178 -0
  41. biotite/interface/pymol/startup.py +169 -0
  42. biotite/interface/rdkit/__init__.py +15 -0
  43. biotite/interface/rdkit/mol.py +490 -0
  44. biotite/interface/version.py +71 -0
  45. biotite/interface/warning.py +19 -0
  46. biotite/sequence/align/__init__.py +0 -4
  47. biotite/sequence/align/alignment.py +49 -14
  48. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  49. biotite/sequence/align/banded.pyx +26 -26
  50. biotite/sequence/align/cigar.py +2 -2
  51. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  52. biotite/sequence/align/kmeralphabet.pyx +19 -2
  53. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  54. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  55. biotite/sequence/align/kmertable.pyx +58 -48
  56. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  57. biotite/sequence/align/localgapped.pyx +47 -47
  58. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  59. biotite/sequence/align/localungapped.pyx +10 -10
  60. biotite/sequence/align/matrix.py +284 -57
  61. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  62. biotite/sequence/align/matrix_data/PB.license +21 -0
  63. biotite/sequence/align/matrix_data/PB.mat +18 -0
  64. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  65. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/pairwise.pyx +35 -35
  67. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/align/selector.pyx +2 -2
  70. biotite/sequence/align/statistics.py +1 -1
  71. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  72. biotite/sequence/alphabet.py +5 -2
  73. biotite/sequence/annotation.py +19 -13
  74. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  75. biotite/sequence/codon.py +1 -2
  76. biotite/sequence/graphics/alignment.py +25 -39
  77. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  78. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  79. biotite/sequence/graphics/colorschemes.py +44 -11
  80. biotite/sequence/graphics/dendrogram.py +4 -2
  81. biotite/sequence/graphics/features.py +2 -2
  82. biotite/sequence/graphics/logo.py +10 -12
  83. biotite/sequence/io/fasta/convert.py +1 -2
  84. biotite/sequence/io/fasta/file.py +1 -1
  85. biotite/sequence/io/fastq/file.py +3 -3
  86. biotite/sequence/io/genbank/file.py +3 -3
  87. biotite/sequence/io/genbank/sequence.py +2 -0
  88. biotite/sequence/io/gff/convert.py +1 -1
  89. biotite/sequence/io/gff/file.py +1 -2
  90. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  91. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  92. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  93. biotite/sequence/profile.py +105 -29
  94. biotite/sequence/search.py +0 -1
  95. biotite/sequence/seqtypes.py +136 -8
  96. biotite/sequence/sequence.py +1 -2
  97. biotite/setup_ccd.py +197 -0
  98. biotite/structure/__init__.py +6 -3
  99. biotite/structure/alphabet/__init__.py +25 -0
  100. biotite/structure/alphabet/encoder.py +332 -0
  101. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  102. biotite/structure/alphabet/i3d.py +109 -0
  103. biotite/structure/alphabet/layers.py +86 -0
  104. biotite/structure/alphabet/pb.license +21 -0
  105. biotite/structure/alphabet/pb.py +170 -0
  106. biotite/structure/alphabet/unkerasify.py +128 -0
  107. biotite/structure/atoms.py +163 -66
  108. biotite/structure/basepairs.py +26 -26
  109. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  110. biotite/structure/bonds.pyx +79 -25
  111. biotite/structure/box.py +19 -21
  112. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  113. biotite/structure/celllist.pyx +83 -67
  114. biotite/structure/chains.py +5 -37
  115. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  116. biotite/structure/compare.py +420 -13
  117. biotite/structure/density.py +1 -1
  118. biotite/structure/dotbracket.py +27 -28
  119. biotite/structure/filter.py +8 -8
  120. biotite/structure/geometry.py +74 -127
  121. biotite/structure/hbond.py +17 -19
  122. biotite/structure/info/__init__.py +1 -0
  123. biotite/structure/info/atoms.py +24 -15
  124. biotite/structure/info/bonds.py +12 -6
  125. biotite/structure/info/ccd.py +125 -34
  126. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  127. biotite/structure/info/groups.py +62 -19
  128. biotite/structure/info/masses.py +9 -6
  129. biotite/structure/info/misc.py +15 -22
  130. biotite/structure/info/radii.py +92 -22
  131. biotite/structure/info/standardize.py +4 -4
  132. biotite/structure/integrity.py +4 -6
  133. biotite/structure/io/general.py +2 -2
  134. biotite/structure/io/gro/file.py +8 -9
  135. biotite/structure/io/mol/convert.py +1 -1
  136. biotite/structure/io/mol/ctab.py +33 -28
  137. biotite/structure/io/mol/mol.py +1 -1
  138. biotite/structure/io/mol/sdf.py +80 -53
  139. biotite/structure/io/pdb/convert.py +4 -3
  140. biotite/structure/io/pdb/file.py +85 -25
  141. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  142. biotite/structure/io/pdbqt/file.py +36 -36
  143. biotite/structure/io/pdbx/__init__.py +1 -0
  144. biotite/structure/io/pdbx/bcif.py +54 -15
  145. biotite/structure/io/pdbx/cif.py +92 -66
  146. biotite/structure/io/pdbx/component.py +15 -4
  147. biotite/structure/io/pdbx/compress.py +321 -0
  148. biotite/structure/io/pdbx/convert.py +410 -75
  149. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  150. biotite/structure/io/pdbx/encoding.pyx +98 -17
  151. biotite/structure/io/trajfile.py +9 -6
  152. biotite/structure/io/util.py +38 -0
  153. biotite/structure/mechanics.py +0 -1
  154. biotite/structure/molecules.py +141 -156
  155. biotite/structure/pseudoknots.py +7 -13
  156. biotite/structure/repair.py +2 -4
  157. biotite/structure/residues.py +13 -24
  158. biotite/structure/rings.py +335 -0
  159. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  160. biotite/structure/sasa.pyx +2 -1
  161. biotite/structure/segments.py +69 -11
  162. biotite/structure/sequence.py +0 -1
  163. biotite/structure/sse.py +0 -2
  164. biotite/structure/superimpose.py +74 -62
  165. biotite/structure/tm.py +581 -0
  166. biotite/structure/transform.py +12 -25
  167. biotite/structure/util.py +76 -4
  168. biotite/version.py +9 -4
  169. biotite/visualize.py +111 -1
  170. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
  171. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
  172. biotite/structure/info/ccd/README.rst +0 -8
  173. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  174. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  175. biotite/structure/info/ccd/nucleotides.txt +0 -798
  176. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
  177. {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -0,0 +1,191 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.afdb"
6
+ __author__ = "Patrick Kunzmann, Alex Carlin"
7
+ __all__ = ["fetch"]
8
+
9
+ import io
10
+ import re
11
+ from pathlib import Path
12
+ from xml.etree import ElementTree
13
+ import requests
14
+ from biotite.database.error import RequestError
15
+
16
+ _METADATA_URL = "https://alphafold.com/api/prediction"
17
+ _BINARY_FORMATS = ["bcif"]
18
+ # Adopted from https://www.uniprot.org/help/accession_numbers
19
+ _UNIPROT_PATTERN = (
20
+ "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
21
+ )
22
+
23
+
24
+ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
25
+ """
26
+ Download predicted protein structures from the AlphaFold DB.
27
+
28
+ This function requires an internet connection.
29
+
30
+ Parameters
31
+ ----------
32
+ ids : str or iterable object of str
33
+ A single ID or a list of IDs of the file(s) to be downloaded.
34
+ They can be either UniProt IDs (e.g. ``P12345``) or AlphaFold DB IDs
35
+ (e.g. ``AF-P12345F1``).
36
+ format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
37
+ The format of the files to be downloaded.
38
+ target_path : str, optional
39
+ The target directory of the downloaded files.
40
+ By default, the file content is stored in a file-like object
41
+ (`StringIO` or `BytesIO`, respectively).
42
+ overwrite : bool, optional
43
+ If true, existing files will be overwritten.
44
+ Otherwise the respective file will only be downloaded if the file does not
45
+ exist yet in the specified target directory or if the file is empty.
46
+ verbose : bool, optional
47
+ If true, the function will output the download progress.
48
+
49
+ Returns
50
+ -------
51
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
52
+ The file path(s) to the downloaded files.
53
+ If a single string (a single ID) was given in `ids`, a single string is
54
+ returned.
55
+ If a list (or other iterable object) was given, a list of strings is returned.
56
+ If no `target_path` was given, the file contents are stored in either
57
+ ``StringIO`` or ``BytesIO`` objects.
58
+
59
+ Examples
60
+ --------
61
+
62
+ >>> from pathlib import Path
63
+ >>> file = fetch("P12345", "cif", path_to_directory)
64
+ >>> print(Path(file).name)
65
+ P12345.cif
66
+ >>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
67
+ >>> print([Path(file).name for file in files])
68
+ ['P12345.cif', 'Q8K9I1.cif']
69
+ """
70
+ if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
71
+ raise ValueError(f"Format '{format}' is not supported")
72
+ if format in ["pdbx", "mmcif"]:
73
+ format = "cif"
74
+
75
+ # If only a single ID is present,
76
+ # put it into a single element list
77
+ if isinstance(ids, str):
78
+ ids = [ids]
79
+ single_element = True
80
+ else:
81
+ single_element = False
82
+ if target_path is not None:
83
+ target_path = Path(target_path)
84
+ target_path.mkdir(parents=True, exist_ok=True)
85
+
86
+ files = []
87
+ for i, id in enumerate(ids):
88
+ # Verbose output
89
+ if verbose:
90
+ print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
91
+ # Fetch file from database
92
+ if target_path is not None:
93
+ file = target_path / f"{id}.{format}"
94
+ else:
95
+ # 'file = None' -> store content in a file-like object
96
+ file = None
97
+ if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
98
+ file_response = requests.get(_get_file_url(id, format))
99
+ _assert_valid_file(file_response, id)
100
+ if format in _BINARY_FORMATS:
101
+ content = file_response.content
102
+ else:
103
+ content = file_response.text
104
+
105
+ if file is None:
106
+ if format in _BINARY_FORMATS:
107
+ file = io.BytesIO(content)
108
+ else:
109
+ file = io.StringIO(content)
110
+ else:
111
+ mode = "wb+" if format in _BINARY_FORMATS else "w+"
112
+ with open(file, mode) as f:
113
+ f.write(content)
114
+
115
+ files.append(file)
116
+ if verbose:
117
+ print("\nDone")
118
+
119
+ # Return paths as strings
120
+ files = [file.as_posix() if isinstance(file, Path) else file for file in files]
121
+ # If input was a single ID, return only a single element
122
+ if single_element:
123
+ return files[0]
124
+ else:
125
+ return files
126
+
127
+
128
+ def _get_file_url(id, format):
129
+ """
130
+ Get the actual file URL for the given ID from the ``prediction`` API endpoint.
131
+
132
+ Parameters
133
+ ----------
134
+ id : str
135
+ The ID of the file to be downloaded.
136
+ format : str
137
+ The format of the file to be downloaded.
138
+
139
+ Returns
140
+ -------
141
+ file_url : str
142
+ The URL of the file to be downloaded.
143
+ """
144
+ uniprot_id = _extract_id(id)
145
+ metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
146
+ if len(metadata) == 0:
147
+ raise RequestError(f"ID {id} is invalid")
148
+ # A list of length 1 is always returned, if the response is valid
149
+ return metadata[0][f"{format}Url"]
150
+
151
+
152
+ def _extract_id(id):
153
+ """
154
+ Extract a AFDB compatible UniProt ID from the given qualifier.
155
+ This may comprise
156
+
157
+ - Directly the UniProt ID (e.g. ``P12345``) (trivial case)
158
+ - Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
159
+
160
+ Parameters
161
+ ----------
162
+ id : str
163
+ The qualifier to extract the UniProt ID from.
164
+
165
+ Returns
166
+ -------
167
+ uniprot_id : str
168
+ The UniProt ID.
169
+ """
170
+ match = re.search(_UNIPROT_PATTERN, id)
171
+ if match is None:
172
+ raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
173
+ return match.group()
174
+
175
+
176
+ def _assert_valid_file(response, id):
177
+ """
178
+ Checks whether the response is an actual structure file
179
+ or the response a *404* error due to invalid UniProt ID.
180
+ """
181
+ if len(response.text) == 0:
182
+ raise RequestError(f"Received no repsone for '{id}'")
183
+ try:
184
+ root = ElementTree.fromstring(response.text)
185
+ if root.tag == "Error":
186
+ raise RequestError(
187
+ f"Error while fetching '{id}': {root.find('Message').text}"
188
+ )
189
+ except ElementTree.ParseError:
190
+ # This is not XML -> the response is probably a valid file
191
+ pass
@@ -80,6 +80,16 @@ def sanitize_database_name(db_name):
80
80
  database name is not existing.
81
81
 
82
82
  Only for internal usage in ``download.py`` and ``query.py``.
83
+
84
+ Parameters
85
+ ----------
86
+ db_name : str
87
+ Entrez database name.
88
+
89
+ Returns
90
+ -------
91
+ name : str
92
+ E-utility database name.
83
93
  """
84
94
  if db_name in _db_names.keys():
85
95
  # Convert into E-utility database name
@@ -54,17 +54,16 @@ def fetch(
54
54
  db_name : str:
55
55
  E-utility or common database name.
56
56
  ret_type : str
57
- Retrieval type
57
+ Retrieval type.
58
58
  ret_mode : str, optional
59
- Retrieval mode
59
+ Retrieval mode.
60
60
  overwrite : bool, optional
61
61
  If true, existing files will be overwritten. Otherwise the
62
62
  respective file will only be downloaded if the file does not
63
63
  exist yet in the specified target directory or if the file is
64
- empty. (Default: False)
65
- verbose: bool, optional
64
+ empty.
65
+ verbose : bool, optional
66
66
  If true, the function will output the download progress.
67
- (Default: False)
68
67
 
69
68
  Returns
70
69
  -------
@@ -84,9 +83,9 @@ def fetch(
84
83
  When the issue occurs repeatedly, the error is probably in your
85
84
  input.
86
85
 
87
- See also
86
+ See Also
88
87
  --------
89
- fetch_single_file
88
+ fetch_single_file : Fetch multiple entries as a single file.
90
89
 
91
90
  Examples
92
91
  --------
@@ -111,7 +110,7 @@ def fetch(
111
110
  for i, id in enumerate(uids):
112
111
  # Verbose output
113
112
  if verbose:
114
- print(f"Fetching file {i+1:d} / {len(uids):d} ({id})...", end="\r")
113
+ print(f"Fetching file {i + 1:d} / {len(uids):d} ({id})...", end="\r")
115
114
  # Fetch file from database
116
115
  if target_path is not None:
117
116
  file = join(target_path, id + "." + suffix)
@@ -188,9 +187,9 @@ def fetch_single_file(
188
187
  When the issue occurs repeatedly, the error is probably in your
189
188
  input.
190
189
 
191
- See also
190
+ See Also
192
191
  --------
193
- fetch
192
+ fetch : Fetch one or multiple entries as separate files.
194
193
  """
195
194
  if (
196
195
  file_name is not None
@@ -37,7 +37,7 @@ def set_api_key(key):
37
37
 
38
38
  Parameters
39
39
  ----------
40
- api_key : str
40
+ key : str
41
41
  The API key.
42
42
  """
43
43
  global _API_KEY
@@ -60,9 +60,9 @@ class CompositeQuery(Query):
60
60
 
61
61
  Parameters
62
62
  ----------
63
- operator: str, {"AND", "OR", "NOT"}
63
+ operator : str, {"AND", "OR", "NOT"}
64
64
  The combination operator.
65
- queries : iterable object of SimpleQuery
65
+ query1, query2 : SimpleQuery
66
66
  The queries to be combined.
67
67
 
68
68
  Examples
@@ -97,7 +97,7 @@ class SimpleQuery(Query):
97
97
 
98
98
  Parameters
99
99
  ----------
100
- term: str
100
+ term : str
101
101
  The search term.
102
102
  field : str, optional
103
103
  The field to search the term in.
@@ -173,7 +173,8 @@ class SimpleQuery(Query):
173
173
  "SUBS",
174
174
  "WORD",
175
175
  "TI",
176
- "TITL" "VOL",
176
+ "TITL",
177
+ "VOL",
177
178
  ]
178
179
 
179
180
  def __init__(self, term, field=None):
@@ -41,22 +41,22 @@ def fetch(
41
41
  to be downloaded.
42
42
  format : {'sdf', 'asnt' 'asnb', 'xml', 'json', 'jsonp', 'png'}
43
43
  The format of the files to be downloaded.
44
+ target_path : str, optional
45
+ The target directory of the downloaded files.
46
+ By default, the file content is stored in a file-like object
47
+ (:class:`StringIO` or :class:`BytesIO`, respectively).
44
48
  as_structural_formula : bool, optional
45
49
  If set to true, the structural formula is download instead of
46
50
  an 3D conformer.
47
51
  This means that coordinates lie in th xy-plane and represent
48
52
  the positions atoms would have an a structural formula
49
53
  representation.
50
- target_path : str, optional
51
- The target directory of the downloaded files.
52
- By default, the file content is stored in a file-like object
53
- (:class:`StringIO` or :class:`BytesIO`, respectively).
54
54
  overwrite : bool, optional
55
55
  If true, existing files will be overwritten.
56
56
  Otherwise the respective file will only be downloaded, if the
57
57
  file does not exist yet in the specified target directory or if
58
58
  the file is empty.
59
- verbose: bool, optional
59
+ verbose : bool, optional
60
60
  If set to true, the function will output the download progress.
61
61
  throttle_threshold : float or None, optional
62
62
  A value between 0 and 1.
@@ -114,7 +114,7 @@ def fetch(
114
114
  raise TypeError("CIDs must be given as integers, not as string")
115
115
  # Verbose output
116
116
  if verbose:
117
- print(f"Fetching file {i+1:d} / {len(cids):d} ({cid})...", end="\r")
117
+ print(f"Fetching file {i + 1:d} / {len(cids):d} ({cid})...", end="\r")
118
118
 
119
119
  # Fetch file from database
120
120
  if target_path is not None:
@@ -11,6 +11,16 @@ def parse_error_details(response_text):
11
11
  """
12
12
  Parse the ``Detail: ...`` or alternatively ``Message: ...`` part of
13
13
  an error response.
14
+
15
+ Parameters
16
+ ----------
17
+ response_text : str
18
+ The text of the response.
19
+
20
+ Returns
21
+ -------
22
+ error_details : str
23
+ The error details.
14
24
  """
15
25
  for message_line_indicator in ["Detail: ", "Message: "]:
16
26
  for line in response_text.splitlines():
@@ -240,6 +240,11 @@ class FormulaQuery(Query):
240
240
  The maximum number of matches that this query may return.
241
241
  By default, the *PubChem* default value is used, which can
242
242
  be considered unlimited.
243
+
244
+ Returns
245
+ -------
246
+ query : FormulaQuery
247
+ The query.
243
248
  """
244
249
  element_counter = collections.Counter(atoms.element)
245
250
  formula = ""
@@ -327,7 +332,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
327
332
  )
328
333
  if not query_key_found:
329
334
  raise TypeError(
330
- "Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' " "or 'cid'"
335
+ "Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' or 'cid'"
331
336
  )
332
337
  if "number" in kwargs:
333
338
  self._number = kwargs["number"]
@@ -348,8 +353,13 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
348
353
  ----------
349
354
  atoms : AtomArray or AtomArrayStack
350
355
  The query structure.
351
- **kwargs : dict, optional
356
+ *args, **kwargs
352
357
  See the constructor for additional options.
358
+
359
+ Returns
360
+ -------
361
+ query : StructureQuery
362
+ The query object.
353
363
  """
354
364
  mol_file = MOLFile()
355
365
  mol_file.set_structure(atoms)
@@ -448,26 +458,19 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
448
458
  be considered unlimited.
449
459
  match_charges : bool, optional
450
460
  If set to true, atoms must match the specified charge.
451
- (Default: False)
452
461
  match_tautomers : bool, optional
453
462
  If set to true, allow match to tautomers of the given structure.
454
- (Default: False)
455
463
  rings_not_embedded : bool, optional
456
464
  If set to true, rings may not be embedded in a larger system.
457
- (Default: False)
458
465
  single_double_bonds_match : bool, optional
459
466
  If set to true, single or double bonds match aromatic bonds.
460
- (Default: True)
461
467
  chains_match_rings : bool, optional
462
468
  If set to true, chain bonds in the query may match rings in
463
469
  hits.
464
- (Default: True)
465
470
  strip_hydrogen : bool, optional
466
471
  If set to true, remove any explicit hydrogens before searching.
467
- (Default: False)
468
472
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
469
473
  How to handle stereo.
470
- (Default: 'ignore')
471
474
 
472
475
  Notes
473
476
  -----
@@ -528,26 +531,19 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
528
531
  be considered unlimited.
529
532
  match_charges : bool, optional
530
533
  If set to true, atoms must match the specified charge.
531
- (Default: False)
532
534
  match_tautomers : bool, optional
533
535
  If set to true, allow match to tautomers of the given structure.
534
- (Default: False)
535
536
  rings_not_embedded : bool, optional
536
537
  If set to true, rings may not be embedded in a larger system.
537
- (Default: False)
538
538
  single_double_bonds_match : bool, optional
539
539
  If set to true, single or double bonds match aromatic bonds.
540
- (Default: True)
541
540
  chains_match_rings : bool, optional
542
541
  If set to true, chain bonds in the query may match rings in
543
542
  hits.
544
- (Default: True)
545
543
  strip_hydrogen : bool, optional
546
544
  If set to true, remove any explicit hydrogens before searching.
547
- (Default: False)
548
545
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
549
546
  How to handle stereo.
550
- (Default: 'ignore')
551
547
 
552
548
  Notes
553
549
  -----
@@ -601,26 +597,19 @@ class SubstructureQuery(SuperOrSubstructureQuery):
601
597
  be considered unlimited.
602
598
  match_charges : bool, optional
603
599
  If set to true, atoms must match the specified charge.
604
- (Default: False)
605
600
  match_tautomers : bool, optional
606
601
  If set to true, allow match to tautomers of the given structure.
607
- (Default: False)
608
602
  rings_not_embedded : bool, optional
609
603
  If set to true, rings may not be embedded in a larger system.
610
- (Default: False)
611
604
  single_double_bonds_match : bool, optional
612
605
  If set to true, single or double bonds match aromatic bonds.
613
- (Default: True)
614
606
  chains_match_rings : bool, optional
615
607
  If set to true, chain bonds in the query may match rings in
616
608
  hits.
617
- (Default: True)
618
609
  strip_hydrogen : bool, optional
619
610
  If set to true, remove any explicit hydrogens before searching.
620
- (Default: False)
621
611
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
622
612
  How to handle stereo.
623
- (Default: 'ignore')
624
613
 
625
614
  Notes
626
615
  -----
@@ -44,7 +44,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
44
44
  Otherwise the respective file will only be downloaded, if the
45
45
  file does not exist yet in the specified target directory or if
46
46
  the file is empty.
47
- verbose: bool, optional
47
+ verbose : bool, optional
48
48
  If set to true, the function will output the download progress.
49
49
 
50
50
  Returns
@@ -91,7 +91,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
91
91
  for i, id in enumerate(pdb_ids):
92
92
  # Verbose output
93
93
  if verbose:
94
- print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...", end="\r")
94
+ print(f"Fetching file {i + 1:d} / {len(pdb_ids):d} ({id})...", end="\r")
95
95
 
96
96
  # Fetch file from database
97
97
  if target_path is not None:
@@ -152,6 +152,7 @@ def _assert_valid_file(response_text, pdb_id):
152
152
  for err_msg in [
153
153
  "404 Not Found",
154
154
  "<title>RCSB Protein Data Bank Error Page</title>",
155
+ "<title>PDB Archive over AWS</title>",
155
156
  "No fasta files were found.",
156
157
  "No valid PDB IDs were submitted.",
157
158
  ]
@@ -146,9 +146,9 @@ class BasicQuery(SingleQuery):
146
146
  Examples
147
147
  --------
148
148
 
149
- >>> query = BasicQuery("tc5b")
149
+ >>> query = BasicQuery("Miniprotein Construct")
150
150
  >>> print(sorted(search(query)))
151
- ['1L2Y', '8ANG', '8ANH', '8ANI', '8ANM', '8QWW']
151
+ ['1L2Y']
152
152
  """
153
153
 
154
154
  def __init__(self, term):
@@ -257,7 +257,7 @@ class FieldQuery(SingleQuery):
257
257
  "exists",
258
258
  ]:
259
259
  raise TypeError(
260
- f"Constructor got an unexpected keyword argument " f"'{self._operator}'"
260
+ f"Constructor got an unexpected keyword argument '{self._operator}'"
261
261
  )
262
262
 
263
263
  # Convert dates into ISO 8601
@@ -346,9 +346,9 @@ class SequenceQuery(SingleQuery):
346
346
  --------
347
347
 
348
348
  >>> sequence = "NLYIQWLKDGGPSSGRPPPS"
349
- >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.8)
349
+ >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.95)
350
350
  >>> print(sorted(search(query)))
351
- ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2LL5', '2MJ9', '3UC7', '3UC8']
351
+ ['1L2Y', '2LDJ', '9G22', '9G2N', '9G2O', '9G31', '9G32', '9GDL', '9GDN', '9GDT', '9GDU', '9GE1']
352
352
  """
353
353
 
354
354
  def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
@@ -441,7 +441,7 @@ class StructureQuery(SingleQuery):
441
441
 
442
442
  >>> query = StructureQuery("1L2Y", chain="A")
443
443
  >>> print(sorted(search(query)))
444
- ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
444
+ ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS', '9DPF']
445
445
  """
446
446
 
447
447
  def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
@@ -868,7 +868,7 @@ def search(
868
868
  ... query, return_type="polymer_entity", return_groups=True,
869
869
  ... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
870
870
  ... ))
871
- {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['1EJG_1', '3NIR_1']}
871
+ {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
872
872
  """
873
873
  query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
874
874
 
@@ -944,8 +944,7 @@ def _initialize_query_dict(query, return_type, group_by, content_types):
944
944
  if group_by is not None:
945
945
  if not group_by.is_compatible_return_type(return_type):
946
946
  raise ValueError(
947
- f"Return type '{return_type}' is not compatible "
948
- f"with the given Grouping"
947
+ f"Return type '{return_type}' is not compatible with the given Grouping"
949
948
  )
950
949
  request_options["group_by"] = group_by.get_content()
951
950
 
@@ -10,26 +10,31 @@ from biotite.database.error import RequestError
10
10
 
11
11
 
12
12
  # Taken from https://www.uniprot.org/help/api_retrieve_entries
13
- def assert_valid_response(response_status_code):
13
+ def assert_valid_response(response):
14
14
  """
15
15
  Checks whether the response is valid.
16
16
 
17
17
  Parameters
18
18
  ----------
19
- response_status_code: int
20
- Status code of request.get.
19
+ response : Response
20
+ Status code of :func:`requests.get()`.
21
21
  """
22
- if response_status_code == 400:
23
- raise RequestError("Bad request. There is a problem with your input.")
24
- elif response_status_code == 404:
25
- raise RequestError("Not found. The resource you requested doesn't exist.")
26
- elif response_status_code == 410:
27
- raise RequestError("Gone. The resource you requested was removed.")
28
- elif response_status_code == 500:
29
- raise RequestError(
30
- "Internal server error. Most likely a temporary problem, but if the problem persists please contact UniProt team."
31
- )
32
- elif response_status_code == 503:
33
- raise RequestError(
34
- "Service not available. The server is being updated, try again later."
35
- )
22
+ if len(response.content) == 0:
23
+ raise RequestError("No content returned")
24
+ match response.status_code:
25
+ case 400:
26
+ raise RequestError("Bad request. There is a problem with your input.")
27
+ case 404:
28
+ raise RequestError("Not found. The resource you requested doesn't exist.")
29
+ case 410:
30
+ raise RequestError("Gone. The resource you requested was removed.")
31
+ case 500:
32
+ raise RequestError(
33
+ "Internal server error. "
34
+ "Most likely a temporary problem, "
35
+ "but if the problem persists please contact UniProt team."
36
+ )
37
+ case 503:
38
+ raise RequestError(
39
+ "Service not available. The server is being updated, try again later."
40
+ )
@@ -41,7 +41,6 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
41
41
  Download files from the UniProt in various formats.
42
42
 
43
43
  Available databases are UniProtKB, UniRef and UniParc.
44
-
45
44
  This function requires an internet connection.
46
45
 
47
46
  Parameters
@@ -58,11 +57,9 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
58
57
  overwrite : bool, optional
59
58
  If true, existing files will be overwritten. Otherwise the
60
59
  respective file will only be downloaded if the file does not
61
- exist yet in the specified target directory or if the file is
62
- empty. (Default: False)
63
- verbose: bool, optional
60
+ exist yet in the specified target directory.
61
+ verbose : bool, optional
64
62
  If true, the function will output the download progress.
65
- (Default: False)
66
63
 
67
64
  Returns
68
65
  -------
@@ -111,7 +108,7 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
111
108
  if format in ["fasta", "gff", "txt", "xml", "rdf", "tab"]:
112
109
  r = requests.get(_fetch_url + db_name + "/" + id + "." + format)
113
110
  content = r.text
114
- assert_valid_response(r.status_code)
111
+ assert_valid_response(r)
115
112
  else:
116
113
  raise ValueError(f"Format '{format}' is not supported")
117
114
  if file is None:
@@ -50,9 +50,9 @@ class CompositeQuery(Query):
50
50
 
51
51
  Parameters
52
52
  ----------
53
- operator: str, {"AND", "OR", "NOT"}
53
+ operator : str, {"AND", "OR", "NOT"}
54
54
  The combination operator.
55
- queries : iterable object of SimpleQuery
55
+ query1, query2 : SimpleQuery
56
56
  The queries to be combined.
57
57
  """
58
58
 
@@ -114,7 +114,7 @@ class SimpleQuery(Query):
114
114
  The list of possible fields and the required search term
115
115
  formatting can be found
116
116
  `here <https://www.uniprot.org/help/query-fields>`_.
117
- term: str
117
+ term : str
118
118
  The search term.
119
119
  """
120
120
 
@@ -264,7 +264,6 @@ def search(query, number=500):
264
264
  The search query.
265
265
  number : int
266
266
  The maximum number of IDs that are obtained.
267
- (Default: 500)
268
267
 
269
268
  Returns
270
269
  -------
@@ -289,5 +288,5 @@ def search(query, number=500):
289
288
  params = {"query": str(query), "format": "list", "size": str(number)}
290
289
  r = requests.get(_base_url, params=params)
291
290
  content = r.text
292
- assert_valid_response(r.status_code)
291
+ assert_valid_response(r)
293
292
  return content.split("\n")[:-1]