biotite 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (124) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/application.py +33 -28
  3. biotite/application/dssp/app.py +18 -18
  4. biotite/application/sra/__init__.py +5 -0
  5. biotite/application/sra/app.py +337 -55
  6. biotite/database/entrez/__init__.py +2 -1
  7. biotite/database/entrez/check.py +14 -3
  8. biotite/database/entrez/download.py +20 -13
  9. biotite/database/entrez/key.py +44 -0
  10. biotite/database/entrez/query.py +38 -34
  11. biotite/database/pubchem/query.py +44 -44
  12. biotite/database/rcsb/download.py +19 -14
  13. biotite/database/rcsb/query.py +46 -46
  14. biotite/sequence/align/__init__.py +5 -1
  15. biotite/sequence/align/banded.c +1408 -1025
  16. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  17. biotite/sequence/align/buckets.py +69 -0
  18. biotite/sequence/align/cigar.py +389 -0
  19. biotite/sequence/align/kmeralphabet.c +3220 -2850
  20. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  21. biotite/sequence/align/kmersimilarity.c +713 -663
  22. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  23. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/kmertable.cpp +68398 -0
  25. biotite/sequence/align/localgapped.c +1507 -1074
  26. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  27. biotite/sequence/align/localungapped.c +1143 -833
  28. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  29. biotite/sequence/align/multiple.c +1569 -1092
  30. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  31. biotite/sequence/align/pairwise.c +1612 -1212
  32. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  33. biotite/sequence/align/permutation.c +33259 -0
  34. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  35. biotite/sequence/align/primes.txt +821 -0
  36. biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
  37. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  38. biotite/sequence/align/tracetable.c +685 -646
  39. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  40. biotite/sequence/codec.c +1159 -841
  41. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  42. biotite/sequence/graphics/alignment.py +212 -2
  43. biotite/sequence/io/genbank/annotation.py +11 -11
  44. biotite/sequence/phylo/nj.c +684 -636
  45. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  46. biotite/sequence/phylo/tree.c +970 -673
  47. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  48. biotite/sequence/phylo/upgma.c +672 -626
  49. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  50. biotite/structure/__init__.py +1 -1
  51. biotite/structure/atoms.py +1 -1
  52. biotite/structure/basepairs.py +7 -12
  53. biotite/structure/bonds.c +3861 -3749
  54. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  55. biotite/structure/celllist.c +727 -707
  56. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  57. biotite/structure/charges.c +1561 -1560
  58. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  59. biotite/structure/filter.py +30 -37
  60. biotite/structure/info/__init__.py +5 -8
  61. biotite/structure/info/atoms.py +25 -67
  62. biotite/structure/info/bonds.py +46 -100
  63. biotite/structure/info/ccd/README.rst +8 -0
  64. biotite/structure/info/ccd/amino_acids.txt +1646 -0
  65. biotite/structure/info/ccd/carbohydrates.txt +1133 -0
  66. biotite/structure/info/ccd/components.bcif +0 -0
  67. biotite/structure/info/ccd/nucleotides.txt +797 -0
  68. biotite/structure/info/ccd.py +95 -0
  69. biotite/structure/info/groups.py +90 -0
  70. biotite/structure/info/masses.py +21 -20
  71. biotite/structure/info/misc.py +11 -22
  72. biotite/structure/info/standardize.py +17 -12
  73. biotite/structure/io/__init__.py +2 -4
  74. biotite/structure/io/ctab.py +1 -1
  75. biotite/structure/io/general.py +37 -43
  76. biotite/structure/io/mmtf/__init__.py +3 -0
  77. biotite/structure/io/mmtf/convertarray.c +528 -365
  78. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  79. biotite/structure/io/mmtf/convertfile.c +725 -676
  80. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  81. biotite/structure/io/mmtf/decode.c +1070 -754
  82. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  83. biotite/structure/io/mmtf/encode.c +727 -677
  84. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  85. biotite/structure/io/mmtf/file.py +34 -26
  86. biotite/structure/io/npz/__init__.py +3 -0
  87. biotite/structure/io/npz/file.py +21 -18
  88. biotite/structure/io/pdb/__init__.py +3 -3
  89. biotite/structure/io/pdb/file.py +72 -70
  90. biotite/structure/io/pdb/hybrid36.c +540 -478
  91. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  92. biotite/structure/io/pdbqt/file.py +82 -68
  93. biotite/structure/io/pdbx/__init__.py +13 -6
  94. biotite/structure/io/pdbx/bcif.py +649 -0
  95. biotite/structure/io/pdbx/cif.py +1028 -0
  96. biotite/structure/io/pdbx/component.py +243 -0
  97. biotite/structure/io/pdbx/convert.py +707 -359
  98. biotite/structure/io/pdbx/encoding.c +112813 -0
  99. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  100. biotite/structure/io/pdbx/error.py +14 -0
  101. biotite/structure/io/pdbx/legacy.py +267 -0
  102. biotite/structure/molecules.py +151 -151
  103. biotite/structure/residues.py +40 -40
  104. biotite/structure/sasa.c +713 -644
  105. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  106. biotite/structure/superimpose.py +158 -115
  107. biotite/visualize.py +9 -11
  108. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
  109. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
  110. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
  111. biotite/structure/info/amino_acids.json +0 -1556
  112. biotite/structure/info/amino_acids.py +0 -42
  113. biotite/structure/info/carbohydrates.json +0 -1122
  114. biotite/structure/info/carbohydrates.py +0 -39
  115. biotite/structure/info/intra_bonds.msgpack +0 -0
  116. biotite/structure/info/link_types.msgpack +0 -1
  117. biotite/structure/info/nucleotides.json +0 -772
  118. biotite/structure/info/nucleotides.py +0 -39
  119. biotite/structure/info/residue_masses.msgpack +0 -0
  120. biotite/structure/info/residue_names.msgpack +0 -3
  121. biotite/structure/info/residues.msgpack +0 -0
  122. biotite/structure/io/pdbx/file.py +0 -652
  123. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
  124. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
@@ -11,13 +11,11 @@ import abc
11
11
  from xml.etree import ElementTree
12
12
  from .check import check_for_errors
13
13
  from .dbnames import sanitize_database_name
14
+ from ..error import RequestError
15
+ from .key import get_api_key
14
16
 
15
17
 
16
- _base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
17
-
18
- _search_url = ("esearch.fcgi?db={:}"
19
- "&term={:}"
20
- "&retmax={:}")
18
+ _search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
21
19
 
22
20
  class Query(metaclass=abc.ABCMeta):
23
21
  """
@@ -26,21 +24,21 @@ class Query(metaclass=abc.ABCMeta):
26
24
  """
27
25
  def __init__(self):
28
26
  pass
29
-
27
+
30
28
  @abc.abstractmethod
31
29
  def __str__(self):
32
30
  pass
33
-
31
+
34
32
  def __or__(self, operand):
35
33
  if not isinstance(operand, Query):
36
34
  operand = SimpleQuery(operand)
37
35
  return CompositeQuery("OR", self, operand)
38
-
36
+
39
37
  def __and__(self, operand):
40
38
  if not isinstance(operand, Query):
41
39
  operand = SimpleQuery(operand)
42
40
  return CompositeQuery("AND", self, operand)
43
-
41
+
44
42
  def __xor__(self, operand):
45
43
  if not isinstance(operand, Query):
46
44
  operand = SimpleQuery(operand)
@@ -51,21 +49,21 @@ class CompositeQuery(Query):
51
49
  """
52
50
  A representation of an composite query
53
51
  for the NCBI Entrez search service.
54
-
52
+
55
53
  A composite query is a combination of two other queries,
56
54
  combined either with an 'AND', 'OR' or 'NOT' operator.
57
55
 
58
56
  Usually the user does not create instances of this class directly,
59
57
  but :class:`Query` instances are combined with
60
58
  ``|`` (OR), ``&`` (AND) or ``^`` (NOT).
61
-
59
+
62
60
  Parameters
63
61
  ----------
64
62
  operator: str, {"AND", "OR", "NOT"}
65
63
  The combination operator.
66
64
  queries : iterable object of SimpleQuery
67
65
  The queries to be combined.
68
-
66
+
69
67
  Examples
70
68
  --------
71
69
 
@@ -76,16 +74,16 @@ class CompositeQuery(Query):
76
74
  >>> print(query)
77
75
  ("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
78
76
  """
79
-
77
+
80
78
  def __init__(self, operator, query1, query2):
81
79
  super().__init__()
82
80
  self._op = operator
83
81
  self._q1 = query1
84
82
  self._q2 = query2
85
-
83
+
86
84
  def __str__(self):
87
85
  return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
88
-
86
+
89
87
 
90
88
 
91
89
  class SimpleQuery(Query):
@@ -96,7 +94,7 @@ class SimpleQuery(Query):
96
94
 
97
95
  A list of available search fields with description can be found
98
96
  `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
99
-
97
+
100
98
  Parameters
101
99
  ----------
102
100
  term: str
@@ -108,10 +106,10 @@ class SimpleQuery(Query):
108
106
  `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
109
107
  By default the field is omitted and all fields are searched in
110
108
  for the term, implicitly.
111
-
109
+
112
110
  Examples
113
111
  --------
114
-
112
+
115
113
  >>> query = SimpleQuery("Escherichia coli")
116
114
  >>> print(query)
117
115
  "Escherichia coli"
@@ -152,7 +150,7 @@ class SimpleQuery(Query):
152
150
  term = f'"{term}"'
153
151
  self._term = term
154
152
  self._field = field
155
-
153
+
156
154
  def __str__(self):
157
155
  string = self._term
158
156
  if self._field is not None:
@@ -164,9 +162,9 @@ def search(query, db_name, number=20):
164
162
  r"""
165
163
  Get all PDB IDs that meet the given query requirements,
166
164
  via the NCBI ESearch service.
167
-
165
+
168
166
  This function requires an internet connection.
169
-
167
+
170
168
  Parameters
171
169
  ----------
172
170
  query : Query
@@ -175,13 +173,13 @@ def search(query, db_name, number=20):
175
173
  E-utility or common database name.
176
174
  number : Query
177
175
  The maximum number of UIDs that are obtained.
178
-
176
+
179
177
  Returns
180
178
  -------
181
179
  ids : list of str
182
180
  A list of strings containing all NCBI UIDs (accession number)
183
181
  that meet the query requirements.
184
-
182
+
185
183
  Warnings
186
184
  --------
187
185
  Even if you give valid input to this function, in rare cases the
@@ -194,7 +192,7 @@ def search(query, db_name, number=20):
194
192
  -----
195
193
  A list of available search fields with description can be found
196
194
  `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
197
-
195
+
198
196
  Examples
199
197
  --------
200
198
  >>> query = SimpleQuery("Escherichia coli", "Organism") & \
@@ -202,18 +200,24 @@ def search(query, db_name, number=20):
202
200
  >>> ids = search(query, "nuccore", number=5)
203
201
  >>> print(ids)
204
202
  ['...', '...', '...', '...', '...']
205
- """
206
- r = requests.get(
207
- (_base_url + _search_url).format(
208
- sanitize_database_name(db_name),
209
- str(query),
210
- str(number)
211
- )
212
- )
203
+ """
204
+ param_dict = {
205
+ "db": sanitize_database_name(db_name),
206
+ "term": str(query),
207
+ "retmax": str(number),
208
+ }
209
+ api_key = get_api_key()
210
+ if api_key is not None:
211
+ param_dict["api_key"] = api_key
212
+ r = requests.get(_search_url, params=param_dict)
213
213
  xml_response = r.text
214
214
  check_for_errors(xml_response)
215
- root = ElementTree.fromstring(xml_response)
215
+ try:
216
+ root = ElementTree.fromstring(xml_response)
217
+ except ElementTree.ParseError:
218
+ if len(xml_response) > 100:
219
+ xml_response = xml_response[:100] + "..."
220
+ raise RequestError(f"Invalid server response: {xml_response}")
216
221
  xpath = ".//IdList/Id"
217
222
  uids = [element.text for element in root.findall(xpath)]
218
223
  return uids
219
-
@@ -84,12 +84,12 @@ class NameQuery(Query):
84
84
  --------
85
85
 
86
86
  >>> print(search(NameQuery("Alanine")))
87
- [5950, 602, 71080]
87
+ [5950, ..., ..., ...]
88
88
  """
89
89
 
90
90
  def __init__(self, name):
91
91
  self._name = name
92
-
92
+
93
93
  def get_input_url_path(self):
94
94
  return "compound/name"
95
95
 
@@ -107,7 +107,7 @@ class SmilesQuery(Query):
107
107
  ----------
108
108
  smiles : str
109
109
  The *SMILES* string.
110
-
110
+
111
111
  Examples
112
112
  --------
113
113
 
@@ -117,7 +117,7 @@ class SmilesQuery(Query):
117
117
 
118
118
  def __init__(self, smiles):
119
119
  self._smiles = smiles
120
-
120
+
121
121
  def get_input_url_path(self):
122
122
  return "compound/smiles"
123
123
 
@@ -134,7 +134,7 @@ class InchiQuery(Query):
134
134
  ----------
135
135
  inchi : str
136
136
  The *InChI* string.
137
-
137
+
138
138
  Examples
139
139
  --------
140
140
 
@@ -144,7 +144,7 @@ class InchiQuery(Query):
144
144
 
145
145
  def __init__(self, inchi):
146
146
  self._inchi = inchi
147
-
147
+
148
148
  def get_input_url_path(self):
149
149
  return "compound/inchi"
150
150
 
@@ -161,7 +161,7 @@ class InchiKeyQuery(Query):
161
161
  ----------
162
162
  inchi_key : str
163
163
  The *InChI* key.
164
-
164
+
165
165
  Examples
166
166
  --------
167
167
 
@@ -171,7 +171,7 @@ class InchiKeyQuery(Query):
171
171
 
172
172
  def __init__(self, inchi_key):
173
173
  self._inchi_key = inchi_key
174
-
174
+
175
175
  def get_input_url_path(self):
176
176
  return "compound/inchikey"
177
177
 
@@ -199,22 +199,22 @@ class FormulaQuery(Query):
199
199
  The maximum number of matches that this query may return.
200
200
  By default, the *PubChem* default value is used, which can be
201
201
  considered unlimited.
202
-
202
+
203
203
  Examples
204
204
  --------
205
205
 
206
206
  >>> print(search(FormulaQuery("C4H10", number=5)))
207
- [7843, 6360, 16213391, 71309065, 16213390]
207
+ [7843, ..., ..., ..., ...]
208
208
  >>> atom_array = residue("ALA")
209
209
  >>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
210
- [5950, 5641, 1088, 239, 602]
210
+ [5950, ..., ..., ..., ...]
211
211
  """
212
212
 
213
213
  def __init__(self, formula, allow_other_elements=False, number=None):
214
214
  self._formula = formula
215
215
  self._allow_other_elements = allow_other_elements
216
216
  self._number = number
217
-
217
+
218
218
  @staticmethod
219
219
  def from_atoms(atoms, allow_other_elements=False, number=None):
220
220
  """
@@ -247,7 +247,7 @@ class FormulaQuery(Query):
247
247
  for element in sorted_elements:
248
248
  formula += _format_element(element, element_counter[element])
249
249
  return FormulaQuery(formula, allow_other_elements, number)
250
-
250
+
251
251
  def get_input_url_path(self):
252
252
  # The 'fastformula' service seems not to accept the formula
253
253
  # in the parameter section of the request
@@ -287,7 +287,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
287
287
  sdf : str, optional
288
288
  A query structure as SDF formatted string.
289
289
  Usually :meth:`from_atoms()` is used to create the SDF from an
290
- :class:`AtomArray`.
290
+ :class:`AtomArray`.
291
291
  cid : int, optional
292
292
  The query structure given as CID.
293
293
  number : int, optional
@@ -351,7 +351,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
351
351
  sdf = "\r\n".join(mol_file.lines) + "\r\n$$$$\r\n",
352
352
  **kwargs
353
353
  )
354
-
354
+
355
355
  def get_input_url_path(self):
356
356
  input_string = f"compound/{self.search_type()}/{self._query_key}"
357
357
  if self._query_key == "cid":
@@ -384,7 +384,7 @@ class StructureQuery(Query, metaclass=abc.ABCMeta):
384
384
  return {"sdf": self._query_val}
385
385
  else:
386
386
  return {}
387
-
387
+
388
388
  @abc.abstractmethod
389
389
  def search_type(self):
390
390
  """
@@ -434,7 +434,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
434
434
  sdf : str, optional
435
435
  A query structure as SDF formatted string.
436
436
  Usually :meth:`from_atoms()` is used to create the SDF from an
437
- :class:`AtomArray`.
437
+ :class:`AtomArray`.
438
438
  cid : int, optional
439
439
  The query structure given as CID.
440
440
  number : int, optional
@@ -463,7 +463,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
463
463
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
464
464
  How to handle stereo.
465
465
  (Default: 'ignore')
466
-
466
+
467
467
  Notes
468
468
  -----
469
469
  Optional parameter descriptions are taken from the *PubChem* REST
@@ -488,7 +488,7 @@ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
488
488
  self._options[option] = value
489
489
  del kwargs[option]
490
490
  super().__init__(**kwargs)
491
-
491
+
492
492
  def search_options(self):
493
493
  return self._options
494
494
 
@@ -514,7 +514,7 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
514
514
  sdf : str, optional
515
515
  A query structure as SDF formatted string.
516
516
  Usually :meth:`from_atoms()` is used to create the SDF from an
517
- :class:`AtomArray`.
517
+ :class:`AtomArray`.
518
518
  cid : int, optional
519
519
  The query structure given as CID.
520
520
  number : int, optional
@@ -543,7 +543,7 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
543
543
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
544
544
  How to handle stereo.
545
545
  (Default: 'ignore')
546
-
546
+
547
547
  Notes
548
548
  -----
549
549
  Optional parameter descriptions are taken from the *PubChem* REST
@@ -555,11 +555,11 @@ class SuperstructureQuery(SuperOrSubstructureQuery):
555
555
 
556
556
  >>> # CID of alanine
557
557
  >>> print(search(SuperstructureQuery(cid=5950, number=5)))
558
- [1032, 887, 712, 702, 284]
558
+ [1032, ..., ..., ..., ...]
559
559
  >>> # AtomArray of alanine
560
560
  >>> atom_array = residue("ALA")
561
561
  >>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
562
- [1032, 887, 712, 702, 284]
562
+ [1032, ..., ..., ..., ...]
563
563
  """
564
564
 
565
565
  def search_type(self):
@@ -587,7 +587,7 @@ class SubstructureQuery(SuperOrSubstructureQuery):
587
587
  sdf : str, optional
588
588
  A query structure as SDF formatted string.
589
589
  Usually :meth:`from_atoms()` is used to create the SDF from an
590
- :class:`AtomArray`.
590
+ :class:`AtomArray`.
591
591
  cid : int, optional
592
592
  The query structure given as CID.
593
593
  number : int, optional
@@ -616,7 +616,7 @@ class SubstructureQuery(SuperOrSubstructureQuery):
616
616
  stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
617
617
  How to handle stereo.
618
618
  (Default: 'ignore')
619
-
619
+
620
620
  Notes
621
621
  -----
622
622
  Optional parameter descriptions are taken from the *PubChem* REST
@@ -628,11 +628,11 @@ class SubstructureQuery(SuperOrSubstructureQuery):
628
628
 
629
629
  >>> # CID of alanine
630
630
  >>> print(search(SubstructureQuery(cid=5950, number=5)))
631
- [5950, 602, 71080, 3081884, 65370]
631
+ [5950, ..., ..., ..., ...]
632
632
  >>> # AtomArray of alanine
633
633
  >>> atom_array = residue("ALA")
634
634
  >>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
635
- [5950, 602, 71080, 3081884, 65370]
635
+ [5950, ..., ..., ..., ...]
636
636
  """
637
637
 
638
638
  def search_type(self):
@@ -666,14 +666,14 @@ class SimilarityQuery(StructureQuery):
666
666
  sdf : str, optional
667
667
  A query structure as SDF formatted string.
668
668
  Usually :meth:`from_atoms()` is used to create the SDF from an
669
- :class:`AtomArray`.
669
+ :class:`AtomArray`.
670
670
  cid : int, optional
671
671
  The query structure given as CID.
672
672
  number : int, optional
673
673
  The maximum number of matches that this query may return.
674
674
  By default, the *PubChem* default value is used, which can
675
675
  be considered unlimited.
676
-
676
+
677
677
  Notes
678
678
  -----
679
679
  The conformation based similarity measure uses *shape-Tanimoto* and
@@ -681,7 +681,7 @@ class SimilarityQuery(StructureQuery):
681
681
 
682
682
  References
683
683
  ----------
684
-
684
+
685
685
  .. footbibliography::
686
686
 
687
687
  Examples
@@ -689,22 +689,22 @@ class SimilarityQuery(StructureQuery):
689
689
 
690
690
  >>> # CID of alanine
691
691
  >>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
692
- [5950, 602, 71080, 11815285, 10749140]
692
+ [5950, ..., ..., ..., ...]
693
693
  >>> # AtomArray of alanine
694
694
  >>> atom_array = residue("ALA")
695
695
  >>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
696
- [5950, 602, 71080, 11815285, 10749140]
696
+ [5950, ..., ..., ..., ...]
697
697
  """
698
698
 
699
699
  def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
700
700
  self._threshold = threshold
701
701
  self._conformation_based = conformation_based
702
702
  super().__init__(**kwargs)
703
-
703
+
704
704
  def search_type(self):
705
705
  dim = "3d" if self._conformation_based else "2d"
706
706
  return f"fastsimilarity_{dim}"
707
-
707
+
708
708
  def search_options(self):
709
709
  return {"threshold" : int(round(self._threshold * 100))}
710
710
 
@@ -730,14 +730,14 @@ class IdentityQuery(StructureQuery):
730
730
  sdf : str, optional
731
731
  A query structure as SDF formatted string.
732
732
  Usually :meth:`from_atoms()` is used to create the SDF from an
733
- :class:`AtomArray`.
733
+ :class:`AtomArray`.
734
734
  cid : int, optional
735
735
  The query structure given as CID.
736
736
  number : int, optional
737
737
  The maximum number of matches that this query may return.
738
738
  By default, the *PubChem* default value is used, which can
739
739
  be considered unlimited.
740
-
740
+
741
741
  Examples
742
742
  --------
743
743
 
@@ -753,10 +753,10 @@ class IdentityQuery(StructureQuery):
753
753
  def __init__(self, identity_type="same_stereo_isotope", **kwargs):
754
754
  self._identity_type = identity_type
755
755
  super().__init__(**kwargs)
756
-
756
+
757
757
  def search_type(self):
758
758
  return "fastidentity"
759
-
759
+
760
760
  def get_params(self):
761
761
  # Use 'get_params()' instead of 'search_options()', since the
762
762
  # parameter 'identity_type' in the REST API is *snake case*
@@ -764,7 +764,7 @@ class IdentityQuery(StructureQuery):
764
764
  params = super().get_params()
765
765
  params["identity_type"] = self._identity_type
766
766
  return params
767
-
767
+
768
768
 
769
769
 
770
770
 
@@ -772,9 +772,9 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
772
772
  """
773
773
  Get all CIDs that meet the given query requirements,
774
774
  via the PubChem REST API.
775
-
775
+
776
776
  This function requires an internet connection.
777
-
777
+
778
778
  Parameters
779
779
  ----------
780
780
  query : Query
@@ -787,7 +787,7 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
787
787
  If ``None`` is given, the execution is never halted.
788
788
  return_throttle_status : float, optional
789
789
  If set to true, the :class:`ThrottleStatus` is also returned.
790
-
790
+
791
791
  Returns
792
792
  -------
793
793
  ids : list of int
@@ -796,12 +796,12 @@ def search(query, throttle_threshold=0.5, return_throttle_status=False):
796
796
  The :class:`ThrottleStatus` obtained from the server response.
797
797
  This can be used for custom request throttling, for example.
798
798
  Only returned, if `return_throttle_status` is set to true.
799
-
799
+
800
800
  Examples
801
801
  --------
802
802
 
803
803
  >>> print(search(NameQuery("Alanine")))
804
- [5950, 602, 71080]
804
+ [5950, ..., ..., ...]
805
805
  """
806
806
  # Use POST to be compatible with the larger payloads
807
807
  # of structure searches
@@ -16,24 +16,25 @@ from ..error import RequestError
16
16
 
17
17
  _standard_url = "https://files.rcsb.org/download/"
18
18
  _mmtf_url = "https://mmtf.rcsb.org/v1.0/full/"
19
+ _bcif_url = "https://models.rcsb.org/"
19
20
  _fasta_url = "https://www.rcsb.org/fasta/entry/"
20
21
 
21
- _binary_formats = ["mmtf"]
22
+ _binary_formats = ["mmtf", "bcif"]
22
23
 
23
24
 
24
25
  def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
25
26
  """
26
27
  Download structure files (or sequence files) from the RCSB PDB in
27
28
  various formats.
28
-
29
+
29
30
  This function requires an internet connection.
30
-
31
+
31
32
  Parameters
32
33
  ----------
33
34
  pdb_ids : str or iterable object of str
34
35
  A single PDB ID or a list of PDB IDs of the structure(s)
35
36
  to be downloaded.
36
- format : {'pdb', 'pdbx', 'cif', 'mmcif', 'mmtf', 'fasta'}
37
+ format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'mmtf', 'fasta'}
37
38
  The format of the files to be downloaded.
38
39
  ``'pdbx'``, ``'cif'`` and ``'mmcif'`` are synonyms for
39
40
  the same format.
@@ -48,7 +49,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
48
49
  the file is empty.
49
50
  verbose: bool, optional
50
51
  If set to true, the function will output the download progress.
51
-
52
+
52
53
  Returns
53
54
  -------
54
55
  files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
@@ -58,7 +59,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
58
59
  object) was given, a list of strings is returned.
59
60
  If no `target_path` was given, the file contents are stored in
60
61
  either :class:`StringIO` or :class:`BytesIO` objects.
61
-
62
+
62
63
  Warnings
63
64
  --------
64
65
  Even if you give valid input to this function, in rare cases the
@@ -66,10 +67,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
66
67
  In these cases the request should be retried.
67
68
  When the issue occurs repeatedly, the error is probably in your
68
69
  input.
69
-
70
+
70
71
  Examples
71
72
  --------
72
-
73
+
73
74
  >>> import os.path
74
75
  >>> file = fetch("1l2y", "cif", path_to_directory)
75
76
  >>> print(os.path.basename(file))
@@ -88,21 +89,21 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
88
89
  # Create the target folder, if not existing
89
90
  if target_path is not None and not os.path.isdir(target_path):
90
91
  os.makedirs(target_path)
91
-
92
+
92
93
  files = []
93
94
  for i, id in enumerate(pdb_ids):
94
95
  # Verbose output
95
96
  if verbose:
96
97
  print(f"Fetching file {i+1:d} / {len(pdb_ids):d} ({id})...",
97
98
  end="\r")
98
-
99
+
99
100
  # Fetch file from database
100
101
  if target_path is not None:
101
102
  file = join(target_path, id + "." + format)
102
103
  else:
103
104
  # 'file = None' -> store content in a file-like object
104
105
  file = None
105
-
106
+
106
107
  if file is None \
107
108
  or not isfile(file) \
108
109
  or getsize(file) == 0 \
@@ -115,6 +116,10 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
115
116
  r = requests.get(_standard_url + id + ".cif")
116
117
  content = r.text
117
118
  _assert_valid_file(content, id)
119
+ elif format in ["bcif"]:
120
+ r = requests.get(_bcif_url + id + ".bcif")
121
+ content = r.content
122
+ _assert_valid_file(r.text, id)
118
123
  elif format == "mmtf":
119
124
  r = requests.get(_mmtf_url + id)
120
125
  content = r.content
@@ -125,7 +130,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
125
130
  _assert_valid_file(content, id)
126
131
  else:
127
132
  raise ValueError(f"Format '{format}' is not supported")
128
-
133
+
129
134
  if file is None:
130
135
  if format in _binary_formats:
131
136
  file = io.BytesIO(content)
@@ -135,7 +140,7 @@ def fetch(pdb_ids, format, target_path=None, overwrite=False, verbose=False):
135
140
  mode = "wb+" if format in _binary_formats else "w+"
136
141
  with open(file, mode) as f:
137
142
  f.write(content)
138
-
143
+
139
144
  files.append(file)
140
145
  if verbose:
141
146
  print("\nDone")
@@ -153,7 +158,7 @@ def _assert_valid_file(response_text, pdb_id):
153
158
  """
154
159
  # Structure file and FASTA file retrieval
155
160
  # have different error messages
156
- if any(err_msg in response_text for err_msg in [
161
+ if len(response_text) == 0 or any(err_msg in response_text for err_msg in [
157
162
  "404 Not Found",
158
163
  "<title>RCSB Protein Data Bank Error Page</title>",
159
164
  "No fasta files were found.",