biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,959 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.rcsb"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = ["Query", "SingleQuery", "CompositeQuery",
8
+ "BasicQuery", "FieldQuery",
9
+ "SequenceQuery", "StructureQuery", "MotifQuery",
10
+ "Sorting",
11
+ "Grouping", "DepositGrouping", "IdentityGrouping", "UniprotGrouping",
12
+ "search", "count"]
13
+
14
+ import abc
15
+ import json
16
+ import copy
17
+ from datetime import datetime
18
+ import numpy as np
19
+ import requests
20
+ from ...sequence.seqtypes import NucleotideSequence
21
+ from ..error import RequestError
22
+
23
+
24
+ _search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
25
+ _scope_to_target = {
26
+ "protein": "pdb_protein_sequence",
27
+ "rna": "pdb_rna_sequence",
28
+ "dna": "pdb_dna_sequence"
29
+ }
30
+
31
+
32
+ class Query(metaclass=abc.ABCMeta):
33
+ """
34
+ A representation of a JSON query for the RCSB search API.
35
+
36
+ This is the abstract base class for all queries.
37
+ """
38
+ @abc.abstractmethod
39
+ def get_content(self):
40
+ """
41
+ Get the query content, i.e. the data belonging to the
42
+ ``'query'`` attribute in the RCSB search API.
43
+
44
+ This content is converted into JSON by the :func:`search`
45
+ and :func:`count` functions.
46
+
47
+ Returns
48
+ -------
49
+ content : dict
50
+ The content dictionary for the ``'query'`` attributes.
51
+ """
52
+ pass
53
+
54
+ def __and__(self, query):
55
+ return CompositeQuery([self, query], "and")
56
+
57
+ def __or__(self, query):
58
+ return CompositeQuery([self, query], "or")
59
+
60
+
61
+
62
+ class SingleQuery(Query, metaclass=abc.ABCMeta):
63
+ """
64
+ A terminal query node for the RCSB search API.
65
+
66
+ Multiple :class:`SingleQuery` objects can be combined to
67
+ :class:`CompositeQuery`objects using the ``|`` and ``&`` operators.
68
+
69
+ This is the abstract base class for all queries that are
70
+ terminal nodes.
71
+ """
72
+ @abc.abstractmethod
73
+ def get_content(self):
74
+ return {"parameters": {}}
75
+
76
+
77
+ class CompositeQuery(Query):
78
+ """
79
+ A group query node for the RCSB search API.
80
+
81
+ A composite query is an combination of other queries, combined
82
+ either with the `'and'` or `'or'` operator.
83
+ Usually, a :class:`CompositeQuery` will not be created by calling
84
+ its constructor, but by combining queries using the ``|`` or ``&``
85
+ operator.
86
+
87
+ Parameters
88
+ ----------
89
+ queries : iterable object of Query
90
+ The queries to be combined.
91
+ operator : {'or', 'and'}
92
+ The type of combination.
93
+ """
94
+ def __init__(self, queries, operator):
95
+ self._queries = queries
96
+ if operator not in ("or", "and"):
97
+ raise ValueError(
98
+ f"Operator must be 'or' or 'and', not '{operator}'"
99
+ )
100
+ self._operator = operator
101
+
102
+ def get_content(self):
103
+ """
104
+ A dictionary representation of the query.
105
+ This dictionary is the content of the ``'query'`` key in the
106
+ JSON query.
107
+
108
+ Returns
109
+ -------
110
+ content : dict
111
+ The dictionary representation of the query.
112
+ """
113
+ content = {
114
+ "type": "group",
115
+ "logical_operator": self._operator,
116
+ "nodes": [query.get_content() for query in self._queries]
117
+ }
118
+ return content
119
+
120
+
121
+
122
+ class BasicQuery(SingleQuery):
123
+ """
124
+ A text query for searching for a given term across all available
125
+ fields.
126
+
127
+ Parameters
128
+ ----------
129
+ term : str
130
+ The search term.
131
+ If the term contains multiple words, the query will return
132
+ results where the entire term is present.
133
+ The matching is not case-sensitive.
134
+ Logic combinations of terms is described
135
+ `here <https://search.rcsb.org/#basic-queries>`_.
136
+
137
+ Examples
138
+ --------
139
+
140
+ >>> query = BasicQuery("tc5b")
141
+ >>> print(sorted(search(query)))
142
+ ['1L2Y', '8ANG', '8ANH', '8ANI', '8ANM']
143
+ """
144
+ def __init__(self, term):
145
+ super().__init__()
146
+ self._term = term
147
+
148
+ def get_content(self):
149
+ content = super().get_content()
150
+ content["type"] = "terminal"
151
+ content["service"] = "full_text"
152
+ content["parameters"]["value"] = f'"{self._term}"'
153
+ return content
154
+
155
+
156
+ class FieldQuery(SingleQuery):
157
+ """
158
+ A text query for searching for values in a given field using the
159
+ given operator.
160
+
161
+ The operators are keyword arguments of this function and the search
162
+ value is the value given to the respective parameter.
163
+ The operators are mutually exclusive.
164
+ If none is given, the search will return results where the given
165
+ field exists.
166
+
167
+ A :class:`FieldQuery` is negated using the ``~`` operator.
168
+
169
+ Parameters
170
+ ----------
171
+ field : str
172
+ The field to search in.
173
+ molecular_definition : bool, optional
174
+ If set true, this query searches in fields
175
+ associated with
176
+ `molecular definitions <https://search.rcsb.org/chemical-search-attributes.html>`_.
177
+ If false (default), this query searches in fields
178
+ associated with `PDB structures <https://search.rcsb.org/structure-search-attributes.html>`_.
179
+ case_sensitive : bool, optional
180
+ If set to true, searches are case sensitive.
181
+ By default matching is case-insensitive.
182
+ exact_match : str, optional
183
+ Operator for returning results whose field exactly matches the
184
+ given value.
185
+ contains_words, contains_phrase : str, optional
186
+ Operator for returning results whose field matches
187
+ individual words from the given value or the value as exact
188
+ phrase, respectively.
189
+ greater, less, greater_or_equal, less_or_equal, equals : int or float or datetime, optional
190
+ Operator for returning results whose field values are larger,
191
+ smaller or equal to the given value.
192
+ range, range_closed : tuple(int, int) or tuple(float, float) or tuple(datetime, datetime), optional
193
+ Operator for returning results whose field matches values within
194
+ the given range.
195
+ `range_closed` includes the interval limits.
196
+ is_in : tuple of str or list of str, optional
197
+ Operator for returning results whose field matches any of the
198
+ values in the given list.
199
+
200
+ Notes
201
+ -----
202
+ A complete list of the available fields and its supported operators
203
+ is documented at
204
+ `<https://search.rcsb.org/structure-search-attributes.html>`_
205
+ and
206
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
207
+
208
+ Examples
209
+ --------
210
+
211
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
212
+ >>> print(sorted(search(query)))
213
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
214
+ """
215
+ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kwargs):
216
+ super().__init__()
217
+ self._negation = False
218
+ self._field = field
219
+ self._mol_definition = molecular_definition
220
+ self._case_sensitive = case_sensitive
221
+
222
+ if len(kwargs) > 1:
223
+ raise TypeError("Only one operator must be given")
224
+ elif len(kwargs) == 1:
225
+ self._operator = list(kwargs.keys())[0]
226
+ self._value = list(kwargs.values())[0]
227
+ else:
228
+ # No operator is given
229
+ self._operator = "exists"
230
+ self._value = None
231
+
232
+ if self._operator not in [
233
+ "exact_match",
234
+ "contains_words", "contains_phrase",
235
+ "greater", "less", "greater_or_equal", "less_or_equal", "equals",
236
+ "range", "range_closed",
237
+ "is_in",
238
+ "exists"
239
+ ]:
240
+ raise TypeError(
241
+ f"Constructor got an unexpected keyword argument "
242
+ f"'{self._operator}'"
243
+ )
244
+
245
+ # Convert dates into ISO 8601
246
+ if isinstance(self._value, datetime):
247
+ self._value = _to_isoformat(self._value)
248
+ elif isinstance(self._value, (tuple, list, np.ndarray)):
249
+ self._value = [
250
+ _to_isoformat(val) if isinstance(val, datetime) else val
251
+ for val in self._value
252
+ ]
253
+
254
+ # Create dictionary for 'range' operator
255
+ if self._operator == "range":
256
+ self._value = {
257
+ "from": self._value[0],
258
+ "include_lower": False,
259
+ "to": self._value[1],
260
+ "include_upper": False
261
+ }
262
+ elif self._operator == "range_closed":
263
+ self._value = {
264
+ "from": self._value[0],
265
+ "include_lower": True,
266
+ "to": self._value[1],
267
+ "include_upper": True
268
+ }
269
+
270
+ # Rename operators to names used in API
271
+ if self._operator == "is_in":
272
+ # 'in' is not an available parameter name in Python
273
+ self._operator = "in"
274
+ elif self._operator == "range_closed":
275
+ # For backwards compatibility
276
+ self._operator = "range"
277
+
278
+ def get_content(self):
279
+ content = super().get_content()
280
+ content["type"] = "terminal"
281
+ if self._mol_definition:
282
+ content["service"] = "text_chem"
283
+ else:
284
+ content["service"] = "text"
285
+ content["parameters"]["attribute"] = self._field
286
+ content["parameters"]["operator"] = self._operator
287
+ content["parameters"]["negation"] = self._negation
288
+ content["parameters"]["case_sensitive"] = self._case_sensitive
289
+ if self._value is not None:
290
+ content["parameters"]["value"] = self._value
291
+ return content
292
+
293
+ def __invert__(self):
294
+ clone = copy.deepcopy(self)
295
+ clone._negation = not clone._negation
296
+ return clone
297
+
298
+
299
+ class SequenceQuery(SingleQuery):
300
+ """
301
+ A query for protein/DNA/RNA molecules with a sequence similar to a
302
+ given input sequence using
303
+ `MMseqs2 <https://github.com/soedinglab/mmseqs2>`_.
304
+
305
+ Parameters
306
+ ----------
307
+ sequence : Sequence or str
308
+ The input sequence.
309
+ If `sequence` is a :class:`NucleotideSequence` and the `scope`
310
+ is ``'rna'``, ``'T'`` is automatically replaced by ``'U'``.
311
+ scope : {'protein', 'dna', 'rna'}
312
+ The type of molecule to find.
313
+ min_identity : float, optional
314
+ A match is only returned, if the sequence identity between
315
+ the match and the input sequence exceeds this value.
316
+ Must be between 0 and 1.
317
+ By default, the sequence identity is ignored.
318
+ max_expect_value : float, optional
319
+ A match is only returned, if the *expect value* (E-value) does
320
+ not exceed this value.
321
+ By default, the value is effectively ignored.
322
+
323
+ Notes
324
+ -----
325
+ *MMseqs2* is run on the RCSB servers.
326
+
327
+ Examples
328
+ --------
329
+
330
+ >>> sequence = "NLYIQWLKDGGPSSGRPPPS"
331
+ >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.8)
332
+ >>> print(sorted(search(query)))
333
+ ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2LL5', '2MJ9', '3UC7', '3UC8']
334
+ """
335
+ def __init__(self, sequence, scope,
336
+ min_identity=0.0, max_expect_value=10000000.0):
337
+ super().__init__()
338
+ self._target = _scope_to_target.get(scope.lower())
339
+ if self._target is None:
340
+ raise ValueError(f"'{scope}' is an invalid scope")
341
+
342
+ if isinstance(sequence, NucleotideSequence) and scope.lower() == "rna":
343
+ self._sequence = str(sequence).replace("T", "U")
344
+ else:
345
+ self._sequence = str(sequence)
346
+
347
+ self._min_identity = min_identity
348
+ self._max_expect_value = max_expect_value
349
+
350
+ def get_content(self):
351
+ content = super().get_content()
352
+ content["type"] = "terminal"
353
+ content["service"] = "sequence"
354
+ content["parameters"]["value"] = self._sequence
355
+ content["parameters"]["target"] = self._target
356
+ content["parameters"]["identity_cutoff"] = self._min_identity
357
+ content["parameters"]["evalue_cutoff"] = self._max_expect_value
358
+ return content
359
+
360
+
361
+ class MotifQuery(SingleQuery):
362
+ """
363
+ A query for protein/DNA/RNA molecules containing the given sequence
364
+ motif.
365
+
366
+ Parameters
367
+ ----------
368
+ pattern : str
369
+ The sequence pattern.
370
+ pattern_type : {'simple', 'prosite', 'regex'}
371
+ The type of the pattern.
372
+ scope : {'protein', 'dna', 'rna'}
373
+ The type of molecule to find.
374
+
375
+ Examples
376
+ --------
377
+
378
+ >>> query = MotifQuery(
379
+ ... "C-x(2,4)-C-x(3)-[LIVMFYWC]-x(8)-H-x(3,5)-H.",
380
+ ... "prosite",
381
+ ... "protein"
382
+ ... )
383
+ """
384
+ def __init__(self, pattern, pattern_type, scope):
385
+ super().__init__()
386
+ self._pattern = pattern
387
+ self._pattern_type = pattern_type
388
+ self._target = _scope_to_target.get(scope.lower())
389
+
390
+ def get_content(self):
391
+ content = super().get_content()
392
+ content["type"] = "terminal"
393
+ content["service"] = "seqmotif"
394
+ content["parameters"]["value"] = self._pattern
395
+ content["parameters"]["pattern_type"] = self._pattern_type
396
+ content["parameters"]["target"] = self._target
397
+ return content
398
+
399
+
400
+ class StructureQuery(SingleQuery):
401
+ """
402
+ A query for protein/DNA/RNA molecules with structural similarity
403
+ to the query structure.
404
+
405
+ Either the chain or assembly ID of the query structure must be
406
+ specified.
407
+
408
+ Parameters
409
+ ----------
410
+ pdb_id : str
411
+ The PDB ID of the query structure.
412
+ chain : str, optional
413
+ The chain ID (more exactly ``asym_id``) of the query structure.
414
+ assembly : str, optional
415
+ The assembly ID (``assembly_id``) of the query structure.
416
+ strict : bool, optional
417
+ If true, structure comparison is strict, otherwise it is
418
+ relaxed.
419
+
420
+ Examples
421
+ --------
422
+
423
+ >>> query = StructureQuery("1L2Y", chain="A")
424
+ >>> print(sorted(search(query)))
425
+ ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
426
+ """
427
+ def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
428
+ super().__init__()
429
+
430
+ if (chain is None and assembly is None) \
431
+ or (chain is not None and assembly is not None):
432
+ raise TypeError(
433
+ "Either the chain ID or assembly ID must be set"
434
+ )
435
+ elif chain is None:
436
+ self._value = {
437
+ "entry_id": pdb_id,
438
+ "asssembly_id": assembly
439
+ }
440
+ else:
441
+ self._value = {
442
+ "entry_id": pdb_id,
443
+ "asym_id": chain
444
+ }
445
+
446
+ self._operator = "strict_shape_match" if strict \
447
+ else "relaxed_shape_match"
448
+
449
+ def get_content(self):
450
+ content = super().get_content()
451
+ content["type"] = "terminal"
452
+ content["service"] = "structure"
453
+ content["parameters"]["value"] = self._value
454
+ content["parameters"]["operator"] = self._operator
455
+ return content
456
+
457
+
458
+
459
+
460
+ class Sorting:
461
+
462
+ def __init__(self, field, descending=True):
463
+ self._field = field
464
+ self._descending = descending
465
+
466
+ @property
467
+ def field(self):
468
+ return self._field
469
+
470
+ @property
471
+ def descending(self):
472
+ return self._descending
473
+
474
+ def get_content(self):
475
+ """
476
+ Get the sorting content, i.e. the data belonging to the
477
+ ``'sort'`` and ``'ranking_criteria_type'`` attributes in the
478
+ RCSB search API.
479
+
480
+ This content is converted into JSON by the :func:`search`
481
+ function.
482
+
483
+ Returns
484
+ -------
485
+ content : dict
486
+ The content dictionary for the ``'sort'`` and
487
+ ``'ranking_criteria_type'`` attributes.
488
+ """
489
+ direction = "desc" if self._descending else "asc"
490
+ return {
491
+ "sort_by" : self._field,
492
+ "direction" : direction
493
+ }
494
+
495
+
496
+
497
+
498
+ class Grouping(metaclass=abc.ABCMeta):
499
+ """
500
+ A representation of the JSON grouping options of the RCSB search
501
+ API.
502
+
503
+ Parameters
504
+ ----------
505
+ sort_by : str or Sorting, optional
506
+ If specified, the returned PDB IDs within each group are sorted
507
+ by the values of the given field name.
508
+ A complete list of the available fields is documented at
509
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
510
+ and
511
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
512
+ If a string is given, sorting is performed in descending order.
513
+ To choose the order a :class:`Sorting` object needs to be
514
+ provided.
515
+ """
516
+
517
+ def __init__(self, sort_by=None):
518
+ if sort_by is None:
519
+ self._sorting = None
520
+ elif isinstance(sort_by, Sorting):
521
+ self._sorting = sort_by
522
+ else:
523
+ self._sorting = Sorting(sort_by)
524
+
525
+ @abc.abstractmethod
526
+ def get_content(self):
527
+ """
528
+ Get the grouping content, i.e. the data belonging to the
529
+ ``'group_by'`` attribute in the RCSB search API.
530
+
531
+ This content is converted into JSON by the :func:`search`
532
+ and :func:`count` functions.
533
+
534
+ ABSTRACT: Override when inheriting.
535
+
536
+ Returns
537
+ -------
538
+ content : dict
539
+ The content dictionary for the ``'group_by'`` attributes.
540
+ """
541
+ if self._sorting is not None:
542
+ return {"ranking_criteria_type" : self._sorting.get_content()}
543
+ else:
544
+ return {}
545
+
546
+ @abc.abstractmethod
547
+ def is_compatible_return_type(self, return_type):
548
+ """
549
+ Check whether this :class:`Group` is compatible with the
550
+ RCSB search API ``return_type``.
551
+
552
+ ABSTRACT: Override when inheriting.
553
+
554
+ Parameters
555
+ ----------
556
+ return_type : str
557
+ The ``return_type`` attribute to be checked.
558
+
559
+ Returns
560
+ -------
561
+ is_compatible : bool
562
+ True, if this :class:`Group` is compatible with the
563
+ `return_type`, false otherwise.
564
+ """
565
+ pass
566
+
567
+
568
+ class DepositGrouping(Grouping):
569
+ """
570
+ This class groups PDB entries if they were deposited as a
571
+ collection.
572
+ Such a group usually contain the same protein with e.g. a different
573
+ bound molecule.
574
+
575
+ This :class:`Grouping` is only applicable, if the
576
+ :func:`count()`/:func:`search()` return type is set to ``entry``.
577
+
578
+ Parameters
579
+ ----------
580
+ sort_by : str or Sorting, optional
581
+ If specified, the returned PDB IDs within each group are sorted
582
+ by the values of the given field name.
583
+ A complete list of the available fields is documented at
584
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
585
+ and
586
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
587
+ If a string is given, sorting is performed in descending order.
588
+ To choose the order a :class:`Sorting` object needs to be
589
+ provided.
590
+ """
591
+
592
+ def get_content(self):
593
+ content = super().get_content()
594
+ content["aggregation_method"] = "matching_deposit_group_id"
595
+ return content
596
+
597
+ def is_compatible_return_type(self, return_type):
598
+ return return_type == "entry"
599
+
600
+
601
+ class IdentityGrouping(Grouping):
602
+ """
603
+ This class groups protein chains with a given sequence identity
604
+ with each other.
605
+
606
+ This :class:`Grouping` is only applicable, if the
607
+ :func:`count()`/:func:`search()` return type is set to
608
+ ``polymer_entity``.
609
+
610
+ Parameters
611
+ ----------
612
+ similarity_cutoff : {100, 95, 90, 70, 50, 30}
613
+ The sequence identity in percent at which the structures are
614
+ grouped.
615
+ In other words, a returned group contains sequences that have
616
+ `similarity_cutoff` sequence identity with each other.
617
+ Since the PDB uses precalculated clusters, only certain values
618
+ are available.
619
+ sort_by : str or Sorting, optional
620
+ If specified, the returned PDB IDs within each group are sorted
621
+ by the values of the given field name.
622
+ A complete list of the available fields is documented at
623
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
624
+ and
625
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
626
+ If a string is given, sorting is performed in descending order.
627
+ To choose the order a :class:`Sorting` object needs to be
628
+ provided.
629
+ """
630
+ def __init__(self, similarity_cutoff, sort_by=None):
631
+ super().__init__(sort_by)
632
+ if similarity_cutoff not in (100, 95, 90, 70, 50, 30):
633
+ raise ValueError(
634
+ f"A similarity cutoff of {similarity_cutoff}% is not supported"
635
+ )
636
+ self._similarity_cutoff = similarity_cutoff
637
+
638
+ def get_content(self):
639
+ content = super().get_content()
640
+ content["aggregation_method"] = "sequence_identity"
641
+ content["similarity_cutoff"] = self._similarity_cutoff
642
+ return content
643
+
644
+ def is_compatible_return_type(self, return_type):
645
+ return return_type == "polymer_entity"
646
+
647
+
648
+ class UniprotGrouping(Grouping):
649
+ """
650
+ This class groups protein chains that point to the same *Uniprot*
651
+ accession ID.
652
+
653
+ This :class:`Grouping` is only applicable, if the
654
+ :func:`count()`/:func:`search()` return type is set to
655
+ ``polymer_entity``.
656
+
657
+ Parameters
658
+ ----------
659
+ sort_by : str or Sorting, optional
660
+ If specified, the returned PDB IDs within each group are sorted
661
+ by the values of the given field name.
662
+ A complete list of the available fields is documented at
663
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
664
+ and
665
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
666
+ If a string is given, sorting is performed in descending order.
667
+ To choose the order a :class:`Sorting` object needs to be
668
+ provided.
669
+ """
670
+
671
+ def get_content(self):
672
+ content = super().get_content()
673
+ content["aggregation_method"] = "matching_uniprot_accession"
674
+ return content
675
+
676
+ def is_compatible_return_type(self, return_type):
677
+ return return_type == "polymer_entity"
678
+
679
+
680
+
681
+
682
+
683
+ def count(query, return_type="entry", group_by=None,
684
+ content_types=("experimental",)):
685
+ """
686
+ Count PDB entries that meet the given query requirements,
687
+ via the RCSB search API.
688
+
689
+ This function requires an internet connection.
690
+
691
+ Parameters
692
+ ----------
693
+ query : Query
694
+ The search query.
695
+ return_type : {'entry', 'assembly', 'polymer_entity', 'non_polymer_entity', 'polymer_instance'}, optional
696
+ The type of the counted identifiers:
697
+
698
+ - ``'entry'``: All macthing PDB entries are counted.
699
+ - ``'assembly'``: All matching assemblies are counted.
700
+ - ``'polymer_entity'``: All matching polymeric entities are
701
+ counted.
702
+ - ``'non_polymer_entity'``: All matching non-polymeric entities
703
+ are counted.
704
+ - ``'polymer_instance'``: All matching chains are counted.
705
+ group_by : Grouping
706
+ If this parameter is set, the number of groups is returned
707
+ instead.
708
+ content_types : iterable of {"experimental", "computational"}, optional
709
+ Specify whether experimental and computational structures should
710
+ be included.
711
+ At least one of them needs to be specified.
712
+ By default only experimental structures are included.
713
+ Note, that identifiers for computational structures cannot be
714
+ downloaded via :func:`biotite.database.rcsb.fetch()` as they
715
+ point to *AlphaFold DB* and *ModelArchive*.
716
+
717
+ Returns
718
+ -------
719
+ count : int
720
+ The total number of PDB IDs (or groups) that would be returned
721
+ by calling :func:`search()` using the same parameters.
722
+
723
+ Notes
724
+ -----
725
+ If `group_by` is set, the number of results may be lower than in an
726
+ ungrouped query, as grouping is not applicable to all structures.
727
+ For example a DNA structure has no associated *Uniprot* accession
728
+ and hence is omitted by :class:`UniprotGrouping`.
729
+
730
+ Examples
731
+ --------
732
+
733
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
734
+ >>> print(count(query))
735
+ 9
736
+ >>> ids = search(query)
737
+ >>> print(sorted(ids))
738
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
739
+ """
740
+ query_dict = _initialize_query_dict(
741
+ query, return_type, group_by, content_types
742
+ )
743
+
744
+ query_dict["request_options"]["return_counts"] = True
745
+
746
+ r = requests.get(_search_url, params={"json": json.dumps(query_dict)})
747
+
748
+ if r.status_code == 200:
749
+ if group_by is None:
750
+ return r.json()["total_count"]
751
+ else:
752
+ return r.json()["group_by_count"]
753
+ elif r.status_code == 204:
754
+ # Search did not return any results
755
+ return 0
756
+ else:
757
+ try:
758
+ raise RequestError(f"Error {r.status_code}: {r.json()['message']}")
759
+ except json.decoder.JSONDecodeError:
760
+ # In case there an error response without message
761
+ raise RequestError(f"Error {r.status_code}")
762
+
763
+
764
+ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
765
+ return_groups=False, content_types=("experimental",)):
766
+ """
767
+ Get all PDB IDs that meet the given query requirements,
768
+ via the RCSB search API.
769
+
770
+ This function requires an internet connection.
771
+
772
+ Parameters
773
+ ----------
774
+ query : Query
775
+ The search query.
776
+ return_type : {'entry', 'assembly', 'polymer_entity', 'non_polymer_entity', 'polymer_instance'}, optional
777
+ The type of the returned identifiers:
778
+
779
+ - ``'entry'``: Only the PDB ID is returned (e.g. ``'XXXX'``).
780
+ These can be used directly a input to :func:`fetch()`.
781
+ - ``'assembly'``: The PDB ID appended with assembly ID is
782
+ returned (e.g. ``'XXXX-1'``).
783
+ - ``'polymer_entity'``: The PDB ID appended with entity ID of
784
+ polymers is returned (e.g. ``'XXXX_1'``).
785
+ - ``'non_polymer_entity'``: The PDB ID appended with entity ID
786
+ of non-polymeric entities is returned (e.g. ``'XXXX_1'``).
787
+ - ``'polymer_instance'``: The PDB ID appended with chain ID
788
+ (more exactly ``'asym_id'``) is returned (e.g. ``'XXXX.A'``).
789
+
790
+ range : tuple(int, int), optional
791
+ If this parameter is specified, only PDB IDs in this range
792
+ are selected from all matching PDB IDs and returned
793
+ (pagination).
794
+ The range is zero-indexed and the stop value is exclusive.
795
+ sort_by : str or Sorting, optional
796
+ If specified, the returned PDB IDs are sorted by the values
797
+ of the given field name.
798
+ A complete list of the available fields is documented at
799
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
800
+ and
801
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
802
+ If a string is given sorting is performed in descending order.
803
+ To choose the order, a :class:`Sorting` object needs to be
804
+ provided.
805
+ group_by : Grouping
806
+ If this parameter is set, the PDB IDs that meet the query
807
+ requirements, are grouped according to the given criterion.
808
+ return_groups : boolean, optional
809
+ Only has effect, if `group_by` is set.
810
+ By default the representative with the highest rank in each
811
+ group is returned.
812
+ The rank is determined by the `sort_by` parameter of
813
+ :class:`Grouping` provided in `group_by`.
814
+ If set to true, groups containing all structures belonging to
815
+ the group are returned instead.
816
+ content_types : iterable of {"experimental", "computational"}, optional
817
+ Specify whether experimental and computational structures should
818
+ be included.
819
+ At least one of them needs to be specified.
820
+ By default only experimental structures are included.
821
+ Note, that identifiers for computational structures cannot be
822
+ downloaded via :func:`biotite.database.rcsb.fetch()` as they
823
+ point to *AlphaFold DB* and *ModelArchive*.
824
+
825
+ Returns
826
+ -------
827
+ ids : list of str or dict (str -> list of str)
828
+ If `return_groups` is false (default case), a list of strings
829
+ containing all PDB IDs that meet the query requirements is
830
+ returned.
831
+ If `return_groups` is set to true a dictionary of groups is
832
+ returned.
833
+ This dictionary maps group identifiers to a list of all PDB IDs
834
+ belonging to this group.
835
+
836
+ Notes
837
+ -----
838
+ If `group_by` is set, the number of results may be lower than in an
839
+ ungrouped query, as grouping is not applicable to all structures.
840
+ For example a DNA structure has no associated *Uniprot* accession
841
+ and hence is omitted by :class:`UniprotGrouping`.
842
+
843
+ Also note that `sort_by` does not affect the order within a group.
844
+ This order is determined by the `sort_by` parameter of the
845
+ :class:`Grouping`.
846
+
847
+ Examples
848
+ --------
849
+
850
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
851
+ >>> print(sorted(search(query)))
852
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
853
+ >>> print(search(query, sort_by="rcsb_accession_info.initial_release_date"))
854
+ ['7R0H', '7ATG', '5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG']
855
+ >>> print(search(
856
+ ... query, range=(1,4), sort_by="rcsb_accession_info.initial_release_date"
857
+ ... ))
858
+ ['7ATG', '5NW3', '5D8V']
859
+ >>> print(sorted(search(query, return_type="polymer_instance")))
860
+ ['1EJG.A', '1I0T.A', '1I0T.B', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A', '7ATG.A', '7ATG.B', '7R0H.A']
861
+ >>> print(search(
862
+ ... query, return_type="polymer_entity", return_groups=True,
863
+ ... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
864
+ ... ))
865
+ {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
866
+ """
867
+ query_dict = _initialize_query_dict(
868
+ query, return_type, group_by, content_types
869
+ )
870
+
871
+ if group_by is not None:
872
+ if return_groups:
873
+ query_dict["request_options"]["group_by_return_type"] \
874
+ = "groups"
875
+ else:
876
+ query_dict["request_options"]["group_by_return_type"] \
877
+ = "representatives"
878
+
879
+ if sort_by is not None:
880
+ if isinstance(sort_by, Sorting):
881
+ sorting = sort_by
882
+ else:
883
+ sorting = Sorting(sort_by)
884
+ query_dict["request_options"]["sort"] = [sorting.get_content()]
885
+
886
+ if range is None:
887
+ query_dict["request_options"]["return_all_hits"] = True
888
+ elif range[1] <= range[0]:
889
+ raise ValueError("Range stop must be greater than range start")
890
+ else:
891
+ query_dict["request_options"]["paginate"] = {
892
+ "start": int(range[0]),
893
+ "rows": int(range[1]) - int(range[0])
894
+ }
895
+
896
+ r = requests.get(_search_url, params={"json": json.dumps(query_dict)})
897
+
898
+ if r.status_code == 200:
899
+ if group_by is None or not return_groups:
900
+ return [result["identifier"] for result in r.json()["result_set"]]
901
+ else:
902
+ return {
903
+ group["identifier"] : [
904
+ result["identifier"] for result in group["result_set"]
905
+ ]
906
+ for group in r.json()["group_set"]
907
+ }
908
+ elif r.status_code == 204:
909
+ # Search did not return any results
910
+ return []
911
+ else:
912
+ try:
913
+ raise RequestError(f"Error {r.status_code}: {r.json()['message']}")
914
+ except json.decoder.JSONDecodeError:
915
+ # In case there an error response without message
916
+ raise RequestError(f"Error {r.status_code}")
917
+
918
+
919
+ def _initialize_query_dict(query, return_type, group_by, content_types):
920
+ """
921
+ Initialize the request parameter dictionary with attributes that
922
+ `count()` and `search()` have in common.
923
+ """
924
+ if return_type not in [
925
+ "entry", "polymer_instance", "assembly",
926
+ "polymer_entity", "non_polymer_entity",
927
+ ]:
928
+ raise ValueError(f"'{return_type}' is an invalid return type")
929
+
930
+ request_options = {}
931
+
932
+ if len(content_types) == 0:
933
+ raise ValueError("At least one content type must be specified")
934
+ for content_type in content_types:
935
+ if content_type not in ("experimental", "computational"):
936
+ raise ValueError(f"Unknown content type '{content_type}'")
937
+ request_options["results_content_type"] = content_types
938
+
939
+ if group_by is not None:
940
+ if not group_by.is_compatible_return_type(return_type):
941
+ raise ValueError(
942
+ f"Return type '{return_type}' is not compatible "
943
+ f"with the given Grouping"
944
+ )
945
+ request_options["group_by"] = group_by.get_content()
946
+
947
+ query_dict = {
948
+ "query": query.get_content(),
949
+ "return_type": return_type,
950
+ "request_options": request_options
951
+ }
952
+ return query_dict
953
+
954
+
955
+ def _to_isoformat(object):
956
+ """
957
+ Convert a datetime into the specifc ISO 8601 format required by the RCSB.
958
+ """
959
+ return object.strftime("%Y-%m-%dT%H:%M:%SZ")