biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,963 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.rcsb"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = [
8
+ "Query",
9
+ "SingleQuery",
10
+ "CompositeQuery",
11
+ "BasicQuery",
12
+ "FieldQuery",
13
+ "SequenceQuery",
14
+ "StructureQuery",
15
+ "MotifQuery",
16
+ "Sorting",
17
+ "Grouping",
18
+ "DepositGrouping",
19
+ "IdentityGrouping",
20
+ "UniprotGrouping",
21
+ "search",
22
+ "count",
23
+ ]
24
+
25
+ import abc
26
+ import copy
27
+ import json
28
+ from datetime import datetime
29
+ import numpy as np
30
+ import requests
31
+ from biotite.database.error import RequestError
32
+ from biotite.sequence.seqtypes import NucleotideSequence
33
+
34
+ _search_url = "https://search.rcsb.org/rcsbsearch/v2/query"
35
+ _scope_to_target = {
36
+ "protein": "pdb_protein_sequence",
37
+ "rna": "pdb_rna_sequence",
38
+ "dna": "pdb_dna_sequence",
39
+ }
40
+
41
+
42
+ class Query(metaclass=abc.ABCMeta):
43
+ """
44
+ A representation of a JSON query for the RCSB search API.
45
+
46
+ This is the abstract base class for all queries.
47
+ """
48
+
49
+ @abc.abstractmethod
50
+ def get_content(self):
51
+ """
52
+ Get the query content, i.e. the data belonging to the
53
+ ``'query'`` attribute in the RCSB search API.
54
+
55
+ This content is converted into JSON by the :func:`search`
56
+ and :func:`count` functions.
57
+
58
+ Returns
59
+ -------
60
+ content : dict
61
+ The content dictionary for the ``'query'`` attributes.
62
+ """
63
+ pass
64
+
65
+ def __and__(self, query):
66
+ return CompositeQuery([self, query], "and")
67
+
68
+ def __or__(self, query):
69
+ return CompositeQuery([self, query], "or")
70
+
71
+
72
+ class SingleQuery(Query, metaclass=abc.ABCMeta):
73
+ """
74
+ A terminal query node for the RCSB search API.
75
+
76
+ Multiple :class:`SingleQuery` objects can be combined to
77
+ :class:`CompositeQuery` objects using the ``|`` and ``&`` operators.
78
+
79
+ This is the abstract base class for all queries that are
80
+ terminal nodes.
81
+ """
82
+
83
+ @abc.abstractmethod
84
+ def get_content(self):
85
+ return {"parameters": {}}
86
+
87
+
88
+ class CompositeQuery(Query):
89
+ """
90
+ A group query node for the RCSB search API.
91
+
92
+ A composite query is an combination of other queries, combined
93
+ either with the `'and'` or `'or'` operator.
94
+ Usually, a :class:`CompositeQuery` will not be created by calling
95
+ its constructor, but by combining queries using the ``|`` or ``&``
96
+ operator.
97
+
98
+ Parameters
99
+ ----------
100
+ queries : iterable object of Query
101
+ The queries to be combined.
102
+ operator : {'or', 'and'}
103
+ The type of combination.
104
+ """
105
+
106
+ def __init__(self, queries, operator):
107
+ self._queries = queries
108
+ if operator not in ("or", "and"):
109
+ raise ValueError(f"Operator must be 'or' or 'and', not '{operator}'")
110
+ self._operator = operator
111
+
112
+ def get_content(self):
113
+ """
114
+ A dictionary representation of the query.
115
+ This dictionary is the content of the ``'query'`` key in the
116
+ JSON query.
117
+
118
+ Returns
119
+ -------
120
+ content : dict
121
+ The dictionary representation of the query.
122
+ """
123
+ content = {
124
+ "type": "group",
125
+ "logical_operator": self._operator,
126
+ "nodes": [query.get_content() for query in self._queries],
127
+ }
128
+ return content
129
+
130
+
131
+ class BasicQuery(SingleQuery):
132
+ """
133
+ A text query for searching for a given term across all available
134
+ fields.
135
+
136
+ Parameters
137
+ ----------
138
+ term : str
139
+ The search term.
140
+ If the term contains multiple words, the query will return
141
+ results where the entire term is present.
142
+ The matching is not case-sensitive.
143
+ Logic combinations of terms is described
144
+ `here <https://search.rcsb.org/#basic-queries>`_.
145
+
146
+ Examples
147
+ --------
148
+
149
+ >>> query = BasicQuery("Miniprotein Construct")
150
+ >>> print(sorted(search(query)))
151
+ ['1L2Y']
152
+ """
153
+
154
+ def __init__(self, term):
155
+ super().__init__()
156
+ self._term = term
157
+
158
+ def get_content(self):
159
+ content = super().get_content()
160
+ content["type"] = "terminal"
161
+ content["service"] = "full_text"
162
+ content["parameters"]["value"] = f'"{self._term}"'
163
+ return content
164
+
165
+
166
+ class FieldQuery(SingleQuery):
167
+ """
168
+ A text query for searching for values in a given field using the
169
+ given operator.
170
+
171
+ The operators are keyword arguments of this function and the search
172
+ value is the value given to the respective parameter.
173
+ The operators are mutually exclusive.
174
+ If none is given, the search will return results where the given
175
+ field exists.
176
+
177
+ A :class:`FieldQuery` is negated using the ``~`` operator.
178
+
179
+ Parameters
180
+ ----------
181
+ field : str
182
+ The field to search in.
183
+ molecular_definition : bool, optional
184
+ If set true, this query searches in fields
185
+ associated with
186
+ `molecular definitions <https://search.rcsb.org/chemical-search-attributes.html>`_.
187
+ If false (default), this query searches in fields
188
+ associated with `PDB structures <https://search.rcsb.org/structure-search-attributes.html>`_.
189
+ case_sensitive : bool, optional
190
+ If set to true, searches are case sensitive.
191
+ By default matching is case-insensitive.
192
+ exact_match : str, optional
193
+ Operator for returning results whose field exactly matches the
194
+ given value.
195
+ contains_words, contains_phrase : str, optional
196
+ Operator for returning results whose field matches
197
+ individual words from the given value or the value as exact
198
+ phrase, respectively.
199
+ greater, less, greater_or_equal, less_or_equal, equals : int or float or datetime, optional
200
+ Operator for returning results whose field values are larger,
201
+ smaller or equal to the given value.
202
+ range, range_closed : tuple(int, int) or tuple(float, float) or tuple(datetime, datetime), optional
203
+ Operator for returning results whose field matches values within
204
+ the given range.
205
+ `range_closed` includes the interval limits.
206
+ is_in : tuple of str or list of str, optional
207
+ Operator for returning results whose field matches any of the
208
+ values in the given list.
209
+
210
+ Notes
211
+ -----
212
+ A complete list of the available fields and its supported operators
213
+ is documented at
214
+ `<https://search.rcsb.org/structure-search-attributes.html>`_
215
+ and
216
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
217
+
218
+ Examples
219
+ --------
220
+
221
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
222
+ >>> print(sorted(search(query)))
223
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
224
+ """
225
+
226
+ def __init__(
227
+ self, field, molecular_definition=False, case_sensitive=False, **kwargs
228
+ ):
229
+ super().__init__()
230
+ self._negation = False
231
+ self._field = field
232
+ self._mol_definition = molecular_definition
233
+ self._case_sensitive = case_sensitive
234
+
235
+ if len(kwargs) > 1:
236
+ raise TypeError("Only one operator must be given")
237
+ elif len(kwargs) == 1:
238
+ self._operator = list(kwargs.keys())[0]
239
+ self._value = list(kwargs.values())[0]
240
+ else:
241
+ # No operator is given
242
+ self._operator = "exists"
243
+ self._value = None
244
+
245
+ if self._operator not in [
246
+ "exact_match",
247
+ "contains_words",
248
+ "contains_phrase",
249
+ "greater",
250
+ "less",
251
+ "greater_or_equal",
252
+ "less_or_equal",
253
+ "equals",
254
+ "range",
255
+ "range_closed",
256
+ "is_in",
257
+ "exists",
258
+ ]:
259
+ raise TypeError(
260
+ f"Constructor got an unexpected keyword argument '{self._operator}'"
261
+ )
262
+
263
+ # Convert dates into ISO 8601
264
+ if isinstance(self._value, datetime):
265
+ self._value = _to_isoformat(self._value)
266
+ elif isinstance(self._value, (tuple, list, np.ndarray)):
267
+ self._value = [
268
+ _to_isoformat(val) if isinstance(val, datetime) else val
269
+ for val in self._value
270
+ ]
271
+
272
+ # Create dictionary for 'range' operator
273
+ if self._operator == "range":
274
+ self._value = {
275
+ "from": self._value[0],
276
+ "include_lower": False,
277
+ "to": self._value[1],
278
+ "include_upper": False,
279
+ }
280
+ elif self._operator == "range_closed":
281
+ self._value = {
282
+ "from": self._value[0],
283
+ "include_lower": True,
284
+ "to": self._value[1],
285
+ "include_upper": True,
286
+ }
287
+
288
+ # Rename operators to names used in API
289
+ if self._operator == "is_in":
290
+ # 'in' is not an available parameter name in Python
291
+ self._operator = "in"
292
+ elif self._operator == "range_closed":
293
+ # For backwards compatibility
294
+ self._operator = "range"
295
+
296
+ def get_content(self):
297
+ content = super().get_content()
298
+ content["type"] = "terminal"
299
+ if self._mol_definition:
300
+ content["service"] = "text_chem"
301
+ else:
302
+ content["service"] = "text"
303
+ content["parameters"]["attribute"] = self._field
304
+ content["parameters"]["operator"] = self._operator
305
+ content["parameters"]["negation"] = self._negation
306
+ content["parameters"]["case_sensitive"] = self._case_sensitive
307
+ if self._value is not None:
308
+ content["parameters"]["value"] = self._value
309
+ return content
310
+
311
+ def __invert__(self):
312
+ clone = copy.deepcopy(self)
313
+ clone._negation = not clone._negation
314
+ return clone
315
+
316
+
317
+ class SequenceQuery(SingleQuery):
318
+ """
319
+ A query for protein/DNA/RNA molecules with a sequence similar to a
320
+ given input sequence using
321
+ `MMseqs2 <https://github.com/soedinglab/mmseqs2>`_.
322
+
323
+ Parameters
324
+ ----------
325
+ sequence : Sequence or str
326
+ The input sequence.
327
+ If `sequence` is a :class:`NucleotideSequence` and the `scope`
328
+ is ``'rna'``, ``'T'`` is automatically replaced by ``'U'``.
329
+ scope : {'protein', 'dna', 'rna'}
330
+ The type of molecule to find.
331
+ min_identity : float, optional
332
+ A match is only returned, if the sequence identity between
333
+ the match and the input sequence exceeds this value.
334
+ Must be between 0 and 1.
335
+ By default, the sequence identity is ignored.
336
+ max_expect_value : float, optional
337
+ A match is only returned, if the *expect value* (E-value) does
338
+ not exceed this value.
339
+ By default, the value is effectively ignored.
340
+
341
+ Notes
342
+ -----
343
+ *MMseqs2* is run on the RCSB servers.
344
+
345
+ Examples
346
+ --------
347
+
348
+ >>> sequence = "NLYIQWLKDGGPSSGRPPPS"
349
+ >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.95)
350
+ >>> print(sorted(search(query)))
351
+ ['1L2Y', '2LDJ', '9G22', '9G2N', '9G2O', '9G31', '9G32', '9GDL', '9GDN', '9GDT', '9GDU', '9GE1']
352
+ """
353
+
354
+ def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
355
+ super().__init__()
356
+ self._target = _scope_to_target.get(scope.lower())
357
+ if self._target is None:
358
+ raise ValueError(f"'{scope}' is an invalid scope")
359
+
360
+ if isinstance(sequence, NucleotideSequence) and scope.lower() == "rna":
361
+ self._sequence = str(sequence).replace("T", "U")
362
+ else:
363
+ self._sequence = str(sequence)
364
+
365
+ self._min_identity = min_identity
366
+ self._max_expect_value = max_expect_value
367
+
368
+ def get_content(self):
369
+ content = super().get_content()
370
+ content["type"] = "terminal"
371
+ content["service"] = "sequence"
372
+ content["parameters"]["value"] = self._sequence
373
+ content["parameters"]["target"] = self._target
374
+ content["parameters"]["identity_cutoff"] = self._min_identity
375
+ content["parameters"]["evalue_cutoff"] = self._max_expect_value
376
+ return content
377
+
378
+
379
+ class MotifQuery(SingleQuery):
380
+ """
381
+ A query for protein/DNA/RNA molecules containing the given sequence
382
+ motif.
383
+
384
+ Parameters
385
+ ----------
386
+ pattern : str
387
+ The sequence pattern.
388
+ pattern_type : {'simple', 'prosite', 'regex'}
389
+ The type of the pattern.
390
+ scope : {'protein', 'dna', 'rna'}
391
+ The type of molecule to find.
392
+
393
+ Examples
394
+ --------
395
+
396
+ >>> query = MotifQuery(
397
+ ... "C-x(2,4)-C-x(3)-[LIVMFYWC]-x(8)-H-x(3,5)-H.",
398
+ ... "prosite",
399
+ ... "protein"
400
+ ... )
401
+ """
402
+
403
+ def __init__(self, pattern, pattern_type, scope):
404
+ super().__init__()
405
+ self._pattern = pattern
406
+ self._pattern_type = pattern_type
407
+ self._target = _scope_to_target.get(scope.lower())
408
+
409
+ def get_content(self):
410
+ content = super().get_content()
411
+ content["type"] = "terminal"
412
+ content["service"] = "seqmotif"
413
+ content["parameters"]["value"] = self._pattern
414
+ content["parameters"]["pattern_type"] = self._pattern_type
415
+ content["parameters"]["target"] = self._target
416
+ return content
417
+
418
+
419
+ class StructureQuery(SingleQuery):
420
+ """
421
+ A query for protein/DNA/RNA molecules with structural similarity
422
+ to the query structure.
423
+
424
+ Either the chain or assembly ID of the query structure must be
425
+ specified.
426
+
427
+ Parameters
428
+ ----------
429
+ pdb_id : str
430
+ The PDB ID of the query structure.
431
+ chain : str, optional
432
+ The chain ID (more exactly ``asym_id``) of the query structure.
433
+ assembly : str, optional
434
+ The assembly ID (``assembly_id``) of the query structure.
435
+ strict : bool, optional
436
+ If true, structure comparison is strict, otherwise it is
437
+ relaxed.
438
+
439
+ Examples
440
+ --------
441
+
442
+ >>> query = StructureQuery("1L2Y", chain="A")
443
+ >>> print(sorted(search(query)))
444
+ ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS', '9DPF']
445
+ """
446
+
447
+ def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
448
+ super().__init__()
449
+
450
+ if (chain is None and assembly is None) or (
451
+ chain is not None and assembly is not None
452
+ ):
453
+ raise TypeError("Either the chain ID or assembly ID must be set")
454
+ elif chain is None:
455
+ self._value = {"entry_id": pdb_id, "asssembly_id": assembly}
456
+ else:
457
+ self._value = {"entry_id": pdb_id, "asym_id": chain}
458
+
459
+ self._operator = "strict_shape_match" if strict else "relaxed_shape_match"
460
+
461
+ def get_content(self):
462
+ content = super().get_content()
463
+ content["type"] = "terminal"
464
+ content["service"] = "structure"
465
+ content["parameters"]["value"] = self._value
466
+ content["parameters"]["operator"] = self._operator
467
+ return content
468
+
469
+
470
+ class Sorting:
471
+ def __init__(self, field, descending=True):
472
+ self._field = field
473
+ self._descending = descending
474
+
475
+ @property
476
+ def field(self):
477
+ return self._field
478
+
479
+ @property
480
+ def descending(self):
481
+ return self._descending
482
+
483
+ def get_content(self):
484
+ """
485
+ Get the sorting content, i.e. the data belonging to the
486
+ ``'sort'`` and ``'ranking_criteria_type'`` attributes in the
487
+ RCSB search API.
488
+
489
+ This content is converted into JSON by the :func:`search`
490
+ function.
491
+
492
+ Returns
493
+ -------
494
+ content : dict
495
+ The content dictionary for the ``'sort'`` and
496
+ ``'ranking_criteria_type'`` attributes.
497
+ """
498
+ direction = "desc" if self._descending else "asc"
499
+ return {"sort_by": self._field, "direction": direction}
500
+
501
+
502
+ class Grouping(metaclass=abc.ABCMeta):
503
+ """
504
+ A representation of the JSON grouping options of the RCSB search
505
+ API.
506
+
507
+ Parameters
508
+ ----------
509
+ sort_by : str or Sorting, optional
510
+ If specified, the returned PDB IDs within each group are sorted
511
+ by the values of the given field name.
512
+ A complete list of the available fields is documented at
513
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
514
+ and
515
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
516
+ If a string is given, sorting is performed in descending order.
517
+ To choose the order a :class:`Sorting` object needs to be
518
+ provided.
519
+ """
520
+
521
+ def __init__(self, sort_by=None):
522
+ if sort_by is None:
523
+ self._sorting = None
524
+ elif isinstance(sort_by, Sorting):
525
+ self._sorting = sort_by
526
+ else:
527
+ self._sorting = Sorting(sort_by)
528
+
529
+ @abc.abstractmethod
530
+ def get_content(self):
531
+ """
532
+ Get the grouping content, i.e. the data belonging to the
533
+ ``'group_by'`` attribute in the RCSB search API.
534
+
535
+ This content is converted into JSON by the :func:`search`
536
+ and :func:`count` functions.
537
+
538
+ ABSTRACT: Override when inheriting.
539
+
540
+ Returns
541
+ -------
542
+ content : dict
543
+ The content dictionary for the ``'group_by'`` attributes.
544
+ """
545
+ if self._sorting is not None:
546
+ return {"ranking_criteria_type": self._sorting.get_content()}
547
+ else:
548
+ return {}
549
+
550
+ @abc.abstractmethod
551
+ def is_compatible_return_type(self, return_type):
552
+ """
553
+ Check whether this :class:`Group` is compatible with the
554
+ RCSB search API ``return_type``.
555
+
556
+ ABSTRACT: Override when inheriting.
557
+
558
+ Parameters
559
+ ----------
560
+ return_type : str
561
+ The ``return_type`` attribute to be checked.
562
+
563
+ Returns
564
+ -------
565
+ is_compatible : bool
566
+ True, if this :class:`Group` is compatible with the
567
+ `return_type`, false otherwise.
568
+ """
569
+ pass
570
+
571
+
572
+ class DepositGrouping(Grouping):
573
+ """
574
+ This class groups PDB entries if they were deposited as a
575
+ collection.
576
+ Such a group usually contain the same protein with e.g. a different
577
+ bound molecule.
578
+
579
+ This :class:`Grouping` is only applicable, if the
580
+ :func:`count()`/:func:`search()` return type is set to ``entry``.
581
+
582
+ Parameters
583
+ ----------
584
+ sort_by : str or Sorting, optional
585
+ If specified, the returned PDB IDs within each group are sorted
586
+ by the values of the given field name.
587
+ A complete list of the available fields is documented at
588
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
589
+ and
590
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
591
+ If a string is given, sorting is performed in descending order.
592
+ To choose the order a :class:`Sorting` object needs to be
593
+ provided.
594
+ """
595
+
596
+ def get_content(self):
597
+ content = super().get_content()
598
+ content["aggregation_method"] = "matching_deposit_group_id"
599
+ return content
600
+
601
+ def is_compatible_return_type(self, return_type):
602
+ return return_type == "entry"
603
+
604
+
605
+ class IdentityGrouping(Grouping):
606
+ """
607
+ This class groups protein chains with a given sequence identity
608
+ with each other.
609
+
610
+ This :class:`Grouping` is only applicable, if the
611
+ :func:`count()`/:func:`search()` return type is set to
612
+ ``polymer_entity``.
613
+
614
+ Parameters
615
+ ----------
616
+ similarity_cutoff : {100, 95, 90, 70, 50, 30}
617
+ The sequence identity in percent at which the structures are
618
+ grouped.
619
+ In other words, a returned group contains sequences that have
620
+ `similarity_cutoff` sequence identity with each other.
621
+ Since the PDB uses precalculated clusters, only certain values
622
+ are available.
623
+ sort_by : str or Sorting, optional
624
+ If specified, the returned PDB IDs within each group are sorted
625
+ by the values of the given field name.
626
+ A complete list of the available fields is documented at
627
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
628
+ and
629
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
630
+ If a string is given, sorting is performed in descending order.
631
+ To choose the order a :class:`Sorting` object needs to be
632
+ provided.
633
+ """
634
+
635
+ def __init__(self, similarity_cutoff, sort_by=None):
636
+ super().__init__(sort_by)
637
+ if similarity_cutoff not in (100, 95, 90, 70, 50, 30):
638
+ raise ValueError(
639
+ f"A similarity cutoff of {similarity_cutoff}% is not supported"
640
+ )
641
+ self._similarity_cutoff = similarity_cutoff
642
+
643
+ def get_content(self):
644
+ content = super().get_content()
645
+ content["aggregation_method"] = "sequence_identity"
646
+ content["similarity_cutoff"] = self._similarity_cutoff
647
+ return content
648
+
649
+ def is_compatible_return_type(self, return_type):
650
+ return return_type == "polymer_entity"
651
+
652
+
653
+ class UniprotGrouping(Grouping):
654
+ """
655
+ This class groups protein chains that point to the same *Uniprot*
656
+ accession ID.
657
+
658
+ This :class:`Grouping` is only applicable, if the
659
+ :func:`count()`/:func:`search()` return type is set to
660
+ ``polymer_entity``.
661
+
662
+ Parameters
663
+ ----------
664
+ sort_by : str or Sorting, optional
665
+ If specified, the returned PDB IDs within each group are sorted
666
+ by the values of the given field name.
667
+ A complete list of the available fields is documented at
668
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
669
+ and
670
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
671
+ If a string is given, sorting is performed in descending order.
672
+ To choose the order a :class:`Sorting` object needs to be
673
+ provided.
674
+ """
675
+
676
+ def get_content(self):
677
+ content = super().get_content()
678
+ content["aggregation_method"] = "matching_uniprot_accession"
679
+ return content
680
+
681
+ def is_compatible_return_type(self, return_type):
682
+ return return_type == "polymer_entity"
683
+
684
+
685
+ def count(query, return_type="entry", group_by=None, content_types=("experimental",)):
686
+ """
687
+ Count PDB entries that meet the given query requirements,
688
+ via the RCSB search API.
689
+
690
+ This function requires an internet connection.
691
+
692
+ Parameters
693
+ ----------
694
+ query : Query
695
+ The search query.
696
+ return_type : {'entry', 'assembly', 'polymer_entity', 'non_polymer_entity', 'polymer_instance'}, optional
697
+ The type of the counted identifiers:
698
+
699
+ - ``'entry'``: All macthing PDB entries are counted.
700
+ - ``'assembly'``: All matching assemblies are counted.
701
+ - ``'polymer_entity'``: All matching polymeric entities are
702
+ counted.
703
+ - ``'non_polymer_entity'``: All matching non-polymeric entities
704
+ are counted.
705
+ - ``'polymer_instance'``: All matching chains are counted.
706
+ group_by : Grouping
707
+ If this parameter is set, the number of groups is returned
708
+ instead.
709
+ content_types : iterable of {"experimental", "computational"}, optional
710
+ Specify whether experimental and computational structures should
711
+ be included.
712
+ At least one of them needs to be specified.
713
+ By default only experimental structures are included.
714
+ Note, that identifiers for computational structures cannot be
715
+ downloaded via :func:`biotite.database.rcsb.fetch()` as they
716
+ point to *AlphaFold DB* and *ModelArchive*.
717
+
718
+ Returns
719
+ -------
720
+ count : int
721
+ The total number of PDB IDs (or groups) that would be returned
722
+ by calling :func:`search()` using the same parameters.
723
+
724
+ Notes
725
+ -----
726
+ If `group_by` is set, the number of results may be lower than in an
727
+ ungrouped query, as grouping is not applicable to all structures.
728
+ For example a DNA structure has no associated *Uniprot* accession
729
+ and hence is omitted by :class:`UniprotGrouping`.
730
+
731
+ Examples
732
+ --------
733
+
734
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
735
+ >>> print(count(query))
736
+ 9
737
+ >>> ids = search(query)
738
+ >>> print(sorted(ids))
739
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
740
+ """
741
+ query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
742
+
743
+ query_dict["request_options"]["return_counts"] = True
744
+
745
+ r = requests.get(_search_url, params={"json": json.dumps(query_dict)})
746
+
747
+ if r.status_code == 200:
748
+ if group_by is None:
749
+ return r.json()["total_count"]
750
+ else:
751
+ return r.json()["group_by_count"]
752
+ elif r.status_code == 204:
753
+ # Search did not return any results
754
+ return 0
755
+ else:
756
+ try:
757
+ raise RequestError(f"Error {r.status_code}: {r.json()['message']}")
758
+ except json.decoder.JSONDecodeError:
759
+ # In case there an error response without message
760
+ raise RequestError(f"Error {r.status_code}")
761
+
762
+
763
+ def search(
764
+ query,
765
+ return_type="entry",
766
+ range=None,
767
+ sort_by=None,
768
+ group_by=None,
769
+ return_groups=False,
770
+ content_types=("experimental",),
771
+ ):
772
+ """
773
+ Get all PDB IDs that meet the given query requirements,
774
+ via the RCSB search API.
775
+
776
+ This function requires an internet connection.
777
+
778
+ Parameters
779
+ ----------
780
+ query : Query
781
+ The search query.
782
+ return_type : {'entry', 'assembly', 'polymer_entity', 'non_polymer_entity', 'polymer_instance'}, optional
783
+ The type of the returned identifiers:
784
+
785
+ - ``'entry'``: Only the PDB ID is returned (e.g. ``'XXXX'``).
786
+ These can be used directly as input to :func:`fetch()`.
787
+ - ``'assembly'``: The PDB ID appended with assembly ID is
788
+ returned (e.g. ``'XXXX-1'``).
789
+ - ``'polymer_entity'``: The PDB ID appended with entity ID of
790
+ polymers is returned (e.g. ``'XXXX_1'``).
791
+ - ``'non_polymer_entity'``: The PDB ID appended with entity ID
792
+ of non-polymeric entities is returned (e.g. ``'XXXX_1'``).
793
+ - ``'polymer_instance'``: The PDB ID appended with chain ID
794
+ (more exactly ``'asym_id'``) is returned (e.g. ``'XXXX.A'``).
795
+
796
+ range : tuple(int, int), optional
797
+ If this parameter is specified, only PDB IDs in this range
798
+ are selected from all matching PDB IDs and returned
799
+ (pagination).
800
+ The range is zero-indexed and the stop value is exclusive.
801
+ sort_by : str or Sorting, optional
802
+ If specified, the returned PDB IDs are sorted by the values
803
+ of the given field name.
804
+ A complete list of the available fields is documented at
805
+ `<https://search.rcsb.org/structure-search-attributes.html>`_.
806
+ and
807
+ `<https://search.rcsb.org/chemical-search-attributes.html>`_.
808
+ If a string is given sorting is performed in descending order.
809
+ To choose the order, a :class:`Sorting` object needs to be
810
+ provided.
811
+ group_by : Grouping
812
+ If this parameter is set, the PDB IDs that meet the query
813
+ requirements, are grouped according to the given criterion.
814
+ return_groups : boolean, optional
815
+ Only has effect, if `group_by` is set.
816
+ By default the representative with the highest rank in each
817
+ group is returned.
818
+ The rank is determined by the `sort_by` parameter of
819
+ :class:`Grouping` provided in `group_by`.
820
+ If set to true, groups containing all structures belonging to
821
+ the group are returned instead.
822
+ content_types : iterable of {"experimental", "computational"}, optional
823
+ Specify whether experimental and computational structures should
824
+ be included.
825
+ At least one of them needs to be specified.
826
+ By default only experimental structures are included.
827
+ Note, that identifiers for computational structures cannot be
828
+ downloaded via :func:`biotite.database.rcsb.fetch()` as they
829
+ point to *AlphaFold DB* and *ModelArchive*.
830
+
831
+ Returns
832
+ -------
833
+ ids : list of str or dict (str -> list of str)
834
+ If `return_groups` is false (default case), a list of strings
835
+ containing all PDB IDs that meet the query requirements is
836
+ returned.
837
+ If `return_groups` is set to true a dictionary of groups is
838
+ returned.
839
+ This dictionary maps group identifiers to a list of all PDB IDs
840
+ belonging to this group.
841
+
842
+ Notes
843
+ -----
844
+ If `group_by` is set, the number of results may be lower than in an
845
+ ungrouped query, as grouping is not applicable to all structures.
846
+ For example a DNA structure has no associated *Uniprot* accession
847
+ and hence is omitted by :class:`UniprotGrouping`.
848
+
849
+ Also note that `sort_by` does not affect the order within a group.
850
+ This order is determined by the `sort_by` parameter of the
851
+ :class:`Grouping`.
852
+
853
+ Examples
854
+ --------
855
+
856
+ >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
857
+ >>> print(sorted(search(query)))
858
+ ['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
859
+ >>> print(search(query, sort_by="rcsb_accession_info.initial_release_date"))
860
+ ['7R0H', '7ATG', '5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG']
861
+ >>> print(search(
862
+ ... query, range=(1,4), sort_by="rcsb_accession_info.initial_release_date"
863
+ ... ))
864
+ ['7ATG', '5NW3', '5D8V']
865
+ >>> print(sorted(search(query, return_type="polymer_instance")))
866
+ ['1EJG.A', '1I0T.A', '1I0T.B', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A', '7ATG.A', '7ATG.B', '7R0H.A']
867
+ >>> print(search(
868
+ ... query, return_type="polymer_entity", return_groups=True,
869
+ ... group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
870
+ ... ))
871
+ {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
872
+ """
873
+ query_dict = _initialize_query_dict(query, return_type, group_by, content_types)
874
+
875
+ if group_by is not None:
876
+ if return_groups:
877
+ query_dict["request_options"]["group_by_return_type"] = "groups"
878
+ else:
879
+ query_dict["request_options"]["group_by_return_type"] = "representatives"
880
+
881
+ if sort_by is not None:
882
+ if isinstance(sort_by, Sorting):
883
+ sorting = sort_by
884
+ else:
885
+ sorting = Sorting(sort_by)
886
+ query_dict["request_options"]["sort"] = [sorting.get_content()]
887
+
888
+ if range is None:
889
+ query_dict["request_options"]["return_all_hits"] = True
890
+ elif range[1] <= range[0]:
891
+ raise ValueError("Range stop must be greater than range start")
892
+ else:
893
+ query_dict["request_options"]["paginate"] = {
894
+ "start": int(range[0]),
895
+ "rows": int(range[1]) - int(range[0]),
896
+ }
897
+
898
+ r = requests.get(_search_url, params={"json": json.dumps(query_dict)})
899
+
900
+ if r.status_code == 200:
901
+ if group_by is None or not return_groups:
902
+ return [result["identifier"] for result in r.json()["result_set"]]
903
+ else:
904
+ return {
905
+ group["identifier"]: [
906
+ result["identifier"] for result in group["result_set"]
907
+ ]
908
+ for group in r.json()["group_set"]
909
+ }
910
+ elif r.status_code == 204:
911
+ # Search did not return any results
912
+ return []
913
+ else:
914
+ try:
915
+ raise RequestError(f"Error {r.status_code}: {r.json()['message']}")
916
+ except json.decoder.JSONDecodeError:
917
+ # In case there an error response without message
918
+ raise RequestError(f"Error {r.status_code}")
919
+
920
+
921
+ def _initialize_query_dict(query, return_type, group_by, content_types):
922
+ """
923
+ Initialize the request parameter dictionary with attributes that
924
+ `count()` and `search()` have in common.
925
+ """
926
+ if return_type not in [
927
+ "entry",
928
+ "polymer_instance",
929
+ "assembly",
930
+ "polymer_entity",
931
+ "non_polymer_entity",
932
+ ]:
933
+ raise ValueError(f"'{return_type}' is an invalid return type")
934
+
935
+ request_options = {}
936
+
937
+ if len(content_types) == 0:
938
+ raise ValueError("At least one content type must be specified")
939
+ for content_type in content_types:
940
+ if content_type not in ("experimental", "computational"):
941
+ raise ValueError(f"Unknown content type '{content_type}'")
942
+ request_options["results_content_type"] = content_types
943
+
944
+ if group_by is not None:
945
+ if not group_by.is_compatible_return_type(return_type):
946
+ raise ValueError(
947
+ f"Return type '{return_type}' is not compatible with the given Grouping"
948
+ )
949
+ request_options["group_by"] = group_by.get_content()
950
+
951
+ query_dict = {
952
+ "query": query.get_content(),
953
+ "return_type": return_type,
954
+ "request_options": request_options,
955
+ }
956
+ return query_dict
957
+
958
+
959
+ def _to_isoformat(object):
960
+ """
961
+ Convert a datetime into the specifc ISO 8601 format required by the RCSB.
962
+ """
963
+ return object.strftime("%Y-%m-%dT%H:%M:%SZ")