biotite 0.41.1__cp311-cp311-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-311-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,827 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.pubchem"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Query", "NameQuery", "SmilesQuery", "InchiQuery", "InchiKeyQuery",
8
+ "FormulaQuery", "SuperstructureQuery", "SubstructureQuery",
9
+ "SimilarityQuery", "IdentityQuery",
10
+ "search"]
11
+
12
+ import copy
13
+ import abc
14
+ import collections
15
+ import requests
16
+ from .error import parse_error_details
17
+ from .throttle import ThrottleStatus
18
+ from ..error import RequestError
19
+ from ...structure.io.mol.mol import MOLFile
20
+
21
+
22
+ _base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
23
+
24
+
25
+ class Query(metaclass=abc.ABCMeta):
26
+ """
27
+ A search query for the *PubChem* REST API.
28
+ Unlike most other database interfaces in *Biotite*, multiple queries
29
+ cannot be combined using logical operators.
30
+
31
+ This is the abstract base class for all queries.
32
+ """
33
+
34
+ @abc.abstractmethod
35
+ def get_input_url_path(self):
36
+ """
37
+ Get the *input* part of the request URL.
38
+
39
+ Returns
40
+ -------
41
+ get_input_url_path : str
42
+ The *input* part of the request URL.
43
+ Must not contain slash characters at the beginning and end
44
+ of the string.
45
+ """
46
+ pass
47
+
48
+ def get_params(self):
49
+ """
50
+ Get the POST payload for this query.
51
+
52
+ Returns
53
+ -------
54
+ params : dict (str -> object)
55
+ The payload.
56
+ """
57
+ return {}
58
+
59
+ def get_files(self):
60
+ """
61
+ Get the POST file payload for this query.
62
+
63
+ Returns
64
+ -------
65
+ params : dict (str -> object)
66
+ The file payload.
67
+ """
68
+ return {}
69
+
70
+
71
+ class NameQuery(Query):
72
+ """
73
+ A query that searches for compounds with the given name.
74
+
75
+ The name of the compound must match the given name completely,
76
+ but synonyms of the compound name are also considered.
77
+
78
+ Parameters
79
+ ----------
80
+ name : str
81
+ The compound name to be searched.
82
+
83
+ Examples
84
+ --------
85
+
86
+ >>> print(search(NameQuery("Alanine")))
87
+ [5950, ..., ...]
88
+ """
89
+
90
+ def __init__(self, name):
91
+ self._name = name
92
+
93
+ def get_input_url_path(self):
94
+ return "compound/name"
95
+
96
+ def get_params(self):
97
+ return {"name": self._name}
98
+
99
+
100
+ class SmilesQuery(Query):
101
+ """
102
+ A query that searches for compounds with a given
103
+ *Simplified Molecular Input Line Entry Specification* (*SMILES*)
104
+ string.
105
+
106
+ Parameters
107
+ ----------
108
+ smiles : str
109
+ The *SMILES* string.
110
+
111
+ Examples
112
+ --------
113
+
114
+ >>> print(search(SmilesQuery("CCCC")))
115
+ [7843]
116
+ """
117
+
118
+ def __init__(self, smiles):
119
+ self._smiles = smiles
120
+
121
+ def get_input_url_path(self):
122
+ return "compound/smiles"
123
+
124
+ def get_params(self):
125
+ return {"smiles": self._smiles}
126
+
127
+
128
+ class InchiQuery(Query):
129
+ """
130
+ A query that searches for compounds with a given
131
+ *International Chemical Identifier* (*InChI*) string.
132
+
133
+ Parameters
134
+ ----------
135
+ inchi : str
136
+ The *InChI* string.
137
+
138
+ Examples
139
+ --------
140
+
141
+ >>> print(search(InchiQuery("InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3")))
142
+ [7843]
143
+ """
144
+
145
+ def __init__(self, inchi):
146
+ self._inchi = inchi
147
+
148
+ def get_input_url_path(self):
149
+ return "compound/inchi"
150
+
151
+ def get_params(self):
152
+ return {"inchi": self._inchi}
153
+
154
+
155
+ class InchiKeyQuery(Query):
156
+ """
157
+ A query that searches for compounds with a given
158
+ *International Chemical Identifier* (*InChI*) key.
159
+
160
+ Parameters
161
+ ----------
162
+ inchi_key : str
163
+ The *InChI* key.
164
+
165
+ Examples
166
+ --------
167
+
168
+ >>> print(search(InchiKeyQuery("IJDNQMDRQITEOD-UHFFFAOYSA-N")))
169
+ [7843]
170
+ """
171
+
172
+ def __init__(self, inchi_key):
173
+ self._inchi_key = inchi_key
174
+
175
+ def get_input_url_path(self):
176
+ return "compound/inchikey"
177
+
178
+ def get_params(self):
179
+ return {"inchikey": self._inchi_key}
180
+
181
+
182
+ class FormulaQuery(Query):
183
+ """
184
+ A query that searches for compounds with the given molecular
185
+ formula.
186
+
187
+ The formula can also be created from an :class:`AtomArray` using
188
+ the :meth:`from_atoms()` method.
189
+
190
+ Parameters
191
+ ----------
192
+ formula : str
193
+ The molecular formula, i.e. each capitalized element with its
194
+ count in the compound concatenated into a single string.
195
+ allow_other_elements : bool, optional
196
+ If set to true, compounds with additional elements, not present
197
+ in the molecular formula, will also match.
198
+ number : int, optional
199
+ The maximum number of matches that this query may return.
200
+ By default, the *PubChem* default value is used, which can be
201
+ considered unlimited.
202
+
203
+ Examples
204
+ --------
205
+
206
+ >>> print(search(FormulaQuery("C4H10", number=5)))
207
+ [..., ..., ..., ..., ...]
208
+ >>> atom_array = residue("ALA")
209
+ >>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
210
+ [..., ..., ..., ..., ...]
211
+ """
212
+
213
+ def __init__(self, formula, allow_other_elements=False, number=None):
214
+ self._formula = formula
215
+ self._allow_other_elements = allow_other_elements
216
+ self._number = number
217
+
218
+ @staticmethod
219
+ def from_atoms(atoms, allow_other_elements=False, number=None):
220
+ """
221
+ Create the query from an the given structure by using its
222
+ molecular formula.
223
+
224
+ Parameters
225
+ ----------
226
+ atoms : AtomArray or AtomArrayStack
227
+ The structure to take the molecular formula from.
228
+ allow_other_elements : bool, optional
229
+ If set to true, compounds with additional elements, not
230
+ present in the molecular formula, will also match.
231
+ number : int, optional
232
+ The maximum number of matches that this query may return.
233
+ By default, the *PubChem* default value is used, which can
234
+ be considered unlimited.
235
+ """
236
+ element_counter = collections.Counter(atoms.element)
237
+ formula = ""
238
+ # C and H come first in molecular formula
239
+ if "C" in element_counter:
240
+ formula += _format_element("C", element_counter["C"])
241
+ del element_counter["C"]
242
+ if "H" in element_counter:
243
+ formula += _format_element("H", element_counter["H"])
244
+ del element_counter["H"]
245
+ # All other elements follow in alphabetical order
246
+ sorted_elements = sorted(element_counter.keys())
247
+ for element in sorted_elements:
248
+ formula += _format_element(element, element_counter[element])
249
+ return FormulaQuery(formula, allow_other_elements, number)
250
+
251
+ def get_input_url_path(self):
252
+ # The 'fastformula' service seems not to accept the formula
253
+ # in the parameter section of the request
254
+ return f"compound/fastformula/{self._formula}"
255
+
256
+ def get_params(self):
257
+ params = {"AllowOtherElements": self._allow_other_elements}
258
+ # Only set maximum number, if provided by the user
259
+ # The PubChem default value for this might change over time
260
+ if self._number is not None:
261
+ params["MaxRecords"] = self._number
262
+ return params
263
+
264
+ def _format_element(element, count):
265
+ if count == 1:
266
+ return element.capitalize()
267
+ else:
268
+ return element.capitalize() + str(count)
269
+
270
+
271
+ class StructureQuery(Query, metaclass=abc.ABCMeta):
272
+ """
273
+ Abstract superclass for all structure based searches.
274
+ This class handles structure inputs and option formatting.
275
+
276
+ Exactly one of the input structure parameters `smiles`, `smarts`,
277
+ `inchi`, `sdf` or `cid` must be given.
278
+
279
+ Parameters
280
+ ----------
281
+ smiles : str, optional
282
+ The query *SMILES* string.
283
+ smarts : str, optional
284
+ The query *SMARTS* pattern.
285
+ inchi : str, optional
286
+ The query *InChI* string.
287
+ sdf : str, optional
288
+ A query structure as SDF formatted string.
289
+ Usually :meth:`from_atoms()` is used to create the SDF from an
290
+ :class:`AtomArray`.
291
+ cid : int, optional
292
+ The query structure given as CID.
293
+ number : int, optional
294
+ The maximum number of matches that this query may return.
295
+ By default, the *PubChem* default value is used, which can
296
+ be considered unlimited.
297
+ """
298
+
299
+ _query_keys = ("smiles", "smarts", "inchi", "sdf", "cid")
300
+
301
+ def __init__(self, **kwargs):
302
+ query_key_found = False
303
+ for query_key in StructureQuery._query_keys:
304
+ if query_key in kwargs:
305
+ if not query_key_found:
306
+ self._query_key = query_key
307
+ self._query_val = kwargs[query_key]
308
+ # Delete parameter from kwargs for later check for
309
+ # unused (invalid) parameters
310
+ del kwargs[query_key]
311
+ query_key_found = True
312
+ else:
313
+ # A query key was already found,
314
+ # duplicates are not allowed
315
+ raise TypeError(
316
+ "Only one of 'smiles', 'smarts', 'inchi', 'sdf' or "
317
+ "'cid' may be given"
318
+ )
319
+ if not query_key_found:
320
+ raise TypeError(
321
+ "Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' "
322
+ "or 'cid'")
323
+ if "number" in kwargs:
324
+ self._number = kwargs["number"]
325
+ del kwargs["number"]
326
+ else:
327
+ self._number = None
328
+ # If there are still remaining parameters that were not handled
329
+ # by this superclass or the inheriting class, they are invalid
330
+ for key in kwargs:
331
+ raise TypeError(f"'{key}' is an invalid keyword argument")
332
+
333
+ @classmethod
334
+ def from_atoms(cls, atoms, *args, **kwargs):
335
+ """
336
+ Create a query using the given query structure.
337
+
338
+ Parameters
339
+ ----------
340
+ atoms : AtomArray or AtomArrayStack
341
+ The query structure.
342
+ **kwargs : dict, optional
343
+ See the constructor for additional options.
344
+ """
345
+ mol_file = MOLFile()
346
+ mol_file.set_structure(atoms)
347
+ # Every MOL string with "$$$$" is a valid SDF string
348
+ # Important: USE MS-style new lines
349
+ return cls(
350
+ *args,
351
+ sdf = "\r\n".join(mol_file.lines) + "\r\n$$$$\r\n",
352
+ **kwargs
353
+ )
354
+
355
+ def get_input_url_path(self):
356
+ input_string = f"compound/{self.search_type()}/{self._query_key}"
357
+ if self._query_key == "cid":
358
+ # Put CID in URL and not in POST payload,
359
+ # as PubChem is confused otherwise
360
+ input_string += "/" + str(self._query_val)
361
+ return input_string
362
+
363
+ def get_params(self):
364
+ if self._query_key not in ("cid", "sdf"):
365
+ # CID is in URL
366
+ # SDF is given as file
367
+ params = {self._query_key: self._query_val}
368
+ else:
369
+ params = {}
370
+ # Only set maximum number, if provided by the user
371
+ # The PubChem default value for this might change over time
372
+ if self._number is not None:
373
+ params["MaxRecords"] = self._number
374
+ for key, val in self.search_options().items():
375
+ # Convert 'snake case' Python parameters
376
+ # to 'camel case' request parameters
377
+ key = "".join([word.capitalize() for word in key.split("_")])
378
+ params[key] = val
379
+ return params
380
+
381
+ def get_files(self):
382
+ # Multi-line SDF string requires payload as file
383
+ if self._query_key == "sdf":
384
+ return {"sdf": self._query_val}
385
+ else:
386
+ return {}
387
+
388
+ @abc.abstractmethod
389
+ def search_type(self):
390
+ """
391
+ Get the type of performed search for the request input part.
392
+
393
+ PROTECTED: Override when inheriting.
394
+
395
+ Returns
396
+ -------
397
+ search_type : str
398
+ The search type for the input part, i.e. the part directly
399
+ after ``compound/``.
400
+ """
401
+ pass
402
+
403
+ def search_options(self):
404
+ """
405
+ Get additional options for the POST options.
406
+
407
+ PROTECTED: Override when inheriting.
408
+
409
+ Returns
410
+ -------
411
+ options : dict (str -> object)
412
+ They keys are automatically converted from *snake case* to
413
+ *camel case* required by the request parameters.
414
+ """
415
+ return {}
416
+
417
+
418
+ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
419
+ """
420
+ Abstract superclass for super- and substructure searches.
421
+ This class handles specific options for these searches.
422
+
423
+ Exactly one of the input structure parameters `smiles`, `smarts`,
424
+ `inchi`, `sdf` or `cid` must be given.
425
+
426
+ Parameters
427
+ ----------
428
+ smiles : str, optional
429
+ The query *SMILES* string.
430
+ smarts : str, optional
431
+ The query *SMARTS* pattern.
432
+ inchi : str, optional
433
+ The query *InChI* string.
434
+ sdf : str, optional
435
+ A query structure as SDF formatted string.
436
+ Usually :meth:`from_atoms()` is used to create the SDF from an
437
+ :class:`AtomArray`.
438
+ cid : int, optional
439
+ The query structure given as CID.
440
+ number : int, optional
441
+ The maximum number of matches that this query may return.
442
+ By default, the *PubChem* default value is used, which can
443
+ be considered unlimited.
444
+ match_charges : bool, optional
445
+ If set to true, atoms must match the specified charge.
446
+ (Default: False)
447
+ match_tautomers : bool, optional
448
+ If set to true, allow match to tautomers of the given structure.
449
+ (Default: False)
450
+ rings_not_embedded : bool, optional
451
+ If set to true, rings may not be embedded in a larger system.
452
+ (Default: False)
453
+ single_double_bonds_match : bool, optional
454
+ If set to true, single or double bonds match aromatic bonds.
455
+ (Default: True)
456
+ chains_match_rings : bool, optional
457
+ If set to true, chain bonds in the query may match rings in
458
+ hits.
459
+ (Default: True)
460
+ strip_hydrogen : bool, optional
461
+ If set to true, remove any explicit hydrogens before searching.
462
+ (Default: False)
463
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
464
+ How to handle stereo.
465
+ (Default: 'ignore')
466
+
467
+ Notes
468
+ -----
469
+ Optional parameter descriptions are taken from the *PubChem* REST
470
+ API
471
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
472
+ """
473
+
474
+ _option_defaults = {
475
+ "match_charges" : False,
476
+ "match_tautomers" : False,
477
+ "rings_not_embedded" : False,
478
+ "single_double_bonds_match" : True,
479
+ "chains_match_rings" : True,
480
+ "strip_hydrogen" : False,
481
+ "stereo" : "ignore",
482
+ }
483
+
484
+ def __init__(self, **kwargs):
485
+ self._options = copy.copy(SuperOrSubstructureQuery._option_defaults)
486
+ for option, value in kwargs.items():
487
+ if option in SuperOrSubstructureQuery._option_defaults.keys():
488
+ self._options[option] = value
489
+ del kwargs[option]
490
+ super().__init__(**kwargs)
491
+
492
+ def search_options(self):
493
+ return self._options
494
+
495
+
496
+ class SuperstructureQuery(SuperOrSubstructureQuery):
497
+ """
498
+ A query that searches for all structures, where the given
499
+ input structure is a superstructure.
500
+ In other words, this query matches substructures of the input
501
+ structure.
502
+
503
+ Exactly one of the input structure parameters `smiles`, `smarts`,
504
+ `inchi`, `sdf` or `cid` must be given.
505
+
506
+ Parameters
507
+ ----------
508
+ smiles : str, optional
509
+ The query *SMILES* string.
510
+ smarts : str, optional
511
+ The query *SMARTS* pattern.
512
+ inchi : str, optional
513
+ The query *InChI* string.
514
+ sdf : str, optional
515
+ A query structure as SDF formatted string.
516
+ Usually :meth:`from_atoms()` is used to create the SDF from an
517
+ :class:`AtomArray`.
518
+ cid : int, optional
519
+ The query structure given as CID.
520
+ number : int, optional
521
+ The maximum number of matches that this query may return.
522
+ By default, the *PubChem* default value is used, which can
523
+ be considered unlimited.
524
+ match_charges : bool, optional
525
+ If set to true, atoms must match the specified charge.
526
+ (Default: False)
527
+ match_tautomers : bool, optional
528
+ If set to true, allow match to tautomers of the given structure.
529
+ (Default: False)
530
+ rings_not_embedded : bool, optional
531
+ If set to true, rings may not be embedded in a larger system.
532
+ (Default: False)
533
+ single_double_bonds_match : bool, optional
534
+ If set to true, single or double bonds match aromatic bonds.
535
+ (Default: True)
536
+ chains_match_rings : bool, optional
537
+ If set to true, chain bonds in the query may match rings in
538
+ hits.
539
+ (Default: True)
540
+ strip_hydrogen : bool, optional
541
+ If set to true, remove any explicit hydrogens before searching.
542
+ (Default: False)
543
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
544
+ How to handle stereo.
545
+ (Default: 'ignore')
546
+
547
+ Notes
548
+ -----
549
+ Optional parameter descriptions are taken from the *PubChem* REST
550
+ API
551
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
552
+
553
+ Examples
554
+ --------
555
+
556
+ >>> # CID of alanine
557
+ >>> print(search(SuperstructureQuery(cid=5950, number=5)))
558
+ [..., ..., ..., ..., ...]
559
+ >>> # AtomArray of alanine
560
+ >>> atom_array = residue("ALA")
561
+ >>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
562
+ [..., ..., ..., ..., ...]
563
+ """
564
+
565
+ def search_type(self):
566
+ return "fastsuperstructure"
567
+
568
+
569
+ class SubstructureQuery(SuperOrSubstructureQuery):
570
+ """
571
+ A query that searches for all structures, where the given
572
+ input structure is a substructure.
573
+ In other words, this query matches superstructures of the input
574
+ structure.
575
+
576
+ Exactly one of the input structure parameters `smiles`, `smarts`,
577
+ `inchi`, `sdf` or `cid` must be given.
578
+
579
+ Parameters
580
+ ----------
581
+ smiles : str, optional
582
+ The query *SMILES* string.
583
+ smarts : str, optional
584
+ The query *SMARTS* pattern.
585
+ inchi : str, optional
586
+ The query *InChI* string.
587
+ sdf : str, optional
588
+ A query structure as SDF formatted string.
589
+ Usually :meth:`from_atoms()` is used to create the SDF from an
590
+ :class:`AtomArray`.
591
+ cid : int, optional
592
+ The query structure given as CID.
593
+ number : int, optional
594
+ The maximum number of matches that this query may return.
595
+ By default, the *PubChem* default value is used, which can
596
+ be considered unlimited.
597
+ match_charges : bool, optional
598
+ If set to true, atoms must match the specified charge.
599
+ (Default: False)
600
+ match_tautomers : bool, optional
601
+ If set to true, allow match to tautomers of the given structure.
602
+ (Default: False)
603
+ rings_not_embedded : bool, optional
604
+ If set to true, rings may not be embedded in a larger system.
605
+ (Default: False)
606
+ single_double_bonds_match : bool, optional
607
+ If set to true, single or double bonds match aromatic bonds.
608
+ (Default: True)
609
+ chains_match_rings : bool, optional
610
+ If set to true, chain bonds in the query may match rings in
611
+ hits.
612
+ (Default: True)
613
+ strip_hydrogen : bool, optional
614
+ If set to true, remove any explicit hydrogens before searching.
615
+ (Default: False)
616
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
617
+ How to handle stereo.
618
+ (Default: 'ignore')
619
+
620
+ Notes
621
+ -----
622
+ Optional parameter descriptions are taken from the *PubChem* REST
623
+ API
624
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
625
+
626
+ Examples
627
+ --------
628
+
629
+ >>> # CID of alanine
630
+ >>> print(search(SubstructureQuery(cid=5950, number=5)))
631
+ [5950, ..., ..., ..., ...]
632
+ >>> # AtomArray of alanine
633
+ >>> atom_array = residue("ALA")
634
+ >>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
635
+ [5950, ..., ..., ..., ...]
636
+ """
637
+
638
+ def search_type(self):
639
+ return "fastsubstructure"
640
+
641
+
642
+ class SimilarityQuery(StructureQuery):
643
+ """
644
+ A query that searches for all structures similar to the given
645
+ input structure.
646
+
647
+ Exactly one of the input structure parameters `smiles`, `smarts`,
648
+ `inchi`, `sdf` or `cid` must be given.
649
+
650
+ Parameters
651
+ ----------
652
+ threshold : float, optional
653
+ The minimum required *Tanimoto* similarity for a match.
654
+ Must be between 0 (no similarity) and 1 (complete match).
655
+ conformation_based : bool, optional
656
+ If set to true, the similarity is computed based on the
657
+ 3D conformation.
658
+ By default, only the elements and bonds between the atoms are
659
+ considered for similarity computation.
660
+ smiles : str, optional
661
+ The query *SMILES* string.
662
+ smarts : str, optional
663
+ The query *SMARTS* pattern.
664
+ inchi : str, optional
665
+ The query *InChI* string.
666
+ sdf : str, optional
667
+ A query structure as SDF formatted string.
668
+ Usually :meth:`from_atoms()` is used to create the SDF from an
669
+ :class:`AtomArray`.
670
+ cid : int, optional
671
+ The query structure given as CID.
672
+ number : int, optional
673
+ The maximum number of matches that this query may return.
674
+ By default, the *PubChem* default value is used, which can
675
+ be considered unlimited.
676
+
677
+ Notes
678
+ -----
679
+ The conformation based similarity measure uses *shape-Tanimoto* and
680
+ *color-Tanimoto* scores :footcite:`Kim2018`.
681
+
682
+ References
683
+ ----------
684
+
685
+ .. footbibliography::
686
+
687
+ Examples
688
+ --------
689
+
690
+ >>> # CID of alanine
691
+ >>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
692
+ [5950, ..., ..., ..., ...]
693
+ >>> # AtomArray of alanine
694
+ >>> atom_array = residue("ALA")
695
+ >>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
696
+ [5950, ..., ..., ..., ...]
697
+ """
698
+
699
+ def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
700
+ self._threshold = threshold
701
+ self._conformation_based = conformation_based
702
+ super().__init__(**kwargs)
703
+
704
+ def search_type(self):
705
+ dim = "3d" if self._conformation_based else "2d"
706
+ return f"fastsimilarity_{dim}"
707
+
708
+ def search_options(self):
709
+ return {"threshold" : int(round(self._threshold * 100))}
710
+
711
+
712
+ class IdentityQuery(StructureQuery):
713
+ """
714
+ A query that searches for all structures that are identical to the
715
+ given input structure.
716
+
717
+ Exactly one of the input structure parameters `smiles`, `smarts`, `inchi`,
718
+ `sdf` or `cid` must be given.
719
+
720
+ Parameters
721
+ ----------
722
+ identity_type : {'same_connectivity', 'same_tautomer', 'same_stereo', 'same_isotope', 'same_stereo_isotope', 'nonconflicting_stereo', 'same_isotope_nonconflicting_stereo'}, optional
723
+ The type of identity search.
724
+ smiles : str, optional
725
+ The query *SMILES* string.
726
+ smarts : str, optional
727
+ The query *SMARTS* pattern.
728
+ inchi : str, optional
729
+ The query *InChI* string.
730
+ sdf : str, optional
731
+ A query structure as SDF formatted string.
732
+ Usually :meth:`from_atoms()` is used to create the SDF from an
733
+ :class:`AtomArray`.
734
+ cid : int, optional
735
+ The query structure given as CID.
736
+ number : int, optional
737
+ The maximum number of matches that this query may return.
738
+ By default, the *PubChem* default value is used, which can
739
+ be considered unlimited.
740
+
741
+ Examples
742
+ --------
743
+
744
+ >>> # CID of alanine
745
+ >>> print(search(IdentityQuery(cid=5950)))
746
+ [5950]
747
+ >>> # AtomArray of alanine
748
+ >>> atom_array = residue("ALA")
749
+ >>> print(search(IdentityQuery.from_atoms(atom_array)))
750
+ [5950]
751
+ """
752
+
753
+ def __init__(self, identity_type="same_stereo_isotope", **kwargs):
754
+ self._identity_type = identity_type
755
+ super().__init__(**kwargs)
756
+
757
+ def search_type(self):
758
+ return "fastidentity"
759
+
760
+ def get_params(self):
761
+ # Use 'get_params()' instead of 'search_options()', since the
762
+ # parameter 'identity_type' in the REST API is *snake case*
763
+ # -> Conversion to *camel case* is undesirable
764
+ params = super().get_params()
765
+ params["identity_type"] = self._identity_type
766
+ return params
767
+
768
+
769
+
770
+
771
+ def search(query, throttle_threshold=0.5, return_throttle_status=False):
772
+ """
773
+ Get all CIDs that meet the given query requirements,
774
+ via the PubChem REST API.
775
+
776
+ This function requires an internet connection.
777
+
778
+ Parameters
779
+ ----------
780
+ query : Query
781
+ The search query.
782
+ throttle_threshold : float or None, optional
783
+ A value between 0 and 1.
784
+ If the load of either the request time or count exceeds this
785
+ value the execution is halted.
786
+ See :class:`ThrottleStatus` for more information.
787
+ If ``None`` is given, the execution is never halted.
788
+ return_throttle_status : float, optional
789
+ If set to true, the :class:`ThrottleStatus` is also returned.
790
+
791
+ Returns
792
+ -------
793
+ ids : list of int
794
+ List of all compound IDs (CIDs) that meet the query requirement.
795
+ throttle_status : ThrottleStatus
796
+ The :class:`ThrottleStatus` obtained from the server response.
797
+ This can be used for custom request throttling, for example.
798
+ Only returned, if `return_throttle_status` is set to true.
799
+
800
+ Examples
801
+ --------
802
+
803
+ >>> print(search(NameQuery("Alanine")))
804
+ [5950, ..., ...]
805
+ """
806
+ # Use POST to be compatible with the larger payloads
807
+ # of structure searches
808
+ if query.get_files():
809
+ files = {key: file for key, file in query.get_files().items()}
810
+ else:
811
+ files = None
812
+ r = requests.post(
813
+ _base_url + query.get_input_url_path() + "/cids/TXT",
814
+ data=query.get_params(),
815
+ files=files
816
+ )
817
+ if not r.ok:
818
+ raise RequestError(parse_error_details(r.text))
819
+ throttle_status = ThrottleStatus.from_response(r)
820
+ if throttle_threshold is not None:
821
+ throttle_status.wait_if_busy(throttle_threshold)
822
+
823
+ cids = [int(cid) for cid in r.text.splitlines()]
824
+ if return_throttle_status:
825
+ return cids, throttle_status
826
+ else:
827
+ return cids