biotite 1.1.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,830 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.pubchem"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = [
8
+ "Query",
9
+ "NameQuery",
10
+ "SmilesQuery",
11
+ "InchiQuery",
12
+ "InchiKeyQuery",
13
+ "FormulaQuery",
14
+ "SuperstructureQuery",
15
+ "SubstructureQuery",
16
+ "SimilarityQuery",
17
+ "IdentityQuery",
18
+ "search",
19
+ ]
20
+
21
+ import abc
22
+ import collections
23
+ import copy
24
+ import requests
25
+ from biotite.database.error import RequestError
26
+ from biotite.database.pubchem.error import parse_error_details
27
+ from biotite.database.pubchem.throttle import ThrottleStatus
28
+ from biotite.structure.io.mol.mol import MOLFile
29
+
30
+ _base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
31
+
32
+
33
+ class Query(metaclass=abc.ABCMeta):
34
+ """
35
+ A search query for the *PubChem* REST API.
36
+ Unlike most other database interfaces in *Biotite*, multiple queries
37
+ cannot be combined using logical operators.
38
+
39
+ This is the abstract base class for all queries.
40
+ """
41
+
42
+ @abc.abstractmethod
43
+ def get_input_url_path(self):
44
+ """
45
+ Get the *input* part of the request URL.
46
+
47
+ Returns
48
+ -------
49
+ get_input_url_path : str
50
+ The *input* part of the request URL.
51
+ Must not contain slash characters at the beginning and end
52
+ of the string.
53
+ """
54
+ pass
55
+
56
+ def get_params(self):
57
+ """
58
+ Get the POST payload for this query.
59
+
60
+ Returns
61
+ -------
62
+ params : dict (str -> object)
63
+ The payload.
64
+ """
65
+ return {}
66
+
67
+ def get_files(self):
68
+ """
69
+ Get the POST file payload for this query.
70
+
71
+ Returns
72
+ -------
73
+ params : dict (str -> object)
74
+ The file payload.
75
+ """
76
+ return {}
77
+
78
+
79
+ class NameQuery(Query):
80
+ """
81
+ A query that searches for compounds with the given name.
82
+
83
+ The name of the compound must match the given name completely,
84
+ but synonyms of the compound name are also considered.
85
+
86
+ Parameters
87
+ ----------
88
+ name : str
89
+ The compound name to be searched.
90
+
91
+ Examples
92
+ --------
93
+
94
+ >>> print(search(NameQuery("Alanine")))
95
+ [5950, ..., ...]
96
+ """
97
+
98
+ def __init__(self, name):
99
+ self._name = name
100
+
101
+ def get_input_url_path(self):
102
+ return "compound/name"
103
+
104
+ def get_params(self):
105
+ return {"name": self._name}
106
+
107
+
108
+ class SmilesQuery(Query):
109
+ """
110
+ A query that searches for compounds with a given
111
+ *Simplified Molecular Input Line Entry Specification* (*SMILES*)
112
+ string.
113
+
114
+ Parameters
115
+ ----------
116
+ smiles : str
117
+ The *SMILES* string.
118
+
119
+ Examples
120
+ --------
121
+
122
+ >>> print(search(SmilesQuery("CCCC")))
123
+ [7843]
124
+ """
125
+
126
+ def __init__(self, smiles):
127
+ self._smiles = smiles
128
+
129
+ def get_input_url_path(self):
130
+ return "compound/smiles"
131
+
132
+ def get_params(self):
133
+ return {"smiles": self._smiles}
134
+
135
+
136
+ class InchiQuery(Query):
137
+ """
138
+ A query that searches for compounds with a given
139
+ *International Chemical Identifier* (*InChI*) string.
140
+
141
+ Parameters
142
+ ----------
143
+ inchi : str
144
+ The *InChI* string.
145
+
146
+ Examples
147
+ --------
148
+
149
+ >>> print(search(InchiQuery("InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3")))
150
+ [7843]
151
+ """
152
+
153
+ def __init__(self, inchi):
154
+ self._inchi = inchi
155
+
156
+ def get_input_url_path(self):
157
+ return "compound/inchi"
158
+
159
+ def get_params(self):
160
+ return {"inchi": self._inchi}
161
+
162
+
163
+ class InchiKeyQuery(Query):
164
+ """
165
+ A query that searches for compounds with a given
166
+ *International Chemical Identifier* (*InChI*) key.
167
+
168
+ Parameters
169
+ ----------
170
+ inchi_key : str
171
+ The *InChI* key.
172
+
173
+ Examples
174
+ --------
175
+
176
+ >>> print(search(InchiKeyQuery("IJDNQMDRQITEOD-UHFFFAOYSA-N")))
177
+ [7843]
178
+ """
179
+
180
+ def __init__(self, inchi_key):
181
+ self._inchi_key = inchi_key
182
+
183
+ def get_input_url_path(self):
184
+ return "compound/inchikey"
185
+
186
+ def get_params(self):
187
+ return {"inchikey": self._inchi_key}
188
+
189
+
190
+ class FormulaQuery(Query):
191
+ """
192
+ A query that searches for compounds with the given molecular
193
+ formula.
194
+
195
+ The formula can also be created from an :class:`AtomArray` using
196
+ the :meth:`from_atoms()` method.
197
+
198
+ Parameters
199
+ ----------
200
+ formula : str
201
+ The molecular formula, i.e. each capitalized element with its
202
+ count in the compound concatenated into a single string.
203
+ allow_other_elements : bool, optional
204
+ If set to true, compounds with additional elements, not present
205
+ in the molecular formula, will also match.
206
+ number : int, optional
207
+ The maximum number of matches that this query may return.
208
+ By default, the *PubChem* default value is used, which can be
209
+ considered unlimited.
210
+
211
+ Examples
212
+ --------
213
+
214
+ >>> print(search(FormulaQuery("C4H10", number=5)))
215
+ [..., ..., ..., ..., ...]
216
+ >>> atom_array = residue("ALA")
217
+ >>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
218
+ [..., ..., ..., ..., ...]
219
+ """
220
+
221
+ def __init__(self, formula, allow_other_elements=False, number=None):
222
+ self._formula = formula
223
+ self._allow_other_elements = allow_other_elements
224
+ self._number = number
225
+
226
+ @staticmethod
227
+ def from_atoms(atoms, allow_other_elements=False, number=None):
228
+ """
229
+ Create the query from an the given structure by using its
230
+ molecular formula.
231
+
232
+ Parameters
233
+ ----------
234
+ atoms : AtomArray or AtomArrayStack
235
+ The structure to take the molecular formula from.
236
+ allow_other_elements : bool, optional
237
+ If set to true, compounds with additional elements, not
238
+ present in the molecular formula, will also match.
239
+ number : int, optional
240
+ The maximum number of matches that this query may return.
241
+ By default, the *PubChem* default value is used, which can
242
+ be considered unlimited.
243
+ """
244
+ element_counter = collections.Counter(atoms.element)
245
+ formula = ""
246
+ # C and H come first in molecular formula
247
+ if "C" in element_counter:
248
+ formula += _format_element("C", element_counter["C"])
249
+ del element_counter["C"]
250
+ if "H" in element_counter:
251
+ formula += _format_element("H", element_counter["H"])
252
+ del element_counter["H"]
253
+ # All other elements follow in alphabetical order
254
+ sorted_elements = sorted(element_counter.keys())
255
+ for element in sorted_elements:
256
+ formula += _format_element(element, element_counter[element])
257
+ return FormulaQuery(formula, allow_other_elements, number)
258
+
259
+ def get_input_url_path(self):
260
+ # The 'fastformula' service seems not to accept the formula
261
+ # in the parameter section of the request
262
+ return f"compound/fastformula/{self._formula}"
263
+
264
+ def get_params(self):
265
+ params = {"AllowOtherElements": self._allow_other_elements}
266
+ # Only set maximum number, if provided by the user
267
+ # The PubChem default value for this might change over time
268
+ if self._number is not None:
269
+ params["MaxRecords"] = self._number
270
+ return params
271
+
272
+
273
+ def _format_element(element, count):
274
+ if count == 1:
275
+ return element.capitalize()
276
+ else:
277
+ return element.capitalize() + str(count)
278
+
279
+
280
+ class StructureQuery(Query, metaclass=abc.ABCMeta):
281
+ """
282
+ Abstract superclass for all structure based searches.
283
+ This class handles structure inputs and option formatting.
284
+
285
+ Exactly one of the input structure parameters `smiles`, `smarts`,
286
+ `inchi`, `sdf` or `cid` must be given.
287
+
288
+ Parameters
289
+ ----------
290
+ smiles : str, optional
291
+ The query *SMILES* string.
292
+ smarts : str, optional
293
+ The query *SMARTS* pattern.
294
+ inchi : str, optional
295
+ The query *InChI* string.
296
+ sdf : str, optional
297
+ A query structure as SDF formatted string.
298
+ Usually :meth:`from_atoms()` is used to create the SDF from an
299
+ :class:`AtomArray`.
300
+ cid : int, optional
301
+ The query structure given as CID.
302
+ number : int, optional
303
+ The maximum number of matches that this query may return.
304
+ By default, the *PubChem* default value is used, which can
305
+ be considered unlimited.
306
+ """
307
+
308
+ _query_keys = ("smiles", "smarts", "inchi", "sdf", "cid")
309
+
310
+ def __init__(self, **kwargs):
311
+ query_key_found = False
312
+ for query_key in StructureQuery._query_keys:
313
+ if query_key in kwargs:
314
+ if not query_key_found:
315
+ self._query_key = query_key
316
+ self._query_val = kwargs[query_key]
317
+ # Delete parameter from kwargs for later check for
318
+ # unused (invalid) parameters
319
+ del kwargs[query_key]
320
+ query_key_found = True
321
+ else:
322
+ # A query key was already found,
323
+ # duplicates are not allowed
324
+ raise TypeError(
325
+ "Only one of 'smiles', 'smarts', 'inchi', 'sdf' or "
326
+ "'cid' may be given"
327
+ )
328
+ if not query_key_found:
329
+ raise TypeError(
330
+ "Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' " "or 'cid'"
331
+ )
332
+ if "number" in kwargs:
333
+ self._number = kwargs["number"]
334
+ del kwargs["number"]
335
+ else:
336
+ self._number = None
337
+ # If there are still remaining parameters that were not handled
338
+ # by this superclass or the inheriting class, they are invalid
339
+ for key in kwargs:
340
+ raise TypeError(f"'{key}' is an invalid keyword argument")
341
+
342
+ @classmethod
343
+ def from_atoms(cls, atoms, *args, **kwargs):
344
+ """
345
+ Create a query using the given query structure.
346
+
347
+ Parameters
348
+ ----------
349
+ atoms : AtomArray or AtomArrayStack
350
+ The query structure.
351
+ **kwargs : dict, optional
352
+ See the constructor for additional options.
353
+ """
354
+ mol_file = MOLFile()
355
+ mol_file.set_structure(atoms)
356
+ # Every MOL string with "$$$$" is a valid SDF string
357
+ # Important: USE MS-style new lines
358
+ return cls(*args, sdf="\r\n".join(mol_file.lines) + "\r\n$$$$\r\n", **kwargs)
359
+
360
+ def get_input_url_path(self):
361
+ input_string = f"compound/{self.search_type()}/{self._query_key}"
362
+ if self._query_key == "cid":
363
+ # Put CID in URL and not in POST payload,
364
+ # as PubChem is confused otherwise
365
+ input_string += "/" + str(self._query_val)
366
+ return input_string
367
+
368
+ def get_params(self):
369
+ if self._query_key not in ("cid", "sdf"):
370
+ # CID is in URL
371
+ # SDF is given as file
372
+ params = {self._query_key: self._query_val}
373
+ else:
374
+ params = {}
375
+ # Only set maximum number, if provided by the user
376
+ # The PubChem default value for this might change over time
377
+ if self._number is not None:
378
+ params["MaxRecords"] = self._number
379
+ for key, val in self.search_options().items():
380
+ # Convert 'snake case' Python parameters
381
+ # to 'camel case' request parameters
382
+ key = "".join([word.capitalize() for word in key.split("_")])
383
+ params[key] = val
384
+ return params
385
+
386
+ def get_files(self):
387
+ # Multi-line SDF string requires payload as file
388
+ if self._query_key == "sdf":
389
+ return {"sdf": self._query_val}
390
+ else:
391
+ return {}
392
+
393
+ @abc.abstractmethod
394
+ def search_type(self):
395
+ """
396
+ Get the type of performed search for the request input part.
397
+
398
+ PROTECTED: Override when inheriting.
399
+
400
+ Returns
401
+ -------
402
+ search_type : str
403
+ The search type for the input part, i.e. the part directly
404
+ after ``compound/``.
405
+ """
406
+ pass
407
+
408
+ def search_options(self):
409
+ """
410
+ Get additional options for the POST options.
411
+
412
+ PROTECTED: Override when inheriting.
413
+
414
+ Returns
415
+ -------
416
+ options : dict (str -> object)
417
+ They keys are automatically converted from *snake case* to
418
+ *camel case* required by the request parameters.
419
+ """
420
+ return {}
421
+
422
+
423
+ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
424
+ """
425
+ Abstract superclass for super- and substructure searches.
426
+ This class handles specific options for these searches.
427
+
428
+ Exactly one of the input structure parameters `smiles`, `smarts`,
429
+ `inchi`, `sdf` or `cid` must be given.
430
+
431
+ Parameters
432
+ ----------
433
+ smiles : str, optional
434
+ The query *SMILES* string.
435
+ smarts : str, optional
436
+ The query *SMARTS* pattern.
437
+ inchi : str, optional
438
+ The query *InChI* string.
439
+ sdf : str, optional
440
+ A query structure as SDF formatted string.
441
+ Usually :meth:`from_atoms()` is used to create the SDF from an
442
+ :class:`AtomArray`.
443
+ cid : int, optional
444
+ The query structure given as CID.
445
+ number : int, optional
446
+ The maximum number of matches that this query may return.
447
+ By default, the *PubChem* default value is used, which can
448
+ be considered unlimited.
449
+ match_charges : bool, optional
450
+ If set to true, atoms must match the specified charge.
451
+ (Default: False)
452
+ match_tautomers : bool, optional
453
+ If set to true, allow match to tautomers of the given structure.
454
+ (Default: False)
455
+ rings_not_embedded : bool, optional
456
+ If set to true, rings may not be embedded in a larger system.
457
+ (Default: False)
458
+ single_double_bonds_match : bool, optional
459
+ If set to true, single or double bonds match aromatic bonds.
460
+ (Default: True)
461
+ chains_match_rings : bool, optional
462
+ If set to true, chain bonds in the query may match rings in
463
+ hits.
464
+ (Default: True)
465
+ strip_hydrogen : bool, optional
466
+ If set to true, remove any explicit hydrogens before searching.
467
+ (Default: False)
468
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
469
+ How to handle stereo.
470
+ (Default: 'ignore')
471
+
472
+ Notes
473
+ -----
474
+ Optional parameter descriptions are taken from the *PubChem* REST
475
+ API
476
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
477
+ """
478
+
479
+ _option_defaults = {
480
+ "match_charges": False,
481
+ "match_tautomers": False,
482
+ "rings_not_embedded": False,
483
+ "single_double_bonds_match": True,
484
+ "chains_match_rings": True,
485
+ "strip_hydrogen": False,
486
+ "stereo": "ignore",
487
+ }
488
+
489
+ def __init__(self, **kwargs):
490
+ self._options = copy.copy(SuperOrSubstructureQuery._option_defaults)
491
+ for option, value in kwargs.items():
492
+ if option in SuperOrSubstructureQuery._option_defaults.keys():
493
+ self._options[option] = value
494
+ del kwargs[option]
495
+ super().__init__(**kwargs)
496
+
497
+ def search_options(self):
498
+ return self._options
499
+
500
+
501
+ class SuperstructureQuery(SuperOrSubstructureQuery):
502
+ """
503
+ A query that searches for all structures, where the given
504
+ input structure is a superstructure.
505
+ In other words, this query matches substructures of the input
506
+ structure.
507
+
508
+ Exactly one of the input structure parameters `smiles`, `smarts`,
509
+ `inchi`, `sdf` or `cid` must be given.
510
+
511
+ Parameters
512
+ ----------
513
+ smiles : str, optional
514
+ The query *SMILES* string.
515
+ smarts : str, optional
516
+ The query *SMARTS* pattern.
517
+ inchi : str, optional
518
+ The query *InChI* string.
519
+ sdf : str, optional
520
+ A query structure as SDF formatted string.
521
+ Usually :meth:`from_atoms()` is used to create the SDF from an
522
+ :class:`AtomArray`.
523
+ cid : int, optional
524
+ The query structure given as CID.
525
+ number : int, optional
526
+ The maximum number of matches that this query may return.
527
+ By default, the *PubChem* default value is used, which can
528
+ be considered unlimited.
529
+ match_charges : bool, optional
530
+ If set to true, atoms must match the specified charge.
531
+ (Default: False)
532
+ match_tautomers : bool, optional
533
+ If set to true, allow match to tautomers of the given structure.
534
+ (Default: False)
535
+ rings_not_embedded : bool, optional
536
+ If set to true, rings may not be embedded in a larger system.
537
+ (Default: False)
538
+ single_double_bonds_match : bool, optional
539
+ If set to true, single or double bonds match aromatic bonds.
540
+ (Default: True)
541
+ chains_match_rings : bool, optional
542
+ If set to true, chain bonds in the query may match rings in
543
+ hits.
544
+ (Default: True)
545
+ strip_hydrogen : bool, optional
546
+ If set to true, remove any explicit hydrogens before searching.
547
+ (Default: False)
548
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
549
+ How to handle stereo.
550
+ (Default: 'ignore')
551
+
552
+ Notes
553
+ -----
554
+ Optional parameter descriptions are taken from the *PubChem* REST
555
+ API
556
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
557
+
558
+ Examples
559
+ --------
560
+
561
+ >>> # CID of alanine
562
+ >>> print(search(SuperstructureQuery(cid=5950, number=5)))
563
+ [..., ..., ..., ..., ...]
564
+ >>> # AtomArray of alanine
565
+ >>> atom_array = residue("ALA")
566
+ >>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
567
+ [..., ..., ..., ..., ...]
568
+ """
569
+
570
+ def search_type(self):
571
+ return "fastsuperstructure"
572
+
573
+
574
+ class SubstructureQuery(SuperOrSubstructureQuery):
575
+ """
576
+ A query that searches for all structures, where the given
577
+ input structure is a substructure.
578
+ In other words, this query matches superstructures of the input
579
+ structure.
580
+
581
+ Exactly one of the input structure parameters `smiles`, `smarts`,
582
+ `inchi`, `sdf` or `cid` must be given.
583
+
584
+ Parameters
585
+ ----------
586
+ smiles : str, optional
587
+ The query *SMILES* string.
588
+ smarts : str, optional
589
+ The query *SMARTS* pattern.
590
+ inchi : str, optional
591
+ The query *InChI* string.
592
+ sdf : str, optional
593
+ A query structure as SDF formatted string.
594
+ Usually :meth:`from_atoms()` is used to create the SDF from an
595
+ :class:`AtomArray`.
596
+ cid : int, optional
597
+ The query structure given as CID.
598
+ number : int, optional
599
+ The maximum number of matches that this query may return.
600
+ By default, the *PubChem* default value is used, which can
601
+ be considered unlimited.
602
+ match_charges : bool, optional
603
+ If set to true, atoms must match the specified charge.
604
+ (Default: False)
605
+ match_tautomers : bool, optional
606
+ If set to true, allow match to tautomers of the given structure.
607
+ (Default: False)
608
+ rings_not_embedded : bool, optional
609
+ If set to true, rings may not be embedded in a larger system.
610
+ (Default: False)
611
+ single_double_bonds_match : bool, optional
612
+ If set to true, single or double bonds match aromatic bonds.
613
+ (Default: True)
614
+ chains_match_rings : bool, optional
615
+ If set to true, chain bonds in the query may match rings in
616
+ hits.
617
+ (Default: True)
618
+ strip_hydrogen : bool, optional
619
+ If set to true, remove any explicit hydrogens before searching.
620
+ (Default: False)
621
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
622
+ How to handle stereo.
623
+ (Default: 'ignore')
624
+
625
+ Notes
626
+ -----
627
+ Optional parameter descriptions are taken from the *PubChem* REST
628
+ API
629
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
630
+
631
+ Examples
632
+ --------
633
+
634
+ >>> # CID of alanine
635
+ >>> print(search(SubstructureQuery(cid=5950, number=5)))
636
+ [5950, ..., ..., ..., ...]
637
+ >>> # AtomArray of alanine
638
+ >>> atom_array = residue("ALA")
639
+ >>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
640
+ [5950, ..., ..., ..., ...]
641
+ """
642
+
643
+ def search_type(self):
644
+ return "fastsubstructure"
645
+
646
+
647
+ class SimilarityQuery(StructureQuery):
648
+ """
649
+ A query that searches for all structures similar to the given
650
+ input structure.
651
+
652
+ Exactly one of the input structure parameters `smiles`, `smarts`,
653
+ `inchi`, `sdf` or `cid` must be given.
654
+
655
+ Parameters
656
+ ----------
657
+ threshold : float, optional
658
+ The minimum required *Tanimoto* similarity for a match.
659
+ Must be between 0 (no similarity) and 1 (complete match).
660
+ conformation_based : bool, optional
661
+ If set to true, the similarity is computed based on the
662
+ 3D conformation.
663
+ By default, only the elements and bonds between the atoms are
664
+ considered for similarity computation.
665
+ smiles : str, optional
666
+ The query *SMILES* string.
667
+ smarts : str, optional
668
+ The query *SMARTS* pattern.
669
+ inchi : str, optional
670
+ The query *InChI* string.
671
+ sdf : str, optional
672
+ A query structure as SDF formatted string.
673
+ Usually :meth:`from_atoms()` is used to create the SDF from an
674
+ :class:`AtomArray`.
675
+ cid : int, optional
676
+ The query structure given as CID.
677
+ number : int, optional
678
+ The maximum number of matches that this query may return.
679
+ By default, the *PubChem* default value is used, which can
680
+ be considered unlimited.
681
+
682
+ Notes
683
+ -----
684
+ The conformation based similarity measure uses *shape-Tanimoto* and
685
+ *color-Tanimoto* scores :footcite:`Kim2018`.
686
+
687
+ References
688
+ ----------
689
+
690
+ .. footbibliography::
691
+
692
+ Examples
693
+ --------
694
+
695
+ >>> # CID of alanine
696
+ >>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
697
+ [5950, ..., ..., ..., ...]
698
+ >>> # AtomArray of alanine
699
+ >>> atom_array = residue("ALA")
700
+ >>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
701
+ [5950, ..., ..., ..., ...]
702
+ """
703
+
704
+ def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
705
+ self._threshold = threshold
706
+ self._conformation_based = conformation_based
707
+ super().__init__(**kwargs)
708
+
709
+ def search_type(self):
710
+ dim = "3d" if self._conformation_based else "2d"
711
+ return f"fastsimilarity_{dim}"
712
+
713
+ def search_options(self):
714
+ return {"threshold": int(round(self._threshold * 100))}
715
+
716
+
717
+ class IdentityQuery(StructureQuery):
718
+ """
719
+ A query that searches for all structures that are identical to the
720
+ given input structure.
721
+
722
+ Exactly one of the input structure parameters `smiles`, `smarts`, `inchi`,
723
+ `sdf` or `cid` must be given.
724
+
725
+ Parameters
726
+ ----------
727
+ identity_type : {'same_connectivity', 'same_tautomer', 'same_stereo', 'same_isotope', 'same_stereo_isotope', 'nonconflicting_stereo', 'same_isotope_nonconflicting_stereo'}, optional
728
+ The type of identity search.
729
+ smiles : str, optional
730
+ The query *SMILES* string.
731
+ smarts : str, optional
732
+ The query *SMARTS* pattern.
733
+ inchi : str, optional
734
+ The query *InChI* string.
735
+ sdf : str, optional
736
+ A query structure as SDF formatted string.
737
+ Usually :meth:`from_atoms()` is used to create the SDF from an
738
+ :class:`AtomArray`.
739
+ cid : int, optional
740
+ The query structure given as CID.
741
+ number : int, optional
742
+ The maximum number of matches that this query may return.
743
+ By default, the *PubChem* default value is used, which can
744
+ be considered unlimited.
745
+
746
+ Examples
747
+ --------
748
+
749
+ >>> # CID of alanine
750
+ >>> print(search(IdentityQuery(cid=5950)))
751
+ [5950]
752
+ >>> # AtomArray of alanine
753
+ >>> atom_array = residue("ALA")
754
+ >>> print(search(IdentityQuery.from_atoms(atom_array)))
755
+ [5950]
756
+ """
757
+
758
+ def __init__(self, identity_type="same_stereo_isotope", **kwargs):
759
+ self._identity_type = identity_type
760
+ super().__init__(**kwargs)
761
+
762
+ def search_type(self):
763
+ return "fastidentity"
764
+
765
+ def get_params(self):
766
+ # Use 'get_params()' instead of 'search_options()', since the
767
+ # parameter 'identity_type' in the REST API is *snake case*
768
+ # -> Conversion to *camel case* is undesirable
769
+ params = super().get_params()
770
+ params["identity_type"] = self._identity_type
771
+ return params
772
+
773
+
774
+ def search(query, throttle_threshold=0.5, return_throttle_status=False):
775
+ """
776
+ Get all CIDs that meet the given query requirements,
777
+ via the PubChem REST API.
778
+
779
+ This function requires an internet connection.
780
+
781
+ Parameters
782
+ ----------
783
+ query : Query
784
+ The search query.
785
+ throttle_threshold : float or None, optional
786
+ A value between 0 and 1.
787
+ If the load of either the request time or count exceeds this
788
+ value the execution is halted.
789
+ See :class:`ThrottleStatus` for more information.
790
+ If ``None`` is given, the execution is never halted.
791
+ return_throttle_status : float, optional
792
+ If set to true, the :class:`ThrottleStatus` is also returned.
793
+
794
+ Returns
795
+ -------
796
+ ids : list of int
797
+ List of all compound IDs (CIDs) that meet the query requirement.
798
+ throttle_status : ThrottleStatus
799
+ The :class:`ThrottleStatus` obtained from the server response.
800
+ This can be used for custom request throttling, for example.
801
+ Only returned, if `return_throttle_status` is set to true.
802
+
803
+ Examples
804
+ --------
805
+
806
+ >>> print(search(NameQuery("Alanine")))
807
+ [5950, ..., ...]
808
+ """
809
+ # Use POST to be compatible with the larger payloads
810
+ # of structure searches
811
+ if query.get_files():
812
+ files = {key: file for key, file in query.get_files().items()}
813
+ else:
814
+ files = None
815
+ r = requests.post(
816
+ _base_url + query.get_input_url_path() + "/cids/TXT",
817
+ data=query.get_params(),
818
+ files=files,
819
+ )
820
+ if not r.ok:
821
+ raise RequestError(parse_error_details(r.text))
822
+ throttle_status = ThrottleStatus.from_response(r)
823
+ if throttle_threshold is not None:
824
+ throttle_status.wait_if_busy(throttle_threshold)
825
+
826
+ cids = [int(cid) for cid in r.text.splitlines()]
827
+ if return_throttle_status:
828
+ return cids, throttle_status
829
+ else:
830
+ return cids