biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,819 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.pubchem"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = [
8
+ "Query",
9
+ "NameQuery",
10
+ "SmilesQuery",
11
+ "InchiQuery",
12
+ "InchiKeyQuery",
13
+ "FormulaQuery",
14
+ "SuperstructureQuery",
15
+ "SubstructureQuery",
16
+ "SimilarityQuery",
17
+ "IdentityQuery",
18
+ "search",
19
+ ]
20
+
21
+ import abc
22
+ import collections
23
+ import copy
24
+ import requests
25
+ from biotite.database.error import RequestError
26
+ from biotite.database.pubchem.error import parse_error_details
27
+ from biotite.database.pubchem.throttle import ThrottleStatus
28
+ from biotite.structure.io.mol.mol import MOLFile
29
+
30
+ _base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
31
+
32
+
33
+ class Query(metaclass=abc.ABCMeta):
34
+ """
35
+ A search query for the *PubChem* REST API.
36
+ Unlike most other database interfaces in *Biotite*, multiple queries
37
+ cannot be combined using logical operators.
38
+
39
+ This is the abstract base class for all queries.
40
+ """
41
+
42
+ @abc.abstractmethod
43
+ def get_input_url_path(self):
44
+ """
45
+ Get the *input* part of the request URL.
46
+
47
+ Returns
48
+ -------
49
+ get_input_url_path : str
50
+ The *input* part of the request URL.
51
+ Must not contain slash characters at the beginning and end
52
+ of the string.
53
+ """
54
+ pass
55
+
56
+ def get_params(self):
57
+ """
58
+ Get the POST payload for this query.
59
+
60
+ Returns
61
+ -------
62
+ params : dict (str -> object)
63
+ The payload.
64
+ """
65
+ return {}
66
+
67
+ def get_files(self):
68
+ """
69
+ Get the POST file payload for this query.
70
+
71
+ Returns
72
+ -------
73
+ params : dict (str -> object)
74
+ The file payload.
75
+ """
76
+ return {}
77
+
78
+
79
+ class NameQuery(Query):
80
+ """
81
+ A query that searches for compounds with the given name.
82
+
83
+ The name of the compound must match the given name completely,
84
+ but synonyms of the compound name are also considered.
85
+
86
+ Parameters
87
+ ----------
88
+ name : str
89
+ The compound name to be searched.
90
+
91
+ Examples
92
+ --------
93
+
94
+ >>> print(search(NameQuery("Alanine")))
95
+ [5950, ..., ...]
96
+ """
97
+
98
+ def __init__(self, name):
99
+ self._name = name
100
+
101
+ def get_input_url_path(self):
102
+ return "compound/name"
103
+
104
+ def get_params(self):
105
+ return {"name": self._name}
106
+
107
+
108
+ class SmilesQuery(Query):
109
+ """
110
+ A query that searches for compounds with a given
111
+ *Simplified Molecular Input Line Entry Specification* (*SMILES*)
112
+ string.
113
+
114
+ Parameters
115
+ ----------
116
+ smiles : str
117
+ The *SMILES* string.
118
+
119
+ Examples
120
+ --------
121
+
122
+ >>> print(search(SmilesQuery("CCCC")))
123
+ [7843]
124
+ """
125
+
126
+ def __init__(self, smiles):
127
+ self._smiles = smiles
128
+
129
+ def get_input_url_path(self):
130
+ return "compound/smiles"
131
+
132
+ def get_params(self):
133
+ return {"smiles": self._smiles}
134
+
135
+
136
+ class InchiQuery(Query):
137
+ """
138
+ A query that searches for compounds with a given
139
+ *International Chemical Identifier* (*InChI*) string.
140
+
141
+ Parameters
142
+ ----------
143
+ inchi : str
144
+ The *InChI* string.
145
+
146
+ Examples
147
+ --------
148
+
149
+ >>> print(search(InchiQuery("InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3")))
150
+ [7843]
151
+ """
152
+
153
+ def __init__(self, inchi):
154
+ self._inchi = inchi
155
+
156
+ def get_input_url_path(self):
157
+ return "compound/inchi"
158
+
159
+ def get_params(self):
160
+ return {"inchi": self._inchi}
161
+
162
+
163
+ class InchiKeyQuery(Query):
164
+ """
165
+ A query that searches for compounds with a given
166
+ *International Chemical Identifier* (*InChI*) key.
167
+
168
+ Parameters
169
+ ----------
170
+ inchi_key : str
171
+ The *InChI* key.
172
+
173
+ Examples
174
+ --------
175
+
176
+ >>> print(search(InchiKeyQuery("IJDNQMDRQITEOD-UHFFFAOYSA-N")))
177
+ [7843]
178
+ """
179
+
180
+ def __init__(self, inchi_key):
181
+ self._inchi_key = inchi_key
182
+
183
+ def get_input_url_path(self):
184
+ return "compound/inchikey"
185
+
186
+ def get_params(self):
187
+ return {"inchikey": self._inchi_key}
188
+
189
+
190
+ class FormulaQuery(Query):
191
+ """
192
+ A query that searches for compounds with the given molecular
193
+ formula.
194
+
195
+ The formula can also be created from an :class:`AtomArray` using
196
+ the :meth:`from_atoms()` method.
197
+
198
+ Parameters
199
+ ----------
200
+ formula : str
201
+ The molecular formula, i.e. each capitalized element with its
202
+ count in the compound concatenated into a single string.
203
+ allow_other_elements : bool, optional
204
+ If set to true, compounds with additional elements, not present
205
+ in the molecular formula, will also match.
206
+ number : int, optional
207
+ The maximum number of matches that this query may return.
208
+ By default, the *PubChem* default value is used, which can be
209
+ considered unlimited.
210
+
211
+ Examples
212
+ --------
213
+
214
+ >>> print(search(FormulaQuery("C4H10", number=5)))
215
+ [..., ..., ..., ..., ...]
216
+ >>> atom_array = residue("ALA")
217
+ >>> print(search(FormulaQuery.from_atoms(atom_array, number=5)))
218
+ [..., ..., ..., ..., ...]
219
+ """
220
+
221
+ def __init__(self, formula, allow_other_elements=False, number=None):
222
+ self._formula = formula
223
+ self._allow_other_elements = allow_other_elements
224
+ self._number = number
225
+
226
+ @staticmethod
227
+ def from_atoms(atoms, allow_other_elements=False, number=None):
228
+ """
229
+ Create the query from an the given structure by using its
230
+ molecular formula.
231
+
232
+ Parameters
233
+ ----------
234
+ atoms : AtomArray or AtomArrayStack
235
+ The structure to take the molecular formula from.
236
+ allow_other_elements : bool, optional
237
+ If set to true, compounds with additional elements, not
238
+ present in the molecular formula, will also match.
239
+ number : int, optional
240
+ The maximum number of matches that this query may return.
241
+ By default, the *PubChem* default value is used, which can
242
+ be considered unlimited.
243
+
244
+ Returns
245
+ -------
246
+ query : FormulaQuery
247
+ The query.
248
+ """
249
+ element_counter = collections.Counter(atoms.element)
250
+ formula = ""
251
+ # C and H come first in molecular formula
252
+ if "C" in element_counter:
253
+ formula += _format_element("C", element_counter["C"])
254
+ del element_counter["C"]
255
+ if "H" in element_counter:
256
+ formula += _format_element("H", element_counter["H"])
257
+ del element_counter["H"]
258
+ # All other elements follow in alphabetical order
259
+ sorted_elements = sorted(element_counter.keys())
260
+ for element in sorted_elements:
261
+ formula += _format_element(element, element_counter[element])
262
+ return FormulaQuery(formula, allow_other_elements, number)
263
+
264
+ def get_input_url_path(self):
265
+ # The 'fastformula' service seems not to accept the formula
266
+ # in the parameter section of the request
267
+ return f"compound/fastformula/{self._formula}"
268
+
269
+ def get_params(self):
270
+ params = {"AllowOtherElements": self._allow_other_elements}
271
+ # Only set maximum number, if provided by the user
272
+ # The PubChem default value for this might change over time
273
+ if self._number is not None:
274
+ params["MaxRecords"] = self._number
275
+ return params
276
+
277
+
278
+ def _format_element(element, count):
279
+ if count == 1:
280
+ return element.capitalize()
281
+ else:
282
+ return element.capitalize() + str(count)
283
+
284
+
285
+ class StructureQuery(Query, metaclass=abc.ABCMeta):
286
+ """
287
+ Abstract superclass for all structure based searches.
288
+ This class handles structure inputs and option formatting.
289
+
290
+ Exactly one of the input structure parameters `smiles`, `smarts`,
291
+ `inchi`, `sdf` or `cid` must be given.
292
+
293
+ Parameters
294
+ ----------
295
+ smiles : str, optional
296
+ The query *SMILES* string.
297
+ smarts : str, optional
298
+ The query *SMARTS* pattern.
299
+ inchi : str, optional
300
+ The query *InChI* string.
301
+ sdf : str, optional
302
+ A query structure as SDF formatted string.
303
+ Usually :meth:`from_atoms()` is used to create the SDF from an
304
+ :class:`AtomArray`.
305
+ cid : int, optional
306
+ The query structure given as CID.
307
+ number : int, optional
308
+ The maximum number of matches that this query may return.
309
+ By default, the *PubChem* default value is used, which can
310
+ be considered unlimited.
311
+ """
312
+
313
+ _query_keys = ("smiles", "smarts", "inchi", "sdf", "cid")
314
+
315
+ def __init__(self, **kwargs):
316
+ query_key_found = False
317
+ for query_key in StructureQuery._query_keys:
318
+ if query_key in kwargs:
319
+ if not query_key_found:
320
+ self._query_key = query_key
321
+ self._query_val = kwargs[query_key]
322
+ # Delete parameter from kwargs for later check for
323
+ # unused (invalid) parameters
324
+ del kwargs[query_key]
325
+ query_key_found = True
326
+ else:
327
+ # A query key was already found,
328
+ # duplicates are not allowed
329
+ raise TypeError(
330
+ "Only one of 'smiles', 'smarts', 'inchi', 'sdf' or "
331
+ "'cid' may be given"
332
+ )
333
+ if not query_key_found:
334
+ raise TypeError(
335
+ "Expected exactly one of 'smiles', 'smarts', 'inchi', 'sdf' or 'cid'"
336
+ )
337
+ if "number" in kwargs:
338
+ self._number = kwargs["number"]
339
+ del kwargs["number"]
340
+ else:
341
+ self._number = None
342
+ # If there are still remaining parameters that were not handled
343
+ # by this superclass or the inheriting class, they are invalid
344
+ for key in kwargs:
345
+ raise TypeError(f"'{key}' is an invalid keyword argument")
346
+
347
+ @classmethod
348
+ def from_atoms(cls, atoms, *args, **kwargs):
349
+ """
350
+ Create a query using the given query structure.
351
+
352
+ Parameters
353
+ ----------
354
+ atoms : AtomArray or AtomArrayStack
355
+ The query structure.
356
+ *args, **kwargs
357
+ See the constructor for additional options.
358
+
359
+ Returns
360
+ -------
361
+ query : StructureQuery
362
+ The query object.
363
+ """
364
+ mol_file = MOLFile()
365
+ mol_file.set_structure(atoms)
366
+ # Every MOL string with "$$$$" is a valid SDF string
367
+ # Important: USE MS-style new lines
368
+ return cls(*args, sdf="\r\n".join(mol_file.lines) + "\r\n$$$$\r\n", **kwargs)
369
+
370
+ def get_input_url_path(self):
371
+ input_string = f"compound/{self.search_type()}/{self._query_key}"
372
+ if self._query_key == "cid":
373
+ # Put CID in URL and not in POST payload,
374
+ # as PubChem is confused otherwise
375
+ input_string += "/" + str(self._query_val)
376
+ return input_string
377
+
378
+ def get_params(self):
379
+ if self._query_key not in ("cid", "sdf"):
380
+ # CID is in URL
381
+ # SDF is given as file
382
+ params = {self._query_key: self._query_val}
383
+ else:
384
+ params = {}
385
+ # Only set maximum number, if provided by the user
386
+ # The PubChem default value for this might change over time
387
+ if self._number is not None:
388
+ params["MaxRecords"] = self._number
389
+ for key, val in self.search_options().items():
390
+ # Convert 'snake case' Python parameters
391
+ # to 'camel case' request parameters
392
+ key = "".join([word.capitalize() for word in key.split("_")])
393
+ params[key] = val
394
+ return params
395
+
396
+ def get_files(self):
397
+ # Multi-line SDF string requires payload as file
398
+ if self._query_key == "sdf":
399
+ return {"sdf": self._query_val}
400
+ else:
401
+ return {}
402
+
403
+ @abc.abstractmethod
404
+ def search_type(self):
405
+ """
406
+ Get the type of performed search for the request input part.
407
+
408
+ PROTECTED: Override when inheriting.
409
+
410
+ Returns
411
+ -------
412
+ search_type : str
413
+ The search type for the input part, i.e. the part directly
414
+ after ``compound/``.
415
+ """
416
+ pass
417
+
418
+ def search_options(self):
419
+ """
420
+ Get additional options for the POST options.
421
+
422
+ PROTECTED: Override when inheriting.
423
+
424
+ Returns
425
+ -------
426
+ options : dict (str -> object)
427
+ They keys are automatically converted from *snake case* to
428
+ *camel case* required by the request parameters.
429
+ """
430
+ return {}
431
+
432
+
433
+ class SuperOrSubstructureQuery(StructureQuery, metaclass=abc.ABCMeta):
434
+ """
435
+ Abstract superclass for super- and substructure searches.
436
+ This class handles specific options for these searches.
437
+
438
+ Exactly one of the input structure parameters `smiles`, `smarts`,
439
+ `inchi`, `sdf` or `cid` must be given.
440
+
441
+ Parameters
442
+ ----------
443
+ smiles : str, optional
444
+ The query *SMILES* string.
445
+ smarts : str, optional
446
+ The query *SMARTS* pattern.
447
+ inchi : str, optional
448
+ The query *InChI* string.
449
+ sdf : str, optional
450
+ A query structure as SDF formatted string.
451
+ Usually :meth:`from_atoms()` is used to create the SDF from an
452
+ :class:`AtomArray`.
453
+ cid : int, optional
454
+ The query structure given as CID.
455
+ number : int, optional
456
+ The maximum number of matches that this query may return.
457
+ By default, the *PubChem* default value is used, which can
458
+ be considered unlimited.
459
+ match_charges : bool, optional
460
+ If set to true, atoms must match the specified charge.
461
+ match_tautomers : bool, optional
462
+ If set to true, allow match to tautomers of the given structure.
463
+ rings_not_embedded : bool, optional
464
+ If set to true, rings may not be embedded in a larger system.
465
+ single_double_bonds_match : bool, optional
466
+ If set to true, single or double bonds match aromatic bonds.
467
+ chains_match_rings : bool, optional
468
+ If set to true, chain bonds in the query may match rings in
469
+ hits.
470
+ strip_hydrogen : bool, optional
471
+ If set to true, remove any explicit hydrogens before searching.
472
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
473
+ How to handle stereo.
474
+
475
+ Notes
476
+ -----
477
+ Optional parameter descriptions are taken from the *PubChem* REST
478
+ API
479
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
480
+ """
481
+
482
+ _option_defaults = {
483
+ "match_charges": False,
484
+ "match_tautomers": False,
485
+ "rings_not_embedded": False,
486
+ "single_double_bonds_match": True,
487
+ "chains_match_rings": True,
488
+ "strip_hydrogen": False,
489
+ "stereo": "ignore",
490
+ }
491
+
492
+ def __init__(self, **kwargs):
493
+ self._options = copy.copy(SuperOrSubstructureQuery._option_defaults)
494
+ for option, value in kwargs.items():
495
+ if option in SuperOrSubstructureQuery._option_defaults.keys():
496
+ self._options[option] = value
497
+ del kwargs[option]
498
+ super().__init__(**kwargs)
499
+
500
+ def search_options(self):
501
+ return self._options
502
+
503
+
504
+ class SuperstructureQuery(SuperOrSubstructureQuery):
505
+ """
506
+ A query that searches for all structures, where the given
507
+ input structure is a superstructure.
508
+ In other words, this query matches substructures of the input
509
+ structure.
510
+
511
+ Exactly one of the input structure parameters `smiles`, `smarts`,
512
+ `inchi`, `sdf` or `cid` must be given.
513
+
514
+ Parameters
515
+ ----------
516
+ smiles : str, optional
517
+ The query *SMILES* string.
518
+ smarts : str, optional
519
+ The query *SMARTS* pattern.
520
+ inchi : str, optional
521
+ The query *InChI* string.
522
+ sdf : str, optional
523
+ A query structure as SDF formatted string.
524
+ Usually :meth:`from_atoms()` is used to create the SDF from an
525
+ :class:`AtomArray`.
526
+ cid : int, optional
527
+ The query structure given as CID.
528
+ number : int, optional
529
+ The maximum number of matches that this query may return.
530
+ By default, the *PubChem* default value is used, which can
531
+ be considered unlimited.
532
+ match_charges : bool, optional
533
+ If set to true, atoms must match the specified charge.
534
+ match_tautomers : bool, optional
535
+ If set to true, allow match to tautomers of the given structure.
536
+ rings_not_embedded : bool, optional
537
+ If set to true, rings may not be embedded in a larger system.
538
+ single_double_bonds_match : bool, optional
539
+ If set to true, single or double bonds match aromatic bonds.
540
+ chains_match_rings : bool, optional
541
+ If set to true, chain bonds in the query may match rings in
542
+ hits.
543
+ strip_hydrogen : bool, optional
544
+ If set to true, remove any explicit hydrogens before searching.
545
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
546
+ How to handle stereo.
547
+
548
+ Notes
549
+ -----
550
+ Optional parameter descriptions are taken from the *PubChem* REST
551
+ API
552
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
553
+
554
+ Examples
555
+ --------
556
+
557
+ >>> # CID of alanine
558
+ >>> print(search(SuperstructureQuery(cid=5950, number=5)))
559
+ [..., ..., ..., ..., ...]
560
+ >>> # AtomArray of alanine
561
+ >>> atom_array = residue("ALA")
562
+ >>> print(search(SuperstructureQuery.from_atoms(atom_array, number=5)))
563
+ [..., ..., ..., ..., ...]
564
+ """
565
+
566
+ def search_type(self):
567
+ return "fastsuperstructure"
568
+
569
+
570
+ class SubstructureQuery(SuperOrSubstructureQuery):
571
+ """
572
+ A query that searches for all structures, where the given
573
+ input structure is a substructure.
574
+ In other words, this query matches superstructures of the input
575
+ structure.
576
+
577
+ Exactly one of the input structure parameters `smiles`, `smarts`,
578
+ `inchi`, `sdf` or `cid` must be given.
579
+
580
+ Parameters
581
+ ----------
582
+ smiles : str, optional
583
+ The query *SMILES* string.
584
+ smarts : str, optional
585
+ The query *SMARTS* pattern.
586
+ inchi : str, optional
587
+ The query *InChI* string.
588
+ sdf : str, optional
589
+ A query structure as SDF formatted string.
590
+ Usually :meth:`from_atoms()` is used to create the SDF from an
591
+ :class:`AtomArray`.
592
+ cid : int, optional
593
+ The query structure given as CID.
594
+ number : int, optional
595
+ The maximum number of matches that this query may return.
596
+ By default, the *PubChem* default value is used, which can
597
+ be considered unlimited.
598
+ match_charges : bool, optional
599
+ If set to true, atoms must match the specified charge.
600
+ match_tautomers : bool, optional
601
+ If set to true, allow match to tautomers of the given structure.
602
+ rings_not_embedded : bool, optional
603
+ If set to true, rings may not be embedded in a larger system.
604
+ single_double_bonds_match : bool, optional
605
+ If set to true, single or double bonds match aromatic bonds.
606
+ chains_match_rings : bool, optional
607
+ If set to true, chain bonds in the query may match rings in
608
+ hits.
609
+ strip_hydrogen : bool, optional
610
+ If set to true, remove any explicit hydrogens before searching.
611
+ stereo : {'ignore', 'exact', 'relative', 'nonconflicting'}, optional
612
+ How to handle stereo.
613
+
614
+ Notes
615
+ -----
616
+ Optional parameter descriptions are taken from the *PubChem* REST
617
+ API
618
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Substructure-Superstructure>`_.
619
+
620
+ Examples
621
+ --------
622
+
623
+ >>> # CID of alanine
624
+ >>> print(search(SubstructureQuery(cid=5950, number=5)))
625
+ [5950, ..., ..., ..., ...]
626
+ >>> # AtomArray of alanine
627
+ >>> atom_array = residue("ALA")
628
+ >>> print(search(SubstructureQuery.from_atoms(atom_array, number=5)))
629
+ [5950, ..., ..., ..., ...]
630
+ """
631
+
632
+ def search_type(self):
633
+ return "fastsubstructure"
634
+
635
+
636
+ class SimilarityQuery(StructureQuery):
637
+ """
638
+ A query that searches for all structures similar to the given
639
+ input structure.
640
+
641
+ Exactly one of the input structure parameters `smiles`, `smarts`,
642
+ `inchi`, `sdf` or `cid` must be given.
643
+
644
+ Parameters
645
+ ----------
646
+ threshold : float, optional
647
+ The minimum required *Tanimoto* similarity for a match.
648
+ Must be between 0 (no similarity) and 1 (complete match).
649
+ conformation_based : bool, optional
650
+ If set to true, the similarity is computed based on the
651
+ 3D conformation.
652
+ By default, only the elements and bonds between the atoms are
653
+ considered for similarity computation.
654
+ smiles : str, optional
655
+ The query *SMILES* string.
656
+ smarts : str, optional
657
+ The query *SMARTS* pattern.
658
+ inchi : str, optional
659
+ The query *InChI* string.
660
+ sdf : str, optional
661
+ A query structure as SDF formatted string.
662
+ Usually :meth:`from_atoms()` is used to create the SDF from an
663
+ :class:`AtomArray`.
664
+ cid : int, optional
665
+ The query structure given as CID.
666
+ number : int, optional
667
+ The maximum number of matches that this query may return.
668
+ By default, the *PubChem* default value is used, which can
669
+ be considered unlimited.
670
+
671
+ Notes
672
+ -----
673
+ The conformation based similarity measure uses *shape-Tanimoto* and
674
+ *color-Tanimoto* scores :footcite:`Kim2018`.
675
+
676
+ References
677
+ ----------
678
+
679
+ .. footbibliography::
680
+
681
+ Examples
682
+ --------
683
+
684
+ >>> # CID of alanine
685
+ >>> print(search(SimilarityQuery(cid=5950, threshold=1.0, number=5)))
686
+ [5950, ..., ..., ..., ...]
687
+ >>> # AtomArray of alanine
688
+ >>> atom_array = residue("ALA")
689
+ >>> print(search(SimilarityQuery.from_atoms(atom_array, threshold=1.0, number=5)))
690
+ [5950, ..., ..., ..., ...]
691
+ """
692
+
693
+ def __init__(self, threshold=0.9, conformation_based=False, **kwargs):
694
+ self._threshold = threshold
695
+ self._conformation_based = conformation_based
696
+ super().__init__(**kwargs)
697
+
698
+ def search_type(self):
699
+ dim = "3d" if self._conformation_based else "2d"
700
+ return f"fastsimilarity_{dim}"
701
+
702
+ def search_options(self):
703
+ return {"threshold": int(round(self._threshold * 100))}
704
+
705
+
706
+ class IdentityQuery(StructureQuery):
707
+ """
708
+ A query that searches for all structures that are identical to the
709
+ given input structure.
710
+
711
+ Exactly one of the input structure parameters `smiles`, `smarts`, `inchi`,
712
+ `sdf` or `cid` must be given.
713
+
714
+ Parameters
715
+ ----------
716
+ identity_type : {'same_connectivity', 'same_tautomer', 'same_stereo', 'same_isotope', 'same_stereo_isotope', 'nonconflicting_stereo', 'same_isotope_nonconflicting_stereo'}, optional
717
+ The type of identity search.
718
+ smiles : str, optional
719
+ The query *SMILES* string.
720
+ smarts : str, optional
721
+ The query *SMARTS* pattern.
722
+ inchi : str, optional
723
+ The query *InChI* string.
724
+ sdf : str, optional
725
+ A query structure as SDF formatted string.
726
+ Usually :meth:`from_atoms()` is used to create the SDF from an
727
+ :class:`AtomArray`.
728
+ cid : int, optional
729
+ The query structure given as CID.
730
+ number : int, optional
731
+ The maximum number of matches that this query may return.
732
+ By default, the *PubChem* default value is used, which can
733
+ be considered unlimited.
734
+
735
+ Examples
736
+ --------
737
+
738
+ >>> # CID of alanine
739
+ >>> print(search(IdentityQuery(cid=5950)))
740
+ [5950]
741
+ >>> # AtomArray of alanine
742
+ >>> atom_array = residue("ALA")
743
+ >>> print(search(IdentityQuery.from_atoms(atom_array)))
744
+ [5950]
745
+ """
746
+
747
+ def __init__(self, identity_type="same_stereo_isotope", **kwargs):
748
+ self._identity_type = identity_type
749
+ super().__init__(**kwargs)
750
+
751
+ def search_type(self):
752
+ return "fastidentity"
753
+
754
+ def get_params(self):
755
+ # Use 'get_params()' instead of 'search_options()', since the
756
+ # parameter 'identity_type' in the REST API is *snake case*
757
+ # -> Conversion to *camel case* is undesirable
758
+ params = super().get_params()
759
+ params["identity_type"] = self._identity_type
760
+ return params
761
+
762
+
763
+ def search(query, throttle_threshold=0.5, return_throttle_status=False):
764
+ """
765
+ Get all CIDs that meet the given query requirements,
766
+ via the PubChem REST API.
767
+
768
+ This function requires an internet connection.
769
+
770
+ Parameters
771
+ ----------
772
+ query : Query
773
+ The search query.
774
+ throttle_threshold : float or None, optional
775
+ A value between 0 and 1.
776
+ If the load of either the request time or count exceeds this
777
+ value the execution is halted.
778
+ See :class:`ThrottleStatus` for more information.
779
+ If ``None`` is given, the execution is never halted.
780
+ return_throttle_status : float, optional
781
+ If set to true, the :class:`ThrottleStatus` is also returned.
782
+
783
+ Returns
784
+ -------
785
+ ids : list of int
786
+ List of all compound IDs (CIDs) that meet the query requirement.
787
+ throttle_status : ThrottleStatus
788
+ The :class:`ThrottleStatus` obtained from the server response.
789
+ This can be used for custom request throttling, for example.
790
+ Only returned, if `return_throttle_status` is set to true.
791
+
792
+ Examples
793
+ --------
794
+
795
+ >>> print(search(NameQuery("Alanine")))
796
+ [5950, ..., ...]
797
+ """
798
+ # Use POST to be compatible with the larger payloads
799
+ # of structure searches
800
+ if query.get_files():
801
+ files = {key: file for key, file in query.get_files().items()}
802
+ else:
803
+ files = None
804
+ r = requests.post(
805
+ _base_url + query.get_input_url_path() + "/cids/TXT",
806
+ data=query.get_params(),
807
+ files=files,
808
+ )
809
+ if not r.ok:
810
+ raise RequestError(parse_error_details(r.text))
811
+ throttle_status = ThrottleStatus.from_response(r)
812
+ if throttle_threshold is not None:
813
+ throttle_status.wait_if_busy(throttle_threshold)
814
+
815
+ cids = [int(cid) for cid in r.text.splitlines()]
816
+ if return_throttle_status:
817
+ return cids, throttle_status
818
+ else:
819
+ return cids