biotite 1.5.0__cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-314-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-314-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-314-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-314-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-314-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-314-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-314-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-314-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-314-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-314-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-314-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-314-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-314-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-314-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-314-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-314-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-314-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-314-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-314-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-314-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-314-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,263 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
8
+
9
+ import abc
10
+ from xml.etree import ElementTree
11
+ import requests
12
+ from biotite.database.entrez.check import check_for_errors
13
+ from biotite.database.entrez.dbnames import sanitize_database_name
14
+ from biotite.database.entrez.key import get_api_key
15
+ from biotite.database.error import RequestError
16
+
17
+ _search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
18
+
19
+
20
+ class Query(metaclass=abc.ABCMeta):
21
+ """
22
+ Base class for a wrapper around a search term
23
+ for the NCBI Entrez search service.
24
+ """
25
+
26
+ def __init__(self):
27
+ pass
28
+
29
+ @abc.abstractmethod
30
+ def __str__(self):
31
+ pass
32
+
33
+ def __or__(self, operand):
34
+ if not isinstance(operand, Query):
35
+ operand = SimpleQuery(operand)
36
+ return CompositeQuery("OR", self, operand)
37
+
38
+ def __and__(self, operand):
39
+ if not isinstance(operand, Query):
40
+ operand = SimpleQuery(operand)
41
+ return CompositeQuery("AND", self, operand)
42
+
43
+ def __xor__(self, operand):
44
+ if not isinstance(operand, Query):
45
+ operand = SimpleQuery(operand)
46
+ return CompositeQuery("NOT", self, operand)
47
+
48
+
49
+ class CompositeQuery(Query):
50
+ """
51
+ A representation of an composite query
52
+ for the NCBI Entrez search service.
53
+
54
+ A composite query is a combination of two other queries,
55
+ combined either with an 'AND', 'OR' or 'NOT' operator.
56
+
57
+ Usually the user does not create instances of this class directly,
58
+ but :class:`Query` instances are combined with
59
+ ``|`` (OR), ``&`` (AND) or ``^`` (NOT).
60
+
61
+ Parameters
62
+ ----------
63
+ operator : str, {"AND", "OR", "NOT"}
64
+ The combination operator.
65
+ query1, query2 : SimpleQuery
66
+ The queries to be combined.
67
+
68
+ Examples
69
+ --------
70
+
71
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \\
72
+ ... SimpleQuery("90:100", "Sequence Length")
73
+ >>> print(type(query).__name__)
74
+ CompositeQuery
75
+ >>> print(query)
76
+ ("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
77
+ """
78
+
79
+ def __init__(self, operator, query1, query2):
80
+ super().__init__()
81
+ self._op = operator
82
+ self._q1 = query1
83
+ self._q2 = query2
84
+
85
+ def __str__(self):
86
+ return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
87
+
88
+
89
+ class SimpleQuery(Query):
90
+ """
91
+ A simple query for the NCBI Entrez search service without
92
+ combination via 'AND', 'OR' or 'NOT'. A query consists of a search
93
+ term and an optional field.
94
+
95
+ A list of available search fields with description can be found
96
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
97
+
98
+ Parameters
99
+ ----------
100
+ term : str
101
+ The search term.
102
+ field : str, optional
103
+ The field to search the term in.
104
+ The list of possible fields and the required search term
105
+ formatting can be found
106
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
107
+ By default the field is omitted and all fields are searched in
108
+ for the term, implicitly.
109
+
110
+ Examples
111
+ --------
112
+
113
+ >>> query = SimpleQuery("Escherichia coli")
114
+ >>> print(query)
115
+ "Escherichia coli"
116
+ >>> query = SimpleQuery("Escherichia coli", "Organism")
117
+ >>> print(query)
118
+ "Escherichia coli"[Organism]
119
+ """
120
+
121
+ # Field identifiers are taken from
122
+ # https://www.ncbi.nlm.nih.gov/books/NBK49540/
123
+ _fields = [
124
+ "Accession",
125
+ "All Fields",
126
+ "Author",
127
+ "EC/RN Number",
128
+ "Feature Key",
129
+ "Filter",
130
+ "Gene Name",
131
+ "Genome Project",
132
+ "Issue",
133
+ "Journal",
134
+ "Keyword",
135
+ "Modification Date",
136
+ "Molecular Weight",
137
+ "Organism",
138
+ "Page Number",
139
+ "Primary Accession",
140
+ "Properties",
141
+ "Protein Name",
142
+ "Publication Date",
143
+ "SeqID String",
144
+ "Sequence Length",
145
+ "Substance Name",
146
+ "Text Word",
147
+ "Title",
148
+ "Volume",
149
+ # Abbreviations
150
+ "ACCN",
151
+ "ALL",
152
+ "AU",
153
+ "AUTH",
154
+ "ECNO",
155
+ "FKEY",
156
+ "FILT",
157
+ "SB",
158
+ "GENE",
159
+ "ISS",
160
+ "JOUR",
161
+ "KYWD",
162
+ "MDAT",
163
+ "MOLWT",
164
+ "ORGN",
165
+ "PAGE",
166
+ "PACC",
167
+ "PORGN",
168
+ "PROP",
169
+ "PROT",
170
+ "PDAT",
171
+ "SQID",
172
+ "SLEN",
173
+ "SUBS",
174
+ "WORD",
175
+ "TI",
176
+ "TITL",
177
+ "VOL",
178
+ ]
179
+
180
+ def __init__(self, term, field=None):
181
+ super().__init__()
182
+ if field is not None:
183
+ if field not in SimpleQuery._fields:
184
+ raise ValueError(f"Unknown field identifier '{field}'")
185
+ for invalid_string in ['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
186
+ if invalid_string in term:
187
+ raise ValueError(f"Query contains illegal term {invalid_string}")
188
+ if " " in term:
189
+ # Encapsulate in quotes if spaces are in search term
190
+ term = f'"{term}"'
191
+ self._term = term
192
+ self._field = field
193
+
194
+ def __str__(self):
195
+ string = self._term
196
+ if self._field is not None:
197
+ string += f"[{self._field}]"
198
+ return string
199
+
200
+
201
+ def search(query, db_name, number=20):
202
+ r"""
203
+ Get all PDB IDs that meet the given query requirements,
204
+ via the NCBI ESearch service.
205
+
206
+ This function requires an internet connection.
207
+
208
+ Parameters
209
+ ----------
210
+ query : Query
211
+ The search query.
212
+ db_name : str:
213
+ E-utility or common database name.
214
+ number : Query
215
+ The maximum number of UIDs that are obtained.
216
+
217
+ Returns
218
+ -------
219
+ ids : list of str
220
+ A list of strings containing all NCBI UIDs (accession number)
221
+ that meet the query requirements.
222
+
223
+ Warnings
224
+ --------
225
+ Even if you give valid input to this function, in rare cases the
226
+ database might return no or malformed data to you.
227
+ In these cases the request should be retried.
228
+ When the issue occurs repeatedly, the error is probably in your
229
+ input.
230
+
231
+ Notes
232
+ -----
233
+ A list of available search fields with description can be found
234
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
235
+
236
+ Examples
237
+ --------
238
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \
239
+ ... SimpleQuery("90:100", "Sequence Length")
240
+ >>> ids = search(query, "nuccore", number=5)
241
+ >>> print(ids)
242
+ ['...', '...', '...', '...', '...']
243
+ """
244
+ param_dict = {
245
+ "db": sanitize_database_name(db_name),
246
+ "term": str(query),
247
+ "retmax": str(number),
248
+ }
249
+ api_key = get_api_key()
250
+ if api_key is not None:
251
+ param_dict["api_key"] = api_key
252
+ r = requests.get(_search_url, params=param_dict)
253
+ xml_response = r.text
254
+ check_for_errors(xml_response)
255
+ try:
256
+ root = ElementTree.fromstring(xml_response)
257
+ except ElementTree.ParseError:
258
+ if len(xml_response) > 100:
259
+ xml_response = xml_response[:100] + "..."
260
+ raise RequestError(f"Invalid server response: {xml_response}")
261
+ xpath = ".//IdList/Id"
262
+ uids = [element.text for element in root.findall(xpath)]
263
+ return uids
@@ -0,0 +1,16 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["RequestError"]
8
+
9
+
10
+ class RequestError(Exception):
11
+ """
12
+ Indicates that the database returned a response with an error
13
+ message or other malformed content.
14
+ """
15
+
16
+ pass
@@ -0,0 +1,21 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for searching and downloading files from the *PubChem*
7
+ database.
8
+ Although *PubChem* is part of *NCBI Entrez*,
9
+ :mod:`biotite.database.entrez` is only capable of accessing
10
+ meta-information from *PubChem*.
11
+ This subpackage, on the other hand, supports searching *PubChem*
12
+ compounds based on chemical information and is able to download
13
+ structure records.
14
+ """
15
+
16
+ __name__ = "biotite.database.pubchem"
17
+ __author__ = "Patrick Kunzmann"
18
+
19
+ from .download import *
20
+ from .query import *
21
+ from .throttle import *
@@ -0,0 +1,258 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.pubchem"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["fetch", "fetch_property"]
8
+
9
+ import io
10
+ import numbers
11
+ import os
12
+ from os.path import getsize, isdir, isfile, join
13
+ import requests
14
+ from biotite.database.error import RequestError
15
+ from biotite.database.pubchem.error import parse_error_details
16
+ from biotite.database.pubchem.throttle import ThrottleStatus
17
+
18
+ _base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
19
+ _binary_formats = ["png", "asnb"]
20
+
21
+
22
+ def fetch(
23
+ cids,
24
+ format="sdf",
25
+ target_path=None,
26
+ as_structural_formula=False,
27
+ overwrite=False,
28
+ verbose=False,
29
+ throttle_threshold=0.5,
30
+ return_throttle_status=False,
31
+ ):
32
+ """
33
+ Download structure files from *PubChem* in various formats.
34
+
35
+ This function requires an internet connection.
36
+
37
+ Parameters
38
+ ----------
39
+ cids : int or iterable object or int
40
+ A single compound ID (CID) or a list of CIDs of the structure(s)
41
+ to be downloaded.
42
+ format : {'sdf', 'asnt' 'asnb', 'xml', 'json', 'jsonp', 'png'}
43
+ The format of the files to be downloaded.
44
+ target_path : str, optional
45
+ The target directory of the downloaded files.
46
+ By default, the file content is stored in a file-like object
47
+ (:class:`StringIO` or :class:`BytesIO`, respectively).
48
+ as_structural_formula : bool, optional
49
+ If set to true, the structural formula is download instead of
50
+ an 3D conformer.
51
+ This means that coordinates lie in th xy-plane and represent
52
+ the positions atoms would have an a structural formula
53
+ representation.
54
+ overwrite : bool, optional
55
+ If true, existing files will be overwritten.
56
+ Otherwise the respective file will only be downloaded, if the
57
+ file does not exist yet in the specified target directory or if
58
+ the file is empty.
59
+ verbose : bool, optional
60
+ If set to true, the function will output the download progress.
61
+ throttle_threshold : float or None, optional
62
+ A value between 0 and 1.
63
+ If the load of either the request time or count exceeds this
64
+ value the execution is halted.
65
+ See :class:`ThrottleStatus` for more information.
66
+ If ``None`` is given, the execution is never halted.
67
+ return_throttle_status : float, optional
68
+ If set to true, the :class:`ThrottleStatus` of the final request
69
+ is also returned.
70
+
71
+ Returns
72
+ -------
73
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
74
+ The file path(s) to the downloaded files.
75
+ If a single CID was given in `cids`,
76
+ a single string is returned. If a list (or other iterable
77
+ object) was given, a list of strings is returned.
78
+ If no `target_path` was given, the file contents are stored in
79
+ either :class:`StringIO` or :class:`BytesIO` objects.
80
+ throttle_status : ThrottleStatus
81
+ The :class:`ThrottleStatus` obtained from the server response.
82
+ If multiple CIDs are requested, the :class:`ThrottleStatus` of
83
+ of the final response is returned.
84
+ This can be used for custom request throttling, for example.
85
+ Only returned, if `return_throttle_status` is set to true.
86
+
87
+ Examples
88
+ --------
89
+
90
+ >>> import os.path
91
+ >>> file = fetch(2244, "sdf", path_to_directory)
92
+ >>> print(os.path.basename(file))
93
+ 2244.sdf
94
+ >>> files = fetch([2244, 5950], "sdf", path_to_directory)
95
+ >>> print([os.path.basename(file) for file in files])
96
+ ['2244.sdf', '5950.sdf']
97
+ """
98
+ # If only a single CID is present,
99
+ # put it into a single element list
100
+ if isinstance(cids, numbers.Integral):
101
+ cids = [cids]
102
+ single_element = True
103
+ else:
104
+ single_element = False
105
+ # Create the target folder, if not existing
106
+ if target_path is not None and not isdir(target_path):
107
+ os.makedirs(target_path)
108
+
109
+ files = []
110
+ for i, cid in enumerate(cids):
111
+ # Prevent IDs as strings, this could be a common error, as other
112
+ # database interfaces of Biotite use string IDs
113
+ if isinstance(cid, str):
114
+ raise TypeError("CIDs must be given as integers, not as string")
115
+ # Verbose output
116
+ if verbose:
117
+ print(f"Fetching file {i + 1:d} / {len(cids):d} ({cid})...", end="\r")
118
+
119
+ # Fetch file from database
120
+ if target_path is not None:
121
+ file = join(target_path, str(cid) + "." + format)
122
+ else:
123
+ # 'file = None' -> store content in a file-like object
124
+ file = None
125
+
126
+ if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
127
+ record_type = "2d" if as_structural_formula else "3d"
128
+ r = requests.get(
129
+ _base_url + f"compound/cid/{cid}/{format.upper()}",
130
+ params={"record_type": record_type},
131
+ )
132
+ if not r.ok:
133
+ raise RequestError(parse_error_details(r.text))
134
+
135
+ if format.lower() in _binary_formats:
136
+ content = r.content
137
+ else:
138
+ content = r.text
139
+
140
+ if file is None:
141
+ if format in _binary_formats:
142
+ file = io.BytesIO(content)
143
+ else:
144
+ file = io.StringIO(content)
145
+ else:
146
+ mode = "wb+" if format in _binary_formats else "w+"
147
+ with open(file, mode) as f:
148
+ f.write(content)
149
+
150
+ throttle_status = ThrottleStatus.from_response(r)
151
+ if throttle_threshold is not None:
152
+ throttle_status.wait_if_busy(throttle_threshold)
153
+
154
+ files.append(file)
155
+ if verbose:
156
+ print("\nDone")
157
+ # If input was a single ID, return only a single path
158
+ if single_element:
159
+ return_value = files[0]
160
+ else:
161
+ return_value = files
162
+ if return_throttle_status:
163
+ return return_value, throttle_status
164
+ else:
165
+ return return_value
166
+
167
+
168
+ def fetch_property(cids, name, throttle_threshold=0.5, return_throttle_status=False):
169
+ """
170
+ Download the given property for the given CID(s).
171
+
172
+ This function requires an internet connection.
173
+
174
+ Parameters
175
+ ----------
176
+ cids : int or iterable object or int
177
+ A single compound ID (CID) or a list of CIDs to get the property
178
+ for.
179
+ name : str
180
+ The name of the desired property.
181
+ Valid properties are given in the *PubChem* REST API
182
+ `documentation <https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Compound-Property-Tables>`_.
183
+ throttle_threshold : float or None, optional
184
+ A value between 0 and 1.
185
+ If the load of either the request time or count exceeds this
186
+ value the execution is halted.
187
+ See :class:`ThrottleStatus` for more information.
188
+ If ``None`` is given, the execution is never halted.
189
+ return_throttle_status : float, optional
190
+ If set to true, the :class:`ThrottleStatus` of the final request
191
+ is also returned.
192
+
193
+ Returns
194
+ -------
195
+ property : str or list of str
196
+ The requested property for each given CID.
197
+ If a single CID was given in `cids`,
198
+ a single string is returned.
199
+ If a list (or other iterable
200
+ object) was given, a list of strings is returned.
201
+ throttle_status : ThrottleStatus
202
+ The :class:`ThrottleStatus` obtained from the server response.
203
+ This can be used for custom request throttling, for example.
204
+ Only returned, if `return_throttle_status` is set to true.
205
+
206
+ Examples
207
+ --------
208
+
209
+ >>> butane_cids = np.array(search(FormulaQuery("C4H10")))
210
+ >>> # Filter natural isotopes...
211
+ >>> n_iso = np.array(fetch_property(butane_cids, "IsotopeAtomCount"), dtype=int)
212
+ >>> # ...and neutral compounds
213
+ >>> charge = np.array(fetch_property(butane_cids, "Charge"), dtype=int)
214
+ >>> butane_cids = butane_cids[(n_iso == 0) & (charge == 0)]
215
+ >>> print(sorted(butane_cids.tolist()))
216
+ [6360, 7843, 18402699, 19029854, 19048342, 157632982, 158271732, 158934736, 161295599, 161897780]
217
+ >>> # Get the IUPAC names for each compound
218
+ >>> iupac_names = fetch_property(butane_cids, "IUPACName")
219
+ >>> # Compounds with multiple molecules use ';' as separator
220
+ >>> print(iupac_names)
221
+ ['butane', '2-methylpropane', 'methane;prop-1-ene', 'ethane;ethene', 'cyclopropane;methane', 'cyclobutane;molecular hydrogen', 'acetylene;methane', 'carbanide;propane', 'carbanylium;propane', 'methylcyclopropane;molecular hydrogen']
222
+ """
223
+ # If only a single CID is present,
224
+ # put it into a single element list
225
+ if isinstance(cids, numbers.Integral):
226
+ cids = [cids]
227
+ single_element = True
228
+ else:
229
+ single_element = False
230
+
231
+ # Property names may only contain letters and numbers
232
+ if not name.isalnum():
233
+ raise ValueError(f"Property '{name}' contains invalid characters")
234
+
235
+ # Use TXT format instead of CSV to avoid issues with ',' characters
236
+ # within table elements
237
+ r = requests.post(
238
+ _base_url + f"compound/cid/property/{name}/TXT",
239
+ data={"cid": ",".join([str(cid) for cid in cids])},
240
+ )
241
+ if not r.ok:
242
+ raise RequestError(parse_error_details(r.text))
243
+ throttle_status = ThrottleStatus.from_response(r)
244
+ if throttle_threshold is not None:
245
+ throttle_status.wait_if_busy(throttle_threshold)
246
+
247
+ # Each line contains the property for one CID
248
+ properties = r.text.splitlines()
249
+
250
+ # If input was a single ID, return only a single value
251
+ if single_element:
252
+ return_value = properties[0]
253
+ else:
254
+ return_value = properties
255
+ if return_throttle_status:
256
+ return return_value, throttle_status
257
+ else:
258
+ return return_value
@@ -0,0 +1,30 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.pubchem"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["parse_error_details"]
8
+
9
+
10
+ def parse_error_details(response_text):
11
+ """
12
+ Parse the ``Detail: ...`` or alternatively ``Message: ...`` part of
13
+ an error response.
14
+
15
+ Parameters
16
+ ----------
17
+ response_text : str
18
+ The text of the response.
19
+
20
+ Returns
21
+ -------
22
+ error_details : str
23
+ The error details.
24
+ """
25
+ for message_line_indicator in ["Detail: ", "Message: "]:
26
+ for line in response_text.splitlines():
27
+ if line.startswith(message_line_indicator):
28
+ return line[len(message_line_indicator) :]
29
+ # No 'Detail: ...' or 'Message: ' line found
30
+ return "Unknown error"