biotite 1.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cp313-win_amd64.pyd +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,15 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading files from the NCBI Entrez database.
7
+ """
8
+
9
+ __name__ = "biotite.database.entrez"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .dbnames import *
13
+ from .download import *
14
+ from .key import *
15
+ from .query import *
@@ -0,0 +1,60 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = ["check_for_errors"]
8
+
9
+ import json
10
+ from biotite.database.error import RequestError
11
+
12
+ # Taken from https://github.com/kblin/ncbi-entrez-error-messages
13
+ _error_messages = [
14
+ "Error reading from remote server",
15
+ "Bad gateway",
16
+ "Bad Gateway",
17
+ "Cannot process ID list",
18
+ "server is temporarily unable to service your request",
19
+ "Service unavailable",
20
+ "Server Error",
21
+ "ID list is empty",
22
+ "Supplied id parameter is empty",
23
+ "Resource temporarily unavailable",
24
+ "Failed to retrieve sequence",
25
+ "Failed to understand id",
26
+ ]
27
+
28
+
29
+ def check_for_errors(message):
30
+ """
31
+ Check for common error messages in NCBI Entrez database responses.
32
+
33
+ Parameters
34
+ ----------
35
+ message : str
36
+ The message received from NCBI Entrez.
37
+
38
+ Raises
39
+ ------
40
+ RequestError
41
+ If the message contains an error message.
42
+ """
43
+ # Server can respond short JSON error messages
44
+ if len(message) < 500:
45
+ try:
46
+ message_json = json.loads(message)
47
+ if "error" in message_json:
48
+ raise RequestError(message_json["error"])
49
+ except json.decoder.JSONDecodeError:
50
+ # It is not a JSON message
51
+ pass
52
+
53
+ # Error always appear at the end of message
54
+ message_end = message[-200:]
55
+ # Seemingly arbitrary '+' characters are in NCBI error messages
56
+ message_end = message_end.replace("+", "")
57
+ for error_msg in _error_messages:
58
+ # Often whitespace is also replaced by '+' in error message
59
+ if error_msg.replace(" ", "") in message_end:
60
+ raise RequestError(error_msg)
@@ -0,0 +1,91 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_database_name"]
8
+
9
+
10
+ # fmt: off
11
+ _db_names = {
12
+ "BioProject" : "bioproject",
13
+ "BioSample" : "biosample",
14
+ "Biosystems" : "biosystems",
15
+ "Books" : "books",
16
+ "Conserved Domains" : "cdd",
17
+ "dbGaP" : "gap",
18
+ "dbVar" : "dbvar",
19
+ "Epigenomics" : "epigenomics",
20
+ "EST" : "nucest",
21
+ "Gene" : "gene",
22
+ "Genome" : "genome",
23
+ "GEO Datasets" : "gds",
24
+ "GEO Profiles" : "geoprofiles",
25
+ "GSS" : "nucgss",
26
+ "HomoloGene" : "homologene",
27
+ "MeSH" : "mesh",
28
+ "NCBI C++ Toolkit" : "toolkit",
29
+ "NCBI Web Site" : "ncbisearch",
30
+ "NLM Catalog" : "nlmcatalog",
31
+ "Nucleotide" : "nuccore",
32
+ "OMIA" : "omia",
33
+ "PopSet" : "popset",
34
+ "Probe" : "probe",
35
+ "Protein" : "protein",
36
+ "Protein Clusters" : "proteinclusters",
37
+ "PubChem BioAssay" : "pcassay",
38
+ "PubChem Compound" : "pccompound",
39
+ "PubChem Substance" : "pcsubstance",
40
+ "PubMed" : "pubmed",
41
+ "PubMed Central" : "pmc",
42
+ "SNP" : "snp",
43
+ "SRA" : "sra",
44
+ "Structure" : "structure",
45
+ "Taxonomy" : "taxonomy",
46
+ "UniGene" : "unigene",
47
+ "UniSTS" : "unists"
48
+ }
49
+ # fmt: on
50
+
51
+
52
+ def get_database_name(database):
53
+ """
54
+ Map a common NCBI Entrez database name to an E-utility database
55
+ name.
56
+
57
+ Parameters
58
+ ----------
59
+ database : str
60
+ Entrez database name.
61
+
62
+ Returns
63
+ -------
64
+ name : str
65
+ E-utility database name.
66
+
67
+ Examples
68
+ --------
69
+
70
+ >>> print(get_database_name("Nucleotide"))
71
+ nuccore
72
+ """
73
+ return _db_names[database]
74
+
75
+
76
+ def sanitize_database_name(db_name):
77
+ """
78
+ Map a common NCBI Entrez database name to an E-utility database
79
+ name, return E-utility database name, or raise an exception if the
80
+ database name is not existing.
81
+
82
+ Only for internal usage in ``download.py`` and ``query.py``.
83
+ """
84
+ if db_name in _db_names.keys():
85
+ # Convert into E-utility database name
86
+ return _db_names[db_name]
87
+ elif db_name in _db_names.values():
88
+ # Is already E-utility database name
89
+ return db_name
90
+ else:
91
+ raise ValueError("Database '{db_name}' is not existing")
@@ -0,0 +1,229 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["fetch", "fetch_single_file"]
8
+
9
+ import io
10
+ import os
11
+ from os.path import getsize, isdir, isfile, join
12
+ import requests
13
+ from biotite.database.entrez.check import check_for_errors
14
+ from biotite.database.entrez.dbnames import sanitize_database_name
15
+ from biotite.database.entrez.key import get_api_key
16
+ from biotite.database.error import RequestError
17
+
18
+ _fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
19
+
20
+
21
+ def fetch(
22
+ uids,
23
+ target_path,
24
+ suffix,
25
+ db_name,
26
+ ret_type,
27
+ ret_mode="text",
28
+ overwrite=False,
29
+ verbose=False,
30
+ ):
31
+ """
32
+ Download files from the NCBI Entrez database in various formats.
33
+
34
+ The data for each UID will be fetched into a separate file.
35
+
36
+ A list of valid database, retrieval type and mode combinations can
37
+ be found under
38
+ `<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
39
+
40
+ This function requires an internet connection.
41
+
42
+ Parameters
43
+ ----------
44
+ uids : str or iterable object of str
45
+ A single *unique identifier* (UID) or a list of UIDs of the
46
+ file(s) to be downloaded.
47
+ target_path : str or None
48
+ The target directory of the downloaded files.
49
+ If ``None``, the file content is stored in a file-like object
50
+ (`StringIO` or `BytesIO`, respectively).
51
+ suffix : str
52
+ The file suffix of the downloaded files. This value is
53
+ independent of the retrieval type.
54
+ db_name : str:
55
+ E-utility or common database name.
56
+ ret_type : str
57
+ Retrieval type
58
+ ret_mode : str, optional
59
+ Retrieval mode
60
+ overwrite : bool, optional
61
+ If true, existing files will be overwritten. Otherwise the
62
+ respective file will only be downloaded if the file does not
63
+ exist yet in the specified target directory or if the file is
64
+ empty. (Default: False)
65
+ verbose: bool, optional
66
+ If true, the function will output the download progress.
67
+ (Default: False)
68
+
69
+ Returns
70
+ -------
71
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
72
+ The file path(s) to the downloaded files.
73
+ If a single string (a single UID) was given in `uids`,
74
+ a single string is returned. If a list (or other iterable
75
+ object) was given, a list of strings is returned.
76
+ If `target_path` is ``None``, the file contents are stored in
77
+ either `StringIO` or `BytesIO` objects.
78
+
79
+ Warnings
80
+ --------
81
+ Even if you give valid input to this function, in rare cases the
82
+ database might return no or malformed data to you.
83
+ In these cases the request should be retried.
84
+ When the issue occurs repeatedly, the error is probably in your
85
+ input.
86
+
87
+ See also
88
+ --------
89
+ fetch_single_file
90
+
91
+ Examples
92
+ --------
93
+
94
+ >>> import os.path
95
+ >>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
96
+ ... db_name="protein", ret_type="fasta")
97
+ >>> print([os.path.basename(file) for file in files])
98
+ ['1L2Y_A.fa', '3O5R_A.fa']
99
+ """
100
+ # If only a single UID is present,
101
+ # put it into a single element list
102
+ if isinstance(uids, str):
103
+ uids = [uids]
104
+ single_element = True
105
+ else:
106
+ single_element = False
107
+ # Create the target folder, if not existing
108
+ if target_path is not None and not isdir(target_path):
109
+ os.makedirs(target_path)
110
+ files = []
111
+ for i, id in enumerate(uids):
112
+ # Verbose output
113
+ if verbose:
114
+ print(f"Fetching file {i+1:d} / {len(uids):d} ({id})...", end="\r")
115
+ # Fetch file from database
116
+ if target_path is not None:
117
+ file = join(target_path, id + "." + suffix)
118
+ else:
119
+ file = None
120
+ if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
121
+ param_dict = {
122
+ "db": sanitize_database_name(db_name),
123
+ "id": id,
124
+ "rettype": ret_type,
125
+ "retmode": ret_mode,
126
+ "tool": "Biotite",
127
+ "mail": "padix.key@gmail.com",
128
+ }
129
+ api_key = get_api_key()
130
+ if api_key is not None:
131
+ param_dict["api_key"] = api_key
132
+ r = requests.get(_fetch_url, params=param_dict)
133
+ content = r.text
134
+ check_for_errors(content)
135
+ if content.startswith(" Error"):
136
+ raise RequestError(content[8:])
137
+ if file is None:
138
+ file = io.StringIO(content)
139
+ else:
140
+ with open(file, "w+") as f:
141
+ f.write(content)
142
+ files.append(file)
143
+ if verbose:
144
+ print("\nDone")
145
+ # If input was a single ID, return only a single path
146
+ if single_element:
147
+ return files[0]
148
+ else:
149
+ return files
150
+
151
+
152
+ def fetch_single_file(
153
+ uids, file_name, db_name, ret_type, ret_mode="text", overwrite=False
154
+ ):
155
+ """
156
+ Almost the same as :func:`fetch()`, but the data for the given UIDs
157
+ will be stored in a single file.
158
+
159
+ Parameters
160
+ ----------
161
+ uids : iterable object of str
162
+ A list of UIDs of the
163
+ file(s) to be downloaded.
164
+ file_name : str or None
165
+ The file path, including file name, to the target file.
166
+ db_name : str:
167
+ E-utility or common database name.
168
+ ret_type : str
169
+ Retrieval type.
170
+ ret_mode : str, optional
171
+ Retrieval mode.
172
+ overwrite : bool, optional
173
+ If false, the file is only downloaded, if no file with the same
174
+ name already exists.
175
+
176
+ Returns
177
+ -------
178
+ file : str or StringIO or BytesIO
179
+ The file name of the downloaded file.
180
+ If `file_name` is ``None``, the file content is stored in
181
+ either a `StringIO` or a `BytesIO` object.
182
+
183
+ Warnings
184
+ --------
185
+ Even if you give valid input to this function, in rare cases the
186
+ database might return no or malformed data to you.
187
+ In these cases the request should be retried.
188
+ When the issue occurs repeatedly, the error is probably in your
189
+ input.
190
+
191
+ See also
192
+ --------
193
+ fetch
194
+ """
195
+ if (
196
+ file_name is not None
197
+ and os.path.isfile(file_name)
198
+ and getsize(file_name) > 0
199
+ and not overwrite
200
+ ):
201
+ # Do no redownload the already existing file
202
+ return file_name
203
+ uid_list_str = ""
204
+ for id in uids:
205
+ uid_list_str += id + ","
206
+ # Remove terminal comma
207
+ uid_list_str = uid_list_str[:-1]
208
+ param_dict = {
209
+ "db": sanitize_database_name(db_name),
210
+ "id": uid_list_str,
211
+ "rettype": ret_type,
212
+ "retmode": ret_mode,
213
+ "tool": "Biotite",
214
+ "mail": "padix.key@gmail.com",
215
+ }
216
+ api_key = get_api_key()
217
+ if api_key is not None:
218
+ param_dict["api_key"] = api_key
219
+ r = requests.get(_fetch_url, params=param_dict)
220
+ content = r.text
221
+ check_for_errors(content)
222
+ if content.startswith(" Error"):
223
+ raise RequestError(content[8:])
224
+ if file_name is None:
225
+ return io.StringIO(content)
226
+ else:
227
+ with open(file_name, "w+") as f:
228
+ f.write(content)
229
+ return file_name
@@ -0,0 +1,44 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["set_api_key", "get_api_key"]
8
+
9
+
10
+ _API_KEY = None
11
+
12
+
13
+ def get_api_key():
14
+ """
15
+ Get the
16
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
17
+
18
+ Returns
19
+ -------
20
+ api_key : str or None
21
+ The API key, if it was already set before, ``None`` otherwise.
22
+ """
23
+ global _API_KEY
24
+ return _API_KEY
25
+
26
+
27
+ def set_api_key(key):
28
+ """
29
+ Set the
30
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
31
+
32
+ Using an API key increases the request limit on the NCBI servers
33
+ and is automatically used by functions in
34
+ :mod:`biotite.database.entrez`.
35
+ This key is kept only in memory and hence removed in the end of the
36
+ Python session.
37
+
38
+ Parameters
39
+ ----------
40
+ api_key : str
41
+ The API key.
42
+ """
43
+ global _API_KEY
44
+ _API_KEY = key
@@ -0,0 +1,262 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
8
+
9
+ import abc
10
+ from xml.etree import ElementTree
11
+ import requests
12
+ from biotite.database.entrez.check import check_for_errors
13
+ from biotite.database.entrez.dbnames import sanitize_database_name
14
+ from biotite.database.entrez.key import get_api_key
15
+ from biotite.database.error import RequestError
16
+
17
+ _search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
18
+
19
+
20
+ class Query(metaclass=abc.ABCMeta):
21
+ """
22
+ Base class for a wrapper around a search term
23
+ for the NCBI Entrez search service.
24
+ """
25
+
26
+ def __init__(self):
27
+ pass
28
+
29
+ @abc.abstractmethod
30
+ def __str__(self):
31
+ pass
32
+
33
+ def __or__(self, operand):
34
+ if not isinstance(operand, Query):
35
+ operand = SimpleQuery(operand)
36
+ return CompositeQuery("OR", self, operand)
37
+
38
+ def __and__(self, operand):
39
+ if not isinstance(operand, Query):
40
+ operand = SimpleQuery(operand)
41
+ return CompositeQuery("AND", self, operand)
42
+
43
+ def __xor__(self, operand):
44
+ if not isinstance(operand, Query):
45
+ operand = SimpleQuery(operand)
46
+ return CompositeQuery("NOT", self, operand)
47
+
48
+
49
+ class CompositeQuery(Query):
50
+ """
51
+ A representation of an composite query
52
+ for the NCBI Entrez search service.
53
+
54
+ A composite query is a combination of two other queries,
55
+ combined either with an 'AND', 'OR' or 'NOT' operator.
56
+
57
+ Usually the user does not create instances of this class directly,
58
+ but :class:`Query` instances are combined with
59
+ ``|`` (OR), ``&`` (AND) or ``^`` (NOT).
60
+
61
+ Parameters
62
+ ----------
63
+ operator: str, {"AND", "OR", "NOT"}
64
+ The combination operator.
65
+ queries : iterable object of SimpleQuery
66
+ The queries to be combined.
67
+
68
+ Examples
69
+ --------
70
+
71
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \\
72
+ ... SimpleQuery("90:100", "Sequence Length")
73
+ >>> print(type(query).__name__)
74
+ CompositeQuery
75
+ >>> print(query)
76
+ ("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
77
+ """
78
+
79
+ def __init__(self, operator, query1, query2):
80
+ super().__init__()
81
+ self._op = operator
82
+ self._q1 = query1
83
+ self._q2 = query2
84
+
85
+ def __str__(self):
86
+ return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
87
+
88
+
89
+ class SimpleQuery(Query):
90
+ """
91
+ A simple query for the NCBI Entrez search service without
92
+ combination via 'AND', 'OR' or 'NOT'. A query consists of a search
93
+ term and an optional field.
94
+
95
+ A list of available search fields with description can be found
96
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
97
+
98
+ Parameters
99
+ ----------
100
+ term: str
101
+ The search term.
102
+ field : str, optional
103
+ The field to search the term in.
104
+ The list of possible fields and the required search term
105
+ formatting can be found
106
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
107
+ By default the field is omitted and all fields are searched in
108
+ for the term, implicitly.
109
+
110
+ Examples
111
+ --------
112
+
113
+ >>> query = SimpleQuery("Escherichia coli")
114
+ >>> print(query)
115
+ "Escherichia coli"
116
+ >>> query = SimpleQuery("Escherichia coli", "Organism")
117
+ >>> print(query)
118
+ "Escherichia coli"[Organism]
119
+ """
120
+
121
+ # Field identifiers are taken from
122
+ # https://www.ncbi.nlm.nih.gov/books/NBK49540/
123
+ _fields = [
124
+ "Accession",
125
+ "All Fields",
126
+ "Author",
127
+ "EC/RN Number",
128
+ "Feature Key",
129
+ "Filter",
130
+ "Gene Name",
131
+ "Genome Project",
132
+ "Issue",
133
+ "Journal",
134
+ "Keyword",
135
+ "Modification Date",
136
+ "Molecular Weight",
137
+ "Organism",
138
+ "Page Number",
139
+ "Primary Accession",
140
+ "Properties",
141
+ "Protein Name",
142
+ "Publication Date",
143
+ "SeqID String",
144
+ "Sequence Length",
145
+ "Substance Name",
146
+ "Text Word",
147
+ "Title",
148
+ "Volume",
149
+ # Abbreviations
150
+ "ACCN",
151
+ "ALL",
152
+ "AU",
153
+ "AUTH",
154
+ "ECNO",
155
+ "FKEY",
156
+ "FILT",
157
+ "SB",
158
+ "GENE",
159
+ "ISS",
160
+ "JOUR",
161
+ "KYWD",
162
+ "MDAT",
163
+ "MOLWT",
164
+ "ORGN",
165
+ "PAGE",
166
+ "PACC",
167
+ "PORGN",
168
+ "PROP",
169
+ "PROT",
170
+ "PDAT",
171
+ "SQID",
172
+ "SLEN",
173
+ "SUBS",
174
+ "WORD",
175
+ "TI",
176
+ "TITL" "VOL",
177
+ ]
178
+
179
+ def __init__(self, term, field=None):
180
+ super().__init__()
181
+ if field is not None:
182
+ if field not in SimpleQuery._fields:
183
+ raise ValueError(f"Unknown field identifier '{field}'")
184
+ for invalid_string in ['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
185
+ if invalid_string in term:
186
+ raise ValueError(f"Query contains illegal term {invalid_string}")
187
+ if " " in term:
188
+ # Encapsulate in quotes if spaces are in search term
189
+ term = f'"{term}"'
190
+ self._term = term
191
+ self._field = field
192
+
193
+ def __str__(self):
194
+ string = self._term
195
+ if self._field is not None:
196
+ string += f"[{self._field}]"
197
+ return string
198
+
199
+
200
+ def search(query, db_name, number=20):
201
+ r"""
202
+ Get all PDB IDs that meet the given query requirements,
203
+ via the NCBI ESearch service.
204
+
205
+ This function requires an internet connection.
206
+
207
+ Parameters
208
+ ----------
209
+ query : Query
210
+ The search query.
211
+ db_name : str:
212
+ E-utility or common database name.
213
+ number : Query
214
+ The maximum number of UIDs that are obtained.
215
+
216
+ Returns
217
+ -------
218
+ ids : list of str
219
+ A list of strings containing all NCBI UIDs (accession number)
220
+ that meet the query requirements.
221
+
222
+ Warnings
223
+ --------
224
+ Even if you give valid input to this function, in rare cases the
225
+ database might return no or malformed data to you.
226
+ In these cases the request should be retried.
227
+ When the issue occurs repeatedly, the error is probably in your
228
+ input.
229
+
230
+ Notes
231
+ -----
232
+ A list of available search fields with description can be found
233
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
234
+
235
+ Examples
236
+ --------
237
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \
238
+ ... SimpleQuery("90:100", "Sequence Length")
239
+ >>> ids = search(query, "nuccore", number=5)
240
+ >>> print(ids)
241
+ ['...', '...', '...', '...', '...']
242
+ """
243
+ param_dict = {
244
+ "db": sanitize_database_name(db_name),
245
+ "term": str(query),
246
+ "retmax": str(number),
247
+ }
248
+ api_key = get_api_key()
249
+ if api_key is not None:
250
+ param_dict["api_key"] = api_key
251
+ r = requests.get(_search_url, params=param_dict)
252
+ xml_response = r.text
253
+ check_for_errors(xml_response)
254
+ try:
255
+ root = ElementTree.fromstring(xml_response)
256
+ except ElementTree.ParseError:
257
+ if len(xml_response) > 100:
258
+ xml_response = xml_response[:100] + "..."
259
+ raise RequestError(f"Invalid server response: {xml_response}")
260
+ xpath = ".//IdList/Id"
261
+ uids = [element.text for element in root.findall(xpath)]
262
+ return uids