biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,15 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading files from the NCBI Entrez database.
7
+ """
8
+
9
+ __name__ = "biotite.database.entrez"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .dbnames import *
13
+ from .download import *
14
+ from .query import *
15
+ from .key import *
@@ -0,0 +1,61 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = ["check_for_errors"]
8
+
9
+ import json
10
+ from ..error import RequestError
11
+
12
+
13
+ # Taken from https://github.com/kblin/ncbi-entrez-error-messages
14
+ _error_messages = [
15
+ "Error reading from remote server",
16
+ "Bad gateway",
17
+ "Bad Gateway",
18
+ "Cannot process ID list",
19
+ "server is temporarily unable to service your request",
20
+ "Service unavailable",
21
+ "Server Error",
22
+ "ID list is empty",
23
+ "Supplied id parameter is empty",
24
+ "Resource temporarily unavailable",
25
+ "Failed to retrieve sequence",
26
+ "Failed to understand id",
27
+ ]
28
+
29
+
30
+ def check_for_errors(message):
31
+ """
32
+ Check for common error messages in NCBI Entrez database responses.
33
+
34
+ Parameters
35
+ ----------
36
+ message : str
37
+ The message received from NCBI Entrez.
38
+
39
+ Raises
40
+ ------
41
+ RequestError
42
+ If the message contains an error message.
43
+ """
44
+ # Server can respond short JSON error messages
45
+ if len(message) < 500:
46
+ try:
47
+ message_json = json.loads(message)
48
+ if "error" in message_json:
49
+ raise RequestError(message_json["error"])
50
+ except json.decoder.JSONDecodeError:
51
+ # It is not a JSON message
52
+ pass
53
+
54
+ # Error always appear at the end of message
55
+ message_end = message[-200:]
56
+ # Seemingly arbitrary '+' characters are in NCBI error messages
57
+ message_end = message_end.replace("+", "")
58
+ for error_msg in _error_messages:
59
+ # Often whitespace is also replaced by '+' in error message
60
+ if error_msg.replace(" ", "") in message_end:
61
+ raise RequestError(error_msg)
@@ -0,0 +1,89 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_database_name"]
8
+
9
+
10
+ _db_names = {
11
+ "BioProject" : "bioproject",
12
+ "BioSample" : "biosample",
13
+ "Biosystems" : "biosystems",
14
+ "Books" : "books",
15
+ "Conserved Domains" : "cdd",
16
+ "dbGaP" : "gap",
17
+ "dbVar" : "dbvar",
18
+ "Epigenomics" : "epigenomics",
19
+ "EST" : "nucest",
20
+ "Gene" : "gene",
21
+ "Genome" : "genome",
22
+ "GEO Datasets" : "gds",
23
+ "GEO Profiles" : "geoprofiles",
24
+ "GSS" : "nucgss",
25
+ "HomoloGene" : "homologene",
26
+ "MeSH" : "mesh",
27
+ "NCBI C++ Toolkit" : "toolkit",
28
+ "NCBI Web Site" : "ncbisearch",
29
+ "NLM Catalog" : "nlmcatalog",
30
+ "Nucleotide" : "nuccore",
31
+ "OMIA" : "omia",
32
+ "PopSet" : "popset",
33
+ "Probe" : "probe",
34
+ "Protein" : "protein",
35
+ "Protein Clusters" : "proteinclusters",
36
+ "PubChem BioAssay" : "pcassay",
37
+ "PubChem Compound" : "pccompound",
38
+ "PubChem Substance" : "pcsubstance",
39
+ "PubMed" : "pubmed",
40
+ "PubMed Central" : "pmc",
41
+ "SNP" : "snp",
42
+ "SRA" : "sra",
43
+ "Structure" : "structure",
44
+ "Taxonomy" : "taxonomy",
45
+ "UniGene" : "unigene",
46
+ "UniSTS" : "unists"
47
+ }
48
+
49
+
50
+ def get_database_name(database):
51
+ """
52
+ Map a common NCBI Entrez database name to an E-utility database
53
+ name.
54
+
55
+ Parameters
56
+ ----------
57
+ database : str
58
+ Entrez database name.
59
+
60
+ Returns
61
+ -------
62
+ name : str
63
+ E-utility database name.
64
+
65
+ Examples
66
+ --------
67
+
68
+ >>> print(get_database_name("Nucleotide"))
69
+ nuccore
70
+ """
71
+ return _db_names[database]
72
+
73
+
74
+ def sanitize_database_name(db_name):
75
+ """
76
+ Map a common NCBI Entrez database name to an E-utility database
77
+ name, return E-utility database name, or raise an exception if the
78
+ database name is not existing.
79
+
80
+ Only for internal usage in ``download.py`` and ``query.py``.
81
+ """
82
+ if db_name in _db_names.keys():
83
+ # Convert into E-utility database name
84
+ return _db_names[db_name]
85
+ elif db_name in _db_names.values():
86
+ # Is already E-utility database name
87
+ return db_name
88
+ else:
89
+ raise ValueError("Database '{db_name}' is not existing")
@@ -0,0 +1,223 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["fetch", "fetch_single_file"]
8
+
9
+ from os.path import isdir, isfile, join, getsize
10
+ import os
11
+ import glob
12
+ import io
13
+ import requests
14
+ from .check import check_for_errors
15
+ from .dbnames import sanitize_database_name
16
+ from .key import get_api_key
17
+ from ..error import RequestError
18
+
19
+
20
+ _fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
21
+
22
+
23
+ def fetch(uids, target_path, suffix, db_name, ret_type,
24
+ ret_mode="text", overwrite=False, verbose=False):
25
+ """
26
+ Download files from the NCBI Entrez database in various formats.
27
+
28
+ The data for each UID will be fetched into a separate file.
29
+
30
+ A list of valid database, retrieval type and mode combinations can
31
+ be found under
32
+ `<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
33
+
34
+ This function requires an internet connection.
35
+
36
+ Parameters
37
+ ----------
38
+ uids : str or iterable object of str
39
+ A single *unique identifier* (UID) or a list of UIDs of the
40
+ file(s) to be downloaded.
41
+ target_path : str or None
42
+ The target directory of the downloaded files.
43
+ If ``None``, the file content is stored in a file-like object
44
+ (`StringIO` or `BytesIO`, respectively).
45
+ suffix : str
46
+ The file suffix of the downloaded files. This value is
47
+ independent of the retrieval type.
48
+ db_name : str:
49
+ E-utility or common database name.
50
+ ret_type : str
51
+ Retrieval type
52
+ ret_mode : str, optional
53
+ Retrieval mode
54
+ overwrite : bool, optional
55
+ If true, existing files will be overwritten. Otherwise the
56
+ respective file will only be downloaded if the file does not
57
+ exist yet in the specified target directory or if the file is
58
+ empty. (Default: False)
59
+ verbose: bool, optional
60
+ If true, the function will output the download progress.
61
+ (Default: False)
62
+
63
+ Returns
64
+ -------
65
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
66
+ The file path(s) to the downloaded files.
67
+ If a single string (a single UID) was given in `uids`,
68
+ a single string is returned. If a list (or other iterable
69
+ object) was given, a list of strings is returned.
70
+ If `target_path` is ``None``, the file contents are stored in
71
+ either `StringIO` or `BytesIO` objects.
72
+
73
+ Warnings
74
+ --------
75
+ Even if you give valid input to this function, in rare cases the
76
+ database might return no or malformed data to you.
77
+ In these cases the request should be retried.
78
+ When the issue occurs repeatedly, the error is probably in your
79
+ input.
80
+
81
+ See also
82
+ --------
83
+ fetch_single_file
84
+
85
+ Examples
86
+ --------
87
+
88
+ >>> import os.path
89
+ >>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
90
+ ... db_name="protein", ret_type="fasta")
91
+ >>> print([os.path.basename(file) for file in files])
92
+ ['1L2Y_A.fa', '3O5R_A.fa']
93
+ """
94
+ # If only a single UID is present,
95
+ # put it into a single element list
96
+ if isinstance(uids, str):
97
+ uids = [uids]
98
+ single_element = True
99
+ else:
100
+ single_element = False
101
+ # Create the target folder, if not existing
102
+ if target_path is not None and not isdir(target_path):
103
+ os.makedirs(target_path)
104
+ files = []
105
+ for i, id in enumerate(uids):
106
+ # Verbose output
107
+ if verbose:
108
+ print(f"Fetching file {i+1:d} / {len(uids):d} ({id})...", end="\r")
109
+ # Fetch file from database
110
+ if target_path is not None:
111
+ file = join(target_path, id + "." + suffix)
112
+ else:
113
+ file = None
114
+ if file is None \
115
+ or not isfile(file) \
116
+ or getsize(file) == 0 \
117
+ or overwrite:
118
+ param_dict = {
119
+ "db" : sanitize_database_name(db_name),
120
+ "id" : id,
121
+ "rettype" : ret_type,
122
+ "retmode" : ret_mode,
123
+ "tool" : "Biotite",
124
+ "mail" : "padix.key@gmail.com"
125
+ }
126
+ api_key = get_api_key()
127
+ if api_key is not None:
128
+ param_dict["api_key"] = api_key
129
+ r = requests.get(_fetch_url, params=param_dict)
130
+ content = r.text
131
+ check_for_errors(content)
132
+ if content.startswith(" Error"):
133
+ raise RequestError(content[8:])
134
+ if file is None:
135
+ file = io.StringIO(content)
136
+ else:
137
+ with open(file, "w+") as f:
138
+ f.write(content)
139
+ files.append(file)
140
+ if verbose:
141
+ print("\nDone")
142
+ # If input was a single ID, return only a single path
143
+ if single_element:
144
+ return files[0]
145
+ else:
146
+ return files
147
+
148
+
149
+ def fetch_single_file(uids, file_name, db_name, ret_type, ret_mode="text",
150
+ overwrite=False):
151
+ """
152
+ Almost the same as :func:`fetch()`, but the data for the given UIDs
153
+ will be stored in a single file.
154
+
155
+ Parameters
156
+ ----------
157
+ uids : iterable object of str
158
+ A list of UIDs of the
159
+ file(s) to be downloaded.
160
+ file_name : str or None
161
+ The file path, including file name, to the target file.
162
+ db_name : str:
163
+ E-utility or common database name.
164
+ ret_type : str
165
+ Retrieval type.
166
+ ret_mode : str, optional
167
+ Retrieval mode.
168
+ overwrite : bool, optional
169
+ If false, the file is only downloaded, if no file with the same
170
+ name already exists.
171
+
172
+ Returns
173
+ -------
174
+ file : str or StringIO or BytesIO
175
+ The file name of the downloaded file.
176
+ If `file_name` is ``None``, the file content is stored in
177
+ either a `StringIO` or a `BytesIO` object.
178
+
179
+ Warnings
180
+ --------
181
+ Even if you give valid input to this function, in rare cases the
182
+ database might return no or malformed data to you.
183
+ In these cases the request should be retried.
184
+ When the issue occurs repeatedly, the error is probably in your
185
+ input.
186
+
187
+ See also
188
+ --------
189
+ fetch
190
+ """
191
+ if file_name is not None \
192
+ and os.path.isfile(file_name) \
193
+ and getsize(file_name) > 0 \
194
+ and not overwrite:
195
+ # Do no redownload the already existing file
196
+ return file_name
197
+ uid_list_str = ""
198
+ for id in uids:
199
+ uid_list_str += id + ","
200
+ # Remove terminal comma
201
+ uid_list_str = uid_list_str[:-1]
202
+ param_dict = {
203
+ "db" : sanitize_database_name(db_name),
204
+ "id" : uid_list_str,
205
+ "rettype" : ret_type,
206
+ "retmode" : ret_mode,
207
+ "tool" : "Biotite",
208
+ "mail" : "padix.key@gmail.com"
209
+ }
210
+ api_key = get_api_key()
211
+ if api_key is not None:
212
+ param_dict["api_key"] = api_key
213
+ r = requests.get(_fetch_url, params=param_dict)
214
+ content = r.text
215
+ check_for_errors(content)
216
+ if content.startswith(" Error"):
217
+ raise RequestError(content[8:])
218
+ if file_name is None:
219
+ return io.StringIO(content)
220
+ else:
221
+ with open(file_name, "w+") as f:
222
+ f.write(content)
223
+ return file_name
@@ -0,0 +1,44 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["set_api_key", "get_api_key"]
8
+
9
+
10
+ _API_KEY = None
11
+
12
+
13
+ def get_api_key():
14
+ """
15
+ Get the
16
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
17
+
18
+ Returns
19
+ -------
20
+ api_key : str or None
21
+ The API key, if it was already set before, ``None`` otherwise.
22
+ """
23
+ global _API_KEY
24
+ return _API_KEY
25
+
26
+
27
+ def set_api_key(key):
28
+ """
29
+ Set the
30
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
31
+
32
+ Using an API key increases the request limit on the NCBI servers
33
+ and is automatically used by functions in
34
+ :mod:`biotite.database.entrez`.
35
+ This key is kept only in memory and hence removed in the end of the
36
+ Python session.
37
+
38
+ Parameters
39
+ ----------
40
+ api_key : str
41
+ The API key.
42
+ """
43
+ global _API_KEY
44
+ _API_KEY = key
@@ -0,0 +1,223 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Query", "SimpleQuery", "CompositeQuery", "search"]
8
+
9
+ import requests
10
+ import abc
11
+ from xml.etree import ElementTree
12
+ from .check import check_for_errors
13
+ from .dbnames import sanitize_database_name
14
+ from ..error import RequestError
15
+ from .key import get_api_key
16
+
17
+
18
+ _search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
19
+
20
+ class Query(metaclass=abc.ABCMeta):
21
+ """
22
+ Base class for a wrapper around a search term
23
+ for the NCBI Entrez search service.
24
+ """
25
+ def __init__(self):
26
+ pass
27
+
28
+ @abc.abstractmethod
29
+ def __str__(self):
30
+ pass
31
+
32
+ def __or__(self, operand):
33
+ if not isinstance(operand, Query):
34
+ operand = SimpleQuery(operand)
35
+ return CompositeQuery("OR", self, operand)
36
+
37
+ def __and__(self, operand):
38
+ if not isinstance(operand, Query):
39
+ operand = SimpleQuery(operand)
40
+ return CompositeQuery("AND", self, operand)
41
+
42
+ def __xor__(self, operand):
43
+ if not isinstance(operand, Query):
44
+ operand = SimpleQuery(operand)
45
+ return CompositeQuery("NOT", self, operand)
46
+
47
+
48
+ class CompositeQuery(Query):
49
+ """
50
+ A representation of an composite query
51
+ for the NCBI Entrez search service.
52
+
53
+ A composite query is a combination of two other queries,
54
+ combined either with an 'AND', 'OR' or 'NOT' operator.
55
+
56
+ Usually the user does not create instances of this class directly,
57
+ but :class:`Query` instances are combined with
58
+ ``|`` (OR), ``&`` (AND) or ``^`` (NOT).
59
+
60
+ Parameters
61
+ ----------
62
+ operator: str, {"AND", "OR", "NOT"}
63
+ The combination operator.
64
+ queries : iterable object of SimpleQuery
65
+ The queries to be combined.
66
+
67
+ Examples
68
+ --------
69
+
70
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \\
71
+ ... SimpleQuery("90:100", "Sequence Length")
72
+ >>> print(type(query).__name__)
73
+ CompositeQuery
74
+ >>> print(query)
75
+ ("Escherichia coli"[Organism]) AND (90:100[Sequence Length])
76
+ """
77
+
78
+ def __init__(self, operator, query1, query2):
79
+ super().__init__()
80
+ self._op = operator
81
+ self._q1 = query1
82
+ self._q2 = query2
83
+
84
+ def __str__(self):
85
+ return "({:}) {:} ({:})".format(str(self._q1), self._op, self._q2)
86
+
87
+
88
+
89
+ class SimpleQuery(Query):
90
+ """
91
+ A simple query for the NCBI Entrez search service without
92
+ combination via 'AND', 'OR' or 'NOT'. A query consists of a search
93
+ term and an optional field.
94
+
95
+ A list of available search fields with description can be found
96
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
97
+
98
+ Parameters
99
+ ----------
100
+ term: str
101
+ The search term.
102
+ field : str, optional
103
+ The field to search the term in.
104
+ The list of possible fields and the required search term
105
+ formatting can be found
106
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
107
+ By default the field is omitted and all fields are searched in
108
+ for the term, implicitly.
109
+
110
+ Examples
111
+ --------
112
+
113
+ >>> query = SimpleQuery("Escherichia coli")
114
+ >>> print(query)
115
+ "Escherichia coli"
116
+ >>> query = SimpleQuery("Escherichia coli", "Organism")
117
+ >>> print(query)
118
+ "Escherichia coli"[Organism]
119
+ """
120
+
121
+ # Field identifiers are taken from
122
+ # https://www.ncbi.nlm.nih.gov/books/NBK49540/
123
+ _fields = [
124
+ "Accession", "All Fields", "Author", "EC/RN Number", "Feature Key",
125
+ "Filter", "Gene Name", "Genome Project", "Issue", "Journal", "Keyword",
126
+ "Modification Date", "Molecular Weight", "Organism", "Page Number",
127
+ "Primary Accession", "Properties", "Protein Name", "Publication Date",
128
+ "SeqID String", "Sequence Length", "Substance Name", "Text Word",
129
+ "Title", "Volume",
130
+ # Abbreviations
131
+ "ACCN", "ALL", "AU", "AUTH", "ECNO", "FKEY", "FILT", "SB", "GENE",
132
+ "ISS", "JOUR", "KYWD", "MDAT", "MOLWT", "ORGN", "PAGE", "PACC",
133
+ "PORGN", "PROP", "PROT", "PDAT", "SQID", "SLEN", "SUBS", "WORD", "TI",
134
+ "TITL" "VOL"
135
+ ]
136
+
137
+ def __init__(self, term, field=None):
138
+ super().__init__()
139
+ if field is not None:
140
+ if field not in SimpleQuery._fields:
141
+ raise ValueError(f"Unknown field identifier '{field}'")
142
+ for invalid_string in \
143
+ ['"', "AND", "OR", "NOT", "[", "]", "(", ")", "\t", "\n"]:
144
+ if invalid_string in term:
145
+ raise ValueError(
146
+ f"Query contains illegal term {invalid_string}"
147
+ )
148
+ if " " in term:
149
+ # Encapsulate in quotes if spaces are in search term
150
+ term = f'"{term}"'
151
+ self._term = term
152
+ self._field = field
153
+
154
+ def __str__(self):
155
+ string = self._term
156
+ if self._field is not None:
157
+ string += f"[{self._field}]"
158
+ return string
159
+
160
+
161
+ def search(query, db_name, number=20):
162
+ r"""
163
+ Get all PDB IDs that meet the given query requirements,
164
+ via the NCBI ESearch service.
165
+
166
+ This function requires an internet connection.
167
+
168
+ Parameters
169
+ ----------
170
+ query : Query
171
+ The search query.
172
+ db_name : str:
173
+ E-utility or common database name.
174
+ number : Query
175
+ The maximum number of UIDs that are obtained.
176
+
177
+ Returns
178
+ -------
179
+ ids : list of str
180
+ A list of strings containing all NCBI UIDs (accession number)
181
+ that meet the query requirements.
182
+
183
+ Warnings
184
+ --------
185
+ Even if you give valid input to this function, in rare cases the
186
+ database might return no or malformed data to you.
187
+ In these cases the request should be retried.
188
+ When the issue occurs repeatedly, the error is probably in your
189
+ input.
190
+
191
+ Notes
192
+ -----
193
+ A list of available search fields with description can be found
194
+ `here <https://www.ncbi.nlm.nih.gov/books/NBK49540/>`_.
195
+
196
+ Examples
197
+ --------
198
+ >>> query = SimpleQuery("Escherichia coli", "Organism") & \
199
+ ... SimpleQuery("90:100", "Sequence Length")
200
+ >>> ids = search(query, "nuccore", number=5)
201
+ >>> print(ids)
202
+ ['...', '...', '...', '...', '...']
203
+ """
204
+ param_dict = {
205
+ "db": sanitize_database_name(db_name),
206
+ "term": str(query),
207
+ "retmax": str(number),
208
+ }
209
+ api_key = get_api_key()
210
+ if api_key is not None:
211
+ param_dict["api_key"] = api_key
212
+ r = requests.get(_search_url, params=param_dict)
213
+ xml_response = r.text
214
+ check_for_errors(xml_response)
215
+ try:
216
+ root = ElementTree.fromstring(xml_response)
217
+ except ElementTree.ParseError:
218
+ if len(xml_response) > 100:
219
+ xml_response = xml_response[:100] + "..."
220
+ raise RequestError(f"Invalid server response: {xml_response}")
221
+ xpath = ".//IdList/Id"
222
+ uids = [element.text for element in root.findall(xpath)]
223
+ return uids
@@ -0,0 +1,15 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["RequestError"]
8
+
9
+
10
+ class RequestError(Exception):
11
+ """
12
+ Indicates that the database returned a response with an error
13
+ message or other malformed content.
14
+ """
15
+ pass
@@ -0,0 +1,21 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for searching and downloading files from the *PubChem*
7
+ database.
8
+ Although *PubChem* is part of *NCBI Entrez*,
9
+ :mod:`biotite.database.entrez` is only capable of accessing
10
+ meta-information from *PubChem*.
11
+ This subpackage, on the other hand, supports searching *PubChem*
12
+ compounds based on chemical information and is able to download
13
+ structure records.
14
+ """
15
+
16
+ __name__ = "biotite.database.pubchem"
17
+ __author__ = "Patrick Kunzmann"
18
+
19
+ from .download import *
20
+ from .query import *
21
+ from .throttle import *