biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading predicted protein structures from the AlphaFold DB.
7
+ """
8
+
9
+ __name__ = "biotite.database.afdb"
10
+ __author__ = "Alex Carlin"
11
+
12
+ from .download import *
@@ -0,0 +1,202 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.afdb"
6
+ __author__ = "Patrick Kunzmann, Alex Carlin"
7
+ __all__ = ["fetch"]
8
+
9
+ import io
10
+ import re
11
+ from pathlib import Path
12
+ from xml.etree import ElementTree
13
+ import requests
14
+ from biotite.database.error import RequestError
15
+
16
+ _METADATA_URL = "https://alphafold.com/api/prediction"
17
+ _BINARY_FORMATS = ["bcif"]
18
+ # Adopted from https://www.uniprot.org/help/accession_numbers
19
+ # adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
20
+ _UNIPROT_PATTERN = (
21
+ r"^(?P<prefix>(AF-)|(AF_AF))?"
22
+ r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
23
+ r"(?P<suffix>-?F1)?$"
24
+ )
25
+
26
+
27
+ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
28
+ """
29
+ Download predicted protein structures from the AlphaFold DB.
30
+
31
+ This function requires an internet connection.
32
+
33
+ Parameters
34
+ ----------
35
+ ids : str or iterable object of str
36
+ A single ID or a list of IDs of the file(s) to be downloaded.
37
+ They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
38
+ (e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
39
+ format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
40
+ The format of the files to be downloaded.
41
+ target_path : str, optional
42
+ The target directory of the downloaded files.
43
+ By default, the file content is stored in a file-like object
44
+ (`StringIO` or `BytesIO`, respectively).
45
+ overwrite : bool, optional
46
+ If true, existing files will be overwritten.
47
+ Otherwise the respective file will only be downloaded if the file does not
48
+ exist yet in the specified target directory or if the file is empty.
49
+ verbose : bool, optional
50
+ If true, the function will output the download progress.
51
+
52
+ Returns
53
+ -------
54
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
55
+ The file path(s) to the downloaded files.
56
+ If a single string (a single ID) was given in `ids`, a single string is
57
+ returned.
58
+ If a list (or other iterable object) was given, a list of strings is returned.
59
+ If no `target_path` was given, the file contents are stored in either
60
+ ``StringIO`` or ``BytesIO`` objects.
61
+
62
+ Examples
63
+ --------
64
+
65
+ >>> from pathlib import Path
66
+ >>> file = fetch("P12345", "cif", path_to_directory)
67
+ >>> print(Path(file).name)
68
+ P12345.cif
69
+ >>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
70
+ >>> print([Path(file).name for file in files])
71
+ ['P12345.cif', 'Q8K9I1.cif']
72
+ """
73
+ if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
74
+ raise ValueError(f"Format '{format}' is not supported")
75
+ if format in ["pdbx", "mmcif"]:
76
+ format = "cif"
77
+
78
+ # If only a single ID is present,
79
+ # put it into a single element list
80
+ if isinstance(ids, str):
81
+ ids = [ids]
82
+ single_element = True
83
+ else:
84
+ single_element = False
85
+ if target_path is not None:
86
+ target_path = Path(target_path)
87
+ target_path.mkdir(parents=True, exist_ok=True)
88
+
89
+ files = []
90
+ session = requests.Session()
91
+ for i, id in enumerate(ids):
92
+ # Verbose output
93
+ if verbose:
94
+ print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
95
+ # Fetch file from database
96
+ if target_path is not None:
97
+ file = target_path / f"{id}.{format}"
98
+ else:
99
+ # 'file = None' -> store content in a file-like object
100
+ file = None
101
+ if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
102
+ file_response = session.get(_get_file_url(session, id, format))
103
+ _assert_valid_file(file_response, id)
104
+ if format in _BINARY_FORMATS:
105
+ content = file_response.content
106
+ else:
107
+ content = file_response.text
108
+
109
+ if file is None:
110
+ if format in _BINARY_FORMATS:
111
+ file = io.BytesIO(content)
112
+ else:
113
+ file = io.StringIO(content)
114
+ else:
115
+ mode = "wb+" if format in _BINARY_FORMATS else "w+"
116
+ with open(file, mode) as f:
117
+ f.write(content)
118
+
119
+ files.append(file)
120
+ if verbose:
121
+ print("\nDone")
122
+
123
+ # Return paths as strings
124
+ files = [file.as_posix() if isinstance(file, Path) else file for file in files]
125
+ # If input was a single ID, return only a single element
126
+ if single_element:
127
+ return files[0]
128
+ else:
129
+ return files
130
+
131
+
132
+ def _get_file_url(session, id, format):
133
+ """
134
+ Get the actual file URL for the given ID from the ``prediction`` API endpoint.
135
+
136
+ Parameters
137
+ ----------
138
+ session : requests.Session
139
+ The session to use for the request.
140
+ id : str
141
+ The ID of the file to be downloaded.
142
+ format : str
143
+ The format of the file to be downloaded.
144
+
145
+ Returns
146
+ -------
147
+ file_url : str
148
+ The URL of the file to be downloaded.
149
+ """
150
+ uniprot_id = _extract_id(id)
151
+ try:
152
+ metadata = session.get(f"{_METADATA_URL}/{uniprot_id}").json()
153
+ except requests.exceptions.JSONDecodeError:
154
+ raise RequestError("Received malformed JSON response")
155
+ if len(metadata) == 0:
156
+ raise RequestError(f"ID {id} is invalid")
157
+ # A list of length 1 is always returned, if the response is valid
158
+ return metadata[0][f"{format}Url"]
159
+
160
+
161
+ def _extract_id(id):
162
+ """
163
+ Extract a AFDB compatible UniProt ID from the given qualifier.
164
+ This may comprise
165
+
166
+ - Directly the UniProt ID (e.g. ``P12345``) (trivial case)
167
+ - Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
168
+
169
+ Parameters
170
+ ----------
171
+ id : str
172
+ The qualifier to extract the UniProt ID from.
173
+
174
+ Returns
175
+ -------
176
+ uniprot_id : str
177
+ The UniProt ID.
178
+ """
179
+ match = re.match(_UNIPROT_PATTERN, id)
180
+ if match is None:
181
+ raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
182
+ return match.group("id")
183
+
184
+
185
+ def _assert_valid_file(response, id):
186
+ """
187
+ Checks whether the response is an actual structure file
188
+ or the response a *404* error due to invalid UniProt ID.
189
+ """
190
+ if len(response.text) == 0:
191
+ raise RequestError(f"Received no response for '{id}'")
192
+ try:
193
+ root = ElementTree.fromstring(response.text)
194
+ if root.tag == "Error":
195
+ raise RequestError(
196
+ f"Error while fetching '{id}': {root.find('Message').text}"
197
+ )
198
+ except ElementTree.ParseError:
199
+ # This is not XML -> the response is probably a valid file
200
+ pass
201
+ # Fallback for other errors
202
+ response.raise_for_status()
@@ -0,0 +1,15 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading files from the NCBI Entrez database.
7
+ """
8
+
9
+ __name__ = "biotite.database.entrez"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .dbnames import *
13
+ from .download import *
14
+ from .key import *
15
+ from .query import *
@@ -0,0 +1,66 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = ["check_for_errors"]
8
+
9
+ import json
10
+ from biotite.database.error import RequestError
11
+
12
+ # Taken from https://github.com/kblin/ncbi-entrez-error-messages
13
+ _error_messages = [
14
+ "Error reading from remote server",
15
+ "Bad gateway",
16
+ "Bad Gateway",
17
+ "Cannot process ID list",
18
+ "server is temporarily unable to service your request",
19
+ "Service unavailable",
20
+ "Server Error",
21
+ "ID list is empty",
22
+ "Supplied id parameter is empty",
23
+ "Resource temporarily unavailable",
24
+ "Failed to retrieve sequence",
25
+ "Failed to understand id",
26
+ ]
27
+
28
+
29
+ def check_for_errors(response):
30
+ """
31
+ Check for common error messages in NCBI Entrez database responses.
32
+
33
+ Parameters
34
+ ----------
35
+ response : requests.Response
36
+ The message received from NCBI Entrez.
37
+
38
+ Raises
39
+ ------
40
+ RequestError
41
+ If the message contains an error message.
42
+ """
43
+ # Server can respond short JSON error messages
44
+ if len(response.text) < 500:
45
+ try:
46
+ message_json = json.loads(response.text)
47
+ if "error" in message_json:
48
+ raise RequestError(message_json["error"])
49
+ except json.decoder.JSONDecodeError:
50
+ # It is not a JSON message
51
+ pass
52
+
53
+ # Sometimes the error message is at the beginning of the response...
54
+ if response.text.startswith("Error"):
55
+ raise RequestError(response.text[7:])
56
+ # ...and sometimes at the end of the message
57
+ message_end = response.text[-200:]
58
+ # Seemingly arbitrary '+' characters are in NCBI error messages
59
+ message_end = message_end.replace("+", "")
60
+ for error_msg in _error_messages:
61
+ # Often whitespace is also replaced by '+' in error message
62
+ if error_msg.replace(" ", "") in message_end:
63
+ raise RequestError(error_msg)
64
+
65
+ # Fallback for other errors
66
+ response.raise_for_status()
@@ -0,0 +1,101 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_database_name"]
8
+
9
+
10
+ # fmt: off
11
+ _db_names = {
12
+ "BioProject" : "bioproject",
13
+ "BioSample" : "biosample",
14
+ "Biosystems" : "biosystems",
15
+ "Books" : "books",
16
+ "Conserved Domains" : "cdd",
17
+ "dbGaP" : "gap",
18
+ "dbVar" : "dbvar",
19
+ "Epigenomics" : "epigenomics",
20
+ "EST" : "nucest",
21
+ "Gene" : "gene",
22
+ "Genome" : "genome",
23
+ "GEO Datasets" : "gds",
24
+ "GEO Profiles" : "geoprofiles",
25
+ "GSS" : "nucgss",
26
+ "HomoloGene" : "homologene",
27
+ "MeSH" : "mesh",
28
+ "NCBI C++ Toolkit" : "toolkit",
29
+ "NCBI Web Site" : "ncbisearch",
30
+ "NLM Catalog" : "nlmcatalog",
31
+ "Nucleotide" : "nuccore",
32
+ "OMIA" : "omia",
33
+ "PopSet" : "popset",
34
+ "Probe" : "probe",
35
+ "Protein" : "protein",
36
+ "Protein Clusters" : "proteinclusters",
37
+ "PubChem BioAssay" : "pcassay",
38
+ "PubChem Compound" : "pccompound",
39
+ "PubChem Substance" : "pcsubstance",
40
+ "PubMed" : "pubmed",
41
+ "PubMed Central" : "pmc",
42
+ "SNP" : "snp",
43
+ "SRA" : "sra",
44
+ "Structure" : "structure",
45
+ "Taxonomy" : "taxonomy",
46
+ "UniGene" : "unigene",
47
+ "UniSTS" : "unists"
48
+ }
49
+ # fmt: on
50
+
51
+
52
+ def get_database_name(database):
53
+ """
54
+ Map a common NCBI Entrez database name to an E-utility database
55
+ name.
56
+
57
+ Parameters
58
+ ----------
59
+ database : str
60
+ Entrez database name.
61
+
62
+ Returns
63
+ -------
64
+ name : str
65
+ E-utility database name.
66
+
67
+ Examples
68
+ --------
69
+
70
+ >>> print(get_database_name("Nucleotide"))
71
+ nuccore
72
+ """
73
+ return _db_names[database]
74
+
75
+
76
+ def sanitize_database_name(db_name):
77
+ """
78
+ Map a common NCBI Entrez database name to an E-utility database
79
+ name, return E-utility database name, or raise an exception if the
80
+ database name is not existing.
81
+
82
+ Only for internal usage in ``download.py`` and ``query.py``.
83
+
84
+ Parameters
85
+ ----------
86
+ db_name : str
87
+ Entrez database name.
88
+
89
+ Returns
90
+ -------
91
+ name : str
92
+ E-utility database name.
93
+ """
94
+ if db_name in _db_names.keys():
95
+ # Convert into E-utility database name
96
+ return _db_names[db_name]
97
+ elif db_name in _db_names.values():
98
+ # Is already E-utility database name
99
+ return db_name
100
+ else:
101
+ raise ValueError("Database '{db_name}' is not existing")
@@ -0,0 +1,224 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["fetch", "fetch_single_file"]
8
+
9
+ import io
10
+ import os
11
+ from os.path import getsize, isdir, isfile, join
12
+ import requests
13
+ from biotite.database.entrez.check import check_for_errors
14
+ from biotite.database.entrez.dbnames import sanitize_database_name
15
+ from biotite.database.entrez.key import get_api_key
16
+
17
+ _fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
18
+
19
+
20
+ def fetch(
21
+ uids,
22
+ target_path,
23
+ suffix,
24
+ db_name,
25
+ ret_type,
26
+ ret_mode="text",
27
+ overwrite=False,
28
+ verbose=False,
29
+ ):
30
+ """
31
+ Download files from the NCBI Entrez database in various formats.
32
+
33
+ The data for each UID will be fetched into a separate file.
34
+
35
+ A list of valid database, retrieval type and mode combinations can
36
+ be found under
37
+ `<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
38
+
39
+ This function requires an internet connection.
40
+
41
+ Parameters
42
+ ----------
43
+ uids : str or iterable object of str
44
+ A single *unique identifier* (UID) or a list of UIDs of the
45
+ file(s) to be downloaded.
46
+ target_path : str or None
47
+ The target directory of the downloaded files.
48
+ If ``None``, the file content is stored in a file-like object
49
+ (`StringIO` or `BytesIO`, respectively).
50
+ suffix : str
51
+ The file suffix of the downloaded files. This value is
52
+ independent of the retrieval type.
53
+ db_name : str:
54
+ E-utility or common database name.
55
+ ret_type : str
56
+ Retrieval type.
57
+ ret_mode : str, optional
58
+ Retrieval mode.
59
+ overwrite : bool, optional
60
+ If true, existing files will be overwritten. Otherwise the
61
+ respective file will only be downloaded if the file does not
62
+ exist yet in the specified target directory or if the file is
63
+ empty.
64
+ verbose : bool, optional
65
+ If true, the function will output the download progress.
66
+
67
+ Returns
68
+ -------
69
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
70
+ The file path(s) to the downloaded files.
71
+ If a single string (a single UID) was given in `uids`,
72
+ a single string is returned. If a list (or other iterable
73
+ object) was given, a list of strings is returned.
74
+ If `target_path` is ``None``, the file contents are stored in
75
+ either `StringIO` or `BytesIO` objects.
76
+
77
+ Warnings
78
+ --------
79
+ Even if you give valid input to this function, in rare cases the
80
+ database might return no or malformed data to you.
81
+ In these cases the request should be retried.
82
+ When the issue occurs repeatedly, the error is probably in your
83
+ input.
84
+
85
+ See Also
86
+ --------
87
+ fetch_single_file : Fetch multiple entries as a single file.
88
+
89
+ Examples
90
+ --------
91
+
92
+ >>> import os.path
93
+ >>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
94
+ ... db_name="protein", ret_type="fasta")
95
+ >>> print([os.path.basename(file) for file in files])
96
+ ['1L2Y_A.fa', '3O5R_A.fa']
97
+ """
98
+ # If only a single UID is present,
99
+ # put it into a single element list
100
+ if isinstance(uids, str):
101
+ uids = [uids]
102
+ single_element = True
103
+ else:
104
+ single_element = False
105
+ # Create the target folder, if not existing
106
+ if target_path is not None and not isdir(target_path):
107
+ os.makedirs(target_path)
108
+ files = []
109
+ session = requests.Session()
110
+ for i, id in enumerate(uids):
111
+ # Verbose output
112
+ if verbose:
113
+ print(f"Fetching file {i + 1:d} / {len(uids):d} ({id})...", end="\r")
114
+ # Fetch file from database
115
+ if target_path is not None:
116
+ file = join(target_path, id + "." + suffix)
117
+ else:
118
+ file = None
119
+ if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
120
+ param_dict = {
121
+ "db": sanitize_database_name(db_name),
122
+ "id": id,
123
+ "rettype": ret_type,
124
+ "retmode": ret_mode,
125
+ "tool": "Biotite",
126
+ "mail": "padix.key@gmail.com",
127
+ }
128
+ api_key = get_api_key()
129
+ if api_key is not None:
130
+ param_dict["api_key"] = api_key
131
+ r = session.get(_fetch_url, params=param_dict)
132
+ check_for_errors(r)
133
+ content = r.text
134
+ if file is None:
135
+ file = io.StringIO(content)
136
+ else:
137
+ with open(file, "w+") as f:
138
+ f.write(content)
139
+ files.append(file)
140
+ if verbose:
141
+ print("\nDone")
142
+ # If input was a single ID, return only a single path
143
+ if single_element:
144
+ return files[0]
145
+ else:
146
+ return files
147
+
148
+
149
+ def fetch_single_file(
150
+ uids, file_name, db_name, ret_type, ret_mode="text", overwrite=False
151
+ ):
152
+ """
153
+ Almost the same as :func:`fetch()`, but the data for the given UIDs
154
+ will be stored in a single file.
155
+
156
+ Parameters
157
+ ----------
158
+ uids : iterable object of str
159
+ A list of UIDs of the
160
+ file(s) to be downloaded.
161
+ file_name : str or None
162
+ The file path, including file name, to the target file.
163
+ db_name : str:
164
+ E-utility or common database name.
165
+ ret_type : str
166
+ Retrieval type.
167
+ ret_mode : str, optional
168
+ Retrieval mode.
169
+ overwrite : bool, optional
170
+ If false, the file is only downloaded, if no file with the same
171
+ name already exists.
172
+
173
+ Returns
174
+ -------
175
+ file : str or StringIO or BytesIO
176
+ The file name of the downloaded file.
177
+ If `file_name` is ``None``, the file content is stored in
178
+ either a `StringIO` or a `BytesIO` object.
179
+
180
+ Warnings
181
+ --------
182
+ Even if you give valid input to this function, in rare cases the
183
+ database might return no or malformed data to you.
184
+ In these cases the request should be retried.
185
+ When the issue occurs repeatedly, the error is probably in your
186
+ input.
187
+
188
+ See Also
189
+ --------
190
+ fetch : Fetch one or multiple entries as separate files.
191
+ """
192
+ if (
193
+ file_name is not None
194
+ and os.path.isfile(file_name)
195
+ and getsize(file_name) > 0
196
+ and not overwrite
197
+ ):
198
+ # Do no redownload the already existing file
199
+ return file_name
200
+ uid_list_str = ""
201
+ for id in uids:
202
+ uid_list_str += id + ","
203
+ # Remove terminal comma
204
+ uid_list_str = uid_list_str[:-1]
205
+ param_dict = {
206
+ "db": sanitize_database_name(db_name),
207
+ "id": uid_list_str,
208
+ "rettype": ret_type,
209
+ "retmode": ret_mode,
210
+ "tool": "Biotite",
211
+ "mail": "padix.key@gmail.com",
212
+ }
213
+ api_key = get_api_key()
214
+ if api_key is not None:
215
+ param_dict["api_key"] = api_key
216
+ r = requests.get(_fetch_url, params=param_dict)
217
+ check_for_errors(r)
218
+ content = r.text
219
+ if file_name is None:
220
+ return io.StringIO(content)
221
+ else:
222
+ with open(file_name, "w+") as f:
223
+ f.write(content)
224
+ return file_name
@@ -0,0 +1,44 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["set_api_key", "get_api_key"]
8
+
9
+
10
+ _API_KEY = None
11
+
12
+
13
+ def get_api_key():
14
+ """
15
+ Get the
16
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
17
+
18
+ Returns
19
+ -------
20
+ api_key : str or None
21
+ The API key, if it was already set before, ``None`` otherwise.
22
+ """
23
+ global _API_KEY
24
+ return _API_KEY
25
+
26
+
27
+ def set_api_key(key):
28
+ """
29
+ Set the
30
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
31
+
32
+ Using an API key increases the request limit on the NCBI servers
33
+ and is automatically used by functions in
34
+ :mod:`biotite.database.entrez`.
35
+ This key is kept only in memory and hence removed in the end of the
36
+ Python session.
37
+
38
+ Parameters
39
+ ----------
40
+ key : str
41
+ The API key.
42
+ """
43
+ global _API_KEY
44
+ _API_KEY = key