biotite 1.5.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +4 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading predicted protein structures from the AlphaFold DB.
7
+ """
8
+
9
+ __name__ = "biotite.database.afdb"
10
+ __author__ = "Alex Carlin"
11
+
12
+ from .download import *
@@ -0,0 +1,197 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.afdb"
6
+ __author__ = "Patrick Kunzmann, Alex Carlin"
7
+ __all__ = ["fetch"]
8
+
9
+ import io
10
+ import re
11
+ from pathlib import Path
12
+ from xml.etree import ElementTree
13
+ import requests
14
+ from biotite.database.error import RequestError
15
+
16
+ _METADATA_URL = "https://alphafold.com/api/prediction"
17
+ _BINARY_FORMATS = ["bcif"]
18
+ # Adopted from https://www.uniprot.org/help/accession_numbers
19
+ # adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
20
+ _UNIPROT_PATTERN = (
21
+ r"^(?P<prefix>(AF-)|(AF_AF))?"
22
+ r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
23
+ r"(?P<suffix>-?F1)?$"
24
+ )
25
+
26
+
27
+ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
28
+ """
29
+ Download predicted protein structures from the AlphaFold DB.
30
+
31
+ This function requires an internet connection.
32
+
33
+ Parameters
34
+ ----------
35
+ ids : str or iterable object of str
36
+ A single ID or a list of IDs of the file(s) to be downloaded.
37
+ They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
38
+ (e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
39
+ format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
40
+ The format of the files to be downloaded.
41
+ target_path : str, optional
42
+ The target directory of the downloaded files.
43
+ By default, the file content is stored in a file-like object
44
+ (`StringIO` or `BytesIO`, respectively).
45
+ overwrite : bool, optional
46
+ If true, existing files will be overwritten.
47
+ Otherwise the respective file will only be downloaded if the file does not
48
+ exist yet in the specified target directory or if the file is empty.
49
+ verbose : bool, optional
50
+ If true, the function will output the download progress.
51
+
52
+ Returns
53
+ -------
54
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
55
+ The file path(s) to the downloaded files.
56
+ If a single string (a single ID) was given in `ids`, a single string is
57
+ returned.
58
+ If a list (or other iterable object) was given, a list of strings is returned.
59
+ If no `target_path` was given, the file contents are stored in either
60
+ ``StringIO`` or ``BytesIO`` objects.
61
+
62
+ Examples
63
+ --------
64
+
65
+ >>> from pathlib import Path
66
+ >>> file = fetch("P12345", "cif", path_to_directory)
67
+ >>> print(Path(file).name)
68
+ P12345.cif
69
+ >>> files = fetch(["P12345", "Q8K9I1"], "cif", path_to_directory)
70
+ >>> print([Path(file).name for file in files])
71
+ ['P12345.cif', 'Q8K9I1.cif']
72
+ """
73
+ if format not in ["pdb", "pdbx", "cif", "mmcif", "bcif", "fasta"]:
74
+ raise ValueError(f"Format '{format}' is not supported")
75
+ if format in ["pdbx", "mmcif"]:
76
+ format = "cif"
77
+
78
+ # If only a single ID is present,
79
+ # put it into a single element list
80
+ if isinstance(ids, str):
81
+ ids = [ids]
82
+ single_element = True
83
+ else:
84
+ single_element = False
85
+ if target_path is not None:
86
+ target_path = Path(target_path)
87
+ target_path.mkdir(parents=True, exist_ok=True)
88
+
89
+ files = []
90
+ for i, id in enumerate(ids):
91
+ # Verbose output
92
+ if verbose:
93
+ print(f"Fetching file {i + 1:d} / {len(ids):d} ({id})...", end="\r")
94
+ # Fetch file from database
95
+ if target_path is not None:
96
+ file = target_path / f"{id}.{format}"
97
+ else:
98
+ # 'file = None' -> store content in a file-like object
99
+ file = None
100
+ if file is None or not file.is_file() or file.stat().st_size == 0 or overwrite:
101
+ file_response = requests.get(_get_file_url(id, format))
102
+ _assert_valid_file(file_response, id)
103
+ if format in _BINARY_FORMATS:
104
+ content = file_response.content
105
+ else:
106
+ content = file_response.text
107
+
108
+ if file is None:
109
+ if format in _BINARY_FORMATS:
110
+ file = io.BytesIO(content)
111
+ else:
112
+ file = io.StringIO(content)
113
+ else:
114
+ mode = "wb+" if format in _BINARY_FORMATS else "w+"
115
+ with open(file, mode) as f:
116
+ f.write(content)
117
+
118
+ files.append(file)
119
+ if verbose:
120
+ print("\nDone")
121
+
122
+ # Return paths as strings
123
+ files = [file.as_posix() if isinstance(file, Path) else file for file in files]
124
+ # If input was a single ID, return only a single element
125
+ if single_element:
126
+ return files[0]
127
+ else:
128
+ return files
129
+
130
+
131
+ def _get_file_url(id, format):
132
+ """
133
+ Get the actual file URL for the given ID from the ``prediction`` API endpoint.
134
+
135
+ Parameters
136
+ ----------
137
+ id : str
138
+ The ID of the file to be downloaded.
139
+ format : str
140
+ The format of the file to be downloaded.
141
+
142
+ Returns
143
+ -------
144
+ file_url : str
145
+ The URL of the file to be downloaded.
146
+ """
147
+ uniprot_id = _extract_id(id)
148
+ try:
149
+ metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
150
+ except requests.exceptions.JSONDecodeError:
151
+ raise RequestError("Received malformed JSON response")
152
+ if len(metadata) == 0:
153
+ raise RequestError(f"ID {id} is invalid")
154
+ # A list of length 1 is always returned, if the response is valid
155
+ return metadata[0][f"{format}Url"]
156
+
157
+
158
+ def _extract_id(id):
159
+ """
160
+ Extract a AFDB compatible UniProt ID from the given qualifier.
161
+ This may comprise
162
+
163
+ - Directly the UniProt ID (e.g. ``P12345``) (trivial case)
164
+ - Entry ID, as also returned by the RCSB search API (e.g. ``AF-P12345-F1``)
165
+
166
+ Parameters
167
+ ----------
168
+ id : str
169
+ The qualifier to extract the UniProt ID from.
170
+
171
+ Returns
172
+ -------
173
+ uniprot_id : str
174
+ The UniProt ID.
175
+ """
176
+ match = re.match(_UNIPROT_PATTERN, id)
177
+ if match is None:
178
+ raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
179
+ return match.group("id")
180
+
181
+
182
+ def _assert_valid_file(response, id):
183
+ """
184
+ Checks whether the response is an actual structure file
185
+ or the response a *404* error due to invalid UniProt ID.
186
+ """
187
+ if len(response.text) == 0:
188
+ raise RequestError(f"Received no repsone for '{id}'")
189
+ try:
190
+ root = ElementTree.fromstring(response.text)
191
+ if root.tag == "Error":
192
+ raise RequestError(
193
+ f"Error while fetching '{id}': {root.find('Message').text}"
194
+ )
195
+ except ElementTree.ParseError:
196
+ # This is not XML -> the response is probably a valid file
197
+ pass
@@ -0,0 +1,15 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for downloading files from the NCBI Entrez database.
7
+ """
8
+
9
+ __name__ = "biotite.database.entrez"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .dbnames import *
13
+ from .download import *
14
+ from .key import *
15
+ from .query import *
@@ -0,0 +1,60 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann, Maximilian Dombrowsky"
7
+ __all__ = ["check_for_errors"]
8
+
9
+ import json
10
+ from biotite.database.error import RequestError
11
+
12
+ # Taken from https://github.com/kblin/ncbi-entrez-error-messages
13
+ _error_messages = [
14
+ "Error reading from remote server",
15
+ "Bad gateway",
16
+ "Bad Gateway",
17
+ "Cannot process ID list",
18
+ "server is temporarily unable to service your request",
19
+ "Service unavailable",
20
+ "Server Error",
21
+ "ID list is empty",
22
+ "Supplied id parameter is empty",
23
+ "Resource temporarily unavailable",
24
+ "Failed to retrieve sequence",
25
+ "Failed to understand id",
26
+ ]
27
+
28
+
29
+ def check_for_errors(message):
30
+ """
31
+ Check for common error messages in NCBI Entrez database responses.
32
+
33
+ Parameters
34
+ ----------
35
+ message : str
36
+ The message received from NCBI Entrez.
37
+
38
+ Raises
39
+ ------
40
+ RequestError
41
+ If the message contains an error message.
42
+ """
43
+ # Server can respond short JSON error messages
44
+ if len(message) < 500:
45
+ try:
46
+ message_json = json.loads(message)
47
+ if "error" in message_json:
48
+ raise RequestError(message_json["error"])
49
+ except json.decoder.JSONDecodeError:
50
+ # It is not a JSON message
51
+ pass
52
+
53
+ # Error always appear at the end of message
54
+ message_end = message[-200:]
55
+ # Seemingly arbitrary '+' characters are in NCBI error messages
56
+ message_end = message_end.replace("+", "")
57
+ for error_msg in _error_messages:
58
+ # Often whitespace is also replaced by '+' in error message
59
+ if error_msg.replace(" ", "") in message_end:
60
+ raise RequestError(error_msg)
@@ -0,0 +1,101 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_database_name"]
8
+
9
+
10
+ # fmt: off
11
+ _db_names = {
12
+ "BioProject" : "bioproject",
13
+ "BioSample" : "biosample",
14
+ "Biosystems" : "biosystems",
15
+ "Books" : "books",
16
+ "Conserved Domains" : "cdd",
17
+ "dbGaP" : "gap",
18
+ "dbVar" : "dbvar",
19
+ "Epigenomics" : "epigenomics",
20
+ "EST" : "nucest",
21
+ "Gene" : "gene",
22
+ "Genome" : "genome",
23
+ "GEO Datasets" : "gds",
24
+ "GEO Profiles" : "geoprofiles",
25
+ "GSS" : "nucgss",
26
+ "HomoloGene" : "homologene",
27
+ "MeSH" : "mesh",
28
+ "NCBI C++ Toolkit" : "toolkit",
29
+ "NCBI Web Site" : "ncbisearch",
30
+ "NLM Catalog" : "nlmcatalog",
31
+ "Nucleotide" : "nuccore",
32
+ "OMIA" : "omia",
33
+ "PopSet" : "popset",
34
+ "Probe" : "probe",
35
+ "Protein" : "protein",
36
+ "Protein Clusters" : "proteinclusters",
37
+ "PubChem BioAssay" : "pcassay",
38
+ "PubChem Compound" : "pccompound",
39
+ "PubChem Substance" : "pcsubstance",
40
+ "PubMed" : "pubmed",
41
+ "PubMed Central" : "pmc",
42
+ "SNP" : "snp",
43
+ "SRA" : "sra",
44
+ "Structure" : "structure",
45
+ "Taxonomy" : "taxonomy",
46
+ "UniGene" : "unigene",
47
+ "UniSTS" : "unists"
48
+ }
49
+ # fmt: on
50
+
51
+
52
+ def get_database_name(database):
53
+ """
54
+ Map a common NCBI Entrez database name to an E-utility database
55
+ name.
56
+
57
+ Parameters
58
+ ----------
59
+ database : str
60
+ Entrez database name.
61
+
62
+ Returns
63
+ -------
64
+ name : str
65
+ E-utility database name.
66
+
67
+ Examples
68
+ --------
69
+
70
+ >>> print(get_database_name("Nucleotide"))
71
+ nuccore
72
+ """
73
+ return _db_names[database]
74
+
75
+
76
+ def sanitize_database_name(db_name):
77
+ """
78
+ Map a common NCBI Entrez database name to an E-utility database
79
+ name, return E-utility database name, or raise an exception if the
80
+ database name is not existing.
81
+
82
+ Only for internal usage in ``download.py`` and ``query.py``.
83
+
84
+ Parameters
85
+ ----------
86
+ db_name : str
87
+ Entrez database name.
88
+
89
+ Returns
90
+ -------
91
+ name : str
92
+ E-utility database name.
93
+ """
94
+ if db_name in _db_names.keys():
95
+ # Convert into E-utility database name
96
+ return _db_names[db_name]
97
+ elif db_name in _db_names.values():
98
+ # Is already E-utility database name
99
+ return db_name
100
+ else:
101
+ raise ValueError("Database '{db_name}' is not existing")
@@ -0,0 +1,228 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["fetch", "fetch_single_file"]
8
+
9
+ import io
10
+ import os
11
+ from os.path import getsize, isdir, isfile, join
12
+ import requests
13
+ from biotite.database.entrez.check import check_for_errors
14
+ from biotite.database.entrez.dbnames import sanitize_database_name
15
+ from biotite.database.entrez.key import get_api_key
16
+ from biotite.database.error import RequestError
17
+
18
+ _fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
19
+
20
+
21
+ def fetch(
22
+ uids,
23
+ target_path,
24
+ suffix,
25
+ db_name,
26
+ ret_type,
27
+ ret_mode="text",
28
+ overwrite=False,
29
+ verbose=False,
30
+ ):
31
+ """
32
+ Download files from the NCBI Entrez database in various formats.
33
+
34
+ The data for each UID will be fetched into a separate file.
35
+
36
+ A list of valid database, retrieval type and mode combinations can
37
+ be found under
38
+ `<https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly>`_
39
+
40
+ This function requires an internet connection.
41
+
42
+ Parameters
43
+ ----------
44
+ uids : str or iterable object of str
45
+ A single *unique identifier* (UID) or a list of UIDs of the
46
+ file(s) to be downloaded.
47
+ target_path : str or None
48
+ The target directory of the downloaded files.
49
+ If ``None``, the file content is stored in a file-like object
50
+ (`StringIO` or `BytesIO`, respectively).
51
+ suffix : str
52
+ The file suffix of the downloaded files. This value is
53
+ independent of the retrieval type.
54
+ db_name : str:
55
+ E-utility or common database name.
56
+ ret_type : str
57
+ Retrieval type.
58
+ ret_mode : str, optional
59
+ Retrieval mode.
60
+ overwrite : bool, optional
61
+ If true, existing files will be overwritten. Otherwise the
62
+ respective file will only be downloaded if the file does not
63
+ exist yet in the specified target directory or if the file is
64
+ empty.
65
+ verbose : bool, optional
66
+ If true, the function will output the download progress.
67
+
68
+ Returns
69
+ -------
70
+ files : str or StringIO or BytesIO or list of (str or StringIO or BytesIO)
71
+ The file path(s) to the downloaded files.
72
+ If a single string (a single UID) was given in `uids`,
73
+ a single string is returned. If a list (or other iterable
74
+ object) was given, a list of strings is returned.
75
+ If `target_path` is ``None``, the file contents are stored in
76
+ either `StringIO` or `BytesIO` objects.
77
+
78
+ Warnings
79
+ --------
80
+ Even if you give valid input to this function, in rare cases the
81
+ database might return no or malformed data to you.
82
+ In these cases the request should be retried.
83
+ When the issue occurs repeatedly, the error is probably in your
84
+ input.
85
+
86
+ See Also
87
+ --------
88
+ fetch_single_file : Fetch multiple entries as a single file.
89
+
90
+ Examples
91
+ --------
92
+
93
+ >>> import os.path
94
+ >>> files = fetch(["1L2Y_A","3O5R_A"], path_to_directory, suffix="fa",
95
+ ... db_name="protein", ret_type="fasta")
96
+ >>> print([os.path.basename(file) for file in files])
97
+ ['1L2Y_A.fa', '3O5R_A.fa']
98
+ """
99
+ # If only a single UID is present,
100
+ # put it into a single element list
101
+ if isinstance(uids, str):
102
+ uids = [uids]
103
+ single_element = True
104
+ else:
105
+ single_element = False
106
+ # Create the target folder, if not existing
107
+ if target_path is not None and not isdir(target_path):
108
+ os.makedirs(target_path)
109
+ files = []
110
+ for i, id in enumerate(uids):
111
+ # Verbose output
112
+ if verbose:
113
+ print(f"Fetching file {i + 1:d} / {len(uids):d} ({id})...", end="\r")
114
+ # Fetch file from database
115
+ if target_path is not None:
116
+ file = join(target_path, id + "." + suffix)
117
+ else:
118
+ file = None
119
+ if file is None or not isfile(file) or getsize(file) == 0 or overwrite:
120
+ param_dict = {
121
+ "db": sanitize_database_name(db_name),
122
+ "id": id,
123
+ "rettype": ret_type,
124
+ "retmode": ret_mode,
125
+ "tool": "Biotite",
126
+ "mail": "padix.key@gmail.com",
127
+ }
128
+ api_key = get_api_key()
129
+ if api_key is not None:
130
+ param_dict["api_key"] = api_key
131
+ r = requests.get(_fetch_url, params=param_dict)
132
+ content = r.text
133
+ check_for_errors(content)
134
+ if content.startswith(" Error"):
135
+ raise RequestError(content[8:])
136
+ if file is None:
137
+ file = io.StringIO(content)
138
+ else:
139
+ with open(file, "w+") as f:
140
+ f.write(content)
141
+ files.append(file)
142
+ if verbose:
143
+ print("\nDone")
144
+ # If input was a single ID, return only a single path
145
+ if single_element:
146
+ return files[0]
147
+ else:
148
+ return files
149
+
150
+
151
+ def fetch_single_file(
152
+ uids, file_name, db_name, ret_type, ret_mode="text", overwrite=False
153
+ ):
154
+ """
155
+ Almost the same as :func:`fetch()`, but the data for the given UIDs
156
+ will be stored in a single file.
157
+
158
+ Parameters
159
+ ----------
160
+ uids : iterable object of str
161
+ A list of UIDs of the
162
+ file(s) to be downloaded.
163
+ file_name : str or None
164
+ The file path, including file name, to the target file.
165
+ db_name : str:
166
+ E-utility or common database name.
167
+ ret_type : str
168
+ Retrieval type.
169
+ ret_mode : str, optional
170
+ Retrieval mode.
171
+ overwrite : bool, optional
172
+ If false, the file is only downloaded, if no file with the same
173
+ name already exists.
174
+
175
+ Returns
176
+ -------
177
+ file : str or StringIO or BytesIO
178
+ The file name of the downloaded file.
179
+ If `file_name` is ``None``, the file content is stored in
180
+ either a `StringIO` or a `BytesIO` object.
181
+
182
+ Warnings
183
+ --------
184
+ Even if you give valid input to this function, in rare cases the
185
+ database might return no or malformed data to you.
186
+ In these cases the request should be retried.
187
+ When the issue occurs repeatedly, the error is probably in your
188
+ input.
189
+
190
+ See Also
191
+ --------
192
+ fetch : Fetch one or multiple entries as separate files.
193
+ """
194
+ if (
195
+ file_name is not None
196
+ and os.path.isfile(file_name)
197
+ and getsize(file_name) > 0
198
+ and not overwrite
199
+ ):
200
+ # Do no redownload the already existing file
201
+ return file_name
202
+ uid_list_str = ""
203
+ for id in uids:
204
+ uid_list_str += id + ","
205
+ # Remove terminal comma
206
+ uid_list_str = uid_list_str[:-1]
207
+ param_dict = {
208
+ "db": sanitize_database_name(db_name),
209
+ "id": uid_list_str,
210
+ "rettype": ret_type,
211
+ "retmode": ret_mode,
212
+ "tool": "Biotite",
213
+ "mail": "padix.key@gmail.com",
214
+ }
215
+ api_key = get_api_key()
216
+ if api_key is not None:
217
+ param_dict["api_key"] = api_key
218
+ r = requests.get(_fetch_url, params=param_dict)
219
+ content = r.text
220
+ check_for_errors(content)
221
+ if content.startswith(" Error"):
222
+ raise RequestError(content[8:])
223
+ if file_name is None:
224
+ return io.StringIO(content)
225
+ else:
226
+ with open(file_name, "w+") as f:
227
+ f.write(content)
228
+ return file_name
@@ -0,0 +1,44 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.database.entrez"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["set_api_key", "get_api_key"]
8
+
9
+
10
+ _API_KEY = None
11
+
12
+
13
+ def get_api_key():
14
+ """
15
+ Get the
16
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
17
+
18
+ Returns
19
+ -------
20
+ api_key : str or None
21
+ The API key, if it was already set before, ``None`` otherwise.
22
+ """
23
+ global _API_KEY
24
+ return _API_KEY
25
+
26
+
27
+ def set_api_key(key):
28
+ """
29
+ Set the
30
+ `NCBI API key <https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/>`_.
31
+
32
+ Using an API key increases the request limit on the NCBI servers
33
+ and is automatically used by functions in
34
+ :mod:`biotite.database.entrez`.
35
+ This key is kept only in memory and hence removed in the end of the
36
+ Python session.
37
+
38
+ Parameters
39
+ ----------
40
+ key : str
41
+ The API key.
42
+ """
43
+ global _API_KEY
44
+ _API_KEY = key