biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,336 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Functions for obtaining metadata fields of a GenBank file.
7
+ """
8
+
9
+ __name__ = "biotite.sequence.io.genbank"
10
+ __author__ = "Patrick Kunzmann, Natasha Jaffe"
11
+ __all__ = [
12
+ "get_locus",
13
+ "get_definition",
14
+ "get_accession",
15
+ "get_version",
16
+ "get_gi",
17
+ "get_db_link",
18
+ "get_source",
19
+ "set_locus",
20
+ ]
21
+
22
+ from biotite.file import InvalidFileError
23
+
24
+
25
+ def get_locus(gb_file):
26
+ """
27
+ Parse the *LOCUS* field of a GenBank or GenPept file.
28
+
29
+ Parameters
30
+ ----------
31
+ gb_file : GenBankFile
32
+ The GenBank file to read the *LOCUS* field from.
33
+
34
+ Returns
35
+ -------
36
+ name : str
37
+ The locus name.
38
+ length : int
39
+ Sequence length.
40
+ mol_type : str, optional
41
+ The molecule type.
42
+ Usually one of ``'DNA'``, ``'RNA'``, ``'Protein'`` or ``''``.
43
+ is_circular : bool, optional
44
+ True, if the sequence is circular, false otherwise.
45
+ division : str, optional
46
+ The GenBank division to which the file belongs.
47
+ date : str, optional
48
+ The date of last modification.
49
+
50
+ Examples
51
+ --------
52
+
53
+ >>> import os.path
54
+ >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
55
+ >>> name, length, mol_type, is_circular, division, date = get_locus(file)
56
+ >>> print(name)
57
+ CP001509
58
+ >>> print(length)
59
+ 4558953
60
+ >>> print(mol_type)
61
+ DNA
62
+ >>> print(is_circular)
63
+ True
64
+ >>> print(division)
65
+ BCT
66
+ >>> print(date)
67
+ 16-FEB-2017
68
+ """
69
+ lines, _ = _expect_single_field(gb_file, "LOCUS")
70
+ # 'LOCUS' field has only one line
71
+ locus_info = lines[0]
72
+
73
+ fields = str(locus_info).split()
74
+
75
+ # The first field will always be the ID
76
+ name = fields[0]
77
+
78
+ # The second field will always be the length followed
79
+ # by units (eg 1224 aa)
80
+ length = int(fields[1])
81
+
82
+ # The third field *should* be the molecular type
83
+ # but sometimes this is missing. This gets tricky
84
+ # because sometimes the next field, circular/linear,
85
+ # is missing, too. The field after that, division,
86
+ # is a 3 letter all caps token. Unfortunately, mol_type
87
+ # is also often a 3 letter all caps token (eg DNA)!
88
+ # Fortunately, GenBank publishes the set list of divisions
89
+ # here: https://www.ncbi.nlm.nih.gov/genbank/samplerecord ,
90
+ # so we can check against that set when determining whether
91
+ # the current token represents the molecular type.
92
+ divisions = (
93
+ "PRI", # primate sequences
94
+ "ROD", # rodent sequences
95
+ "MAM", # other mammalian sequences
96
+ "VRT", # other vertebrate sequences
97
+ "INV", # invertebrate sequences
98
+ "PLN", # plant, fungal, and algal sequences
99
+ "BCT", # bacterial sequences
100
+ "VRL", # viral sequences
101
+ "PHG", # bacteriophage sequences
102
+ "SYN", # synthetic sequences
103
+ "UNA", # unannotated sequences
104
+ "EST", # EST sequences (expressed sequence tags)
105
+ "PAT", # patent sequences
106
+ "STS", # STS sequences (sequence tagged sites)
107
+ "GSS", # GSS sequences (genome survey sequences)
108
+ "HTG", # HTG sequences (high-throughput genomic sequences)
109
+ "HTC", # unfinished high-throughput cDNA sequencing
110
+ "ENV", # environmental sampling sequences
111
+ "CON",
112
+ )
113
+
114
+ # NOTE: Remember that fields[2] is the unit for length,
115
+ # eg bp or aa, so we move to fields[3] here.
116
+ if fields[3] not in ("linear", "circular") and fields[3] not in divisions:
117
+ mol_type = fields[3]
118
+ next_idx = 4
119
+ else:
120
+ mol_type = None
121
+ next_idx = 3
122
+
123
+ # The next field should be the token 'linear' or 'circular',
124
+ # but sometimes this is missing
125
+ if "linear" == fields[next_idx]:
126
+ is_circular = False
127
+ next_idx += 1
128
+ elif "circular" == fields[next_idx]:
129
+ is_circular = True
130
+ next_idx += 1
131
+ else:
132
+ is_circular = False
133
+
134
+ # The next field should be the division
135
+ if fields[next_idx] in divisions:
136
+ division = fields[next_idx]
137
+ next_idx += 1
138
+
139
+ # The last field is a date in the format DD-M-YYYY
140
+ date = fields[next_idx]
141
+
142
+ return name, length, mol_type, is_circular, division, date
143
+
144
+
145
+ def get_definition(gb_file):
146
+ """
147
+ Parse the *DEFINITION* field of a GenBank or GenPept file.
148
+
149
+ Parameters
150
+ ----------
151
+ gb_file : GenBankFile
152
+ The GenBank file to read the *DEFINITION* field from.
153
+
154
+ Returns
155
+ -------
156
+ definition : str
157
+ Content of the *DEFINITION* field.
158
+
159
+ Examples
160
+ --------
161
+
162
+ >>> import os.path
163
+ >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
164
+ >>> print(get_definition(file))
165
+ Escherichia coli BL21(DE3), complete genome.
166
+ """
167
+ lines, _ = _expect_single_field(gb_file, "DEFINITION")
168
+ return " ".join([line.strip() for line in lines])
169
+
170
+
171
+ def get_accession(gb_file):
172
+ """
173
+ Parse the *ACCESSION* field of a GenBank or GenPept file.
174
+
175
+ Parameters
176
+ ----------
177
+ gb_file : GenBankFile
178
+ The GenBank file to read the *ACCESSION* field from.
179
+
180
+ Returns
181
+ -------
182
+ accession : str
183
+ The accession ID of the file.
184
+
185
+ Examples
186
+ --------
187
+
188
+ >>> import os.path
189
+ >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
190
+ >>> print(get_accession(file))
191
+ CP001509
192
+ """
193
+ lines, _ = _expect_single_field(gb_file, "ACCESSION")
194
+ # 'ACCESSION' field has only one line
195
+ return lines[0]
196
+
197
+
198
+ def get_version(gb_file):
199
+ """
200
+ Parse the version from the *VERSION* field of a GenBank or GenPept
201
+ file.
202
+
203
+ Parameters
204
+ ----------
205
+ gb_file : GenBankFile
206
+ The GenBank file to read the *VERSION* field from.
207
+
208
+ Returns
209
+ -------
210
+ version : str
211
+ Content of the *VERSION* field. Does not include GI.
212
+ """
213
+ lines, _ = _expect_single_field(gb_file, "VERSION")
214
+ # 'VERSION' field has only one line
215
+ return lines[0].split()[0]
216
+
217
+
218
+ def get_gi(gb_file):
219
+ """
220
+ Parse the GI from the *VERSION* field of a GenBank or GenPept
221
+ file.
222
+
223
+ Parameters
224
+ ----------
225
+ gb_file : GenBankFile
226
+ The GenBank file to read the *VERSION* field from.
227
+
228
+ Returns
229
+ -------
230
+ gi : str
231
+ The GI of the file.
232
+ """
233
+ lines, _ = _expect_single_field(gb_file, "VERSION")
234
+ # 'VERSION' field has only one line
235
+ version_info = lines[0].split()
236
+ if len(version_info) < 2 or "GI" not in version_info[1]:
237
+ raise InvalidFileError("File does not contain GI")
238
+ # Truncate GI
239
+ return int(version_info[1][3:])
240
+
241
+
242
+ def get_db_link(gb_file):
243
+ """
244
+ Parse the *DBLINK* field of a GenBank or GenPept file.
245
+
246
+ Parameters
247
+ ----------
248
+ gb_file : GenBankFile
249
+ The GenBank file to read the *DBLINK* field from.
250
+
251
+ Returns
252
+ -------
253
+ link_dict : dict
254
+ A dictionary storing the database links, with the database
255
+ name as key, and the corresponding ID as value.
256
+
257
+ Examples
258
+ --------
259
+
260
+ >>> import os.path
261
+ >>> file = GenBankFile.read(os.path.join(path_to_sequences, "ec_bl21.gb"))
262
+ >>> for key, val in get_db_link(file).items():
263
+ ... print(key, ":", val)
264
+ BioProject : PRJNA20713
265
+ BioSample : SAMN02603478
266
+ """
267
+ lines, _ = _expect_single_field(gb_file, "DBLINK")
268
+ link_dict = {}
269
+ for line in lines:
270
+ key, value = line.split(":")
271
+ link_dict[key.strip()] = value.strip()
272
+ return link_dict
273
+
274
+
275
+ def get_source(gb_file):
276
+ """
277
+ Parse the *SOURCE* field of a GenBank or GenPept file.
278
+
279
+ Parameters
280
+ ----------
281
+ gb_file : GenBankFile
282
+ The GenBank file to read the *SOURCE* field from.
283
+
284
+ Returns
285
+ -------
286
+ accession : str
287
+ The name of the source organism.
288
+ """
289
+ lines, _ = _expect_single_field(gb_file, "SOURCE")
290
+ # 'SOURCE' field has only one line
291
+ return lines[0]
292
+
293
+
294
+ def _expect_single_field(gb_file, name):
295
+ fields = gb_file.get_fields(name)
296
+ if len(fields) == 0:
297
+ raise InvalidFileError(f"File has no '{name}' field")
298
+ if len(fields) > 1:
299
+ raise InvalidFileError(f"File has multiple '{name}' fields")
300
+ return fields[0]
301
+
302
+
303
+ def set_locus(
304
+ gb_file, name, length, mol_type=None, is_circular=False, division=None, date=None
305
+ ):
306
+ """
307
+ Set the *LOCUS* field of a GenBank file.
308
+
309
+ Parameters
310
+ ----------
311
+ gb_file : GenBankFile
312
+ The GenBank file to be edited.
313
+ name : str
314
+ The locus name.
315
+ length : int
316
+ Sequence length.
317
+ mol_type : str, optional
318
+ The molecule type.
319
+ Usually one of ``'DNA'``, ``'RNA'``, ``'Protein'`` or ``''``.
320
+ is_circular : bool, optional
321
+ True, if the sequence is circular, false otherwise.
322
+ division : str, optional
323
+ The GenBank division to which the file belongs.
324
+ date : str, optional
325
+ The date of last modification.
326
+ """
327
+ mol_type = "" if mol_type is None else mol_type
328
+ restype_abbr = "aa" if mol_type in ["", "Protein"] else "bp"
329
+ circularity = "circular" if is_circular else "linear"
330
+ division = "" if division is None else division
331
+ date = "" if date is None else date
332
+ line = (
333
+ f"{name:18} {length:>9} {restype_abbr} {mol_type:^10} "
334
+ f"{circularity:8} {division:3} {date:11}"
335
+ )
336
+ gb_file.set_field("LOCUS", [line])
@@ -0,0 +1,173 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Functions for converting a sequence from/to a GenBank file.
7
+ """
8
+
9
+ __name__ = "biotite.sequence.io.genbank"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = [
12
+ "get_raw_sequence",
13
+ "get_sequence",
14
+ "get_annotated_sequence",
15
+ "set_sequence",
16
+ "set_annotated_sequence",
17
+ ]
18
+
19
+ import re
20
+ from biotite.file import InvalidFileError
21
+ from biotite.sequence.annotation import AnnotatedSequence
22
+ from biotite.sequence.io.genbank.annotation import get_annotation, set_annotation
23
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
24
+
25
+ _SYMBOLS_PER_CHUNK = 10
26
+ _SEQ_CHUNKS_PER_LINE = 6
27
+ _SYMBOLS_PER_LINE = _SYMBOLS_PER_CHUNK * _SEQ_CHUNKS_PER_LINE
28
+
29
+
30
+ def get_raw_sequence(gb_file):
31
+ """
32
+ Get the raw sequence string from the *ORIGIN* field
33
+ of a GenBank file.
34
+
35
+ Parameters
36
+ ----------
37
+ gb_file : GenBankFile
38
+ The GenBank file to read the *ORIGIN* field from.
39
+
40
+ Returns
41
+ -------
42
+ seq_str: str
43
+ The unaltered sequence as string.
44
+ Sequence positions and whitespace characters are removed.
45
+ """
46
+ fields = gb_file.get_fields("ORIGIN")
47
+ if len(fields) == 0:
48
+ raise InvalidFileError("File has no 'ORIGIN' field")
49
+ if len(fields) > 1:
50
+ raise InvalidFileError("File has multiple 'ORIGIN' fields")
51
+ lines, _ = fields[0]
52
+ return _field_to_seq_string(lines)
53
+
54
+
55
+ def get_sequence(gb_file, format="gb"):
56
+ """
57
+ Get the sequence from the *ORIGIN* field of a GenBank file.
58
+
59
+ Parameters
60
+ ----------
61
+ gb_file : GenBankFile
62
+ The GenBank file to read the *ORIGIN* field from.
63
+ format : {'gb', 'gp'}
64
+ Indicates whether the file is a GenBank or a GenPept file.
65
+ Depending on this parameter a :class:`NucleotideSequence` or a
66
+ :class:`ProteinSequence` is returned.
67
+
68
+ Returns
69
+ -------
70
+ sequence : NucleotideSequence or ProteinSequence
71
+ The reference sequence in the file.
72
+ """
73
+ return _convert_seq_str(get_raw_sequence(gb_file), format)
74
+
75
+
76
+ def get_annotated_sequence(gb_file, format="gb", include_only=None):
77
+ """
78
+ Get an annotated sequence by combining the *ANNOTATION* and
79
+ *ORIGIN* fields of a GenBank file.
80
+
81
+ Parameters
82
+ ----------
83
+ gb_file : GenBankFile
84
+ The GenBank file to read the fields from.
85
+ format : {'gb', 'gp'}
86
+ Whether the file is a *GenBank* or *GenPept* file.
87
+ include_only : iterable object of str, optional
88
+ List of names of feature keys, which should included
89
+ in the annotation. By default all features are included.
90
+
91
+ Returns
92
+ -------
93
+ annot_seq : AnnotatedSequence
94
+ The annotated sequence.
95
+ """
96
+ fields = gb_file.get_fields("ORIGIN")
97
+ if len(fields) == 0:
98
+ raise InvalidFileError("File has no 'ORIGIN' field")
99
+ if len(fields) > 1:
100
+ raise InvalidFileError("File has multiple 'ORIGIN' fields")
101
+ lines, _ = fields[0]
102
+ sequence = _convert_seq_str(_field_to_seq_string(lines), format)
103
+ seq_start = _get_seq_start(lines)
104
+ annotation = get_annotation(gb_file, include_only)
105
+ return AnnotatedSequence(annotation, sequence, sequence_start=seq_start)
106
+
107
+
108
+ def _field_to_seq_string(origin_content):
109
+ seq_str = "".join(origin_content)
110
+ # Remove numbers and emtpy spaces
111
+ regex = re.compile("[0-9]| ")
112
+ seq_str = regex.sub("", seq_str)
113
+ return seq_str
114
+
115
+
116
+ def _convert_seq_str(seq_str, format):
117
+ if len(seq_str) == 0:
118
+ raise InvalidFileError("The file's 'ORIGIN' field is empty")
119
+ if format == "gb":
120
+ return NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
121
+ elif format == "gp":
122
+ return ProteinSequence(seq_str.replace("U", "C").replace("O", "K"))
123
+ else:
124
+ raise ValueError(f"Unknown format '{format}'")
125
+
126
+
127
+ def _get_seq_start(origin_content):
128
+ # Start of sequence is the sequence position indicator
129
+ # at the beginning of the first line
130
+ return int(origin_content[0].split()[0])
131
+
132
+
133
+ def set_sequence(gb_file, sequence, sequence_start=1):
134
+ """
135
+ Set the *ORIGIN* field of a GenBank file with a sequence.
136
+
137
+ Parameters
138
+ ----------
139
+ gb_file : GenBankFile
140
+ The GenBank file to be edited.
141
+ sequence : str or NucleotideSequence or ProteinSequence
142
+ The sequence that is put into the GenBank file.
143
+ sequence_start : int, optional
144
+ The number of the first base of the sequence.
145
+ """
146
+ lines = []
147
+ seq_str = str(sequence).lower()
148
+ line = "{:>9d}".format(sequence_start)
149
+ for i in range(0, len(sequence), _SYMBOLS_PER_CHUNK):
150
+ # New line after 5 sequence chunks
151
+ if i != 0 and i % _SYMBOLS_PER_LINE == 0:
152
+ lines.append(line)
153
+ line = "{:>9d}".format(sequence_start + i)
154
+ line += " " + str(seq_str[i : i + _SYMBOLS_PER_CHUNK])
155
+ # Append last line
156
+ lines.append(line)
157
+ gb_file.set_field("ORIGIN", lines)
158
+
159
+
160
+ def set_annotated_sequence(gb_file, annot_sequence):
161
+ """
162
+ Set the *FEATURES* and *ORIGIN* fields of a GenBank file with the
163
+ annotation and sequence of an annotated sequence.
164
+
165
+ Parameters
166
+ ----------
167
+ gb_file : GenBankFile
168
+ The GenBank file to be edited.
169
+ annot_sequence : AnnotatedSequence
170
+ The annotated sequence that is put into the GenBank file.
171
+ """
172
+ set_annotation(gb_file, annot_sequence.annotation)
173
+ set_sequence(gb_file, annot_sequence.sequence, annot_sequence.sequence_start)
@@ -0,0 +1,201 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module contains a convenience function for loading sequences from
7
+ general sequence files.
8
+ """
9
+
10
+ __name__ = "biotite.sequence.io"
11
+ __author__ = "Patrick Kunzmann"
12
+ __all__ = ["load_sequence", "save_sequence", "load_sequences", "save_sequences"]
13
+
14
+ import os.path
15
+ from collections import OrderedDict
16
+ import numpy as np
17
+ from biotite.sequence.seqtypes import NucleotideSequence
18
+
19
+
20
+ def load_sequence(file_path):
21
+ """
22
+ Load a sequence from a sequence file without the need
23
+ to manually instantiate a :class:`File` object.
24
+
25
+ Internally this function uses a :class:`File` object, based on the
26
+ file extension.
27
+
28
+ Parameters
29
+ ----------
30
+ file_path : str
31
+ The path to the sequence file.
32
+
33
+ Returns
34
+ -------
35
+ sequence : Sequence
36
+ The first sequence in the file.
37
+ """
38
+ # We only need the suffix here
39
+ filename, suffix = os.path.splitext(file_path)
40
+ if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
41
+ from biotite.sequence.io.fasta import FastaFile, get_sequence
42
+
43
+ file = FastaFile.read(file_path)
44
+ return get_sequence(file)
45
+ elif suffix in [".fastq", ".fq"]:
46
+ from biotite.sequence.io.fastq import FastqFile
47
+
48
+ # Quality scores are irrelevant for this function
49
+ # -> Offset is irrelevant
50
+ file = FastqFile.read(file_path, offset="Sanger")
51
+ # Get first sequence
52
+ for seq_str, scores in file.values():
53
+ sequence = NucleotideSequence(seq_str)
54
+ break
55
+ return sequence
56
+ elif suffix in [".gb", ".gbk", ".gp"]:
57
+ from biotite.sequence.io.genbank import GenBankFile, get_sequence
58
+
59
+ format = "gp" if suffix == ".gp" else "gb"
60
+ file = GenBankFile.read(file_path)
61
+ return get_sequence(file, format)
62
+ else:
63
+ raise ValueError(f"Unknown file format '{suffix}'")
64
+
65
+
66
+ def save_sequence(file_path, sequence):
67
+ """
68
+ Save a sequence into a sequence file without the need
69
+ to manually instantiate a :class:`File` object.
70
+
71
+ Internally this function uses a :class:`File` object, based on the
72
+ given file extension.
73
+
74
+ Parameters
75
+ ----------
76
+ file_path : str
77
+ The path to structure file.
78
+ sequence : Sequence
79
+ The sequence to be saved.
80
+ """
81
+ # We only need the suffix here
82
+ filename, suffix = os.path.splitext(file_path)
83
+ if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
84
+ from biotite.sequence.io.fasta import FastaFile, set_sequence
85
+
86
+ file = FastaFile()
87
+ set_sequence(file, sequence)
88
+ file.write(file_path)
89
+ elif suffix in [".fastq", ".fq"]:
90
+ from biotite.sequence.io.fastq import FastqFile
91
+
92
+ # Quality scores are irrelevant for this function
93
+ # -> Offset is irrelevant
94
+ file = FastqFile(offset="Sanger")
95
+ # Scores are set to 0 since no score information is supplied
96
+ scores = np.zeros(len(sequence))
97
+ file["sequence"] = str(sequence), scores
98
+ file.write(file_path)
99
+ elif suffix in [".gb", ".gbk", ".gp"]:
100
+ from biotite.sequence.io.genbank import GenBankFile, set_locus, set_sequence
101
+
102
+ file = GenBankFile()
103
+ set_locus(file, "sequence", len(sequence))
104
+ set_sequence(file, sequence)
105
+ file.write(file_path)
106
+ else:
107
+ raise ValueError(f"Unknown file format '{suffix}'")
108
+
109
+
110
+ def load_sequences(file_path):
111
+ """
112
+ Load multiple sequences from a sequence file without the need
113
+ to manually instantiate a :class:`File` object.
114
+
115
+ Internally this function uses a :class:`File` object, based on the
116
+ file extension.
117
+
118
+ Parameters
119
+ ----------
120
+ file_path : str
121
+ The path to the sequence file.
122
+
123
+ Returns
124
+ -------
125
+ sequences : dict of (str, Sequence)
126
+ The sequences in the file.
127
+ This dictionary maps each header name to
128
+ the respective sequence.
129
+ """
130
+ # We only need the suffix here
131
+ filename, suffix = os.path.splitext(file_path)
132
+ if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
133
+ from biotite.sequence.io.fasta import FastaFile, get_sequences
134
+
135
+ file = FastaFile.read(file_path)
136
+ return get_sequences(file)
137
+ elif suffix in [".fastq", ".fq"]:
138
+ from biotite.sequence.io.fastq import FastqFile
139
+
140
+ # Quality scores are irrelevant for this function
141
+ # -> Offset is irrelevant
142
+ file = FastqFile.read(file_path, offset="Sanger")
143
+ return {
144
+ identifier: NucleotideSequence(seq_str)
145
+ for identifier, (seq_str, scores) in file.items()
146
+ }
147
+ elif suffix in [".gb", ".gbk", ".gp"]:
148
+ from biotite.sequence.io.genbank import MultiFile, get_definition, get_sequence
149
+
150
+ file = MultiFile.read(file_path)
151
+ format = "gp" if suffix == ".gp" else "gb"
152
+ sequences = OrderedDict()
153
+ for f in file:
154
+ sequences[get_definition(f)] = get_sequence(f, format)
155
+ return sequences
156
+ else:
157
+ raise ValueError(f"Unknown file format '{suffix}'")
158
+
159
+
160
+ def save_sequences(file_path, sequences):
161
+ """
162
+ Save multiple sequences into a sequence file without the need
163
+ to manually instantiate a :class:`File` object.
164
+
165
+ Internally this function uses a :class:`File` object, based on the
166
+ given file extension.
167
+
168
+ Parameters
169
+ ----------
170
+ file_path : str
171
+ The path to structure file.
172
+ sequences : dict of (str, Sequence)
173
+ The sequences to be saved. The dictionary maps a header name
174
+ to asequence.
175
+ """
176
+ # We only need the suffix here
177
+ filename, suffix = os.path.splitext(file_path)
178
+ if suffix in [".fasta", ".fa", ".mpfa", ".fna", ".fsa"]:
179
+ from biotite.sequence.io.fasta import FastaFile, set_sequences
180
+
181
+ file = FastaFile()
182
+ set_sequences(file, sequences)
183
+ file.write(file_path)
184
+ elif suffix in [".fastq", ".fq"]:
185
+ from biotite.sequence.io.fastq import FastqFile
186
+
187
+ # Quality scores are irrelevant for this function
188
+ # -> Offset is irrelevant
189
+ file = FastqFile(offset="Sanger")
190
+ for identifier, sequence in sequences.items():
191
+ # Scores are set to 0 since no score information is supplied
192
+ scores = np.zeros(len(sequence))
193
+ file["identifer"] = str(sequence), scores
194
+ file.write(file_path)
195
+ elif suffix in [".gb", ".gbk", ".gp"]:
196
+ raise NotImplementedError(
197
+ "Writing GenBank files containing multiple records is currently "
198
+ "not supported"
199
+ )
200
+ else:
201
+ raise ValueError(f"Unknown file format '{suffix}'")