biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,265 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fasta"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["FastaFile"]
8
+
9
+ from collections import OrderedDict
10
+ from collections.abc import MutableMapping
11
+ from biotite.file import InvalidFileError, TextFile, wrap_string
12
+
13
+
14
+ class FastaFile(TextFile, MutableMapping):
15
+ """
16
+ This class represents a file in FASTA format.
17
+
18
+ A FASTA file contains so called *header* lines, beginning with
19
+ ``>``, that describe following sequence.
20
+ The corresponding sequence starts at the line after the header line
21
+ and ends at the next header line or at the end of file.
22
+ The header along with its sequence forms an entry.
23
+
24
+ This class is used in a dictionary like manner, implementing the
25
+ :class:`MutableMapping` interface:
26
+ Headers (without the leading ``>``) are used as keys,
27
+ and strings containing the sequences are the corresponding values.
28
+ Entries can be accessed using indexing,
29
+ ``del`` deletes the entry at the given index.
30
+
31
+ Parameters
32
+ ----------
33
+ chars_per_line : int, optional
34
+ The number characters in a line containing sequence data
35
+ after which a line break is inserted.
36
+ Only relevant, when adding sequences to a file.
37
+ Default is 80.
38
+
39
+ Examples
40
+ --------
41
+
42
+ >>> import os.path
43
+ >>> file = FastaFile()
44
+ >>> file["seq1"] = "ATACT"
45
+ >>> print(file["seq1"])
46
+ ATACT
47
+ >>> file["seq2"] = "AAAATT"
48
+ >>> print(file)
49
+ >seq1
50
+ ATACT
51
+ >seq2
52
+ AAAATT
53
+ >>> print(dict(file.items()))
54
+ {'seq1': 'ATACT', 'seq2': 'AAAATT'}
55
+ >>> for header, seq in file.items():
56
+ ... print(header, seq)
57
+ seq1 ATACT
58
+ seq2 AAAATT
59
+ >>> del file["seq1"]
60
+ >>> print(dict(file.items()))
61
+ {'seq2': 'AAAATT'}
62
+ >>> file.write(os.path.join(path_to_directory, "test.fasta"))
63
+ """
64
+
65
+ def __init__(self, chars_per_line=80):
66
+ super().__init__()
67
+ self._chars_per_line = chars_per_line
68
+ self._entries = OrderedDict()
69
+
70
+ @classmethod
71
+ def read(cls, file, chars_per_line=80):
72
+ """
73
+ Read a FASTA file.
74
+
75
+ Parameters
76
+ ----------
77
+ file : file-like object or str
78
+ The file to be read.
79
+ Alternatively a file path can be supplied.
80
+ chars_per_line : int, optional
81
+ The number characters in a line containing sequence data
82
+ after which a line break is inserted.
83
+ Only relevant, when adding sequences to a file.
84
+ Default is 80.
85
+
86
+ Returns
87
+ -------
88
+ file_object : FastaFile
89
+ The parsed file.
90
+ """
91
+ file = super().read(file, chars_per_line)
92
+ # Filter out empty and comment lines
93
+ file.lines = [
94
+ line for line in file.lines if len(line.strip()) != 0 and line[0] != ";"
95
+ ]
96
+ if len(file.lines) == 0:
97
+ raise InvalidFileError("File is empty or contains only comments")
98
+ file._find_entries()
99
+ return file
100
+
101
+ def __setitem__(self, header, seq_str):
102
+ if not isinstance(header, str):
103
+ raise IndexError("'FastaFile' only supports header strings as keys")
104
+ if not isinstance(seq_str, str):
105
+ raise TypeError("'FastaFile' only supports sequence strings as values")
106
+ # Create lines for new header and sequence (with line breaks)
107
+ new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
108
+ seq_str, width=self._chars_per_line
109
+ )
110
+ if header in self:
111
+ # Delete lines of entry corresponding to the header,
112
+ # if existing
113
+ del self[header]
114
+ self.lines += new_lines
115
+ self._find_entries()
116
+ else:
117
+ # Simply append lines
118
+ # Add entry in a more efficient way than '_find_entries()'
119
+ # for this simple case
120
+ self._entries[header] = (len(self.lines), len(self.lines) + len(new_lines))
121
+ self.lines += new_lines
122
+
123
+ def __getitem__(self, header):
124
+ if not isinstance(header, str):
125
+ raise IndexError("'FastaFile' only supports header strings as keys")
126
+ start, stop = self._entries[header]
127
+ # Concatenate sequence string from following lines
128
+ seq_string = "".join([line.strip() for line in self.lines[start + 1 : stop]])
129
+ return seq_string
130
+
131
+ def __delitem__(self, header):
132
+ start, stop = self._entries[header]
133
+ del self.lines[start:stop]
134
+ del self._entries[header]
135
+ self._find_entries()
136
+
137
+ def __len__(self):
138
+ return len(self._entries)
139
+
140
+ def __iter__(self):
141
+ return self._entries.__iter__()
142
+
143
+ def __contains__(self, identifer):
144
+ return identifer in self._entries
145
+
146
+ def _find_entries(self):
147
+ if len(self.lines) > 0 and self.lines[0][0] != ">":
148
+ raise InvalidFileError(
149
+ f"File starts with '{self.lines[0][0]}' instead of '>'"
150
+ )
151
+
152
+ header_i = []
153
+ for i, line in enumerate(self.lines):
154
+ if line[0] == ">":
155
+ header_i.append(i)
156
+
157
+ self._entries = OrderedDict()
158
+ for j in range(len(header_i)):
159
+ # Remove leading '>' from header
160
+ header = self.lines[header_i[j]].strip()[1:]
161
+ start = header_i[j]
162
+ if j < len(header_i) - 1:
163
+ # Header in mid or start of file
164
+ # -> stop is start of next header
165
+ stop = header_i[j + 1]
166
+ else:
167
+ # Last header -> entry stops at end of file
168
+ stop = len(self.lines)
169
+ self._entries[header] = (start, stop)
170
+
171
+ @staticmethod
172
+ def read_iter(file):
173
+ """
174
+ Create an iterator over each sequence of the given FASTA file.
175
+
176
+ Parameters
177
+ ----------
178
+ file : file-like object or str
179
+ The file to be read.
180
+ Alternatively a file path can be supplied.
181
+
182
+ Yields
183
+ ------
184
+ header : str
185
+ The header of the current sequence.
186
+ seq_str : str
187
+ The current sequence as string.
188
+
189
+ Notes
190
+ -----
191
+ This approach gives the same results as
192
+ `FastaFile.read(file).items()`, but is slightly faster and much
193
+ more memory efficient.
194
+ """
195
+ header = None
196
+ seq_str_list = []
197
+ for line in TextFile.read_iter(file):
198
+ line = line.strip()
199
+ # Ignore empty and comment lines
200
+ if len(line) == 0 or line[0] == ";":
201
+ continue
202
+ if line[0] == ">":
203
+ # New entry
204
+ # -> yield previous entry
205
+ if header is not None:
206
+ yield header, "".join(seq_str_list)
207
+ # Track new header and reset sequence
208
+ header = line[1:]
209
+ seq_str_list = []
210
+ else:
211
+ seq_str_list.append(line)
212
+ # Yield final entry
213
+ if header is not None:
214
+ yield header, "".join(seq_str_list)
215
+
216
+ @staticmethod
217
+ def write_iter(file, items, chars_per_line=80):
218
+ """
219
+ Iterate over the given `items` and write each item into
220
+ the specified `file`.
221
+
222
+ In contrast to :meth:`write()`, the lines of text are not stored
223
+ in an intermediate :class:`TextFile`, but are directly written
224
+ to the file.
225
+ Hence, this static method may save a large amount of memory if
226
+ a large file should be written, especially if the `items`
227
+ are provided as generator.
228
+
229
+ Parameters
230
+ ----------
231
+ file : file-like object or str
232
+ The file to be written to.
233
+ Alternatively a file path can be supplied.
234
+ items : generator or array-like of tuple(str, str)
235
+ The entries to be written into the file.
236
+ Each entry consists of an header string and a sequence
237
+ string.
238
+ chars_per_line : int, optional
239
+ The number characters in a line containing sequence data
240
+ after which a line break is inserted.
241
+ Only relevant, when adding sequences to a file.
242
+ Default is 80.
243
+
244
+ Notes
245
+ -----
246
+ This method does not test, whether the given identifiers are
247
+ unambiguous.
248
+ """
249
+
250
+ def line_generator():
251
+ for item in items:
252
+ header, seq_str = item
253
+ if not isinstance(header, str):
254
+ raise IndexError("'FastaFile' only supports header strings")
255
+ if not isinstance(seq_str, str):
256
+ raise TypeError("'FastaFile' only supports sequence strings")
257
+
258
+ # Yield header line
259
+ yield ">" + header.replace("\n", "").strip()
260
+
261
+ # Yield sequence line(s)
262
+ for line in wrap_string(seq_str, width=chars_per_line):
263
+ yield line
264
+
265
+ TextFile.write_iter(file, line_generator())
@@ -0,0 +1,19 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing sequencing data
7
+ using the popular FASTQ format.
8
+
9
+ This package contains the :class:`FastqFile`, which provides a
10
+ dictionary like interface to FASTQ files, with the sequence identifer
11
+ strings being the keys and the sequences and quality scores being the
12
+ values.
13
+ """
14
+
15
+ __name__ = "biotite.sequence.io.fastq"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .convert import *
19
+ from .file import *
@@ -0,0 +1,117 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fastq"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ from collections import OrderedDict
9
+ from biotite.sequence.seqtypes import NucleotideSequence
10
+
11
+ __all__ = ["get_sequence", "get_sequences", "set_sequence", "set_sequences"]
12
+
13
+
14
+ def get_sequence(fastq_file, header=None):
15
+ """
16
+ Get a sequence and quality scores from a `FastqFile` instance.
17
+
18
+ Parameters
19
+ ----------
20
+ fastq_file : FastqFile
21
+ The `FastqFile` to be accessed.
22
+ header : str, optional
23
+ The identifier to get the sequence and scores from.
24
+ By default, the first sequence of the file is returned.
25
+
26
+ Returns
27
+ -------
28
+ sequence : NucleotideSequence
29
+ The requested sequence.
30
+ scores : ndarray, dtype=int
31
+ The requested scores.
32
+ """
33
+ if header is not None:
34
+ seq_str, scores = fastq_file[header]
35
+ else:
36
+ # Return first (and probably only) sequence of file
37
+ seq_str = None
38
+ scores = None
39
+ for seq_str, scores in fastq_file.values():
40
+ break
41
+ if seq_str is None:
42
+ raise ValueError("File does not contain any sequences")
43
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
44
+ return NucleotideSequence(processed_seq_str), scores
45
+
46
+
47
+ def get_sequences(fastq_file):
48
+ """
49
+ Get a dictionary from a `FastqFile` instance,
50
+ where identifiers are keys and sequence-score-tuples are values.
51
+
52
+ Parameters
53
+ ----------
54
+ fastq_file : FastqFile
55
+ The `Fastqile` to be accessed.
56
+
57
+ Returns
58
+ -------
59
+ seq_dict : dict
60
+ A dictionary containing identifiers as keys and
61
+ (`NucleotideSequence`, `ndarray`) tuples as values.
62
+ """
63
+ seq_dict = OrderedDict()
64
+ for header, (seq_str, scores) in fastq_file.items():
65
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
66
+ seq_dict[header] = NucleotideSequence(processed_seq_str), scores
67
+ return seq_dict
68
+
69
+
70
+ def set_sequence(fastq_file, sequence, scores, header=None, as_rna=False):
71
+ """
72
+ Set a sequence and a quality score array in a `FastqFile` instance.
73
+
74
+ Parameters
75
+ ----------
76
+ fastq_file : FastqFile
77
+ The `FastqFile` to be accessed.
78
+ sequence : NucleotideSequence
79
+ The sequence to be set.
80
+ scores : ndarray, dtype=int
81
+ The quality scores to be set.
82
+ header : str, optional
83
+ The identifier for the sequence. Default is 'sequence'.
84
+ as_rna : bool, optional
85
+ If set to true, the sequence symbol ``'T'`` will be replaced
86
+ by ``'U'``.
87
+ """
88
+ if header is None:
89
+ header = "sequence"
90
+ fastq_file[header] = _convert_to_string(sequence, as_rna), scores
91
+
92
+
93
+ def set_sequences(fastq_file, sequence_dict, as_rna=False):
94
+ """
95
+ Set sequences in a `FastqFile` instance from a dictionary.
96
+
97
+ Parameters
98
+ ----------
99
+ fastq_file : FastqFile
100
+ The `FastqFile` to be accessed.
101
+ sequence_dict : dict
102
+ A dictionary containing the sequences and scores to be set.
103
+ Identifiers are keys,
104
+ (`NucleotideSequence`, `ndarray`) tuples are values.
105
+ as_rna : bool, optional
106
+ If set to true, the sequence symbol ``'T'`` will be replaced
107
+ by ``'U'``.
108
+ """
109
+ for header, (sequence, scores) in sequence_dict.items():
110
+ fastq_file[header] = _convert_to_string(sequence, as_rna), scores
111
+
112
+
113
+ def _convert_to_string(sequence, as_rna):
114
+ if as_rna:
115
+ return str(sequence).replace("T", "U")
116
+ else:
117
+ return str(sequence)