biotite 1.5.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +4 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for reading and writing sequence related data.
7
+ """
8
+
9
+ __name__ = "biotite.sequence.io"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .general import *
@@ -0,0 +1,22 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing sequence objects
7
+ using the popular FASTA format.
8
+
9
+ This package contains the :class:`FastaFile`, which provides a
10
+ dictionary like interface to FASTA files, where the header lines are
11
+ keys and the strings containing sequence data are the corresponding
12
+ values.
13
+
14
+ Furthermore, the package contains convenience functions for
15
+ getting/setting directly :class:`Sequence` objects, rather than strings.
16
+ """
17
+
18
+ __name__ = "biotite.sequence.io.fasta"
19
+ __author__ = "Patrick Kunzmann"
20
+
21
+ from .convert import *
22
+ from .file import *
@@ -0,0 +1,283 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fasta"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ import warnings
9
+ from collections import OrderedDict
10
+ from biotite.sequence.align.alignment import Alignment
11
+ from biotite.sequence.alphabet import AlphabetError, LetterAlphabet
12
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
13
+
14
+ __all__ = [
15
+ "get_sequence",
16
+ "get_sequences",
17
+ "set_sequence",
18
+ "set_sequences",
19
+ "get_alignment",
20
+ "set_alignment",
21
+ ]
22
+
23
+
24
+ def get_sequence(fasta_file, header=None, seq_type=None):
25
+ """
26
+ Get a sequence from a :class:`FastaFile` instance.
27
+
28
+ The type of sequence is guessed from the sequence string:
29
+ First, a conversion into a :class:`NucleotideSequence` and
30
+ second a conversion into a :class:`ProteinSequence` is tried.
31
+
32
+ Parameters
33
+ ----------
34
+ fasta_file : FastaFile
35
+ The :class:`FastaFile` to be accessed.
36
+ header : str, optional
37
+ The header to get the sequence from. By default, the first
38
+ sequence of the file is returned.
39
+ seq_type : Class, optional
40
+ The :class:`Sequence` subclass contained in the file. If not
41
+ set, biotite will attempt to automatically detect whether a
42
+ nucleotide or protein sequence is present.
43
+
44
+ Returns
45
+ -------
46
+ sequence : NucleotideSequence or ProteinSequence
47
+ The requested sequence in the `FastaFile`.
48
+ :class:`NucleotideSequence` if the sequence string fits the
49
+ corresponding alphabet, :class:`ProteinSequence` otherwise.
50
+
51
+ Raises
52
+ ------
53
+ ValueError
54
+ If the sequence data can be neither converted into a
55
+ :class:`NucleotideSequence` nor a :class:`ProteinSequence`.
56
+ """
57
+ if header is not None:
58
+ seq_str = fasta_file[header]
59
+ else:
60
+ # Return first (and probably only) sequence of file
61
+ seq_str = None
62
+ for seq_str in fasta_file.values():
63
+ break
64
+ if seq_str is None:
65
+ raise ValueError("File does not contain any sequences")
66
+ # Determine the sequence type:
67
+ # If NucleotideSequence can be created it is a DNA sequence,
68
+ # otherwise protein sequence
69
+ return _convert_to_sequence(seq_str, seq_type)
70
+
71
+
72
+ def get_sequences(fasta_file, seq_type=None):
73
+ """
74
+ Get dictionary from a :class:`FastaFile` instance,
75
+ where headers are keys and sequences are values.
76
+
77
+ The type of sequence is guessed from the sequence string:
78
+ First, a conversion into a :class:`NucleotideSequence` and
79
+ second a conversion into a :class:`ProteinSequence` is tried.
80
+
81
+ Parameters
82
+ ----------
83
+ fasta_file : FastaFile
84
+ The :class:`FastaFile` to be accessed.
85
+ seq_type : Class, optional
86
+ The :class:`Sequence` subclass contained in the file. If not
87
+ set, biotite will attempt to automatically detect whether a
88
+ nucleotide or protein sequence is present.
89
+
90
+ Returns
91
+ -------
92
+ seq_dict : dict
93
+ A dictionary that maps headers to
94
+ :class:`NucleotideSequence` and/or :class:`ProteinSequence`
95
+ instances as values.
96
+
97
+ Raises
98
+ ------
99
+ ValueError
100
+ If at least on of the sequence strings can be neither converted
101
+ into a :class:`NucleotideSequence` nor a
102
+ :class:`ProteinSequence`.
103
+ """
104
+ seq_dict = OrderedDict()
105
+ for header, seq_str in fasta_file.items():
106
+ seq_dict[header] = _convert_to_sequence(seq_str, seq_type)
107
+ return seq_dict
108
+
109
+
110
+ def set_sequence(fasta_file, sequence, header=None, as_rna=False):
111
+ """
112
+ Set a sequence in a :class:`FastaFile` instance.
113
+
114
+ Parameters
115
+ ----------
116
+ fasta_file : FastaFile
117
+ The :class:`FastaFile` to be accessed.
118
+ sequence : Sequence
119
+ The sequence to be set.
120
+ header : str, optional
121
+ The header for the sequence. Default is ``'sequence'``.
122
+ as_rna : bool, optional
123
+ If set to true, ``'T'`` will be replaced by ``'U'``,
124
+ if a :class:`NucleotideSequence` was given.
125
+
126
+ Raises
127
+ ------
128
+ ValueError
129
+ If the sequence's alphabet uses symbols other than single
130
+ characters.
131
+ """
132
+ if header is None:
133
+ header = "sequence"
134
+ fasta_file[header] = _convert_to_string(sequence, as_rna)
135
+
136
+
137
+ def set_sequences(fasta_file, sequence_dict, as_rna=False):
138
+ """
139
+ Set sequences in a :class:`FastaFile` instance from a dictionary.
140
+
141
+ Parameters
142
+ ----------
143
+ fasta_file : FastaFile
144
+ The :class:`FastaFile` to be accessed.
145
+ sequence_dict : dict
146
+ A dictionary containing the sequences to be set.
147
+ Header are keys, :class:`Sequence` instances are values.
148
+ as_rna : bool, optional
149
+ If set to true, ``'T'`` will be replaced by ``'U'``,
150
+ if a :class:`NucleotideSequence` was given.
151
+
152
+ Raises
153
+ ------
154
+ ValueError
155
+ If the sequences alphabets uses symbols other than single
156
+ characters.
157
+ """
158
+ for header, sequence in sequence_dict.items():
159
+ fasta_file[header] = _convert_to_string(sequence, as_rna)
160
+
161
+
162
+ def get_alignment(fasta_file, additional_gap_chars=("_",), seq_type=None):
163
+ """
164
+ Get an alignment from a :class:`FastaFile` instance.
165
+
166
+ Parameters
167
+ ----------
168
+ fasta_file : FastaFile
169
+ The :class:`FastaFile` to be accessed.
170
+ additional_gap_chars : str, optional
171
+ The characters to be treated as gaps.
172
+ seq_type : Class, optional
173
+ The :class:`Sequence` subclass contained in the file. If not
174
+ set, biotite will attempt to automatically detect whether a
175
+ nucleotide or protein sequence is present.
176
+
177
+ Returns
178
+ -------
179
+ alignment : Alignment
180
+ The alignment from the :class:`FastaFile`.
181
+ """
182
+ seq_strings = list(fasta_file.values())
183
+ # Replace additional gap symbols with default gap symbol ('-')
184
+ for char in additional_gap_chars:
185
+ for i, seq_str in enumerate(seq_strings):
186
+ seq_strings[i] = seq_str.replace(char, "-")
187
+ # Remove gaps for creation of sequences
188
+ sequences = [
189
+ _convert_to_sequence(seq_str.replace("-", ""), seq_type)
190
+ for seq_str in seq_strings
191
+ ]
192
+ trace = Alignment.trace_from_strings(seq_strings)
193
+ return Alignment(sequences, trace, score=None)
194
+
195
+
196
+ def set_alignment(fasta_file, alignment, seq_names):
197
+ """
198
+ Fill a :class:`FastaFile` with gapped sequence strings from an
199
+ alignment.
200
+
201
+ Parameters
202
+ ----------
203
+ fasta_file : FastaFile
204
+ The :class:`FastaFile` to be accessed.
205
+ alignment : Alignment
206
+ The alignment to be set.
207
+ seq_names : iterable object of str
208
+ The names for the sequences in the alignment.
209
+ Must have the same length as the sequence count in `alignment`.
210
+ """
211
+ gapped_seq_strings = alignment.get_gapped_sequences()
212
+ if len(gapped_seq_strings) != len(seq_names):
213
+ raise ValueError(
214
+ f"Alignment has {len(gapped_seq_strings)} sequences, "
215
+ f"but {len(seq_names)} names were given"
216
+ )
217
+ for i in range(len(gapped_seq_strings)):
218
+ fasta_file[seq_names[i]] = gapped_seq_strings[i]
219
+
220
+
221
+ def _convert_to_sequence(seq_str, seq_type=None):
222
+ # Set manually selected sequence type
223
+ if seq_type is not None:
224
+ # Do preprocessing as done without manual selection
225
+ if seq_type == NucleotideSequence:
226
+ seq_str = _process_nucleotide_sequence(seq_str)
227
+ elif seq_type == ProteinSequence:
228
+ if "U" in seq_str:
229
+ warnings.warn(
230
+ "ProteinSequence objects do not support selenocysteine "
231
+ "(U), occurrences were substituted by cysteine (C)"
232
+ )
233
+ seq_str = _process_protein_sequence(seq_str)
234
+ # Return the converted sequence
235
+ return seq_type(seq_str)
236
+
237
+ # Attempt to automatically determine sequence type
238
+
239
+ try:
240
+ return NucleotideSequence(_process_nucleotide_sequence(seq_str))
241
+ except AlphabetError:
242
+ pass
243
+ try:
244
+ prot_seq = ProteinSequence(_process_protein_sequence(seq_str))
245
+ # Raise Warning after conversion into 'ProteinSequence'
246
+ # to wait for potential 'AlphabetError'
247
+ if "U" in seq_str:
248
+ warnings.warn(
249
+ "ProteinSequence objects do not support selenocysteine (U), "
250
+ "occurrences were substituted by cysteine (C)"
251
+ )
252
+ return prot_seq
253
+ except AlphabetError:
254
+ raise ValueError(
255
+ "FASTA data cannot be converted either to "
256
+ "'NucleotideSequence' nor to 'ProteinSequence'"
257
+ )
258
+
259
+
260
+ def _process_protein_sequence(x):
261
+ """
262
+ Replace selenocysteine with cysteine and pyrrolysine with lysine.
263
+ """
264
+ return x.upper().replace("U", "C").replace("O", "K")
265
+
266
+
267
+ def _process_nucleotide_sequence(x):
268
+ """
269
+ For nucleotides uracil is represented by thymine and there is only
270
+ one letter for completely unknown nucleotides
271
+ """
272
+ return x.upper().replace("U", "T").replace("X", "N")
273
+
274
+
275
+ def _convert_to_string(sequence, as_rna):
276
+ if not isinstance(sequence.get_alphabet(), LetterAlphabet):
277
+ raise ValueError(
278
+ "Only sequences using single letter alphabets can be stored in a FASTA file"
279
+ )
280
+ if isinstance(sequence, NucleotideSequence) and as_rna:
281
+ return str(sequence).replace("T", "U")
282
+ else:
283
+ return str(sequence)
@@ -0,0 +1,265 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fasta"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["FastaFile"]
8
+
9
+ from collections import OrderedDict
10
+ from collections.abc import MutableMapping
11
+ from biotite.file import InvalidFileError, TextFile, wrap_string
12
+
13
+
14
+ class FastaFile(TextFile, MutableMapping):
15
+ """
16
+ This class represents a file in FASTA format.
17
+
18
+ A FASTA file contains so called *header* lines, beginning with
19
+ ``>``, that describe following sequence.
20
+ The corresponding sequence starts at the line after the header line
21
+ and ends at the next header line or at the end of file.
22
+ The header along with its sequence forms an entry.
23
+
24
+ This class is used in a dictionary like manner, implementing the
25
+ :class:`MutableMapping` interface:
26
+ Headers (without the leading ``>``) are used as keys,
27
+ and strings containing the sequences are the corresponding values.
28
+ Entries can be accessed using indexing,
29
+ ``del`` deletes the entry at the given index.
30
+
31
+ Parameters
32
+ ----------
33
+ chars_per_line : int, optional
34
+ The number characters in a line containing sequence data
35
+ after which a line break is inserted.
36
+ Only relevant, when adding sequences to a file.
37
+ Default is 80.
38
+
39
+ Examples
40
+ --------
41
+
42
+ >>> import os.path
43
+ >>> file = FastaFile()
44
+ >>> file["seq1"] = "ATACT"
45
+ >>> print(file["seq1"])
46
+ ATACT
47
+ >>> file["seq2"] = "AAAATT"
48
+ >>> print(file)
49
+ >seq1
50
+ ATACT
51
+ >seq2
52
+ AAAATT
53
+ >>> print(dict(file.items()))
54
+ {'seq1': 'ATACT', 'seq2': 'AAAATT'}
55
+ >>> for header, seq in file.items():
56
+ ... print(header, seq)
57
+ seq1 ATACT
58
+ seq2 AAAATT
59
+ >>> del file["seq1"]
60
+ >>> print(dict(file.items()))
61
+ {'seq2': 'AAAATT'}
62
+ >>> file.write(os.path.join(path_to_directory, "test.fasta"))
63
+ """
64
+
65
+ def __init__(self, chars_per_line=80):
66
+ super().__init__()
67
+ self._chars_per_line = chars_per_line
68
+ self._entries = OrderedDict()
69
+
70
+ @classmethod
71
+ def read(cls, file, chars_per_line=80):
72
+ """
73
+ Read a FASTA file.
74
+
75
+ Parameters
76
+ ----------
77
+ file : file-like object or str
78
+ The file to be read.
79
+ Alternatively a file path can be supplied.
80
+ chars_per_line : int, optional
81
+ The number characters in a line containing sequence data
82
+ after which a line break is inserted.
83
+ Only relevant, when adding sequences to a file.
84
+ Default is 80.
85
+
86
+ Returns
87
+ -------
88
+ file_object : FastaFile
89
+ The parsed file.
90
+ """
91
+ file = super().read(file, chars_per_line)
92
+ # Filter out empty and comment lines
93
+ file.lines = [
94
+ line for line in file.lines if len(line.strip()) != 0 and line[0] != ";"
95
+ ]
96
+ if len(file.lines) == 0:
97
+ raise InvalidFileError("File is empty or contains only comments")
98
+ file._find_entries()
99
+ return file
100
+
101
+ def __setitem__(self, header, seq_str):
102
+ if not isinstance(header, str):
103
+ raise IndexError("'FastaFile' only supports header strings as keys")
104
+ if not isinstance(seq_str, str):
105
+ raise TypeError("'FastaFile' only supports sequence strings as values")
106
+ # Create lines for new header and sequence (with line breaks)
107
+ new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
108
+ seq_str, width=self._chars_per_line
109
+ )
110
+ if header in self:
111
+ # Delete lines of entry corresponding to the header,
112
+ # if existing
113
+ del self[header]
114
+ self.lines += new_lines
115
+ self._find_entries()
116
+ else:
117
+ # Simply append lines
118
+ # Add entry in a more efficient way than '_find_entries()'
119
+ # for this simple case
120
+ self._entries[header] = (len(self.lines), len(self.lines) + len(new_lines))
121
+ self.lines += new_lines
122
+
123
+ def __getitem__(self, header):
124
+ if not isinstance(header, str):
125
+ raise IndexError("'FastaFile' only supports header strings as keys")
126
+ start, stop = self._entries[header]
127
+ # Concatenate sequence string from following lines
128
+ seq_string = "".join([line.strip() for line in self.lines[start + 1 : stop]])
129
+ return seq_string
130
+
131
+ def __delitem__(self, header):
132
+ start, stop = self._entries[header]
133
+ del self.lines[start:stop]
134
+ del self._entries[header]
135
+ self._find_entries()
136
+
137
+ def __len__(self):
138
+ return len(self._entries)
139
+
140
+ def __iter__(self):
141
+ return self._entries.__iter__()
142
+
143
+ def __contains__(self, identifer):
144
+ return identifer in self._entries
145
+
146
+ def _find_entries(self):
147
+ if len(self.lines) > 0 and self.lines[0][0] != ">":
148
+ raise InvalidFileError(
149
+ f"File starts with '{self.lines[0][0]}' instead of '>'"
150
+ )
151
+
152
+ header_i = []
153
+ for i, line in enumerate(self.lines):
154
+ if line[0] == ">":
155
+ header_i.append(i)
156
+
157
+ self._entries = OrderedDict()
158
+ for j in range(len(header_i)):
159
+ # Remove leading '>' from header
160
+ header = self.lines[header_i[j]].strip()[1:]
161
+ start = header_i[j]
162
+ if j < len(header_i) - 1:
163
+ # Header in mid or start of file
164
+ # -> stop is start of next header
165
+ stop = header_i[j + 1]
166
+ else:
167
+ # Last header -> entry stops at end of file
168
+ stop = len(self.lines)
169
+ self._entries[header] = (start, stop)
170
+
171
+ @staticmethod
172
+ def read_iter(file):
173
+ """
174
+ Create an iterator over each sequence of the given FASTA file.
175
+
176
+ Parameters
177
+ ----------
178
+ file : file-like object or str
179
+ The file to be read.
180
+ Alternatively a file path can be supplied.
181
+
182
+ Yields
183
+ ------
184
+ header : str
185
+ The header of the current sequence.
186
+ seq_str : str
187
+ The current sequence as string.
188
+
189
+ Notes
190
+ -----
191
+ This approach gives the same results as
192
+ `FastaFile.read(file).items()`, but is slightly faster and much
193
+ more memory efficient.
194
+ """
195
+ header = None
196
+ seq_str_list = []
197
+ for line in TextFile.read_iter(file):
198
+ line = line.strip()
199
+ # Ignore empty and comment lines
200
+ if len(line) == 0 or line[0] == ";":
201
+ continue
202
+ if line[0] == ">":
203
+ # New entry
204
+ # -> yield previous entry
205
+ if header is not None:
206
+ yield header, "".join(seq_str_list)
207
+ # Track new header and reset sequence
208
+ header = line[1:]
209
+ seq_str_list = []
210
+ else:
211
+ seq_str_list.append(line)
212
+ # Yield final entry
213
+ if header is not None:
214
+ yield header, "".join(seq_str_list)
215
+
216
+ @staticmethod
217
+ def write_iter(file, items, chars_per_line=80):
218
+ """
219
+ Iterate over the given `items` and write each item into
220
+ the specified `file`.
221
+
222
+ In contrast to :meth:`write()`, the lines of text are not stored
223
+ in an intermediate :class:`TextFile`, but are directly written
224
+ to the file.
225
+ Hence, this static method may save a large amount of memory if
226
+ a large file should be written, especially if the `items`
227
+ are provided as generator.
228
+
229
+ Parameters
230
+ ----------
231
+ file : file-like object or str
232
+ The file to be written to.
233
+ Alternatively a file path can be supplied.
234
+ items : generator or array-like of tuple(str, str)
235
+ The entries to be written into the file.
236
+ Each entry consists of an header string and a sequence
237
+ string.
238
+ chars_per_line : int, optional
239
+ The number characters in a line containing sequence data
240
+ after which a line break is inserted.
241
+ Only relevant, when adding sequences to a file.
242
+ Default is 80.
243
+
244
+ Notes
245
+ -----
246
+ This method does not test, whether the given identifiers are
247
+ unambiguous.
248
+ """
249
+
250
+ def line_generator():
251
+ for item in items:
252
+ header, seq_str = item
253
+ if not isinstance(header, str):
254
+ raise IndexError("'FastaFile' only supports header strings")
255
+ if not isinstance(seq_str, str):
256
+ raise TypeError("'FastaFile' only supports sequence strings")
257
+
258
+ # Yield header line
259
+ yield ">" + header.replace("\n", "").strip()
260
+
261
+ # Yield sequence line(s)
262
+ for line in wrap_string(seq_str, width=chars_per_line):
263
+ yield line
264
+
265
+ TextFile.write_iter(file, line_generator())
@@ -0,0 +1,19 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing sequencing data
7
+ using the popular FASTQ format.
8
+
9
+ This package contains the :class:`FastqFile`, which provides a
10
+ dictionary like interface to FASTQ files, with the sequence identifer
11
+ strings being the keys and the sequences and quality scores being the
12
+ values.
13
+ """
14
+
15
+ __name__ = "biotite.sequence.io.fastq"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .convert import *
19
+ from .file import *