biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,117 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fastq"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ from collections import OrderedDict
9
+ from biotite.sequence.seqtypes import NucleotideSequence
10
+
11
+ __all__ = ["get_sequence", "get_sequences", "set_sequence", "set_sequences"]
12
+
13
+
14
+ def get_sequence(fastq_file, header=None):
15
+ """
16
+ Get a sequence and quality scores from a `FastqFile` instance.
17
+
18
+ Parameters
19
+ ----------
20
+ fastq_file : FastqFile
21
+ The `FastqFile` to be accessed.
22
+ header : str, optional
23
+ The identifier to get the sequence and scores from.
24
+ By default, the first sequence of the file is returned.
25
+
26
+ Returns
27
+ -------
28
+ sequence : NucleotideSequence
29
+ The requested sequence.
30
+ scores : ndarray, dtype=int
31
+ The requested scores.
32
+ """
33
+ if header is not None:
34
+ seq_str, scores = fastq_file[header]
35
+ else:
36
+ # Return first (and probably only) sequence of file
37
+ seq_str = None
38
+ scores = None
39
+ for seq_str, scores in fastq_file.values():
40
+ break
41
+ if seq_str is None:
42
+ raise ValueError("File does not contain any sequences")
43
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
44
+ return NucleotideSequence(processed_seq_str), scores
45
+
46
+
47
+ def get_sequences(fastq_file):
48
+ """
49
+ Get a dictionary from a `FastqFile` instance,
50
+ where identifiers are keys and sequence-score-tuples are values.
51
+
52
+ Parameters
53
+ ----------
54
+ fastq_file : FastqFile
55
+ The `Fastqile` to be accessed.
56
+
57
+ Returns
58
+ -------
59
+ seq_dict : dict
60
+ A dictionary containing identifiers as keys and
61
+ (`NucleotideSequence`, `ndarray`) tuples as values.
62
+ """
63
+ seq_dict = OrderedDict()
64
+ for header, (seq_str, scores) in fastq_file.items():
65
+ processed_seq_str = seq_str.replace("U", "T").replace("X", "N")
66
+ seq_dict[header] = NucleotideSequence(processed_seq_str), scores
67
+ return seq_dict
68
+
69
+
70
+ def set_sequence(fastq_file, sequence, scores, header=None, as_rna=False):
71
+ """
72
+ Set a sequence and a quality score array in a `FastqFile` instance.
73
+
74
+ Parameters
75
+ ----------
76
+ fastq_file : FastqFile
77
+ The `FastqFile` to be accessed.
78
+ sequence : NucleotideSequence
79
+ The sequence to be set.
80
+ scores : ndarray, dtype=int
81
+ The quality scores to be set.
82
+ header : str, optional
83
+ The identifier for the sequence. Default is 'sequence'.
84
+ as_rna : bool, optional
85
+ If set to true, the sequence symbol ``'T'`` will be replaced
86
+ by ``'U'``.
87
+ """
88
+ if header is None:
89
+ header = "sequence"
90
+ fastq_file[header] = _convert_to_string(sequence, as_rna), scores
91
+
92
+
93
+ def set_sequences(fastq_file, sequence_dict, as_rna=False):
94
+ """
95
+ Set sequences in a `FastqFile` instance from a dictionary.
96
+
97
+ Parameters
98
+ ----------
99
+ fastq_file : FastqFile
100
+ The `FastqFile` to be accessed.
101
+ sequence_dict : dict
102
+ A dictionary containing the sequences and scores to be set.
103
+ Identifiers are keys,
104
+ (`NucleotideSequence`, `ndarray`) tuples are values.
105
+ as_rna : bool, optional
106
+ If set to true, the sequence symbol ``'T'`` will be replaced
107
+ by ``'U'``.
108
+ """
109
+ for header, (sequence, scores) in sequence_dict.items():
110
+ fastq_file[header] = _convert_to_string(sequence, as_rna), scores
111
+
112
+
113
+ def _convert_to_string(sequence, as_rna):
114
+ if as_rna:
115
+ return str(sequence).replace("T", "U")
116
+ else:
117
+ return str(sequence)
@@ -0,0 +1,507 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.fastq"
6
+ __author__ = "Patrick Kunzmann"
7
+
8
+ from collections import OrderedDict
9
+ from collections.abc import MutableMapping
10
+ from numbers import Integral
11
+ import numpy as np
12
+ from biotite.file import InvalidFileError, TextFile, wrap_string
13
+
14
+ __all__ = ["FastqFile"]
15
+
16
+
17
+ _OFFSETS = {
18
+ "Sanger": 33,
19
+ "Solexa": 64,
20
+ "Illumina-1.3": 64,
21
+ "Illumina-1.5": 64,
22
+ "Illumina-1.8": 33,
23
+ }
24
+
25
+
26
+ class FastqFile(TextFile, MutableMapping):
27
+ """
28
+ This class represents a file in FASTQ format.
29
+
30
+ A FASTQ file stores one or multiple sequences (base calls) along
31
+ with sequencing quality scores.
32
+ Each sequence is associated with an identifer string,
33
+ beginning with an ``@``.
34
+
35
+ The quality scores are encoded as ASCII characters,
36
+ with each actual score being the ASCII code subtracted by an
37
+ `offset` value.
38
+ The offset is format dependent.
39
+ As the offset is not reliably deducible from the file contets, it
40
+ must be provided explicitly, either as number or format
41
+ (e.g. ``'Illumina-1.8'``).
42
+
43
+ Similar to the :class:`FastaFile` class, this class implements the
44
+ :class:`MutableMapping` interface:
45
+ An identifier string (without the leading ``@``) is used as index
46
+ to get and set the corresponding sequence and quality.
47
+ ``del`` removes an entry in the file.
48
+
49
+ Parameters
50
+ ----------
51
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}
52
+ This value is added to the quality score to obtain the
53
+ ASCII code.
54
+ Can either be directly the value, or a string that indicates
55
+ the score format.
56
+ chars_per_line : int, optional
57
+ The number characters in a line containing sequence data
58
+ after which a line break is inserted.
59
+ Only relevant, when adding sequences to a file.
60
+ By default each sequence (and score string)
61
+ is put into one line.
62
+
63
+ Examples
64
+ --------
65
+
66
+ >>> import os.path
67
+ >>> file = FastqFile(offset="Sanger")
68
+ >>> file["seq1"] = str(NucleotideSequence("ATACT")), [0,3,10,7,12]
69
+ >>> file["seq2"] = str(NucleotideSequence("TTGTAGG")), [15,13,24,21,28,38,35]
70
+ >>> print(file)
71
+ @seq1
72
+ ATACT
73
+ +
74
+ !$+(-
75
+ @seq2
76
+ TTGTAGG
77
+ +
78
+ 0.96=GD
79
+ >>> sequence, scores = file["seq1"]
80
+ >>> print(sequence)
81
+ ATACT
82
+ >>> print(scores)
83
+ [ 0 3 10 7 12]
84
+ >>> del file["seq1"]
85
+ >>> print(file)
86
+ @seq2
87
+ TTGTAGG
88
+ +
89
+ 0.96=GD
90
+ >>> file.write(os.path.join(path_to_directory, "test.fastq"))
91
+ """
92
+
93
+ def __init__(self, offset, chars_per_line=None):
94
+ super().__init__()
95
+ self._chars_per_line = chars_per_line
96
+ self._entries = OrderedDict()
97
+ self._offset = _convert_offset(offset)
98
+
99
+ @classmethod
100
+ def read(cls, file, offset, chars_per_line=None):
101
+ """
102
+ Read a FASTQ file.
103
+
104
+ Parameters
105
+ ----------
106
+ file : file-like object or str
107
+ The file to be read.
108
+ Alternatively a file path can be supplied.
109
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}
110
+ This value is added to the quality score to obtain the
111
+ ASCII code.
112
+ Can either be directly the value, or a string that indicates
113
+ the score format.
114
+ chars_per_line : int, optional
115
+ The number characters in a line containing sequence data
116
+ after which a line break is inserted.
117
+ Only relevant, when adding sequences to a file.
118
+ By default each sequence (and score string)
119
+ is put into one line.
120
+
121
+ Returns
122
+ -------
123
+ file_object : FastqFile
124
+ The parsed file.
125
+ """
126
+ file = super().read(file, offset, chars_per_line)
127
+ # Remove leading and trailing whitespace in all lines
128
+ file.lines = [line.strip() for line in file.lines]
129
+ # Filter out empty lines
130
+ file.lines = [line for line in file.lines if len(line) != 0]
131
+ if len(file.lines) == 0:
132
+ raise InvalidFileError("File is empty")
133
+ file._find_entries()
134
+ return file
135
+
136
+ def get_seq_string(self, identifier):
137
+ """
138
+ Get the string representing the sequence for the specified
139
+ identifier.
140
+
141
+ Parameters
142
+ ----------
143
+ identifier : str
144
+ The identifier of the sequence.
145
+
146
+ Returns
147
+ -------
148
+ sequence : str
149
+ The sequence corresponding to the identifier.
150
+ """
151
+ if not isinstance(identifier, str):
152
+ raise IndexError("'FastqFile' only supports identifier strings as keys")
153
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
154
+ # Concatenate sequence string from the sequence lines
155
+ seq_str = "".join(self.lines[seq_start:seq_stop])
156
+ return seq_str
157
+
158
+ def get_quality(self, identifier):
159
+ """
160
+ Get the quality scores for the specified identifier.
161
+
162
+ Parameters
163
+ ----------
164
+ identifier : str
165
+ The identifier of the quality scores.
166
+
167
+ Returns
168
+ -------
169
+ scores : ndarray, dtype=int
170
+ The quality scores corresponding to the identifier.
171
+ """
172
+ if not isinstance(identifier, str):
173
+ raise IndexError("'FastqFile' only supports identifier strings as keys")
174
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
175
+ # Concatenate sequence string from the score lines
176
+ return _score_str_to_scores(
177
+ "".join(self.lines[score_start:score_stop]), self._offset
178
+ )
179
+
180
+ def __setitem__(self, identifier, item):
181
+ sequence, scores = item
182
+ if len(sequence) != len(scores):
183
+ raise ValueError(
184
+ f"Sequence has length {len(sequence)}, "
185
+ f"but score length is {len(scores)}"
186
+ )
187
+ if not isinstance(identifier, str):
188
+ raise IndexError("'FastqFile' only supports strings as identifier")
189
+ # Delete lines of entry corresponding to the identifier,
190
+ # if already existing
191
+ if identifier in self:
192
+ del self[identifier]
193
+
194
+ # Create new lines
195
+ # Start with identifier line
196
+ new_lines = ["@" + identifier.replace("\n", "").strip()]
197
+ # Append new lines with sequence string (with line breaks)
198
+ seq_start_i = len(new_lines)
199
+ if self._chars_per_line is None:
200
+ new_lines.append(str(sequence))
201
+ else:
202
+ new_lines += wrap_string(sequence, width=self._chars_per_line)
203
+ seq_stop_i = len(new_lines)
204
+ # Append sequence-score separator
205
+ new_lines += ["+"]
206
+ # Append scores
207
+ score_chars = _scores_to_score_str(scores, self._offset)
208
+ score_start_i = len(new_lines)
209
+ if self._chars_per_line is None:
210
+ new_lines.append(score_chars)
211
+ else:
212
+ new_lines += wrap_string(score_chars, width=self._chars_per_line)
213
+ score_stop_i = len(new_lines)
214
+
215
+ if identifier in self:
216
+ # Delete lines of entry corresponding to the header,
217
+ # if existing
218
+ del self[identifier]
219
+ self.lines += new_lines
220
+ self._find_entries()
221
+ else:
222
+ # Simply append lines
223
+ # Add entry in a more efficient way than '_find_entries()'
224
+ # for this simple case
225
+ self._entries[identifier] = (
226
+ len(self.lines) + seq_start_i,
227
+ len(self.lines) + seq_stop_i,
228
+ len(self.lines) + score_start_i,
229
+ len(self.lines) + score_stop_i,
230
+ )
231
+ self.lines += new_lines
232
+
233
+ def __getitem__(self, identifier):
234
+ return self.get_seq_string(identifier), self.get_quality(identifier)
235
+
236
+ def __delitem__(self, identifier):
237
+ seq_start, seq_stop, score_start, score_stop = self._entries[identifier]
238
+ del self.lines[seq_start - 1 : score_stop]
239
+ del self._entries[identifier]
240
+ self._find_entries()
241
+
242
+ def __len__(self):
243
+ return len(self._entries)
244
+
245
+ def __iter__(self):
246
+ return self._entries.__iter__()
247
+
248
+ def __contains__(self, identifer):
249
+ return identifer in self._entries
250
+
251
+ def _find_entries(self):
252
+ self._entries = OrderedDict()
253
+ in_sequence = False
254
+ # Record if the parser is currently in a quality score section,
255
+ # as the '@' character at the start of a line may also be a
256
+ # score instead of the start of an identifier
257
+ in_scores = False
258
+ seq_len = 0
259
+ score_len = 0
260
+ seq_start_i = None
261
+ seq_stop_i = None
262
+ score_start_i = None
263
+ score_stop_i = None
264
+ identifier = None
265
+ for i, line in enumerate(self.lines):
266
+ if not in_scores and not in_sequence and line[0] == "@":
267
+ # Identifier line
268
+ identifier = line[1:]
269
+ seq_start_i = i + 1
270
+ # Next line is sequence
271
+ in_sequence = True
272
+ # Reset
273
+ seq_len = 0
274
+ score_len = 0
275
+ elif in_sequence:
276
+ if line[0] == "+":
277
+ # End of sequence start of scores
278
+ in_sequence = False
279
+ in_scores = True
280
+ seq_stop_i = i
281
+ score_start_i = i + 1
282
+ else:
283
+ # Still in sequence
284
+ seq_len += len(line)
285
+ elif in_scores:
286
+ score_len += len(line)
287
+ if score_len < seq_len:
288
+ # Scores have not ended yet
289
+ pass
290
+ elif score_len == seq_len:
291
+ # End of scores
292
+ # -> End of entry
293
+ score_stop_i = i + 1
294
+ in_scores = False
295
+ # Record this entry
296
+ self._entries[identifier] = (
297
+ seq_start_i,
298
+ seq_stop_i,
299
+ score_start_i,
300
+ score_stop_i,
301
+ )
302
+ else: # score_len > seq_len
303
+ raise InvalidFileError(
304
+ f"The amount of scores is not equal to the sequence "
305
+ f"length for the sequence in line {seq_start_i + 1} "
306
+ )
307
+ else:
308
+ raise InvalidFileError(f"Line {i + 1} in FASTQ file is invalid")
309
+ # At the end of the file, the last sequence or score block
310
+ # must have properly ended
311
+ if in_sequence or in_scores:
312
+ raise InvalidFileError("The last entry in the file is incomplete")
313
+
314
+ @staticmethod
315
+ def read_iter(file, offset):
316
+ """
317
+ Create an iterator over each sequence (and corresponding scores)
318
+ of the given FASTQ file.
319
+
320
+ Parameters
321
+ ----------
322
+ file : file-like object or str
323
+ The file to be read.
324
+ Alternatively a file path can be supplied.
325
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}
326
+ This value that is added to the quality score to obtain the
327
+ ASCII code.
328
+ Can either be directly the value, or a string that indicates
329
+ the score format.
330
+
331
+ Yields
332
+ ------
333
+ identifier : str
334
+ The identifier of the current sequence.
335
+ sequence : tuple(str, ndarray)
336
+ The current sequence as string and its corresponding quality
337
+ scores as :class:`ndarray`.
338
+
339
+ Notes
340
+ -----
341
+ This approach gives the same results as
342
+ `FastqFile.read(file, offset).items()`, but is slightly faster
343
+ and much more memory efficient.
344
+ """
345
+ offset = _convert_offset(offset)
346
+
347
+ identifier = None
348
+ seq_str_list = []
349
+ score_str_list = []
350
+ in_sequence = False
351
+ in_scores = False
352
+ seq_len = 0
353
+ score_len = 0
354
+
355
+ for line in TextFile.read_iter(file):
356
+ line = line.strip()
357
+ # Ignore empty lines
358
+ if len(line) == 0:
359
+ continue
360
+
361
+ if not in_scores and not in_sequence and line[0] == "@":
362
+ # Track new entry
363
+ identifier = line[1:]
364
+ in_sequence = True
365
+ # Reset
366
+ seq_len = 0
367
+ score_len = 0
368
+ seq_str_list = []
369
+ score_str_list = []
370
+
371
+ elif in_sequence:
372
+ if line[0] == "+":
373
+ # End of sequence start of scores
374
+ in_sequence = False
375
+ in_scores = True
376
+ else:
377
+ # Still in sequence
378
+ seq_len += len(line)
379
+ seq_str_list.append(line)
380
+
381
+ elif in_scores:
382
+ score_len += len(line)
383
+ score_str_list.append(line)
384
+ if score_len < seq_len:
385
+ pass
386
+ elif score_len == seq_len:
387
+ # End of scores
388
+ # -> End of entry
389
+ in_scores = False
390
+ # yield this entry
391
+ scores = _score_str_to_scores("".join(score_str_list), offset)
392
+ yield identifier, ("".join(seq_str_list), scores)
393
+ else: # score_len > seq_len
394
+ raise InvalidFileError(
395
+ "The amount of scores is not equal to the sequence length"
396
+ )
397
+
398
+ else:
399
+ raise InvalidFileError("FASTQ file is invalid")
400
+
401
+ @staticmethod
402
+ def write_iter(file, items, offset, chars_per_line=None):
403
+ """
404
+ Iterate over the given `items` and write each item into
405
+ the specified `file`.
406
+
407
+ In contrast to :meth:`write()`, the lines of text are not stored
408
+ in an intermediate :class:`TextFile`, but are directly written
409
+ to the file.
410
+ Hence, this static method may save a large amount of memory if
411
+ a large file should be written, especially if the `items`
412
+ are provided as generator.
413
+
414
+ Parameters
415
+ ----------
416
+ file : file-like object or str
417
+ The file to be written to.
418
+ Alternatively a file path can be supplied.
419
+ items : generator or array-like of tuple(str, tuple(str, ndarray))
420
+ The entries to be written into the file.
421
+ Each entry consists of an identifier string and a tuple
422
+ containing a sequence (as string) and a score array.
423
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}
424
+ This value is added to the quality score to obtain the
425
+ ASCII code.
426
+ Can either be directly the value, or a string that indicates
427
+ the score format.
428
+ chars_per_line : int, optional
429
+ The number characters in a line containing sequence data
430
+ after which a line break is inserted.
431
+ Only relevant, when adding sequences to a file.
432
+ By default each sequence (and score string)
433
+ is put into one line.
434
+
435
+ Notes
436
+ -----
437
+ This method does not test, whether the given identifiers are
438
+ unambiguous.
439
+ """
440
+ offset = _convert_offset(offset)
441
+
442
+ def line_generator():
443
+ for item in items:
444
+ identifier, (sequence, scores) = item
445
+ if len(sequence) != len(scores):
446
+ raise ValueError(
447
+ f"Sequence has length {len(sequence)}, "
448
+ f"but score length is {len(scores)}"
449
+ )
450
+ if not isinstance(identifier, str):
451
+ raise IndexError("'FastqFile' only supports strings as identifier")
452
+
453
+ # Yield identifier line
454
+ yield "@" + identifier.replace("\n", "").strip()
455
+
456
+ # Yield sequence line(s)
457
+ if chars_per_line is None:
458
+ yield str(sequence)
459
+ else:
460
+ for line in wrap_string(sequence, width=chars_per_line):
461
+ yield line
462
+
463
+ # Yield separator
464
+ yield "+"
465
+
466
+ # Yield scores
467
+ score_chars = _scores_to_score_str(scores, offset)
468
+ if chars_per_line is None:
469
+ yield score_chars
470
+ else:
471
+ for line in wrap_string(score_chars, width=chars_per_line):
472
+ yield line
473
+
474
+ TextFile.write_iter(file, line_generator())
475
+
476
+
477
+ def _score_str_to_scores(score_str, offset):
478
+ """
479
+ Convert an ASCII string into actual score values.
480
+ """
481
+ scores = np.frombuffer(bytearray(score_str, encoding="ascii"), dtype=np.int8)
482
+ scores -= offset
483
+ return scores
484
+
485
+
486
+ def _scores_to_score_str(scores, offset):
487
+ """
488
+ Convert score values into an ASCII string.
489
+ """
490
+ scores = np.asarray(scores) + offset
491
+ return scores.astype(np.int8, copy=False).tobytes().decode("ascii")
492
+
493
+
494
+ def _convert_offset(offset_val_or_string):
495
+ """
496
+ If the given offset is a string return the corresponding numerical
497
+ value.
498
+ """
499
+ if isinstance(offset_val_or_string, Integral):
500
+ return offset_val_or_string
501
+ elif isinstance(offset_val_or_string, str):
502
+ return _OFFSETS[offset_val_or_string]
503
+ else:
504
+ raise TypeError(
505
+ f"The offset must be either an integer or a string "
506
+ f"indicating the format, not {type(offset_val_or_string).__name__}"
507
+ )
@@ -0,0 +1,17 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading/writing information
7
+ (especially sequence features) from/to files in the *GenBank*
8
+ and *GenPept* format.
9
+ """
10
+
11
+ __name__ = "biotite.sequence.io.genbank"
12
+ __author__ = "Patrick Kunzmann"
13
+
14
+ from .annotation import *
15
+ from .file import *
16
+ from .metadata import *
17
+ from .sequence import *