biotite 1.6.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +426 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +202 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +66 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +224 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +259 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +191 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +127 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +491 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +763 -0
  73. biotite/sequence/align/banded.cp314-win_amd64.pyd +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cp314-win_amd64.pyd +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cp314-win_amd64.pyd +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cp314-win_amd64.pyd +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cp314-win_amd64.pyd +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cp314-win_amd64.pyd +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cp314-win_amd64.pyd +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cp314-win_amd64.pyd +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cp314-win_amd64.pyd +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cp314-win_amd64.pyd +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cp314-win_amd64.pyd +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cp314-win_amd64.pyd +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +462 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cp314-win_amd64.pyd +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cp314-win_amd64.pyd +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cp314-win_amd64.pyd +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1596 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cp314-win_amd64.pyd +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cp314-win_amd64.pyd +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cp314-win_amd64.pyd +0 -0
  272. biotite/structure/charges.pyx +521 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +646 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +426 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cp314-win_amd64.pyd +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2122 -0
  323. biotite/structure/io/pdbx/encoding.cp314-win_amd64.pyd +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +452 -0
  338. biotite/structure/sasa.cp314-win_amd64.pyd +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.6.0.dist-info/METADATA +162 -0
  352. biotite-1.6.0.dist-info/RECORD +354 -0
  353. biotite-1.6.0.dist-info/WHEEL +4 -0
  354. biotite-1.6.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,26 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing sequence features in the
7
+ *Generic Feature Format 3* (GFF3).
8
+
9
+ It provides the :class:`GFFFile` class, a low-level line-based
10
+ interface to this format, and high-level functions for extracting
11
+ :class:`Annotation` objects.
12
+
13
+ .. note: This package cannot create hierarchical data structures from
14
+ GFF 3 files. This means, that you cannot directly access the the
15
+ parent or child of a feature.
16
+ However, the ``Id`` and ``Name`` attributes are stored in the
17
+ qualifiers of the created :class:`Feature` objects.
18
+ Hence, it is possible to implement such a data structure from this
19
+ information.
20
+ """
21
+
22
+ __name__ = "biotite.sequence.io.gff"
23
+ __author__ = "Patrick Kunzmann"
24
+
25
+ from .convert import *
26
+ from .file import *
@@ -0,0 +1,128 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.gff"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_annotation", "set_annotation"]
8
+
9
+ from biotite.sequence.annotation import Annotation, Feature, Location
10
+
11
+
12
+ def get_annotation(gff_file):
13
+ """
14
+ Parse a GFF3 file into an :class:`Annotation`.
15
+
16
+ The *type* column is used as the :attr:`Feature.key` attribute,
17
+ the locations (``loc``) are taken from the *start*, *end* and
18
+ *strand* columns and the *attributes* column is parsed into the
19
+ :attr:`Feature.qual` attribute.
20
+ Multiple entries with the same ``ID`` attribute are interpreted
21
+ as the same feature.
22
+ Thus, for entries with the same ``ID``, the *type* and *attributes*
23
+ are only parsed once and the locations are aggregated from each
24
+ entry.
25
+
26
+ Parameters
27
+ ----------
28
+ gff_file : GFFFile
29
+ The file tro extract the :class:`Annotation` object from.
30
+
31
+ Returns
32
+ -------
33
+ annotation : Annotation
34
+ The extracted annotation.
35
+ """
36
+ annot = Annotation()
37
+ current_key = None
38
+ current_locs = None
39
+ current_qual = None
40
+ current_id = None
41
+ for _, _, type, start, end, _, strand, _, attrib in gff_file:
42
+ id = attrib.get("ID")
43
+ if id != current_id or id is None:
44
+ # current_key is None, when there is no previous feature
45
+ # (beginning of the file)
46
+ if current_key is not None:
47
+ # Beginning of new feature -> Save previous feature
48
+ annot.add_feature(Feature(current_key, current_locs, current_qual))
49
+ # Track new feature
50
+ current_key = type
51
+ current_locs = [Location(start, end, strand)]
52
+ current_qual = attrib
53
+ else:
54
+ current_locs.append(Location(start, end, strand))
55
+ current_id = id
56
+ # Save last feature
57
+ if current_key is not None:
58
+ annot.add_feature(Feature(current_key, current_locs, current_qual))
59
+ return annot
60
+
61
+
62
+ def set_annotation(gff_file, annotation, seqid=None, source=None, is_stranded=True):
63
+ """
64
+ Write an :class:`Annotation` object into a GFF3 file.
65
+
66
+ Each feature will get one entry for each location it has.
67
+ :class:`Feature` objects with multiple locations require the ``ID``
68
+ qualifier in its :attr:`Feature.qual` attribute.
69
+
70
+ Parameters
71
+ ----------
72
+ gff_file : GFFFile
73
+ The GFF3 file to write into.
74
+ annotation : Annotation
75
+ The annoation which is written to the GFF3 file.
76
+ seqid : str, optional
77
+ The content for the *seqid* column.
78
+ source : str, optional
79
+ The content for the *source* column.
80
+ is_stranded : bool, optional
81
+ If true, the strand of each feature is taken into account.
82
+ Otherwise the *strand* column is filled with '``.``'.
83
+ """
84
+ for feature in sorted(annotation):
85
+ if len(feature.locs) > 1 and "ID" not in feature.qual:
86
+ raise ValueError(
87
+ "The 'Id' qualifier is required for features with multiple locations"
88
+ )
89
+ ## seqid ##
90
+ if seqid is not None and " " in seqid:
91
+ raise ValueError("The 'seqid' must not contain whitespaces")
92
+ ## source ##
93
+ # Nothing to be done
94
+ ## type ##
95
+ type = feature.key
96
+ ## strand ##
97
+ # Expect same strandedness for all locations
98
+ strand = list(feature.locs)[0].strand if is_stranded else None
99
+ ## score ##
100
+ score = None
101
+ ## attributes ##
102
+ attributes = feature.qual
103
+ # The previous properties are shared by all entries
104
+ # for this feature
105
+ # The following loop handles properties that change with each
106
+ # location
107
+ reverse_order = True if strand == Location.Strand.REVERSE else False
108
+ next_phase = 0
109
+ for loc in sorted(
110
+ feature.locs, key=lambda loc: loc.first, reverse=reverse_order
111
+ ):
112
+ ## start ##
113
+ start = loc.first
114
+ ## end ##
115
+ end = loc.last
116
+ ## strand ##
117
+ strand = loc.strand if is_stranded else None
118
+ ## phase ##
119
+ if type == "CDS":
120
+ phase = next_phase
121
+ # Subtract the length of the location
122
+ next_phase -= loc.last - loc.first + 1
123
+ next_phase %= 3
124
+ else:
125
+ phase = None
126
+ gff_file.append(
127
+ seqid, source, type, start, end, score, strand, phase, attributes
128
+ )
@@ -0,0 +1,449 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.gff"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["GFFFile"]
8
+
9
+ import string
10
+ import warnings
11
+ from urllib.parse import quote, unquote
12
+ from biotite.file import InvalidFileError, TextFile
13
+ from biotite.sequence.annotation import Location
14
+
15
+ # All punctuation characters except
16
+ # percent, semicolon, equals, ampersand, comma
17
+ _NOT_QUOTED = (
18
+ "".join([char for char in string.punctuation if char not in "%;=&,"]) + " "
19
+ )
20
+
21
+
22
+ class GFFFile(TextFile):
23
+ """
24
+ This class represents a file in *Generic Feature Format 3*
25
+ (`GFF3 <https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md>`_)
26
+ format.
27
+
28
+ Similar to GenBank files, GFF3 files contain information about
29
+ features of a reference sequence, but in a more concise and better
30
+ parsable way.
31
+ However, it does not provide additional meta information.
32
+
33
+ This class serves as low-level API for accessing GFF3 files.
34
+ It is used as a sequence of entries, where each entry is defined as
35
+ a non-comment and non-directive line.
36
+ Each entry consists of values corresponding to the 9 columns of
37
+ GFF3:
38
+
39
+ ============== =============================== ==========================================================
40
+ **seqid** ``str`` The ID of the reference sequence
41
+ **source** ``str`` Source of the data (e.g. ``Genbank``)
42
+ **type** ``str`` Type of the feature (e.g. ``CDS``)
43
+ **start** ``int`` Start coordinate of feature on the reference sequence
44
+ **end** ``int`` End coordinate of feature on the reference sequence
45
+ **score** ``float`` or ``None`` Optional score (e.g. an E-value)
46
+ **strand** ``Location.Strand`` or ``None`` Strand of the feature, ``None`` if feature is not stranded
47
+ **phase** ``int`` or ``None`` Reading frame shift, ``None`` for non-CDS features
48
+ **attributes** ``dict`` Additional properties of the feature
49
+ ============== =============================== ==========================================================
50
+
51
+ Note that the entry index may not be equal to the line index,
52
+ because GFF3 files can contain comment and directive lines.
53
+
54
+ Notes
55
+ -----
56
+ Although the GFF3 specification allows mixing in reference sequence
57
+ data in FASTA format via the ``##FASTA`` directive, this class does
58
+ not support extracting the sequence information.
59
+ The content after the ``##FASTA`` directive is simply ignored.
60
+ Please provide the sequence via a separate file or read the FASTA
61
+ data directly via the :attr:`lines` attribute:
62
+
63
+ >>> import os.path
64
+ >>> from io import StringIO
65
+ >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "indexing_test.gff3"))
66
+ >>> fasta_start_index = None
67
+ >>> for directive, line_index in gff_file.directives():
68
+ ... if directive == "FASTA":
69
+ ... fasta_start_index = line_index + 1
70
+ >>> fasta_data = StringIO("\\n".join(gff_file.lines[fasta_start_index:]))
71
+ >>> fasta_file = FastaFile.read(fasta_data)
72
+ >>> for seq_string in fasta_file.values():
73
+ ... print(seq_string[:60] + "...")
74
+ TACGTAGCTAGCTGATCGATGTTGTGTGTATCGATCTAGCTAGCTAGCTGACTACACAAT...
75
+
76
+ Examples
77
+ --------
78
+ Reading and editing of an existing GFF3 file:
79
+
80
+ >>> import os.path
81
+ >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "gg_avidin.gff3"))
82
+ >>> # Get content of first entry
83
+ >>> seqid, source, type, start, end, score, strand, phase, attrib = gff_file[0]
84
+ >>> print(seqid)
85
+ AJ311647.1
86
+ >>> print(source)
87
+ EMBL
88
+ >>> print(type)
89
+ region
90
+ >>> print(start)
91
+ 1
92
+ >>> print(end)
93
+ 1224
94
+ >>> print(score)
95
+ None
96
+ >>> print(strand)
97
+ Strand.FORWARD
98
+ >>> print(phase)
99
+ None
100
+ >>> print(attrib)
101
+ {'ID': 'AJ311647.1:1..1224', 'Dbxref': 'taxon:9031', 'Name': 'Z', 'chromosome': 'Z', 'gbkey': 'Src', 'mol_type': 'genomic DNA'}
102
+ >>> # Edit the first entry: Simply add a score
103
+ >>> score = 1.0
104
+ >>> gff_file[0] = seqid, source, type, start, end, score, strand, phase, attrib
105
+ >>> # Delete first entry
106
+ >>> del gff_file[0]
107
+
108
+ Writing a new GFF3 file:
109
+
110
+ >>> gff_file = GFFFile()
111
+ >>> gff_file.append_directive("Example directive", "param1", "param2")
112
+ >>> gff_file.append(
113
+ ... "SomeSeqID", "Biotite", "CDS", 1, 99,
114
+ ... None, Location.Strand.FORWARD, 0,
115
+ ... {"ID": "FeatureID", "product":"A protein"}
116
+ ... )
117
+ >>> print(gff_file) #doctest: +NORMALIZE_WHITESPACE
118
+ ##gff-version 3
119
+ ##Example directive param1 param2
120
+ SomeSeqID Biotite CDS 1 99 . + 0 ID=FeatureID;product=A protein
121
+ """
122
+
123
+ def __init__(self):
124
+ super().__init__()
125
+ # Maps entry indices to line indices
126
+ self._entries = None
127
+ # Stores the directives as (directive text, line index)-tuple
128
+ self._directives = None
129
+ # Stores whether the file has FASTA data
130
+ self._has_fasta = None
131
+ self._index_entries()
132
+ self.append_directive("gff-version", "3")
133
+
134
+ @classmethod
135
+ def read(cls, file):
136
+ """
137
+ Read a GFF3 file.
138
+
139
+ Parameters
140
+ ----------
141
+ file : file-like object or str
142
+ The file to be read.
143
+ Alternatively a file path can be supplied.
144
+
145
+ Returns
146
+ -------
147
+ file_object : GFFFile
148
+ The parsed file.
149
+ """
150
+ file = super().read(file)
151
+ file._index_entries()
152
+ return file
153
+
154
+ def insert(
155
+ self,
156
+ index,
157
+ seqid,
158
+ source,
159
+ type,
160
+ start,
161
+ end,
162
+ score,
163
+ strand,
164
+ phase,
165
+ attributes=None,
166
+ ):
167
+ """
168
+ Insert an entry at the given index.
169
+
170
+ Parameters
171
+ ----------
172
+ index : int
173
+ Index where the entry is inserted.
174
+ If the index is equal to the length of the file, the entry
175
+ is appended at the end of the file.
176
+ seqid : str
177
+ The ID of the reference sequence.
178
+ source : str
179
+ Source of the data (e.g. ``Genbank``).
180
+ type : str
181
+ Type of the feature (e.g. ``CDS``).
182
+ start : int
183
+ Start coordinate of feature on the reference sequence.
184
+ end : int
185
+ End coordinate of feature on the reference sequence.
186
+ score : float or None
187
+ Optional score (e.g. an E-value).
188
+ strand : Location.Strand or None
189
+ Strand of the feature, ``None`` if feature is not stranded.
190
+ phase : int or None
191
+ Reading frame shift, ``None`` for non-CDS features.
192
+ attributes : dict, optional
193
+ Additional properties of the feature.
194
+ """
195
+ if index == len(self):
196
+ self.append(
197
+ seqid, source, type, start, end, score, strand, phase, attributes
198
+ )
199
+ else:
200
+ line_index = self._entries[index]
201
+ line = GFFFile._create_line(
202
+ seqid, source, type, start, end, score, strand, phase, attributes
203
+ )
204
+ self.lines.insert(line_index, line)
205
+ self._index_entries()
206
+
207
+ def append(
208
+ self, seqid, source, type, start, end, score, strand, phase, attributes=None
209
+ ):
210
+ """
211
+ Append an entry to the end of the file.
212
+
213
+ Parameters
214
+ ----------
215
+ seqid : str
216
+ The ID of the reference sequence.
217
+ source : str
218
+ Source of the data (e.g. ``Genbank``).
219
+ type : str
220
+ Type of the feature (e.g. ``CDS``).
221
+ start : int
222
+ Start coordinate of feature on the reference sequence.
223
+ end : int
224
+ End coordinate of feature on the reference sequence.
225
+ score : float or None
226
+ Optional score (e.g. an E-value).
227
+ strand : Location.Strand or None
228
+ Strand of the feature, ``None`` if feature is not stranded.
229
+ phase : int or None
230
+ Reading frame shift, ``None`` for non-CDS features.
231
+ attributes : dict, optional
232
+ Additional properties of the feature.
233
+ """
234
+ if self._has_fasta:
235
+ raise NotImplementedError(
236
+ "Cannot append feature entries, "
237
+ "as this file contains additional FASTA data"
238
+ )
239
+ line = GFFFile._create_line(
240
+ seqid, source, type, start, end, score, strand, phase, attributes
241
+ )
242
+ self.lines.append(line)
243
+ # Fast update of entry index by adding last line
244
+ self._entries.append(len(self.lines) - 1)
245
+
246
+ def append_directive(self, directive, *args):
247
+ """
248
+ Append a directive line to the end of the file.
249
+
250
+ Parameters
251
+ ----------
252
+ directive : str
253
+ Name of the directive.
254
+ *args : str
255
+ Optional parameters for the directive.
256
+ Each argument is simply appended to the directive, separated
257
+ by a single space character.
258
+
259
+ Raises
260
+ ------
261
+ NotImplementedError
262
+ If the ``##FASTA`` directive is used, which is not
263
+ supported.
264
+
265
+ Examples
266
+ --------
267
+
268
+ >>> gff_file = GFFFile()
269
+ >>> gff_file.append_directive("Example directive", "param1", "param2")
270
+ >>> print(gff_file)
271
+ ##gff-version 3
272
+ ##Example directive param1 param2
273
+ """
274
+ if directive.startswith("FASTA"):
275
+ raise NotImplementedError("Adding FASTA information is not supported")
276
+ directive_line = "##" + directive + " " + " ".join(args)
277
+ self._directives.append((directive_line[2:], len(self.lines)))
278
+ self.lines.append(directive_line)
279
+
280
+ def directives(self):
281
+ """
282
+ Get the directives in the file.
283
+
284
+ Returns
285
+ -------
286
+ directives : list of tuple(str, int)
287
+ A list of directives, sorted by their line order.
288
+ The first element of each tuple is the name of the
289
+ directive (without ``##``), the second element is the index
290
+ of the corresponding line.
291
+ """
292
+ # Sort in line order
293
+ return sorted(self._directives, key=lambda directive: directive[1])
294
+
295
+ def __setitem__(self, index, item):
296
+ seqid, source, type, start, end, score, strand, phase, attrib = item
297
+ line = GFFFile._create_line(
298
+ seqid, source, type, start, end, score, strand, phase, attrib
299
+ )
300
+ line_index = self._entries[index]
301
+ self.lines[line_index] = line
302
+
303
+ def __getitem__(self, index):
304
+ if (index >= 0 and index >= len(self)) or (index < 0 and -index > len(self)):
305
+ raise IndexError(
306
+ f"Index {index} is out of range for GFFFile with {len(self)} entries"
307
+ )
308
+
309
+ line_index = self._entries[index]
310
+ # Columns are tab separated
311
+ s = self.lines[line_index].strip().split("\t")
312
+ if len(s) != 9:
313
+ raise InvalidFileError(f"Expected 9 columns, but got {len(s)}")
314
+ seqid, source, type, start, end, score, strand, phase, attrib = s
315
+
316
+ seqid = unquote(seqid)
317
+ source = unquote(source)
318
+ type = unquote(type)
319
+ start = int(start)
320
+ end = int(end)
321
+ score = None if score == "." else float(score)
322
+ if strand == "+":
323
+ strand = Location.Strand.FORWARD
324
+ elif strand == "-":
325
+ strand = Location.Strand.REVERSE
326
+ else:
327
+ strand = None
328
+ phase = None if phase == "." else int(phase)
329
+ attrib = GFFFile._parse_attributes(attrib)
330
+
331
+ return seqid, source, type, start, end, score, strand, phase, attrib
332
+
333
+ def __delitem__(self, index):
334
+ line_index = self._entries[index]
335
+ del self.lines[line_index]
336
+ self._index_entries()
337
+
338
+ def __len__(self):
339
+ return len(self._entries)
340
+
341
+ def _index_entries(self):
342
+ """
343
+ Parse the file for comment and directive lines.
344
+ Count these lines cumulatively, so that entry indices can be
345
+ mapped onto line indices.
346
+ Additionally track the line index of directive lines.
347
+ """
348
+ self._directives = []
349
+ # Worst case allocation -> all lines contain actual entries
350
+ self._entries = [None] * len(self.lines)
351
+ self._has_fasta = False
352
+ entry_counter = 0
353
+ for line_i, line in enumerate(self.lines):
354
+ if len(line) == 0 or line[0] == " ":
355
+ # Empty line -> do nothing
356
+ pass
357
+ elif line.startswith("#"):
358
+ # Comment or directive
359
+ if line.startswith("##"):
360
+ # Directive
361
+ # Omit the leading '##'
362
+ self._directives.append((line[2:], line_i))
363
+ if line[2:] == "FASTA":
364
+ self._has_fasta = True
365
+ # This parser does not support bundled FASTA
366
+ # data
367
+ warnings.warn(
368
+ "Biotite does not support FASTA data mixed into "
369
+ "GFF files, the FASTA data will be ignored"
370
+ )
371
+ # To ignore the following FASTA data, stop
372
+ # parsing at this point
373
+ break
374
+ else:
375
+ # Actual entry
376
+ self._entries[entry_counter] = line_i
377
+ entry_counter += 1
378
+ # Trim to correct size
379
+ self._entries = self._entries[:entry_counter]
380
+
381
+ @staticmethod
382
+ def _create_line(seqid, source, type, start, end, score, strand, phase, attributes):
383
+ """
384
+ Create a line for a newly created entry.
385
+ """
386
+ seqid = quote(seqid.strip(), safe=_NOT_QUOTED) if seqid is not None else "."
387
+ source = quote(source.strip(), safe=_NOT_QUOTED) if source is not None else "."
388
+ type = type.strip()
389
+
390
+ # Perform checks
391
+ if len(seqid) == 0:
392
+ raise ValueError("'seqid' must not be empty")
393
+ if len(source) == 0:
394
+ raise ValueError("'source' must not be empty")
395
+ if len(type) == 0:
396
+ raise ValueError("'type' must not be empty")
397
+ if seqid[0] == ">":
398
+ raise ValueError("'seqid' must not start with '>'")
399
+
400
+ score = str(score) if score is not None else "."
401
+ if strand == Location.Strand.FORWARD:
402
+ strand = "+"
403
+ elif strand == Location.Strand.REVERSE:
404
+ strand = "-"
405
+ else:
406
+ strand = "."
407
+ phase = str(phase) if phase is not None else "."
408
+ attributes = (
409
+ ";".join(
410
+ [
411
+ quote(key, safe=_NOT_QUOTED) + "=" + quote(val, safe=_NOT_QUOTED)
412
+ for key, val in attributes.items()
413
+ ]
414
+ )
415
+ if attributes is not None and len(attributes) > 0
416
+ else "."
417
+ )
418
+
419
+ return "\t".join(
420
+ [
421
+ seqid,
422
+ source,
423
+ type,
424
+ str(start),
425
+ str(end),
426
+ str(score),
427
+ strand,
428
+ phase,
429
+ attributes,
430
+ ]
431
+ )
432
+
433
+ @staticmethod
434
+ def _parse_attributes(attributes):
435
+ """
436
+ Parse the *attributes* string into a dictionary.
437
+ """
438
+ if attributes == ".":
439
+ return {}
440
+
441
+ attrib_dict = {}
442
+ attrib_entries = attributes.split(";")
443
+ for entry in attrib_entries:
444
+ compounds = entry.split("=")
445
+ if len(compounds) != 2:
446
+ raise InvalidFileError(f"Attribute entry '{entry}' is invalid")
447
+ key, val = compounds
448
+ attrib_dict[unquote(key)] = unquote(val)
449
+ return attrib_dict
@@ -0,0 +1,36 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage provides functions and data structures for creating
7
+ (phylogenetic) trees.
8
+
9
+ The :class:`Tree` is the central class in this subpackage.
10
+ It wraps a *root* :class:`TreeNode` object.
11
+ A :class:`TreeNode` is either an intermediate node, if it has child
12
+ :class:`TreeNode` objects, or otherwise a leaf node.
13
+
14
+ A :class:`Tree` is not a container itself:
15
+ Objects, e.g species names or sequences, that are represented by the
16
+ nodes, cannot be stored directly in a :class:`Tree` or
17
+ :class:`TreeNode`.
18
+ Instead, each leaf node has a reference index:
19
+ These indices refer to a separate list or array, containing the actual
20
+ reference objects.
21
+
22
+ A :class:`Tree` can be created from or exported to a *Newick* notation,
23
+ usingthe :func:`Tree.from_newick()` or :func:`Tree.to_newick()` method,
24
+ respectively.
25
+
26
+ A :class:`Tree` can be build from a pairwise distance matrix using the
27
+ popular *UPGMA* (:func:`upgma()`) and *Neighbor-Joining*
28
+ (:func:`neighbor_joining()`) algorithms.
29
+ """
30
+
31
+ __name__ = "biotite.sequence.phylo"
32
+ __author__ = "Patrick Kunzmann"
33
+
34
+ from .nj import *
35
+ from .tree import *
36
+ from .upgma import *