biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,26 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage is used for reading and writing sequence features in the
7
+ *Generic Feature Format 3* (GFF3).
8
+
9
+ It provides the :class:`GFFFile` class, a low-level line-based
10
+ interface to this format, and high-level functions for extracting
11
+ :class:`Annotation` objects.
12
+
13
+ .. note: This package cannot create hierarchical data structures from
14
+ GFF 3 files. This means, that you cannot directly access the the
15
+ parent or child of a feature.
16
+ However, the ``Id`` and ``Name`` attributes are stored in the
17
+ qualifiers of the created :class:`Feature` objects.
18
+ Hence, it is possible to implement such a data structure from this
19
+ information.
20
+ """
21
+
22
+ __name__ = "biotite.sequence.io.gff"
23
+ __author__ = "Patrick Kunzmann"
24
+
25
+ from .file import *
26
+ from .convert import *
@@ -0,0 +1,133 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.gff"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_annotation", "set_annotation"]
8
+
9
+ from ...annotation import Location, Feature, Annotation
10
+
11
+
12
+ def get_annotation(gff_file):
13
+ """
14
+ Parse a GFF3 file into an :class:`Annotation`.
15
+
16
+ The *type* column is used as the :attr:`Feature.key` attribute,
17
+ the locations (``loc``) are taken from the *start*, *end* and
18
+ *strand* columns and the *attributes* column is parsed into the
19
+ :attr:`Feature.qual` attribute.
20
+ Multiple entries with the same ``ID`` attribute are interpreted
21
+ as the same feature.
22
+ Thus, for entries with the same ``ID``, the *type* and *attributes*
23
+ are only parsed once and the locations are aggregated from each
24
+ entry.
25
+
26
+ Parameters
27
+ ----------
28
+ gff_file : GFFFile
29
+ The file tro extract the :class:`Annotation` object from.
30
+
31
+ Returns
32
+ -------
33
+ annotation : Annotation
34
+ The extracted annotation.
35
+ """
36
+ annot = Annotation()
37
+ current_key = None
38
+ current_locs = None
39
+ current_qual = None
40
+ current_id = None
41
+ for _, _, type, start, end, _, strand, _, attrib in gff_file:
42
+ id = attrib.get("ID")
43
+ if id != current_id or id is None:
44
+ # current_key is None, when there is no previous feature
45
+ # (beginning of the file)
46
+ if current_key is not None:
47
+ # Beginning of new feature -> Save previous feature
48
+ annot.add_feature(
49
+ Feature(current_key, current_locs, current_qual)
50
+ )
51
+ # Track new feature
52
+ current_key = type
53
+ current_locs = [Location(start, end, strand)]
54
+ current_qual = attrib
55
+ else:
56
+ current_locs.append(Location(start, end, strand))
57
+ current_id = id
58
+ # Save last feature
59
+ if current_key is not None:
60
+ annot.add_feature(Feature(current_key, current_locs, current_qual))
61
+ return annot
62
+
63
+
64
+ def set_annotation(gff_file, annotation,
65
+ seqid=None, source=None, is_stranded=True):
66
+ """
67
+ Write an :class:`Annotation` object into a GFF3 file.
68
+
69
+ Each feature will get one entry for each location it has.
70
+ :class:`Feature` objects with multiple locations require the ``ID``
71
+ qualifier in its :attr:`Feature.qual` attribute.
72
+
73
+ Parameters
74
+ ----------
75
+ gff_file : GFFFile
76
+ The GFF3 file to write into.
77
+ annotation : Annotation
78
+ The annoation which is written to the GFF3 file.
79
+ seqid : str, optional
80
+ The content for the *seqid* column.
81
+ source : str, optional
82
+ The content for the *source* column.
83
+ is_stranded : bool, optional
84
+ If true, the strand of each feature is taken into account.
85
+ Otherwise the *strand* column is filled with '``.``'.
86
+ """
87
+ for feature in sorted(annotation):
88
+ if len(feature.locs) > 1 and "ID" not in feature.qual:
89
+ raise ValueError(
90
+ "The 'Id' qualifier is required "
91
+ "for features with multiple locations"
92
+ )
93
+ ## seqid ##
94
+ if seqid is not None and " " in seqid:
95
+ raise ValueError("The 'seqid' must not contain whitespaces")
96
+ ## source ##
97
+ #Nothing to be done
98
+ ## type ##
99
+ type = feature.key
100
+ ## strand ##
101
+ # Expect same strandedness for all locations
102
+ strand = list(feature.locs)[0].strand if is_stranded else None
103
+ ## score ##
104
+ score = None
105
+ ## attributes ##
106
+ attributes = feature.qual
107
+ # The previous properties are shared by all entries
108
+ # for this feature
109
+ # The following loop handles properties that change with each
110
+ # location
111
+ reverse_order = True if strand == Location.Strand.REVERSE else False
112
+ next_phase = 0
113
+ for loc in sorted(
114
+ feature.locs, key=lambda loc: loc.first, reverse=reverse_order
115
+ ):
116
+ ## start ##
117
+ start = loc.first
118
+ ## end ##
119
+ end = loc.last
120
+ ## strand ##
121
+ strand = loc.strand if is_stranded else None
122
+ ## phase ##
123
+ if type == "CDS":
124
+ phase = next_phase
125
+ # Subtract the length of the location
126
+ next_phase -= loc.last - loc.first + 1
127
+ next_phase %= 3
128
+ else:
129
+ phase = None
130
+ gff_file.append(
131
+ seqid, source, type, start, end,
132
+ score, strand, phase, attributes
133
+ )
@@ -0,0 +1,434 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.sequence.io.gff"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["GFFFile"]
8
+
9
+ import copy
10
+ import string
11
+ from urllib.parse import quote, unquote
12
+ import warnings
13
+ from ....file import TextFile, InvalidFileError
14
+ from ...annotation import Location
15
+
16
+
17
+ # All punctuation characters except
18
+ # percent, semicolon, equals, ampersand, comma
19
+ _NOT_QUOTED = "".join(
20
+ [char for char in string.punctuation if char not in "%;=&,"]
21
+ ) + " "
22
+
23
+
24
+ class GFFFile(TextFile):
25
+ """
26
+ This class represents a file in *Generic Feature Format 3*
27
+ (`GFF3 <https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md>`_)
28
+ format.
29
+
30
+ Similar to GenBank files, GFF3 files contain information about
31
+ features of a reference sequence, but in a more concise and better
32
+ parsable way.
33
+ However, it does not provide additional meta information.
34
+
35
+ This class serves as low-level API for accessing GFF3 files.
36
+ It is used as a sequence of entries, where each entry is defined as
37
+ a non-comment and non-directive line.
38
+ Each entry consists of values corresponding to the 9 columns of
39
+ GFF3:
40
+
41
+ ============== =============================== ==========================================================
42
+ **seqid** ``str`` The ID of the reference sequence
43
+ **source** ``str`` Source of the data (e.g. ``Genbank``)
44
+ **type** ``str`` Type of the feature (e.g. ``CDS``)
45
+ **start** ``int`` Start coordinate of feature on the reference sequence
46
+ **end** ``int`` End coordinate of feature on the reference sequence
47
+ **score** ``float`` or ``None`` Optional score (e.g. an E-value)
48
+ **strand** ``Location.Strand`` or ``None`` Strand of the feature, ``None`` if feature is not stranded
49
+ **phase** ``int`` or ``None`` Reading frame shift, ``None`` for non-CDS features
50
+ **attributes** ``dict`` Additional properties of the feature
51
+ ============== =============================== ==========================================================
52
+
53
+ Note that the entry index may not be equal to the line index,
54
+ because GFF3 files can contain comment and directive lines.
55
+
56
+ Notes
57
+ -----
58
+ Although the GFF3 specification allows mixing in reference sequence
59
+ data in FASTA format via the ``##FASTA`` directive, this class does
60
+ not support extracting the sequence information.
61
+ The content after the ``##FASTA`` directive is simply ignored.
62
+ Please provide the sequence via a separate file or read the FASTA
63
+ data directly via the :attr:`lines` attribute:
64
+
65
+ >>> import os.path
66
+ >>> from io import StringIO
67
+ >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "indexing_test.gff3"))
68
+ >>> fasta_start_index = None
69
+ >>> for directive, line_index in gff_file.directives():
70
+ ... if directive == "FASTA":
71
+ ... fasta_start_index = line_index + 1
72
+ >>> fasta_data = StringIO("\\n".join(gff_file.lines[fasta_start_index:]))
73
+ >>> fasta_file = FastaFile.read(fasta_data)
74
+ >>> for seq_string in fasta_file.values():
75
+ ... print(seq_string[:60] + "...")
76
+ TACGTAGCTAGCTGATCGATGTTGTGTGTATCGATCTAGCTAGCTAGCTGACTACACAAT...
77
+
78
+ Examples
79
+ --------
80
+ Reading and editing of an existing GFF3 file:
81
+
82
+ >>> import os.path
83
+ >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "gg_avidin.gff3"))
84
+ >>> # Get content of first entry
85
+ >>> seqid, source, type, start, end, score, strand, phase, attrib = gff_file[0]
86
+ >>> print(seqid)
87
+ AJ311647.1
88
+ >>> print(source)
89
+ EMBL
90
+ >>> print(type)
91
+ region
92
+ >>> print(start)
93
+ 1
94
+ >>> print(end)
95
+ 1224
96
+ >>> print(score)
97
+ None
98
+ >>> print(strand)
99
+ Strand.FORWARD
100
+ >>> print(phase)
101
+ None
102
+ >>> print(attrib)
103
+ {'ID': 'AJ311647.1:1..1224', 'Dbxref': 'taxon:9031', 'Name': 'Z', 'chromosome': 'Z', 'gbkey': 'Src', 'mol_type': 'genomic DNA'}
104
+ >>> # Edit the first entry: Simply add a score
105
+ >>> score = 1.0
106
+ >>> gff_file[0] = seqid, source, type, start, end, score, strand, phase, attrib
107
+ >>> # Delete first entry
108
+ >>> del gff_file[0]
109
+
110
+ Writing a new GFF3 file:
111
+
112
+ >>> gff_file = GFFFile()
113
+ >>> gff_file.append_directive("Example directive", "param1", "param2")
114
+ >>> gff_file.append(
115
+ ... "SomeSeqID", "Biotite", "CDS", 1, 99,
116
+ ... None, Location.Strand.FORWARD, 0,
117
+ ... {"ID": "FeatureID", "product":"A protein"}
118
+ ... )
119
+ >>> print(gff_file) #doctest: +NORMALIZE_WHITESPACE
120
+ ##gff-version 3
121
+ ##Example directive param1 param2
122
+ SomeSeqID Biotite CDS 1 99 . + 0 ID=FeatureID;product=A protein
123
+ """
124
+
125
+ def __init__(self):
126
+ super().__init__()
127
+ # Maps entry indices to line indices
128
+ self._entries = None
129
+ # Stores the directives as (directive text, line index)-tuple
130
+ self._directives = None
131
+ # Stores whether the file has FASTA data
132
+ self._has_fasta = None
133
+ self._index_entries()
134
+ self.append_directive("gff-version", "3")
135
+
136
+ @classmethod
137
+ def read(cls, file):
138
+ """
139
+ Read a GFF3 file.
140
+
141
+ Parameters
142
+ ----------
143
+ file : file-like object or str
144
+ The file to be read.
145
+ Alternatively a file path can be supplied.
146
+
147
+ Returns
148
+ -------
149
+ file_object : GFFFile
150
+ The parsed file.
151
+ """
152
+ file = super().read(file)
153
+ file._index_entries()
154
+ return file
155
+
156
+ def insert(self, index, seqid, source, type, start, end,
157
+ score, strand, phase, attributes=None):
158
+ """
159
+ Insert an entry at the given index.
160
+
161
+ Parameters
162
+ ----------
163
+ index : int
164
+ Index where the entry is inserted.
165
+ If the index is equal to the length of the file, the entry
166
+ is appended at the end of the file.
167
+ seqid : str
168
+ The ID of the reference sequence.
169
+ source : str
170
+ Source of the data (e.g. ``Genbank``).
171
+ type : str
172
+ Type of the feature (e.g. ``CDS``).
173
+ start : int
174
+ Start coordinate of feature on the reference sequence.
175
+ end : int
176
+ End coordinate of feature on the reference sequence.
177
+ score : float or None
178
+ Optional score (e.g. an E-value).
179
+ strand : Location.Strand or None
180
+ Strand of the feature, ``None`` if feature is not stranded.
181
+ phase : int or None
182
+ Reading frame shift, ``None`` for non-CDS features.
183
+ attributes : dict, optional
184
+ Additional properties of the feature.
185
+ """
186
+ if index == len(self):
187
+ self.append(seqid, source, type, start, end,
188
+ score, strand, phase, attributes)
189
+ else:
190
+ line_index = self._entries[index]
191
+ line = GFFFile._create_line(
192
+ seqid, source, type, start, end,
193
+ score, strand, phase, attributes
194
+ )
195
+ self.lines.insert(line_index, line)
196
+ self._index_entries()
197
+
198
+ def append(self, seqid, source, type, start, end,
199
+ score, strand, phase, attributes=None):
200
+ """
201
+ Append an entry to the end of the file.
202
+
203
+ Parameters
204
+ ----------
205
+ seqid : str
206
+ The ID of the reference sequence.
207
+ source : str
208
+ Source of the data (e.g. ``Genbank``).
209
+ type : str
210
+ Type of the feature (e.g. ``CDS``).
211
+ start : int
212
+ Start coordinate of feature on the reference sequence.
213
+ end : int
214
+ End coordinate of feature on the reference sequence.
215
+ score : float or None
216
+ Optional score (e.g. an E-value).
217
+ strand : Location.Strand or None
218
+ Strand of the feature, ``None`` if feature is not stranded.
219
+ phase : int or None
220
+ Reading frame shift, ``None`` for non-CDS features.
221
+ attributes : dict, optional
222
+ Additional properties of the feature.
223
+ """
224
+ if self._has_fasta:
225
+ raise NotImplementedError(
226
+ "Cannot append feature entries, "
227
+ "as this file contains additional FASTA data"
228
+ )
229
+ line = GFFFile._create_line(
230
+ seqid, source, type, start, end, score, strand, phase, attributes
231
+ )
232
+ self.lines.append(line)
233
+ # Fast update of entry index by adding last line
234
+ self._entries.append(len(self.lines) - 1)
235
+
236
+ def append_directive(self, directive, *args):
237
+ """
238
+ Append a directive line to the end of the file.
239
+
240
+ Parameters
241
+ ----------
242
+ directive : str
243
+ Name of the directive.
244
+ *args : str
245
+ Optional parameters for the directive.
246
+ Each argument is simply appended to the directive, separated
247
+ by a single space character.
248
+
249
+ Raises
250
+ ------
251
+ NotImplementedError
252
+ If the ``##FASTA`` directive is used, which is not
253
+ supported.
254
+
255
+ Examples
256
+ --------
257
+
258
+ >>> gff_file = GFFFile()
259
+ >>> gff_file.append_directive("Example directive", "param1", "param2")
260
+ >>> print(gff_file)
261
+ ##gff-version 3
262
+ ##Example directive param1 param2
263
+ """
264
+ if directive.startswith("FASTA"):
265
+ raise NotImplementedError(
266
+ "Adding FASTA information is not supported"
267
+ )
268
+ directive_line = "##" + directive + " " + " ".join(args)
269
+ self._directives.append((directive_line[2:], len(self.lines)))
270
+ self.lines.append(directive_line)
271
+
272
+ def directives(self):
273
+ """
274
+ Get the directives in the file.
275
+
276
+ Returns
277
+ -------
278
+ directives : list of tuple(str, int)
279
+ A list of directives, sorted by their line order.
280
+ The first element of each tuple is the name of the
281
+ directive (without ``##``), the second element is the index
282
+ of the corresponding line.
283
+ """
284
+ # Sort in line order
285
+ return sorted(self._directives, key=lambda directive: directive[1])
286
+
287
+ def __setitem__(self, index, item):
288
+ seqid, source, type, start, end, score, strand, phase, attrib = item
289
+ line = GFFFile._create_line(
290
+ seqid, source, type, start, end, score, strand, phase, attrib
291
+ )
292
+ line_index = self._entries[index]
293
+ self.lines[line_index] = line
294
+
295
+
296
+ def __getitem__(self, index):
297
+ if (index >= 0 and index >= len(self)) or \
298
+ (index < 0 and -index > len(self)):
299
+ raise IndexError(
300
+ f"Index {index} is out of range for GFFFile with "
301
+ f"{len(self)} entries"
302
+ )
303
+
304
+ line_index = self._entries[index]
305
+ # Columns are tab separated
306
+ s = self.lines[line_index].strip().split("\t")
307
+ if len(s) != 9:
308
+ raise InvalidFileError(f"Expected 9 columns, but got {len(s)}")
309
+ seqid, source, type, start, end, score, strand, phase, attrib = s
310
+
311
+ seqid = unquote(seqid)
312
+ source = unquote(source)
313
+ type = unquote(type)
314
+ start = int(start)
315
+ end = int(end)
316
+ score = None if score == "." else float(score)
317
+ if strand == "+":
318
+ strand = Location.Strand.FORWARD
319
+ elif strand == "-":
320
+ strand = Location.Strand.REVERSE
321
+ else:
322
+ strand = None
323
+ phase = None if phase == "." else int(phase)
324
+ attrib = GFFFile._parse_attributes(attrib)
325
+
326
+ return seqid, source, type, start, end, score, strand, phase, attrib
327
+
328
+ def __delitem__(self, index):
329
+ line_index = self._entries[index]
330
+ del self.lines[line_index]
331
+ self._index_entries()
332
+
333
+ def __len__(self):
334
+ return len(self._entries)
335
+
336
+ def _index_entries(self):
337
+ """
338
+ Parse the file for comment and directive lines.
339
+ Count these lines cumulatively, so that entry indices can be
340
+ mapped onto line indices.
341
+ Additionally track the line index of directive lines.
342
+ """
343
+ self._directives = []
344
+ # Worst case allocation -> all lines contain actual entries
345
+ self._entries = [None] * len(self.lines)
346
+ self._has_fasta = False
347
+ entry_counter = 0
348
+ for line_i, line in enumerate(self.lines):
349
+ if len(line) == 0 or line[0] == " ":
350
+ # Empty line -> do nothing
351
+ pass
352
+ elif line.startswith("#"):
353
+ # Comment or directive
354
+ if line.startswith("##"):
355
+ # Directive
356
+ # Omit the leading '##'
357
+ self._directives.append((line[2:], line_i))
358
+ if line[2:] == "FASTA":
359
+ self._has_fasta = True
360
+ # This parser does not support bundled FASTA
361
+ # data
362
+ warnings.warn(
363
+ "Biotite does not support FASTA data mixed into "
364
+ "GFF files, the FASTA data will be ignored"
365
+ )
366
+ # To ignore the following FASTA data, stop
367
+ # parsing at this point
368
+ break
369
+ else:
370
+ # Actual entry
371
+ self._entries[entry_counter] = line_i
372
+ entry_counter += 1
373
+ # Trim to correct size
374
+ self._entries = self._entries[:entry_counter]
375
+
376
+ @staticmethod
377
+ def _create_line(seqid, source, type, start, end,
378
+ score, strand, phase, attributes):
379
+ """
380
+ Create a line for a newly created entry.
381
+ """
382
+ seqid = quote(seqid.strip(), safe=_NOT_QUOTED) \
383
+ if seqid is not None else "."
384
+ source = quote(source.strip(), safe=_NOT_QUOTED) \
385
+ if source is not None else "."
386
+ type = type.strip()
387
+
388
+ # Perform checks
389
+ if len(seqid) == 0:
390
+ raise ValueError("'seqid' must not be empty")
391
+ if len(source) == 0:
392
+ raise ValueError("'source' must not be empty")
393
+ if len(type) == 0:
394
+ raise ValueError("'type' must not be empty")
395
+ if seqid[0] == ">":
396
+ raise ValueError("'seqid' must not start with '>'")
397
+
398
+ score = str(score) if score is not None else "."
399
+ if strand == Location.Strand.FORWARD:
400
+ strand = "+"
401
+ elif strand == Location.Strand.REVERSE:
402
+ strand = "-"
403
+ else:
404
+ strand = "."
405
+ phase = str(phase) if phase is not None else "."
406
+ attributes = ";".join(
407
+ [quote(key, safe=_NOT_QUOTED) + "=" + quote(val, safe=_NOT_QUOTED)
408
+ for key, val in attributes.items()]
409
+ ) if attributes is not None and len(attributes) > 0 else "."
410
+
411
+ return "\t".join(
412
+ [seqid, source, type, str(start), str(end),
413
+ str(score), strand, phase, attributes]
414
+ )
415
+
416
+ @staticmethod
417
+ def _parse_attributes(attributes):
418
+ """
419
+ Parse the *attributes* string into a dictionary.
420
+ """
421
+ if attributes == ".":
422
+ return {}
423
+
424
+ attrib_dict = {}
425
+ attrib_entries = attributes.split(";")
426
+ for entry in attrib_entries:
427
+ compounds = entry.split("=")
428
+ if len(compounds) != 2:
429
+ raise InvalidFileError(
430
+ f"Attribute entry '{entry}' is invalid"
431
+ )
432
+ key, val = compounds
433
+ attrib_dict[unquote(key)] = unquote(val)
434
+ return attrib_dict
@@ -0,0 +1,36 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This subpackage provides functions and data structures for creating
7
+ (phylogenetic) trees.
8
+
9
+ The :class:`Tree` is the central class in this subpackage.
10
+ It wraps a *root* :class:`TreeNode` object.
11
+ A :class:`TreeNode` is either an intermediate node, if it has child
12
+ :class:`TreeNode` objects, or otherwise a leaf node.
13
+
14
+ A :class:`Tree` is not a container itself:
15
+ Objects, e.g species names or sequences, that are represented by the
16
+ nodes, cannot be stored directly in a :class:`Tree` or
17
+ :class:`TreeNode`.
18
+ Instead, each leaf node has a reference index:
19
+ These indices refer to a separate list or array, containing the actual
20
+ reference objects.
21
+
22
+ A :class:`Tree` can be created from or exported to a *Newick* notation,
23
+ usingthe :func:`Tree.from_newick()` or :func:`Tree.to_newick()` method,
24
+ respectively.
25
+
26
+ A :class:`Tree` can be build from a pairwise distance matrix using the
27
+ popular *UPGMA* (:func:`upgma()`) and *Neighbor-Joining*
28
+ (:func:`neighbor_joining()`) algorithms.
29
+ """
30
+
31
+ __name__ = "biotite.sequence.phylo"
32
+ __author__ = "Patrick Kunzmann"
33
+
34
+ from .tree import *
35
+ from .upgma import *
36
+ from .nj import *