bioconvert 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. bioconvert/__init__.py +74 -0
  2. bioconvert/abi2fasta.py +61 -0
  3. bioconvert/abi2fastq.py +60 -0
  4. bioconvert/abi2qual.py +69 -0
  5. bioconvert/bam2bedgraph.py +109 -0
  6. bioconvert/bam2bigwig.py +98 -0
  7. bioconvert/bam2cov.py +81 -0
  8. bioconvert/bam2cram.py +90 -0
  9. bioconvert/bam2fasta.py +140 -0
  10. bioconvert/bam2fastq.py +189 -0
  11. bioconvert/bam2json.py +61 -0
  12. bioconvert/bam2sam.py +94 -0
  13. bioconvert/bam2tsv.py +85 -0
  14. bioconvert/bam2wiggle.py +57 -0
  15. bioconvert/bcf2vcf.py +61 -0
  16. bioconvert/bcf2wiggle.py +58 -0
  17. bioconvert/bed2wiggle.py +57 -0
  18. bioconvert/bedgraph2bigwig.py +79 -0
  19. bioconvert/bedgraph2cov.py +79 -0
  20. bioconvert/bedgraph2wiggle.py +61 -0
  21. bioconvert/bigbed2bed.py +78 -0
  22. bioconvert/bigbed2wiggle.py +75 -0
  23. bioconvert/bigwig2bedgraph.py +82 -0
  24. bioconvert/bigwig2wiggle.py +58 -0
  25. bioconvert/bplink2plink.py +71 -0
  26. bioconvert/bplink2vcf.py +76 -0
  27. bioconvert/bz22gz.py +73 -0
  28. bioconvert/clustal2fasta.py +84 -0
  29. bioconvert/clustal2nexus.py +66 -0
  30. bioconvert/clustal2phylip.py +72 -0
  31. bioconvert/clustal2stockholm.py +74 -0
  32. bioconvert/core/__init__.py +2 -0
  33. bioconvert/core/base.py +736 -0
  34. bioconvert/core/benchmark.py +319 -0
  35. bioconvert/core/converter.py +195 -0
  36. bioconvert/core/decorators.py +256 -0
  37. bioconvert/core/downloader.py +60 -0
  38. bioconvert/core/extensions.py +131 -0
  39. bioconvert/core/graph.py +309 -0
  40. bioconvert/core/init.py +111 -0
  41. bioconvert/core/levenshtein.py +102 -0
  42. bioconvert/core/registry.py +405 -0
  43. bioconvert/core/shell.py +102 -0
  44. bioconvert/core/utils.py +173 -0
  45. bioconvert/cram2bam.py +89 -0
  46. bioconvert/cram2fasta.py +129 -0
  47. bioconvert/cram2fastq.py +156 -0
  48. bioconvert/cram2sam.py +89 -0
  49. bioconvert/csv2tsv.py +130 -0
  50. bioconvert/csv2xls.py +113 -0
  51. bioconvert/data/README.rst +85 -0
  52. bioconvert/data/__init__.py +0 -0
  53. bioconvert/data/measles.chrom.sizes +1 -0
  54. bioconvert/data/test.bigwig +0 -0
  55. bioconvert/data/test_SP1.fq.gz +0 -0
  56. bioconvert/data/test_gfa2fasta_v1.gfa +11 -0
  57. bioconvert/data/test_measles.fa.fai +1 -0
  58. bioconvert/data/test_measles.sorted.bam +0 -0
  59. bioconvert/data/testing/__init__.py +0 -0
  60. bioconvert/dsrc2gz.py +68 -0
  61. bioconvert/embl2fasta.py +66 -0
  62. bioconvert/embl2genbank.py +66 -0
  63. bioconvert/fast52pod5.py +56 -0
  64. bioconvert/fasta2clustal.py +79 -0
  65. bioconvert/fasta2faa.py +134 -0
  66. bioconvert/fasta2fasta_agp.py +264 -0
  67. bioconvert/fasta2fastq.py +74 -0
  68. bioconvert/fasta2genbank.py +123 -0
  69. bioconvert/fasta2nexus.py +66 -0
  70. bioconvert/fasta2phylip.py +91 -0
  71. bioconvert/fasta2twobit.py +61 -0
  72. bioconvert/fasta_qual2fastq.py +91 -0
  73. bioconvert/fastq2fasta.py +270 -0
  74. bioconvert/fastq2fasta_qual.py +113 -0
  75. bioconvert/fastq2qual.py +83 -0
  76. bioconvert/genbank2embl.py +64 -0
  77. bioconvert/genbank2faa.py +81 -0
  78. bioconvert/genbank2fasta.py +94 -0
  79. bioconvert/genbank2gff3.py +57 -0
  80. bioconvert/gfa2fasta.py +101 -0
  81. bioconvert/gff22gff3.py +87 -0
  82. bioconvert/gff32gff2.py +86 -0
  83. bioconvert/gff32gtf.py +52 -0
  84. bioconvert/gml2graphml.py +60 -0
  85. bioconvert/gml2pajek.py +60 -0
  86. bioconvert/graphml2gml.py +60 -0
  87. bioconvert/graphml2pajek.py +60 -0
  88. bioconvert/gtf22gff3.py +79 -0
  89. bioconvert/gz2bz2.py +90 -0
  90. bioconvert/gz2dsrc.py +77 -0
  91. bioconvert/io/__init__.py +0 -0
  92. bioconvert/io/fasta.py +62 -0
  93. bioconvert/io/genbank.py +572 -0
  94. bioconvert/io/gff2.py +115 -0
  95. bioconvert/io/gff3.py +142 -0
  96. bioconvert/io/maf.py +284 -0
  97. bioconvert/io/scf.py +248 -0
  98. bioconvert/jaspar2transfac.py +60 -0
  99. bioconvert/json2yaml.py +70 -0
  100. bioconvert/maf2sam.py +67 -0
  101. bioconvert/misc/README.rst +3 -0
  102. bioconvert/misc/__init__.py +1 -0
  103. bioconvert/misc/cython_fastq2fasta.pyx +44 -0
  104. bioconvert/misc/fastq2fasta.c +121 -0
  105. bioconvert/misc/fastq2fasta.pl +26 -0
  106. bioconvert/misc/install_goalign.sh +33 -0
  107. bioconvert/misc/install_gotree.sh +33 -0
  108. bioconvert/mol22smiles.py +70 -0
  109. bioconvert/newick2nexus.py +64 -0
  110. bioconvert/newick2phyloxml.py +66 -0
  111. bioconvert/nexus2clustal.py +96 -0
  112. bioconvert/nexus2fasta.py +150 -0
  113. bioconvert/nexus2newick.py +78 -0
  114. bioconvert/nexus2phylip.py +64 -0
  115. bioconvert/nexus2phyloxml.py +67 -0
  116. bioconvert/ods2csv.py +96 -0
  117. bioconvert/pajek2gml.py +60 -0
  118. bioconvert/pajek2graphml.py +60 -0
  119. bioconvert/pdb2faa.py +96 -0
  120. bioconvert/pdb2smiles.py +72 -0
  121. bioconvert/phylip2clustal.py +71 -0
  122. bioconvert/phylip2fasta.py +84 -0
  123. bioconvert/phylip2nexus.py +63 -0
  124. bioconvert/phylip2stockholm.py +72 -0
  125. bioconvert/phylip2xmfa.py +64 -0
  126. bioconvert/phyloxml2newick.py +65 -0
  127. bioconvert/phyloxml2nexus.py +64 -0
  128. bioconvert/plink2bplink.py +61 -0
  129. bioconvert/plink2vcf.py +66 -0
  130. bioconvert/sam2bam.py +61 -0
  131. bioconvert/sam2cram.py +98 -0
  132. bioconvert/sam2paf.py +354 -0
  133. bioconvert/scf2fasta.py +114 -0
  134. bioconvert/scf2fastq.py +120 -0
  135. bioconvert/scripts/__init__.py +0 -0
  136. bioconvert/scripts/converter.py +705 -0
  137. bioconvert/scripts/init_convert.py +59 -0
  138. bioconvert/scripts/stats.py +103 -0
  139. bioconvert/sdf2smiles.py +74 -0
  140. bioconvert/simulator/__init__.py +0 -0
  141. bioconvert/simulator/fasta.py +48 -0
  142. bioconvert/simulator/fastq.py +41 -0
  143. bioconvert/simulator/gfa.py +39 -0
  144. bioconvert/sra2fastq.py +167 -0
  145. bioconvert/stockholm2clustal.py +71 -0
  146. bioconvert/stockholm2phylip.py +72 -0
  147. bioconvert/transfac2jaspar.py +67 -0
  148. bioconvert/tsv2csv.py +130 -0
  149. bioconvert/twobit2fasta.py +76 -0
  150. bioconvert/utils/__init__.py +0 -0
  151. bioconvert/utils/biocode/__init__.py +0 -0
  152. bioconvert/utils/biocode/annotation.py +427 -0
  153. bioconvert/utils/biocode/convert_genbank_to_gff3.py +241 -0
  154. bioconvert/utils/biocode/gff.py +885 -0
  155. bioconvert/utils/biocode/tbl.py +259 -0
  156. bioconvert/utils/biocode/things.py +1494 -0
  157. bioconvert/utils/biocode/utils.py +239 -0
  158. bioconvert/vcf2bcf.py +75 -0
  159. bioconvert/vcf2bed.py +60 -0
  160. bioconvert/vcf2bplink.py +63 -0
  161. bioconvert/vcf2plink.py +67 -0
  162. bioconvert/vcf2wiggle.py +55 -0
  163. bioconvert/wig2bed.py +57 -0
  164. bioconvert/xls2csv.py +127 -0
  165. bioconvert/xlsx2csv.py +128 -0
  166. bioconvert/xmfa2phylip.py +63 -0
  167. bioconvert/yaml2json.py +75 -0
  168. bioconvert-1.2.0.dist-info/METADATA +673 -0
  169. bioconvert-1.2.0.dist-info/RECORD +186 -0
  170. bioconvert-1.2.0.dist-info/WHEEL +5 -0
  171. bioconvert-1.2.0.dist-info/entry_points.txt +4 -0
  172. bioconvert-1.2.0.dist-info/licenses/COPYING +674 -0
  173. bioconvert-1.2.0.dist-info/top_level.txt +3 -0
  174. doc/conf.py +84 -0
  175. doc/create_automodules_allconverters.py +34 -0
  176. doc/create_graph.py +7 -0
  177. doc/script.sh +8 -0
  178. doc/script2.sh +8 -0
  179. examples/README.txt +2 -0
  180. examples/conversion.dot +884 -0
  181. examples/plot_benchmark.py +60 -0
  182. examples/plot_graph.py +67 -0
  183. examples/plot_graph_clustered.py +64 -0
  184. examples/plot_graph_colored.py +64 -0
  185. examples/plot_methods.py +60 -0
  186. examples/test.cov +15894 -0
bioconvert/__init__.py ADDED
@@ -0,0 +1,74 @@
1
+ __version__ = "1.2.0"
2
+ try:
3
+ from importlib.metadata import version as _get_version, PackageNotFoundError
4
+ version = _get_version("bioconvert")
5
+ except PackageNotFoundError:
6
+ version = __version__
7
+
8
+ import os
9
+
10
+ import colorlog
11
+
12
+ # This will create a HOME/.config/bioconvert where files (e.g., executables)
13
+ # can be downloaded
14
+ from easydev import CustomConfig
15
+
16
+ configuration = CustomConfig("bioconvert", verbose=True)
17
+
18
+ os.environ["GOPATH"] = os.environ["HOME"] + "/go"
19
+ os.environ["PATH"] = os.environ["GOPATH"] + "/bin/:" + os.environ["PATH"]
20
+
21
+ from easydev.logging_tools import Logging
22
+
23
+ logger = Logging("bioconvert", "INFO")
24
+
25
+
26
+ def bioconvert_script(filename, where=None):
27
+ bioconvert_path = bioconvert.__path__[0]
28
+ share = os.path.join(bioconvert_path, "misc")
29
+ if where:
30
+ filename = os.path.join(share, where, filename)
31
+ else:
32
+ filename = os.path.join(share, filename)
33
+ if not os.path.exists(filename):
34
+ raise FileNotFoundError("unknown file {}".format(filename))
35
+ return filename
36
+
37
+
38
+ def bioconvert_data(filename, where=None):
39
+ """Simple utilities to retrieve data sets from bioconvert/data directory
40
+
41
+ :param str filename: the name of the data file to get the path
42
+ :param str where:
43
+ """
44
+ bioconvert_path = bioconvert.__path__[0]
45
+ share = os.path.join(bioconvert_path, "data")
46
+ # in the code one may use / or \
47
+ if where:
48
+ filename = os.path.join(share, where, filename)
49
+ else:
50
+ filename = os.path.join(share, filename)
51
+ if not os.path.exists(filename):
52
+ raise FileNotFoundError("unknown file {}".format(filename))
53
+ return filename
54
+
55
+
56
+ def info():
57
+ from bioconvert.core.registry import Registry
58
+
59
+ r = Registry()
60
+ info = r.get_info()
61
+ converters = [x for x in info.items()]
62
+ data = [info[k] for k, v in info.items()]
63
+ msg = "Bioconvert contains {} converters including {} methods"
64
+ return msg.format(len(converters), sum(data))
65
+
66
+
67
+ import bioconvert
68
+ from bioconvert.core.base import ConvBase
69
+ from bioconvert.core.benchmark import Benchmark
70
+ from bioconvert.core.converter import Bioconvert
71
+ from bioconvert.core.decorators import compressor, requires
72
+ from bioconvert.core.registry import Registry
73
+ from bioconvert.core.shell import shell
74
+ from bioconvert.core.utils import TempFile, md5
@@ -0,0 +1,61 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`ABI` format to :term:`FASTA` format"""
25
+ from bioconvert import ConvBase, requires
26
+
27
+ __all__ = ["ABI2FASTA"]
28
+
29
+
30
+ class ABI2FASTA(ConvBase):
31
+ """Convert :term:`ABI` file to :term:`FASTQ` file
32
+
33
+ :term:`ABI` files are created by ABI sequencing machine and includes
34
+ PHRED quality scores for base calls. This allows the creation of
35
+ :term:`FastA` files.
36
+
37
+ Method implemented is based on BioPython [BIOPYTHON]_.
38
+
39
+ """
40
+
41
+ #: Default value
42
+ _default_method = "biopython"
43
+
44
+ def __init__(self, infile, outfile, *args, **kargs):
45
+ """.. rubric:: constructor
46
+
47
+ :param str infile: input ABI file
48
+ :param str outfile: output FASTA filename
49
+
50
+ """
51
+ super(ABI2FASTA, self).__init__(infile, outfile, *args, **kargs)
52
+
53
+ @requires(python_library="biopython")
54
+ def _method_biopython(self, *args, **kwargs):
55
+ """For this method we use the biopython package Bio.SeqIO.
56
+
57
+ :reference: `Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
58
+ from Bio import SeqIO
59
+
60
+ records = SeqIO.parse(self.infile, "abi")
61
+ SeqIO.write(records, self.outfile, "fasta")
@@ -0,0 +1,60 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`ABI` format to :term:`FASTQ` format"""
25
+ from bioconvert import ConvBase, requires
26
+
27
+ __all__ = ["ABI2FASTQ"]
28
+
29
+
30
+ class ABI2FASTQ(ConvBase):
31
+ """Convert :term:`ABI` file to :term:`FASTQ` file
32
+
33
+ :term:`ABI` files are created by ABI sequencing machine and includes
34
+ PHRED quality scores for base calls. This allows the creation
35
+ of :term:`FastQ` files.
36
+
37
+ Method implemented is based on BioPython [BIOPYTHON]_.
38
+ """
39
+
40
+ #: Default value
41
+ _default_method = "biopython"
42
+
43
+ def __init__(self, infile, outfile, *args, **kargs):
44
+ """.. rubric:: constructor
45
+
46
+ :param str infile: input ABI file
47
+ :param str outfile: output FASTQ filename
48
+
49
+ """
50
+ super(ABI2FASTQ, self).__init__(infile, outfile, *args, **kargs)
51
+
52
+ @requires(python_library="biopython")
53
+ def _method_biopython(self, *args, **kwargs):
54
+ """For this method we use the biopython package Bio.SeqIO.
55
+
56
+ `Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
57
+ from Bio import SeqIO
58
+
59
+ records = SeqIO.parse(self.infile, "abi")
60
+ SeqIO.write(records, self.outfile, "fastq")
bioconvert/abi2qual.py ADDED
@@ -0,0 +1,69 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`ABI` format to :term:`QUAL` format"""
25
+ from bioconvert import ConvBase, requires
26
+
27
+ __all__ = ["ABI2QUAL"]
28
+
29
+
30
+ class ABI2QUAL(ConvBase):
31
+ """Convert :term:`ABI` file to :term:`QUAL` file
32
+
33
+ :term:`ABI` files are created by ABI sequencing machine and
34
+ includes PHRED quality scores for base calls. This allows
35
+ the creation of :term:`QUAL` files.
36
+
37
+ Method implemented is based on BioPython [BIOPYTHON]_.
38
+
39
+ """
40
+
41
+ #: Default value
42
+ _default_method = "biopython"
43
+
44
+ def __init__(self, infile, outfile, *args, **kargs):
45
+ """.. rubric:: constructor
46
+
47
+ :param str infile: input ABI file
48
+ :param str outfile: output QUAL filename
49
+
50
+ """
51
+ super(ABI2QUAL, self).__init__(infile, outfile, *args, **kargs)
52
+
53
+ @requires(python_library="biopython")
54
+ def _method_biopython(self, *args, **kwargs):
55
+ """For this method we use the biopython package Bio.SeqIO.
56
+
57
+ `Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
58
+ from Bio import SeqIO
59
+
60
+ records = SeqIO.parse(self.infile, "abi")
61
+ # output using SeqIO.write(records, self.outfile, "qual") is not
62
+ # standard so we write our own conversion here below
63
+ with open(self.outfile, "w") as fout:
64
+ for rec in records:
65
+ header = rec.name
66
+ qual = rec.letter_annotations["phred_quality"]
67
+ qual = "".join([str(x) for x in qual])
68
+ fout.write(">{}\n".format(header))
69
+ fout.write("{}\n".format(qual))
@@ -0,0 +1,109 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`BAM` format to :term:`BEDGRAPH` format"""
25
+ import colorlog
26
+
27
+ from bioconvert import ConvBase
28
+ from bioconvert.core.decorators import requires
29
+
30
+ _log = colorlog.getLogger(__name__)
31
+
32
+
33
+ __all__ = ["BAM2BEDGRAPH"]
34
+
35
+
36
+ class BAM2BEDGRAPH(ConvBase):
37
+ """Convert sorted :term:`BAM` file into :term:`BEDGRAPH` file
38
+
39
+ Compute the coverage (depth) in BEDGRAPH.
40
+ Regions with zero coverage are also reported.
41
+
42
+
43
+ Note that this BEDGRAPH format is of the form::
44
+
45
+ chrom chromStart chromEnd dataValue
46
+
47
+ Note that consecutive positions with same values are compressed.
48
+
49
+ ::
50
+
51
+ chr1 0 75 0
52
+ chr1 75 176 1
53
+ chr1 176 177 2
54
+
55
+
56
+ .. warning:: the BAM file must be sorted. This can be achieved with
57
+ bamtools.
58
+
59
+
60
+ Methods available are based on bedtools [BEDTOOLS]_ and mosdepth
61
+ [MOSDEPTH]_.
62
+ """
63
+
64
+ # 4 minutes with bedtools and 20s with mosdepth
65
+ #: Default value
66
+ _default_method = "bedtools"
67
+ _threading = True
68
+
69
+ def __init__(self, infile, outfile):
70
+ """.. rubric:: Constructor
71
+
72
+ :param str infile: The path to the input BAM file.
73
+ **It must be sorted**.
74
+ :param str outfile: The path to the output file
75
+ """
76
+ super().__init__(infile, outfile)
77
+
78
+ @requires("bedtools")
79
+ def _method_bedtools(self, *args, **kwargs):
80
+ """Do the conversion using bedtools.
81
+
82
+ `bedtools documentation <https://bedtools.readthedocs.io/en/latest/>`_"""
83
+ cmd = "bedtools genomecov -bga -ibam {} > {}".format(self.infile, self.outfile)
84
+ self.execute(cmd)
85
+
86
+ @requires("mosdepth")
87
+ def _method_mosdepth(self, *args, **kwargs):
88
+ """Do the conversion using mosdepth.
89
+
90
+ `mosdepth documentation <https://github.com/brentp/mosdepth>`_"""
91
+ # For testing, we need to save into a specific temporary directory
92
+ import tempfile
93
+
94
+ with tempfile.TemporaryDirectory() as tmpdir:
95
+ try:
96
+ cmd = "mosdepth {}/.bioconvert -t {} {}".format(tmpdir, self.threads, self.infile)
97
+ self.execute(cmd)
98
+
99
+ if self.outfile.endswith(".gz"):
100
+ pass
101
+ else:
102
+ cmd = "gunzip -c {}/.bioconvert.per-base.bed.gz > {}".format(tmpdir, self.outfile)
103
+ self.execute(cmd)
104
+ except Exception as err:
105
+ raise (err)
106
+ finally:
107
+ cmd = "rm -f {name}/.bioconvert.per-base.bed.gz {name}/.bioconvert.per-base.bed.gz.csi"
108
+ cmd += " {name}/.bioconvert.mosdepth.global.dist.txt"
109
+ self.execute(cmd.format(name=tmpdir))
@@ -0,0 +1,98 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`BAM` file to :term:`BIGWIG` format"""
25
+
26
+ import colorlog
27
+ from tempfile import NamedTemporaryFile
28
+
29
+ from bioconvert import ConvBase
30
+ from bioconvert.core.base import ConvArg
31
+ from bioconvert.core.decorators import requires
32
+
33
+ _log = colorlog.getLogger(__name__)
34
+
35
+ __all__ = ["BAM2BIGWIG"]
36
+
37
+
38
+ class BAM2BIGWIG(ConvBase):
39
+ """Convert :term:`BAM` file to :term:`BIGWIG` file
40
+
41
+ Convert BAM into a binary version of :term:`WIG` format.
42
+
43
+ Methods are base on bamCoverage [DEEPTOOLS]_ and bedGraphToBigWig from
44
+ wiggletools [WIGGLETOOLS]_. Wiggletools method requires an extra argument
45
+ (--chrom-sizes) therefore default one is bamCoverage for now.
46
+
47
+ Moreover, the two methods do not return exactly the same info!
48
+
49
+ You can check this by using bioconvert to convert into a human readable file
50
+ such as wiggle. We will use the bamCoverage as our default conversion.
51
+
52
+ """
53
+
54
+ #: Default value
55
+ _default_method = "bamCoverage"
56
+
57
+ def __init__(self, infile, outfile, *args, **kargs):
58
+ """.. rubric:: constructor
59
+
60
+ :param str infile: input BAM file
61
+ :param str outfile: output BIGWIG filename
62
+
63
+ """
64
+ super(BAM2BIGWIG, self).__init__(infile, outfile, *args, **kargs)
65
+
66
+ @requires("bamCoverage")
67
+ def _method_bamCoverage(self, *args, **kwargs):
68
+ """run bamCoverage package.
69
+
70
+ `bamCoverage documentation <https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html>`_"""
71
+ cmd = "bamCoverage --bam {} --outFileFormat bigwig --outFileName {}".format(self.infile, self.outfile)
72
+ self.execute(cmd)
73
+
74
+ @requires(external_binaries=["bedGraphToBigWig", "bedtools"])
75
+ def _method_ucsc(self, *args, **kwargs):
76
+ """Run ucsc tool bedGraphToBigWig.
77
+
78
+ Requires extra argument (chrom_sizes) required by the bioconvert
79
+ standalone.
80
+ """
81
+ from bioconvert.bam2bedgraph import BAM2BEDGRAPH
82
+ from bioconvert.bedgraph2bigwig import BEDGRAPH2BIGWIG
83
+
84
+ chrom_sizes = kwargs.get("chrom_sizes", None)
85
+
86
+ with NamedTemporaryFile(suffix=".bedgraph") as fh:
87
+ convertbam2bed = BAM2BEDGRAPH(self.infile, fh.name)
88
+ convertbam2bed()
89
+ convertbed2bw = BEDGRAPH2BIGWIG(fh.name, self.outfile)
90
+ convertbed2bw(chrom_sizes=chrom_sizes)
91
+
92
+ @classmethod
93
+ def get_additional_arguments(cls):
94
+ yield ConvArg(
95
+ names="--chrom-sizes",
96
+ default=None,
97
+ help="a two-column file/URL: <chromosome name> <size in bases>. " "Used by the ucsc method only",
98
+ )
bioconvert/bam2cov.py ADDED
@@ -0,0 +1,81 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`BAM` format to :term:`COV` format"""
25
+ import colorlog
26
+
27
+ from bioconvert import ConvBase
28
+ from bioconvert.core.decorators import requires
29
+
30
+ _log = colorlog.getLogger(__name__)
31
+
32
+ __all__ = ["BAM2COV"]
33
+
34
+
35
+ class BAM2COV(ConvBase):
36
+ """Convert sorted :term:`BAM` file into :term:`COV` file
37
+
38
+
39
+ Note that the COV format is of the form::
40
+
41
+ chr1 1 0
42
+ chr1 2 0
43
+ chr1 3 0
44
+ chr1 4 0
45
+ chr1 5 0
46
+
47
+ that is contig name, position, coverage.
48
+
49
+ .. warning:: the BAM file must be sorted. This can be achieved with
50
+ bamtools using *bamtools sort -in INPUT.bam*
51
+
52
+ Methods available are based on samtools [SAMTOOLS]_ or bedtools [BEDTOOLS]_.
53
+ """
54
+
55
+ #: Default value
56
+ # _default_method = "samtools"
57
+ _default_method = "bedtools"
58
+
59
+ def __init__(self, infile, outfile):
60
+ """.. rubric:: Constructor
61
+
62
+ :param str infile: The path to the input BAM file. **It must be sorted**.
63
+ :param str outfile: The path to the output file
64
+ """
65
+ super().__init__(infile, outfile)
66
+
67
+ @requires("samtools")
68
+ def _method_samtools(self, *args, **kwargs):
69
+ """Do the conversion sorted :term:`BAM` -> :term:`BED` using samtools
70
+
71
+ `SAMtools documentation <http://www.htslib.org/doc/samtools.html>`_"""
72
+ cmd = "samtools depth -aa {} > {}".format(self.infile, self.outfile)
73
+ self.execute(cmd)
74
+
75
+ @requires("bedtools")
76
+ def _method_bedtools(self, *args, **kwargs):
77
+ """Do the conversion sorted :term:`BAM` -> :term:`BED` using bedtools
78
+
79
+ `bedtools documentation <https://bedtools.readthedocs.io/en/latest/>`_"""
80
+ cmd = "bedtools genomecov -d -ibam {} > {}".format(self.infile, self.outfile)
81
+ self.execute(cmd)
bioconvert/bam2cram.py ADDED
@@ -0,0 +1,90 @@
1
+ ###########################################################################
2
+ # Bioconvert is a project to facilitate the interconversion #
3
+ # of life science data from one format to another. #
4
+ # #
5
+ # Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
6
+ # #
7
+ # bioconvert is free software: you can redistribute it and/or modify #
8
+ # it under the terms of the GNU General Public License as published by #
9
+ # the Free Software Foundation, either version 3 of the License, or #
10
+ # (at your option) any later version. #
11
+ # #
12
+ # bioconvert is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program (COPYING file). #
19
+ # If not, see <http://www.gnu.org/licenses/>. #
20
+ # #
21
+ # Repository: https://github.com/bioconvert/bioconvert #
22
+ # Documentation: http://bioconvert.readthedocs.io #
23
+ ###########################################################################
24
+ """Convert :term:`BAM` file to :term:`CRAM` format"""
25
+ import os
26
+
27
+ import colorlog
28
+
29
+ from bioconvert import ConvBase
30
+ from bioconvert.core.base import ConvArg
31
+ from bioconvert.core.decorators import requires
32
+
33
+ logger = colorlog.getLogger(__name__)
34
+
35
+
36
+ class BAM2CRAM(ConvBase):
37
+ """Convert :term:`BAM` file to :term:`CRAM` file
38
+
39
+ The conversion requires the reference corresponding to the input file
40
+ It can be provided as an argument with the standalone (*-\\-reference*).
41
+ Otherwise, users are asked to provide it.
42
+
43
+ Methods available are based on samtools [SAMTOOLS]_.
44
+ """
45
+
46
+ #: Default value
47
+ _default_method = "samtools"
48
+ _threading = True
49
+
50
+ def __init__(self, infile, outfile, *args, **kargs):
51
+ """.. rubric:: constructor
52
+
53
+ :param str infile: input BAM file
54
+ :param str outfile: output CRAM filename
55
+
56
+ """
57
+ super(BAM2CRAM, self).__init__(infile, outfile, *args, **kargs)
58
+
59
+ def _get_reference(self):
60
+ # In case the --reference is not used
61
+ msg = "Please enter the reference corresponding "
62
+ msg += "to the input BAM file:"
63
+ reference = input(msg)
64
+ if os.path.exists(reference) is False:
65
+ raise IOError("Reference required")
66
+ else:
67
+ logger.debug("Reference exists ({}).".format(reference))
68
+ return reference
69
+
70
+ @requires("samtools")
71
+ def _method_samtools(self, *args, **kwargs):
72
+ """Here we use the SAMtools tool.
73
+
74
+ `SAMtools documentation <http://www.htslib.org/doc/samtools.html>`_"""
75
+ # -C means output is CRAM
76
+
77
+ reference = kwargs.get("reference", None)
78
+ if reference is None:
79
+ reference = self._get_reference()
80
+
81
+ cmd = "samtools view -@ {} -C {} -T {} -o {}".format(self.threads, self.infile, reference, self.outfile)
82
+ self.execute(cmd)
83
+
84
+ @classmethod
85
+ def get_additional_arguments(cls):
86
+ yield ConvArg(
87
+ names="--reference",
88
+ default=None,
89
+ help="the reference used (FASTA format). If not provided, prompt will appear",
90
+ )