bioconvert 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioconvert/__init__.py +74 -0
- bioconvert/abi2fasta.py +61 -0
- bioconvert/abi2fastq.py +60 -0
- bioconvert/abi2qual.py +69 -0
- bioconvert/bam2bedgraph.py +109 -0
- bioconvert/bam2bigwig.py +98 -0
- bioconvert/bam2cov.py +81 -0
- bioconvert/bam2cram.py +90 -0
- bioconvert/bam2fasta.py +140 -0
- bioconvert/bam2fastq.py +189 -0
- bioconvert/bam2json.py +61 -0
- bioconvert/bam2sam.py +94 -0
- bioconvert/bam2tsv.py +85 -0
- bioconvert/bam2wiggle.py +57 -0
- bioconvert/bcf2vcf.py +61 -0
- bioconvert/bcf2wiggle.py +58 -0
- bioconvert/bed2wiggle.py +57 -0
- bioconvert/bedgraph2bigwig.py +79 -0
- bioconvert/bedgraph2cov.py +79 -0
- bioconvert/bedgraph2wiggle.py +61 -0
- bioconvert/bigbed2bed.py +78 -0
- bioconvert/bigbed2wiggle.py +75 -0
- bioconvert/bigwig2bedgraph.py +82 -0
- bioconvert/bigwig2wiggle.py +58 -0
- bioconvert/bplink2plink.py +71 -0
- bioconvert/bplink2vcf.py +76 -0
- bioconvert/bz22gz.py +73 -0
- bioconvert/clustal2fasta.py +84 -0
- bioconvert/clustal2nexus.py +66 -0
- bioconvert/clustal2phylip.py +72 -0
- bioconvert/clustal2stockholm.py +74 -0
- bioconvert/core/__init__.py +2 -0
- bioconvert/core/base.py +736 -0
- bioconvert/core/benchmark.py +319 -0
- bioconvert/core/converter.py +195 -0
- bioconvert/core/decorators.py +256 -0
- bioconvert/core/downloader.py +60 -0
- bioconvert/core/extensions.py +131 -0
- bioconvert/core/graph.py +309 -0
- bioconvert/core/init.py +111 -0
- bioconvert/core/levenshtein.py +102 -0
- bioconvert/core/registry.py +405 -0
- bioconvert/core/shell.py +102 -0
- bioconvert/core/utils.py +173 -0
- bioconvert/cram2bam.py +89 -0
- bioconvert/cram2fasta.py +129 -0
- bioconvert/cram2fastq.py +156 -0
- bioconvert/cram2sam.py +89 -0
- bioconvert/csv2tsv.py +130 -0
- bioconvert/csv2xls.py +113 -0
- bioconvert/data/README.rst +85 -0
- bioconvert/data/__init__.py +0 -0
- bioconvert/data/measles.chrom.sizes +1 -0
- bioconvert/data/test.bigwig +0 -0
- bioconvert/data/test_SP1.fq.gz +0 -0
- bioconvert/data/test_gfa2fasta_v1.gfa +11 -0
- bioconvert/data/test_measles.fa.fai +1 -0
- bioconvert/data/test_measles.sorted.bam +0 -0
- bioconvert/data/testing/__init__.py +0 -0
- bioconvert/dsrc2gz.py +68 -0
- bioconvert/embl2fasta.py +66 -0
- bioconvert/embl2genbank.py +66 -0
- bioconvert/fast52pod5.py +56 -0
- bioconvert/fasta2clustal.py +79 -0
- bioconvert/fasta2faa.py +134 -0
- bioconvert/fasta2fasta_agp.py +264 -0
- bioconvert/fasta2fastq.py +74 -0
- bioconvert/fasta2genbank.py +123 -0
- bioconvert/fasta2nexus.py +66 -0
- bioconvert/fasta2phylip.py +91 -0
- bioconvert/fasta2twobit.py +61 -0
- bioconvert/fasta_qual2fastq.py +91 -0
- bioconvert/fastq2fasta.py +270 -0
- bioconvert/fastq2fasta_qual.py +113 -0
- bioconvert/fastq2qual.py +83 -0
- bioconvert/genbank2embl.py +64 -0
- bioconvert/genbank2faa.py +81 -0
- bioconvert/genbank2fasta.py +94 -0
- bioconvert/genbank2gff3.py +57 -0
- bioconvert/gfa2fasta.py +101 -0
- bioconvert/gff22gff3.py +87 -0
- bioconvert/gff32gff2.py +86 -0
- bioconvert/gff32gtf.py +52 -0
- bioconvert/gml2graphml.py +60 -0
- bioconvert/gml2pajek.py +60 -0
- bioconvert/graphml2gml.py +60 -0
- bioconvert/graphml2pajek.py +60 -0
- bioconvert/gtf22gff3.py +79 -0
- bioconvert/gz2bz2.py +90 -0
- bioconvert/gz2dsrc.py +77 -0
- bioconvert/io/__init__.py +0 -0
- bioconvert/io/fasta.py +62 -0
- bioconvert/io/genbank.py +572 -0
- bioconvert/io/gff2.py +115 -0
- bioconvert/io/gff3.py +142 -0
- bioconvert/io/maf.py +284 -0
- bioconvert/io/scf.py +248 -0
- bioconvert/jaspar2transfac.py +60 -0
- bioconvert/json2yaml.py +70 -0
- bioconvert/maf2sam.py +67 -0
- bioconvert/misc/README.rst +3 -0
- bioconvert/misc/__init__.py +1 -0
- bioconvert/misc/cython_fastq2fasta.pyx +44 -0
- bioconvert/misc/fastq2fasta.c +121 -0
- bioconvert/misc/fastq2fasta.pl +26 -0
- bioconvert/misc/install_goalign.sh +33 -0
- bioconvert/misc/install_gotree.sh +33 -0
- bioconvert/mol22smiles.py +70 -0
- bioconvert/newick2nexus.py +64 -0
- bioconvert/newick2phyloxml.py +66 -0
- bioconvert/nexus2clustal.py +96 -0
- bioconvert/nexus2fasta.py +150 -0
- bioconvert/nexus2newick.py +78 -0
- bioconvert/nexus2phylip.py +64 -0
- bioconvert/nexus2phyloxml.py +67 -0
- bioconvert/ods2csv.py +96 -0
- bioconvert/pajek2gml.py +60 -0
- bioconvert/pajek2graphml.py +60 -0
- bioconvert/pdb2faa.py +96 -0
- bioconvert/pdb2smiles.py +72 -0
- bioconvert/phylip2clustal.py +71 -0
- bioconvert/phylip2fasta.py +84 -0
- bioconvert/phylip2nexus.py +63 -0
- bioconvert/phylip2stockholm.py +72 -0
- bioconvert/phylip2xmfa.py +64 -0
- bioconvert/phyloxml2newick.py +65 -0
- bioconvert/phyloxml2nexus.py +64 -0
- bioconvert/plink2bplink.py +61 -0
- bioconvert/plink2vcf.py +66 -0
- bioconvert/sam2bam.py +61 -0
- bioconvert/sam2cram.py +98 -0
- bioconvert/sam2paf.py +354 -0
- bioconvert/scf2fasta.py +114 -0
- bioconvert/scf2fastq.py +120 -0
- bioconvert/scripts/__init__.py +0 -0
- bioconvert/scripts/converter.py +705 -0
- bioconvert/scripts/init_convert.py +59 -0
- bioconvert/scripts/stats.py +103 -0
- bioconvert/sdf2smiles.py +74 -0
- bioconvert/simulator/__init__.py +0 -0
- bioconvert/simulator/fasta.py +48 -0
- bioconvert/simulator/fastq.py +41 -0
- bioconvert/simulator/gfa.py +39 -0
- bioconvert/sra2fastq.py +167 -0
- bioconvert/stockholm2clustal.py +71 -0
- bioconvert/stockholm2phylip.py +72 -0
- bioconvert/transfac2jaspar.py +67 -0
- bioconvert/tsv2csv.py +130 -0
- bioconvert/twobit2fasta.py +76 -0
- bioconvert/utils/__init__.py +0 -0
- bioconvert/utils/biocode/__init__.py +0 -0
- bioconvert/utils/biocode/annotation.py +427 -0
- bioconvert/utils/biocode/convert_genbank_to_gff3.py +241 -0
- bioconvert/utils/biocode/gff.py +885 -0
- bioconvert/utils/biocode/tbl.py +259 -0
- bioconvert/utils/biocode/things.py +1494 -0
- bioconvert/utils/biocode/utils.py +239 -0
- bioconvert/vcf2bcf.py +75 -0
- bioconvert/vcf2bed.py +60 -0
- bioconvert/vcf2bplink.py +63 -0
- bioconvert/vcf2plink.py +67 -0
- bioconvert/vcf2wiggle.py +55 -0
- bioconvert/wig2bed.py +57 -0
- bioconvert/xls2csv.py +127 -0
- bioconvert/xlsx2csv.py +128 -0
- bioconvert/xmfa2phylip.py +63 -0
- bioconvert/yaml2json.py +75 -0
- bioconvert-1.2.0.dist-info/METADATA +673 -0
- bioconvert-1.2.0.dist-info/RECORD +186 -0
- bioconvert-1.2.0.dist-info/WHEEL +5 -0
- bioconvert-1.2.0.dist-info/entry_points.txt +4 -0
- bioconvert-1.2.0.dist-info/licenses/COPYING +674 -0
- bioconvert-1.2.0.dist-info/top_level.txt +3 -0
- doc/conf.py +84 -0
- doc/create_automodules_allconverters.py +34 -0
- doc/create_graph.py +7 -0
- doc/script.sh +8 -0
- doc/script2.sh +8 -0
- examples/README.txt +2 -0
- examples/conversion.dot +884 -0
- examples/plot_benchmark.py +60 -0
- examples/plot_graph.py +67 -0
- examples/plot_graph_clustered.py +64 -0
- examples/plot_graph_colored.py +64 -0
- examples/plot_methods.py +60 -0
- examples/test.cov +15894 -0
bioconvert/__init__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
__version__ = "1.2.0"
|
|
2
|
+
try:
|
|
3
|
+
from importlib.metadata import version as _get_version, PackageNotFoundError
|
|
4
|
+
version = _get_version("bioconvert")
|
|
5
|
+
except PackageNotFoundError:
|
|
6
|
+
version = __version__
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
import colorlog
|
|
11
|
+
|
|
12
|
+
# This will create a HOME/.config/bioconvert where files (e.g., executables)
|
|
13
|
+
# can be downloaded
|
|
14
|
+
from easydev import CustomConfig
|
|
15
|
+
|
|
16
|
+
configuration = CustomConfig("bioconvert", verbose=True)
|
|
17
|
+
|
|
18
|
+
os.environ["GOPATH"] = os.environ["HOME"] + "/go"
|
|
19
|
+
os.environ["PATH"] = os.environ["GOPATH"] + "/bin/:" + os.environ["PATH"]
|
|
20
|
+
|
|
21
|
+
from easydev.logging_tools import Logging
|
|
22
|
+
|
|
23
|
+
logger = Logging("bioconvert", "INFO")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def bioconvert_script(filename, where=None):
|
|
27
|
+
bioconvert_path = bioconvert.__path__[0]
|
|
28
|
+
share = os.path.join(bioconvert_path, "misc")
|
|
29
|
+
if where:
|
|
30
|
+
filename = os.path.join(share, where, filename)
|
|
31
|
+
else:
|
|
32
|
+
filename = os.path.join(share, filename)
|
|
33
|
+
if not os.path.exists(filename):
|
|
34
|
+
raise FileNotFoundError("unknown file {}".format(filename))
|
|
35
|
+
return filename
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def bioconvert_data(filename, where=None):
|
|
39
|
+
"""Simple utilities to retrieve data sets from bioconvert/data directory
|
|
40
|
+
|
|
41
|
+
:param str filename: the name of the data file to get the path
|
|
42
|
+
:param str where:
|
|
43
|
+
"""
|
|
44
|
+
bioconvert_path = bioconvert.__path__[0]
|
|
45
|
+
share = os.path.join(bioconvert_path, "data")
|
|
46
|
+
# in the code one may use / or \
|
|
47
|
+
if where:
|
|
48
|
+
filename = os.path.join(share, where, filename)
|
|
49
|
+
else:
|
|
50
|
+
filename = os.path.join(share, filename)
|
|
51
|
+
if not os.path.exists(filename):
|
|
52
|
+
raise FileNotFoundError("unknown file {}".format(filename))
|
|
53
|
+
return filename
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def info():
|
|
57
|
+
from bioconvert.core.registry import Registry
|
|
58
|
+
|
|
59
|
+
r = Registry()
|
|
60
|
+
info = r.get_info()
|
|
61
|
+
converters = [x for x in info.items()]
|
|
62
|
+
data = [info[k] for k, v in info.items()]
|
|
63
|
+
msg = "Bioconvert contains {} converters including {} methods"
|
|
64
|
+
return msg.format(len(converters), sum(data))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
import bioconvert
|
|
68
|
+
from bioconvert.core.base import ConvBase
|
|
69
|
+
from bioconvert.core.benchmark import Benchmark
|
|
70
|
+
from bioconvert.core.converter import Bioconvert
|
|
71
|
+
from bioconvert.core.decorators import compressor, requires
|
|
72
|
+
from bioconvert.core.registry import Registry
|
|
73
|
+
from bioconvert.core.shell import shell
|
|
74
|
+
from bioconvert.core.utils import TempFile, md5
|
bioconvert/abi2fasta.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`ABI` format to :term:`FASTA` format"""
|
|
25
|
+
from bioconvert import ConvBase, requires
|
|
26
|
+
|
|
27
|
+
__all__ = ["ABI2FASTA"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ABI2FASTA(ConvBase):
|
|
31
|
+
"""Convert :term:`ABI` file to :term:`FASTQ` file
|
|
32
|
+
|
|
33
|
+
:term:`ABI` files are created by ABI sequencing machine and includes
|
|
34
|
+
PHRED quality scores for base calls. This allows the creation of
|
|
35
|
+
:term:`FastA` files.
|
|
36
|
+
|
|
37
|
+
Method implemented is based on BioPython [BIOPYTHON]_.
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
#: Default value
|
|
42
|
+
_default_method = "biopython"
|
|
43
|
+
|
|
44
|
+
def __init__(self, infile, outfile, *args, **kargs):
|
|
45
|
+
""".. rubric:: constructor
|
|
46
|
+
|
|
47
|
+
:param str infile: input ABI file
|
|
48
|
+
:param str outfile: output FASTA filename
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
super(ABI2FASTA, self).__init__(infile, outfile, *args, **kargs)
|
|
52
|
+
|
|
53
|
+
@requires(python_library="biopython")
|
|
54
|
+
def _method_biopython(self, *args, **kwargs):
|
|
55
|
+
"""For this method we use the biopython package Bio.SeqIO.
|
|
56
|
+
|
|
57
|
+
:reference: `Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
|
|
58
|
+
from Bio import SeqIO
|
|
59
|
+
|
|
60
|
+
records = SeqIO.parse(self.infile, "abi")
|
|
61
|
+
SeqIO.write(records, self.outfile, "fasta")
|
bioconvert/abi2fastq.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`ABI` format to :term:`FASTQ` format"""
|
|
25
|
+
from bioconvert import ConvBase, requires
|
|
26
|
+
|
|
27
|
+
__all__ = ["ABI2FASTQ"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ABI2FASTQ(ConvBase):
|
|
31
|
+
"""Convert :term:`ABI` file to :term:`FASTQ` file
|
|
32
|
+
|
|
33
|
+
:term:`ABI` files are created by ABI sequencing machine and includes
|
|
34
|
+
PHRED quality scores for base calls. This allows the creation
|
|
35
|
+
of :term:`FastQ` files.
|
|
36
|
+
|
|
37
|
+
Method implemented is based on BioPython [BIOPYTHON]_.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
#: Default value
|
|
41
|
+
_default_method = "biopython"
|
|
42
|
+
|
|
43
|
+
def __init__(self, infile, outfile, *args, **kargs):
|
|
44
|
+
""".. rubric:: constructor
|
|
45
|
+
|
|
46
|
+
:param str infile: input ABI file
|
|
47
|
+
:param str outfile: output FASTQ filename
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
super(ABI2FASTQ, self).__init__(infile, outfile, *args, **kargs)
|
|
51
|
+
|
|
52
|
+
@requires(python_library="biopython")
|
|
53
|
+
def _method_biopython(self, *args, **kwargs):
|
|
54
|
+
"""For this method we use the biopython package Bio.SeqIO.
|
|
55
|
+
|
|
56
|
+
`Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
|
|
57
|
+
from Bio import SeqIO
|
|
58
|
+
|
|
59
|
+
records = SeqIO.parse(self.infile, "abi")
|
|
60
|
+
SeqIO.write(records, self.outfile, "fastq")
|
bioconvert/abi2qual.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`ABI` format to :term:`QUAL` format"""
|
|
25
|
+
from bioconvert import ConvBase, requires
|
|
26
|
+
|
|
27
|
+
__all__ = ["ABI2QUAL"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ABI2QUAL(ConvBase):
|
|
31
|
+
"""Convert :term:`ABI` file to :term:`QUAL` file
|
|
32
|
+
|
|
33
|
+
:term:`ABI` files are created by ABI sequencing machine and
|
|
34
|
+
includes PHRED quality scores for base calls. This allows
|
|
35
|
+
the creation of :term:`QUAL` files.
|
|
36
|
+
|
|
37
|
+
Method implemented is based on BioPython [BIOPYTHON]_.
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
#: Default value
|
|
42
|
+
_default_method = "biopython"
|
|
43
|
+
|
|
44
|
+
def __init__(self, infile, outfile, *args, **kargs):
|
|
45
|
+
""".. rubric:: constructor
|
|
46
|
+
|
|
47
|
+
:param str infile: input ABI file
|
|
48
|
+
:param str outfile: output QUAL filename
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
super(ABI2QUAL, self).__init__(infile, outfile, *args, **kargs)
|
|
52
|
+
|
|
53
|
+
@requires(python_library="biopython")
|
|
54
|
+
def _method_biopython(self, *args, **kwargs):
|
|
55
|
+
"""For this method we use the biopython package Bio.SeqIO.
|
|
56
|
+
|
|
57
|
+
`Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_"""
|
|
58
|
+
from Bio import SeqIO
|
|
59
|
+
|
|
60
|
+
records = SeqIO.parse(self.infile, "abi")
|
|
61
|
+
# output using SeqIO.write(records, self.outfile, "qual") is not
|
|
62
|
+
# standard so we write our own conversion here below
|
|
63
|
+
with open(self.outfile, "w") as fout:
|
|
64
|
+
for rec in records:
|
|
65
|
+
header = rec.name
|
|
66
|
+
qual = rec.letter_annotations["phred_quality"]
|
|
67
|
+
qual = "".join([str(x) for x in qual])
|
|
68
|
+
fout.write(">{}\n".format(header))
|
|
69
|
+
fout.write("{}\n".format(qual))
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`BAM` format to :term:`BEDGRAPH` format"""
|
|
25
|
+
import colorlog
|
|
26
|
+
|
|
27
|
+
from bioconvert import ConvBase
|
|
28
|
+
from bioconvert.core.decorators import requires
|
|
29
|
+
|
|
30
|
+
_log = colorlog.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = ["BAM2BEDGRAPH"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BAM2BEDGRAPH(ConvBase):
|
|
37
|
+
"""Convert sorted :term:`BAM` file into :term:`BEDGRAPH` file
|
|
38
|
+
|
|
39
|
+
Compute the coverage (depth) in BEDGRAPH.
|
|
40
|
+
Regions with zero coverage are also reported.
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
Note that this BEDGRAPH format is of the form::
|
|
44
|
+
|
|
45
|
+
chrom chromStart chromEnd dataValue
|
|
46
|
+
|
|
47
|
+
Note that consecutive positions with same values are compressed.
|
|
48
|
+
|
|
49
|
+
::
|
|
50
|
+
|
|
51
|
+
chr1 0 75 0
|
|
52
|
+
chr1 75 176 1
|
|
53
|
+
chr1 176 177 2
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
.. warning:: the BAM file must be sorted. This can be achieved with
|
|
57
|
+
bamtools.
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
Methods available are based on bedtools [BEDTOOLS]_ and mosdepth
|
|
61
|
+
[MOSDEPTH]_.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# 4 minutes with bedtools and 20s with mosdepth
|
|
65
|
+
#: Default value
|
|
66
|
+
_default_method = "bedtools"
|
|
67
|
+
_threading = True
|
|
68
|
+
|
|
69
|
+
def __init__(self, infile, outfile):
|
|
70
|
+
""".. rubric:: Constructor
|
|
71
|
+
|
|
72
|
+
:param str infile: The path to the input BAM file.
|
|
73
|
+
**It must be sorted**.
|
|
74
|
+
:param str outfile: The path to the output file
|
|
75
|
+
"""
|
|
76
|
+
super().__init__(infile, outfile)
|
|
77
|
+
|
|
78
|
+
@requires("bedtools")
|
|
79
|
+
def _method_bedtools(self, *args, **kwargs):
|
|
80
|
+
"""Do the conversion using bedtools.
|
|
81
|
+
|
|
82
|
+
`bedtools documentation <https://bedtools.readthedocs.io/en/latest/>`_"""
|
|
83
|
+
cmd = "bedtools genomecov -bga -ibam {} > {}".format(self.infile, self.outfile)
|
|
84
|
+
self.execute(cmd)
|
|
85
|
+
|
|
86
|
+
@requires("mosdepth")
|
|
87
|
+
def _method_mosdepth(self, *args, **kwargs):
|
|
88
|
+
"""Do the conversion using mosdepth.
|
|
89
|
+
|
|
90
|
+
`mosdepth documentation <https://github.com/brentp/mosdepth>`_"""
|
|
91
|
+
# For testing, we need to save into a specific temporary directory
|
|
92
|
+
import tempfile
|
|
93
|
+
|
|
94
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
95
|
+
try:
|
|
96
|
+
cmd = "mosdepth {}/.bioconvert -t {} {}".format(tmpdir, self.threads, self.infile)
|
|
97
|
+
self.execute(cmd)
|
|
98
|
+
|
|
99
|
+
if self.outfile.endswith(".gz"):
|
|
100
|
+
pass
|
|
101
|
+
else:
|
|
102
|
+
cmd = "gunzip -c {}/.bioconvert.per-base.bed.gz > {}".format(tmpdir, self.outfile)
|
|
103
|
+
self.execute(cmd)
|
|
104
|
+
except Exception as err:
|
|
105
|
+
raise (err)
|
|
106
|
+
finally:
|
|
107
|
+
cmd = "rm -f {name}/.bioconvert.per-base.bed.gz {name}/.bioconvert.per-base.bed.gz.csi"
|
|
108
|
+
cmd += " {name}/.bioconvert.mosdepth.global.dist.txt"
|
|
109
|
+
self.execute(cmd.format(name=tmpdir))
|
bioconvert/bam2bigwig.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`BAM` file to :term:`BIGWIG` format"""
|
|
25
|
+
|
|
26
|
+
import colorlog
|
|
27
|
+
from tempfile import NamedTemporaryFile
|
|
28
|
+
|
|
29
|
+
from bioconvert import ConvBase
|
|
30
|
+
from bioconvert.core.base import ConvArg
|
|
31
|
+
from bioconvert.core.decorators import requires
|
|
32
|
+
|
|
33
|
+
_log = colorlog.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
__all__ = ["BAM2BIGWIG"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BAM2BIGWIG(ConvBase):
|
|
39
|
+
"""Convert :term:`BAM` file to :term:`BIGWIG` file
|
|
40
|
+
|
|
41
|
+
Convert BAM into a binary version of :term:`WIG` format.
|
|
42
|
+
|
|
43
|
+
Methods are base on bamCoverage [DEEPTOOLS]_ and bedGraphToBigWig from
|
|
44
|
+
wiggletools [WIGGLETOOLS]_. Wiggletools method requires an extra argument
|
|
45
|
+
(--chrom-sizes) therefore default one is bamCoverage for now.
|
|
46
|
+
|
|
47
|
+
Moreover, the two methods do not return exactly the same info!
|
|
48
|
+
|
|
49
|
+
You can check this by using bioconvert to convert into a human readable file
|
|
50
|
+
such as wiggle. We will use the bamCoverage as our default conversion.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
#: Default value
|
|
55
|
+
_default_method = "bamCoverage"
|
|
56
|
+
|
|
57
|
+
def __init__(self, infile, outfile, *args, **kargs):
|
|
58
|
+
""".. rubric:: constructor
|
|
59
|
+
|
|
60
|
+
:param str infile: input BAM file
|
|
61
|
+
:param str outfile: output BIGWIG filename
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
super(BAM2BIGWIG, self).__init__(infile, outfile, *args, **kargs)
|
|
65
|
+
|
|
66
|
+
@requires("bamCoverage")
|
|
67
|
+
def _method_bamCoverage(self, *args, **kwargs):
|
|
68
|
+
"""run bamCoverage package.
|
|
69
|
+
|
|
70
|
+
`bamCoverage documentation <https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html>`_"""
|
|
71
|
+
cmd = "bamCoverage --bam {} --outFileFormat bigwig --outFileName {}".format(self.infile, self.outfile)
|
|
72
|
+
self.execute(cmd)
|
|
73
|
+
|
|
74
|
+
@requires(external_binaries=["bedGraphToBigWig", "bedtools"])
|
|
75
|
+
def _method_ucsc(self, *args, **kwargs):
|
|
76
|
+
"""Run ucsc tool bedGraphToBigWig.
|
|
77
|
+
|
|
78
|
+
Requires extra argument (chrom_sizes) required by the bioconvert
|
|
79
|
+
standalone.
|
|
80
|
+
"""
|
|
81
|
+
from bioconvert.bam2bedgraph import BAM2BEDGRAPH
|
|
82
|
+
from bioconvert.bedgraph2bigwig import BEDGRAPH2BIGWIG
|
|
83
|
+
|
|
84
|
+
chrom_sizes = kwargs.get("chrom_sizes", None)
|
|
85
|
+
|
|
86
|
+
with NamedTemporaryFile(suffix=".bedgraph") as fh:
|
|
87
|
+
convertbam2bed = BAM2BEDGRAPH(self.infile, fh.name)
|
|
88
|
+
convertbam2bed()
|
|
89
|
+
convertbed2bw = BEDGRAPH2BIGWIG(fh.name, self.outfile)
|
|
90
|
+
convertbed2bw(chrom_sizes=chrom_sizes)
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def get_additional_arguments(cls):
|
|
94
|
+
yield ConvArg(
|
|
95
|
+
names="--chrom-sizes",
|
|
96
|
+
default=None,
|
|
97
|
+
help="a two-column file/URL: <chromosome name> <size in bases>. " "Used by the ucsc method only",
|
|
98
|
+
)
|
bioconvert/bam2cov.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`BAM` format to :term:`COV` format"""
|
|
25
|
+
import colorlog
|
|
26
|
+
|
|
27
|
+
from bioconvert import ConvBase
|
|
28
|
+
from bioconvert.core.decorators import requires
|
|
29
|
+
|
|
30
|
+
_log = colorlog.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
__all__ = ["BAM2COV"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BAM2COV(ConvBase):
|
|
36
|
+
"""Convert sorted :term:`BAM` file into :term:`COV` file
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Note that the COV format is of the form::
|
|
40
|
+
|
|
41
|
+
chr1 1 0
|
|
42
|
+
chr1 2 0
|
|
43
|
+
chr1 3 0
|
|
44
|
+
chr1 4 0
|
|
45
|
+
chr1 5 0
|
|
46
|
+
|
|
47
|
+
that is contig name, position, coverage.
|
|
48
|
+
|
|
49
|
+
.. warning:: the BAM file must be sorted. This can be achieved with
|
|
50
|
+
bamtools using *bamtools sort -in INPUT.bam*
|
|
51
|
+
|
|
52
|
+
Methods available are based on samtools [SAMTOOLS]_ or bedtools [BEDTOOLS]_.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
#: Default value
|
|
56
|
+
# _default_method = "samtools"
|
|
57
|
+
_default_method = "bedtools"
|
|
58
|
+
|
|
59
|
+
def __init__(self, infile, outfile):
|
|
60
|
+
""".. rubric:: Constructor
|
|
61
|
+
|
|
62
|
+
:param str infile: The path to the input BAM file. **It must be sorted**.
|
|
63
|
+
:param str outfile: The path to the output file
|
|
64
|
+
"""
|
|
65
|
+
super().__init__(infile, outfile)
|
|
66
|
+
|
|
67
|
+
@requires("samtools")
|
|
68
|
+
def _method_samtools(self, *args, **kwargs):
|
|
69
|
+
"""Do the conversion sorted :term:`BAM` -> :term:`BED` using samtools
|
|
70
|
+
|
|
71
|
+
`SAMtools documentation <http://www.htslib.org/doc/samtools.html>`_"""
|
|
72
|
+
cmd = "samtools depth -aa {} > {}".format(self.infile, self.outfile)
|
|
73
|
+
self.execute(cmd)
|
|
74
|
+
|
|
75
|
+
@requires("bedtools")
|
|
76
|
+
def _method_bedtools(self, *args, **kwargs):
|
|
77
|
+
"""Do the conversion sorted :term:`BAM` -> :term:`BED` using bedtools
|
|
78
|
+
|
|
79
|
+
`bedtools documentation <https://bedtools.readthedocs.io/en/latest/>`_"""
|
|
80
|
+
cmd = "bedtools genomecov -d -ibam {} > {}".format(self.infile, self.outfile)
|
|
81
|
+
self.execute(cmd)
|
bioconvert/bam2cram.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
###########################################################################
|
|
2
|
+
# Bioconvert is a project to facilitate the interconversion #
|
|
3
|
+
# of life science data from one format to another. #
|
|
4
|
+
# #
|
|
5
|
+
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
|
|
6
|
+
# #
|
|
7
|
+
# bioconvert is free software: you can redistribute it and/or modify #
|
|
8
|
+
# it under the terms of the GNU General Public License as published by #
|
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
10
|
+
# (at your option) any later version. #
|
|
11
|
+
# #
|
|
12
|
+
# bioconvert is distributed in the hope that it will be useful, #
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
15
|
+
# GNU General Public License for more details. #
|
|
16
|
+
# #
|
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
|
18
|
+
# along with this program (COPYING file). #
|
|
19
|
+
# If not, see <http://www.gnu.org/licenses/>. #
|
|
20
|
+
# #
|
|
21
|
+
# Repository: https://github.com/bioconvert/bioconvert #
|
|
22
|
+
# Documentation: http://bioconvert.readthedocs.io #
|
|
23
|
+
###########################################################################
|
|
24
|
+
"""Convert :term:`BAM` file to :term:`CRAM` format"""
|
|
25
|
+
import os
|
|
26
|
+
|
|
27
|
+
import colorlog
|
|
28
|
+
|
|
29
|
+
from bioconvert import ConvBase
|
|
30
|
+
from bioconvert.core.base import ConvArg
|
|
31
|
+
from bioconvert.core.decorators import requires
|
|
32
|
+
|
|
33
|
+
logger = colorlog.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BAM2CRAM(ConvBase):
|
|
37
|
+
"""Convert :term:`BAM` file to :term:`CRAM` file
|
|
38
|
+
|
|
39
|
+
The conversion requires the reference corresponding to the input file
|
|
40
|
+
It can be provided as an argument with the standalone (*-\\-reference*).
|
|
41
|
+
Otherwise, users are asked to provide it.
|
|
42
|
+
|
|
43
|
+
Methods available are based on samtools [SAMTOOLS]_.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
#: Default value
|
|
47
|
+
_default_method = "samtools"
|
|
48
|
+
_threading = True
|
|
49
|
+
|
|
50
|
+
def __init__(self, infile, outfile, *args, **kargs):
|
|
51
|
+
""".. rubric:: constructor
|
|
52
|
+
|
|
53
|
+
:param str infile: input BAM file
|
|
54
|
+
:param str outfile: output CRAM filename
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
super(BAM2CRAM, self).__init__(infile, outfile, *args, **kargs)
|
|
58
|
+
|
|
59
|
+
def _get_reference(self):
|
|
60
|
+
# In case the --reference is not used
|
|
61
|
+
msg = "Please enter the reference corresponding "
|
|
62
|
+
msg += "to the input BAM file:"
|
|
63
|
+
reference = input(msg)
|
|
64
|
+
if os.path.exists(reference) is False:
|
|
65
|
+
raise IOError("Reference required")
|
|
66
|
+
else:
|
|
67
|
+
logger.debug("Reference exists ({}).".format(reference))
|
|
68
|
+
return reference
|
|
69
|
+
|
|
70
|
+
@requires("samtools")
|
|
71
|
+
def _method_samtools(self, *args, **kwargs):
|
|
72
|
+
"""Here we use the SAMtools tool.
|
|
73
|
+
|
|
74
|
+
`SAMtools documentation <http://www.htslib.org/doc/samtools.html>`_"""
|
|
75
|
+
# -C means output is CRAM
|
|
76
|
+
|
|
77
|
+
reference = kwargs.get("reference", None)
|
|
78
|
+
if reference is None:
|
|
79
|
+
reference = self._get_reference()
|
|
80
|
+
|
|
81
|
+
cmd = "samtools view -@ {} -C {} -T {} -o {}".format(self.threads, self.infile, reference, self.outfile)
|
|
82
|
+
self.execute(cmd)
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def get_additional_arguments(cls):
|
|
86
|
+
yield ConvArg(
|
|
87
|
+
names="--reference",
|
|
88
|
+
default=None,
|
|
89
|
+
help="the reference used (FASTA format). If not provided, prompt will appear",
|
|
90
|
+
)
|