biastools 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biastools/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ #biastools
2
+ import biastools.biastools
3
+ import biastools.biastools_scan
biastools/biastools.py ADDED
@@ -0,0 +1,307 @@
1
+ # Wrap up python file for the biastools 1st and 2nd module
2
+ import subprocess
3
+ import sys
4
+ import os
5
+ import argparse
6
+ from shutil import which
7
+
8
+ def is_tool(name):
9
+ """Check whether `name` is on PATH and marked as executable."""
10
+ return which(name) is not None
11
+
12
+
13
+ def check_program_install(list_names):
14
+ flag_violate = False
15
+ for name in list_names:
16
+ if is_tool(name) == False:
17
+ print(name, "is a prerequisite program, please install it before running biastools")
18
+ flag_violate = True
19
+ if flag_violate:
20
+ print("Use --force option if you want to disable the prerequisite program check.")
21
+ exit(1)
22
+
23
+
24
+ def bool2str(flag):
25
+ if flag:
26
+ return "1"
27
+ else:
28
+ return "0"
29
+
30
+
31
+ def catch_assert(parser, message):
32
+ print('\n', message, '\n')
33
+ parser.print_usage()
34
+ exit(1)
35
+
36
+
37
+ def ensure_processed_vcf(path_ref, path_vcf, path_output, sample_id, path_module, parser):
38
+ """
39
+ Ensure that the processed heterozygous VCF (<out>/<sample_id>.het.vcf.gz) exists.
40
+ If it doesn't, regenerate it in the same way as in the simulation/analysis shell scripts.
41
+ """
42
+ het_vcf_path = path_output + '/' + sample_id + '.het.vcf.gz'
43
+ if os.path.exists(het_vcf_path):
44
+ return het_vcf_path
45
+
46
+ try:
47
+ assert path_ref is not None
48
+ assert path_vcf is not None
49
+ except AssertionError:
50
+ catch_assert(
51
+ parser,
52
+ "<genome> and <vcf> should be specified when using --analyze "
53
+ "if the processed VCF does not already exist in the output directory.",
54
+ )
55
+
56
+ prefix = path_output + '/' + sample_id
57
+ print("[Biastools] Prepare processed VCF for analysis...")
58
+ # Normalize the input VCF against the reference
59
+ subprocess.check_call([
60
+ 'bcftools', 'norm',
61
+ '-f', path_ref,
62
+ path_vcf,
63
+ '-m', '+any',
64
+ '-Oz',
65
+ '-o', prefix + '.normalized.vcf.gz',
66
+ ])
67
+ subprocess.check_call(['bcftools', 'index', prefix + '.normalized.vcf.gz'])
68
+
69
+ # Filter heterozygous sites and index the resulting VCF
70
+ subprocess.check_call([
71
+ 'python3',
72
+ path_module + 'filter_het_VCF.py',
73
+ '-v', prefix + '.normalized.vcf.gz',
74
+ '-o', het_vcf_path,
75
+ ])
76
+ subprocess.check_call(['tabix', '-p', 'vcf', het_vcf_path])
77
+
78
+ return het_vcf_path
79
+
80
+
81
+ def main():
82
+ parser = argparse.ArgumentParser(description="Simulation/Alignment/Analyzing/Prediction module of the Biastools v0.3.2")
83
+ parser.add_argument('--version', action='version', version='%(prog)s 0.3.2')
84
+ parser.add_argument('-o', '--out', help="Path to output directory ['out_dir'].", default="out_dir")
85
+ parser.add_argument('-g', '--genome', help="Path to the reference genome.")
86
+ parser.add_argument('-v', '--vcf', nargs='+',
87
+ help="Path(s) to VCF file(s). For simulation/align/single-run analyze, "
88
+ "provide exactly one. For multi-report analyze, you may provide "
89
+ "one VCF shared by all reports or one per report.")
90
+ parser.add_argument('-s', '--sample_id', help="Sample ID ['sample'].", default="sample")
91
+ parser.add_argument('-r', '--run_id', help="Run ID ['run'].", default="run")
92
+ # Process options
93
+ parser.add_argument('--simulate', help='[1] Option to run biastools simulation.', action='store_true')
94
+ parser.add_argument('--align', help='[2] Option to run biastools align.', action='store_true')
95
+ parser.add_argument('--analyze', help='[3] Option to run biastools analyze.', action='store_true')
96
+ parser.add_argument('--predict', help='[4] Option to predict bias from analysis report.', action='store_true')
97
+
98
+ parser.add_argument('-t', '--thread', help="Number of threads to use [max].", type=int)
99
+ parser.add_argument('--force', help="running the program without checking prerequisite programs.", action='store_true')
100
+ # [1]
101
+ parser.add_argument('-x', '--coverage', help="Read coverage to simulate [30].", type=int, default=30)
102
+ # [2]
103
+ parser.add_argument('-a', '--aligner', help="Aligner to use (bowtie2|bwamem) [bowtie2]", default="bowtie2")
104
+ parser.add_argument('-b', '--align_index', help="Path to the aligner index (target reference)")
105
+ # [3]
106
+ parser.add_argument('-i', '--bam', help="Path to the alignment bam file, should be sorted [out_dir/sample.run_id.sorted.bam].")
107
+ parser.add_argument('-n', '--naive', help= "Option to run the naive assignment method [False].", action='store_true')
108
+ parser.add_argument('-R', '--real', help= "Option for performing analysis on real data [False].", action='store_true')
109
+ parser.add_argument('-d', '--boundary', help= "Boundary to plot the indel balance plot [20]", type=int, default=20)
110
+ parser.add_argument('-lr', '--list_report', help= "List of bias report to plot the indel balance plot", nargs='+')
111
+ parser.add_argument('-ld', '--list_run_id', help= "List of run ID for namings in the indel balance plot", nargs='+')
112
+ # [4]
113
+ parser.add_argument('-ps', '--sim_report', help= "Path to the simulation report.")
114
+ parser.add_argument('-pr', '--real_report', help= "Path to the real read report [out_dir/sample.real.run.bias].")
115
+ args = parser.parse_args()
116
+
117
+ ##### Parameters for biastool_analysis
118
+ path_output = args.out
119
+ path_ref = args.genome
120
+ vcf_list = args.vcf # may be None or a list of one/many paths
121
+ # For steps that only support a single VCF (simulate/align/single-run analyze),
122
+ # use the first entry when provided.
123
+ path_vcf = vcf_list[0] if vcf_list else None
124
+ sample_id = args.sample_id
125
+ run_id = args.run_id
126
+ bam_file = args.bam
127
+ if bam_file == None:
128
+ bam_file = path_output + '/' + sample_id + '.' + run_id + '.sorted.bam'
129
+
130
+ flag_simulate = args.simulate
131
+ flag_align = args.align
132
+ flag_analyze = args.analyze
133
+ flag_predict = args.predict
134
+
135
+ path_module = os.path.dirname(__file__) + '/'
136
+ try:
137
+ assert flag_simulate + flag_align + flag_analyze + flag_predict >= 1
138
+ except AssertionError:
139
+ catch_assert(parser, "At least one of the --simulate/align/analyze/predict option should be specified.")
140
+
141
+ flag_force = args.force
142
+ thread = args.thread
143
+ if thread == None:
144
+ if sys.platform == "darwin":
145
+ result = subprocess.run(["sysctl -n hw.ncpu"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
146
+ else:
147
+ result = subprocess.run(["nproc"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
148
+ thread = int(result.stdout.strip())
149
+
150
+ coverage = args.coverage
151
+ aligner = args.aligner
152
+ align_index = args.align_index
153
+ try:
154
+ assert aligner=="bowtie2" or aligner=="bwamem"
155
+ except AssertionError:
156
+ catch_assert(parser, "Only bowtie2 and bwamem are supported.")
157
+
158
+ flag_naive = args.naive
159
+ flag_real = args.real
160
+ boundary = args.boundary
161
+ list_report = args.list_report
162
+ list_run_id = args.list_run_id
163
+ if list_report:
164
+ try:
165
+ assert len(list_report) == len(list_run_id)
166
+ except AssertionError:
167
+ catch_assert(parser, "Number of list --list_report and --list_run_id entries are inconsistent.")
168
+
169
+ # Multiple VCF paths are only meaningful for multi-report analyze mode,
170
+ # where each bias report can have its own VCF. For all other modes, require
171
+ # at most one VCF path.
172
+ if vcf_list and len(vcf_list) > 1:
173
+ if not (flag_analyze and list_report):
174
+ catch_assert(
175
+ parser,
176
+ "Multiple --vcf paths are only supported when using --analyze "
177
+ "together with --list_report/--list_run_id.",
178
+ )
179
+
180
+ sim_report = args.sim_report
181
+ real_report = args.real_report
182
+ if flag_predict:
183
+ try:
184
+ assert real_report != None
185
+ except AssertionError:
186
+ catch_assert(parser, "<real_report> should be specified when using --predict")
187
+
188
+
189
+
190
+ # Checking prerequisite programs are installed
191
+ if flag_force != True:
192
+ list_program = ["bedtools", \
193
+ "samtools", \
194
+ "bcftools", \
195
+ "gzip", \
196
+ "tabix"]
197
+ if flag_align:
198
+ list_program += ["bwa", "bowtie2"]
199
+ if flag_simulate:
200
+ list_program.append("mason_simulator")
201
+ check_program_install( list_program )
202
+
203
+ # Start running
204
+ command = "mkdir -p " + path_output
205
+ subprocess.call(command, shell=True)
206
+
207
+ if flag_simulate:
208
+ try:
209
+ assert path_ref != None
210
+ assert path_vcf != None
211
+ except AssertionError:
212
+ catch_assert(parser, "<genome> and <vcf> should be specified when using --simulate")
213
+ print("[Biastools] Simulate...")
214
+ command = ' '.join(["bash", path_module+"biastools_simulation.sh", path_ref, path_vcf, path_output, sample_id, str(thread), str(coverage), path_module])
215
+ #print(command)
216
+ subprocess.call(command, shell=True)
217
+ if flag_align:
218
+ try:
219
+ assert path_ref != None
220
+ assert path_vcf != None
221
+ except AssertionError:
222
+ catch_assert(parser, "<genome> and <vcf> should be specified when using --align")
223
+ if align_index == None:
224
+ align_index = path_ref
225
+ print("[Biastools] Align...")
226
+ command = ' '.join(["bash", path_module+"biastools_align.sh", path_ref, path_vcf, path_output, sample_id, str(thread), aligner, align_index, run_id, path_module])
227
+ #print(command)
228
+ subprocess.call(command, shell=True)
229
+ if flag_analyze:
230
+ if list_report != None:
231
+ # Multi-report indel balance plotting.
232
+ # If multiple VCFs are provided, pass them directly through to
233
+ # indel_balance_plot.py, one per report. Otherwise, ensure the
234
+ # processed VCF for this sample exists and use it for all reports.
235
+ if vcf_list and len(vcf_list) > 1:
236
+ if len(vcf_list) != len(list_report):
237
+ catch_assert(
238
+ parser,
239
+ "When providing multiple --vcf paths, their number must "
240
+ "match the number of --list_report entries.",
241
+ )
242
+ vcf_args = ["-vcf"] + vcf_list
243
+ else:
244
+ # Ensure the processed VCF exists (or regenerate it) before plotting.
245
+ het_vcf_path = ensure_processed_vcf(
246
+ path_ref=path_ref,
247
+ path_vcf=path_vcf,
248
+ path_output=path_output,
249
+ sample_id=sample_id,
250
+ path_module=path_module,
251
+ parser=parser,
252
+ )
253
+ vcf_args = ["-vcf", het_vcf_path]
254
+
255
+ print("[Biastools] Plot the indel balance plot for multiple bias reports...")
256
+ if flag_real:
257
+ subprocess.call(
258
+ ['python3', path_module+'indel_balance_plot.py', "-lr"] + list_report
259
+ + ["-ln"] + list_run_id
260
+ + vcf_args
261
+ + ["-bd", str(boundary), "-map",
262
+ "-out", path_output+"/"+sample_id+"."+run_id+".real", "-real"]
263
+ )
264
+ else:
265
+ subprocess.call(
266
+ ['python3', path_module+'indel_balance_plot.py', "-lr"] + list_report
267
+ + ["-ln"] + list_run_id
268
+ + vcf_args
269
+ + ["-bd", str(boundary), "-map",
270
+ "-out", path_output+"/"+sample_id+"."+run_id+".sim"]
271
+ )
272
+ else:
273
+ try:
274
+ assert path_ref != None
275
+ assert path_vcf != None
276
+ except AssertionError:
277
+ catch_assert(parser, "<genome> and <vcf> should be specified when using --analyze")
278
+ # When running analyze directly, ensure that the BAM file is either
279
+ # explicitly provided or present at the default location.
280
+ if not os.path.exists(bam_file):
281
+ catch_assert(
282
+ parser,
283
+ "BAM file not found. Please specify --bam <file> explicitly "
284
+ "or ensure it exists at the default path: "
285
+ f"{path_output}/{sample_id}.{run_id}.sorted.bam",
286
+ )
287
+ print("[Biastools] Analyze and plot...")
288
+ command = ' '.join(["bash", path_module+"biastools_analysis.sh", path_ref, path_vcf, path_output, sample_id, str(thread), run_id, bool2str(flag_real), \
289
+ bool2str(flag_naive), str(boundary), path_module, bam_file])
290
+ #print(command)
291
+ subprocess.call(command, shell=True)
292
+ if flag_predict:
293
+ print("[Biastools] Predict bias...")
294
+ command = ' '.join(["bash", path_module+"biastools_predict.sh", path_output, sample_id, run_id, bool2str(flag_real), real_report, sim_report, path_module])
295
+ #print(command)
296
+ subprocess.call(command, shell=True)
297
+
298
+
299
+
300
+
301
+
302
+ if __name__ == "__main__":
303
+ main()
304
+
305
+
306
+
307
+
@@ -0,0 +1,38 @@
1
+ path_ref=$1
2
+ path_vcf=$2
3
+ path_out=$3
4
+ sample_id=$4
5
+ THR=$5
6
+ ALN=$6
7
+ ALN_IDX=$7
8
+ run_id=$8
9
+ path_module=$9
10
+ prefix=${path_out}/${sample_id}
11
+
12
+ echo "[Biastools] Align sequences to the original reference"
13
+ if [[ ${ALN_IDX} == 'none' ]]; then
14
+ ALN_IDX=${path_ref}
15
+ fi
16
+
17
+ if [[ ${ALN} == "bowtie2" ]]; then
18
+ echo "[Biastools] Align with bowtie2"
19
+ if [ ! -f ${ALN_IDX}.1.bt2 ]; then
20
+ bowtie2-build ${path_ref} ${ALN_IDX}
21
+ fi
22
+ bowtie2 -p ${THR} -x ${ALN_IDX} --rg-id ${run_id}_hapA --rg SM:${sample_id} -1 ${prefix}.hapA_1.fq.gz -2 ${prefix}.hapA_2.fq.gz |\
23
+ samtools sort -o ${prefix}.hapA.${run_id}.sorted.bam
24
+ bowtie2 -p ${THR} -x ${ALN_IDX} --rg-id ${run_id}_hapB --rg SM:${sample_id} -1 ${prefix}.hapB_1.fq.gz -2 ${prefix}.hapB_2.fq.gz |\
25
+ samtools sort -o ${prefix}.hapB.${run_id}.sorted.bam
26
+ elif [[ ${ALN} == "bwamem" ]]; then
27
+ echo "[Biastools] Align with BWA MEM"
28
+ if [ ! -f ${ALN_IDX}.bwt ]; then
29
+ bwa index ${path_ref} -p ${ALN_IDX}
30
+ fi
31
+ bwa mem -t ${THR} ${ALN_IDX} ${prefix}.hapA_1.fq.gz ${prefix}.hapA_2.fq.gz -R "@RG\tID:${run_id}_hapA\tSM:${sample_id}" |\
32
+ samtools sort -@ ${THR} -o ${prefix}.hapA.${run_id}.sorted.bam -
33
+ bwa mem -t ${THR} ${ALN_IDX} ${prefix}.hapB_1.fq.gz ${prefix}.hapB_2.fq.gz -R "@RG\tID:${run_id}_hapB\tSM:${sample_id}" |\
34
+ samtools sort -@ ${THR} -o ${prefix}.hapB.${run_id}.sorted.bam -
35
+ fi
36
+ samtools merge -f ${prefix}.${run_id}.sorted.bam ${prefix}.hapA.${run_id}.sorted.bam ${prefix}.hapB.${run_id}.sorted.bam
37
+
38
+
@@ -0,0 +1,73 @@
1
+ path_ref=$1
2
+ path_vcf=$2
3
+ path_out=$3
4
+ sample_id=$4
5
+ THR=$5
6
+ run_id=$6
7
+ flag_real=$7
8
+ flag_naive=$8
9
+ boundary=$9
10
+ path_module=${10}
11
+ prefix=${path_out}/${sample_id}
12
+ bam_file=${11}
13
+
14
+
15
+ echo "[Biastools] Intersect the bam file and vcf file"
16
+ if [ ! -f ${prefix}.het.vcf.gz ]; then
17
+ bcftools norm -f ${path_ref} ${path_vcf} -m +any -Oz -o ${prefix}.normalized.vcf.gz
18
+ bcftools index ${prefix}.normalized.vcf.gz
19
+ python3 ${path_module}filter_het_VCF.py -v ${prefix}.normalized.vcf.gz -o ${prefix}.het.vcf.gz
20
+ tabix -p vcf ${prefix}.het.vcf.gz
21
+ fi
22
+ if [ ! -f ${prefix}.${run_id}.sorted.het.bam ]; then
23
+ python3 ${path_module}vcf_to_bed.py -v ${prefix}.het.vcf.gz -o ${prefix}.het.bed
24
+ samtools view -h -L ${prefix}.het.bed ${bam_file} -@ ${THR} | samtools sort -@ ${THR} > ${prefix}.${run_id}.sorted.het.bam
25
+ samtools index ${prefix}.${run_id}.sorted.het.bam
26
+ fi
27
+
28
+
29
+ echo "[Biastools] Reference bias analysis"
30
+ if [[ ${flag_naive} == 1 ]]; then
31
+ assign_method=${path_module}"ref_bi_naive.py"
32
+ else
33
+ assign_method=${path_module}"ref_bi_context.py"
34
+ fi
35
+
36
+ mkdir -p ${path_out}/${run_id}"_report"
37
+ r_prefix=${path_out}/${run_id}"_report"/${sample_id}
38
+ if [[ ${flag_real} == 1 ]]; then
39
+ python3 ${assign_method} -s ${prefix}.${run_id}.sorted.het.bam \
40
+ -v ${prefix}.het.vcf.gz \
41
+ -f ${path_ref} \
42
+ -p ${prefix}.golden.rpt.pickle \
43
+ -o ${r_prefix}.${run_id}.real.bias \
44
+ --real
45
+ # indel balance plot
46
+ python3 ${path_module}indel_balance_plot.py -lr ${r_prefix}.${run_id}.real.bias.all \
47
+ -ln ${run_id} \
48
+ -vcf ${prefix}.het.vcf.gz \
49
+ -bd ${boundary} \
50
+ -map \
51
+ -out ${r_prefix}.${run_id}.real \
52
+ -real
53
+ else
54
+ python3 ${assign_method} -s ${prefix}.${run_id}.sorted.het.bam \
55
+ -v ${prefix}.het.vcf.gz \
56
+ -f ${path_ref} \
57
+ -p ${prefix}.golden.rpt.pickle \
58
+ -o ${r_prefix}.${run_id}.sim.bias
59
+
60
+ # report the bias categories and report
61
+ python3 ${path_module}golden_graph_report.py -mb ${r_prefix}.${run_id}.sim.bias.snp -out ${r_prefix}.${run_id}.snp
62
+ python3 ${path_module}golden_graph_report.py -mb ${r_prefix}.${run_id}.sim.bias.gap -out ${r_prefix}.${run_id}.gap
63
+ # plot the measures with NMB and NAB
64
+ python3 ${path_module}golden_graph.py -mb ${r_prefix}.${run_id}.sim.bias.snp -out ${r_prefix}.${run_id}.snp
65
+ python3 ${path_module}golden_graph.py -mb ${r_prefix}.${run_id}.sim.bias.gap -out ${r_prefix}.${run_id}.gap
66
+ # indel balance plot
67
+ python3 ${path_module}indel_balance_plot.py -lr ${r_prefix}.${run_id}.sim.bias.all \
68
+ -ln ${run_id} \
69
+ -vcf ${prefix}.het.vcf.gz \
70
+ -bd ${boundary} \
71
+ -map \
72
+ -out ${r_prefix}.${run_id}.sim
73
+ fi
@@ -0,0 +1,14 @@
1
+ path_out=$1
2
+ sample_id=$2
3
+ run_id=$3
4
+ target_bed=$4
5
+ improve_bed=$5
6
+ improve_lowRd=$6
7
+ path_module=$7
8
+ prefix=${path_out}/${sample_id}
9
+
10
+ bedtools subtract -a ${improve_bed} -b ${improve_lowRd} > ${prefix}.improve.goodRd.bias.bed
11
+ bedtools intersect -a ${target_bed} -b ${improve_lowRd} > ${prefix}.improve.skipped.bias.bed
12
+
13
+ python3 ${path_module}compare_bias_with_RD.py -lt ${target_bed} -li ${prefix}.improve.goodRd.bias.bed -lrd ${prefix}.improve.skipped.bias.bed -out ${prefix}.${run_id}.improve.bias.bed
14
+ #python3 check_inside_centromere.py -lr1 centromere_extend.bed -lr2 ${prefix}.${run_id}.improve.bias.bed
@@ -0,0 +1,23 @@
1
+ path_out=$1
2
+ sample_id=$2
3
+ run_id=$3
4
+ flag_real=$4
5
+ report_real=$5
6
+ report_simulation=$6
7
+ path_module=$7
8
+ prefix=${path_out}/${sample_id}
9
+
10
+
11
+ if [[ ${report_real} == 'none' ]]; then
12
+ report_real=${prefix}.real.${run_id}.bias
13
+ fi
14
+
15
+ if [[ ${flag_real} == 1 || ${report_simulation} == 'none' ]]; then
16
+ echo "[Biastools] Real report bias prediction."
17
+ python3 ${path_module}predict_model.py -rr ${report_real} -out ${prefix}.real.${run_id}
18
+ else
19
+ echo "[Biastools] Bias prediction based on simulation report!"
20
+ python3 ${path_module}predict_experiment.py -sr ${report_simulation} \
21
+ -rr ${report_real} \
22
+ -out ${prefix}.sim.${run_id}
23
+ fi
@@ -0,0 +1,165 @@
1
+ # Wrap up python file for the biastools 3rd module
2
+ import subprocess
3
+ import sys
4
+ import os
5
+ import argparse
6
+ from biastools.biastools import check_program_install, catch_assert
7
+
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument('-o', '--out', help="Path to output directory ['out_dir'].", default="out_dir")
12
+ parser.add_argument('-g', '--genome', help="Path to the reference genome.")
13
+ parser.add_argument('-i', '--bam', help="Path to the alignment bam file, should be SORTED.")
14
+ parser.add_argument('-s', '--sample_id', help="Sample ID ['sample'].", default="sample")
15
+ parser.add_argument('-r', '--run_id', help="Run ID ['run'].", default="run")
16
+ # Process options
17
+ parser.add_argument('--scan', help='[1] Option to scan and report bias region.', action='store_true')
18
+ parser.add_argument('--compare_bam', help='[2] Option to generate common baseline and compare.', action='store_true')
19
+ parser.add_argument('--compare_rpt', help='[3] Option to directly compare two bias report.', action='store_true')
20
+
21
+ parser.add_argument('-t', '--thread', help="Number of threads to use [max].", type=int)
22
+ parser.add_argument('--force', help="running the program without checking prerequisite programs.", action='store_true')
23
+ # [1]
24
+ parser.add_argument('-w', '--wig', help="Generate the wig files for the three measures, VERY SLOW [False]", action='store_true')
25
+ parser.add_argument('-R', '--range', help="The range in the bam file targeted for analysis.")
26
+ # [2]
27
+ parser.add_argument('-i2', '--bam2', help="Path to the second alignment bam file want to compare, should be SORTED.")
28
+ parser.add_argument('-m', '--mpileup', help="Path to the mpileup file of the first bam file.")
29
+ parser.add_argument('-m2', '--mpileup2', help="Path to the mpileup file of the second bam file.")
30
+ # [3]
31
+ parser.add_argument('-b1', '--bed1', help="Path to the first bed file for comparison.")
32
+ parser.add_argument('-b2', '--bed2', help="Path to the second bed file for comparison.")
33
+ parser.add_argument('-l2', '--lowRd2', help="Path to the .lowRd.bed report of the second file.")
34
+ args = parser.parse_args()
35
+
36
+ path_output = args.out
37
+ path_ref = args.genome
38
+ bam_file = args.bam
39
+ sample_id = args.sample_id
40
+ run_id = args.run_id
41
+
42
+ flag_scan = args.scan
43
+ flag_compare_bam = args.compare_bam
44
+ flag_compare_rpt = args.compare_rpt
45
+ try:
46
+ assert flag_scan + flag_compare_bam + flag_compare_rpt >= 1
47
+ except AssertionError:
48
+ catch_assert(parser, "At least one of the --scan/compare_bam/compare_rpt option should be specified.")
49
+
50
+ flag_force = args.force
51
+ thread = args.thread
52
+ if thread == None:
53
+ if sys.platform == "darwin":
54
+ result = subprocess.run(["sysctl -n hw.ncpu"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
55
+ else:
56
+ result = subprocess.run(["nproc"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
57
+ thread = int(result.stdout.strip())
58
+ flag_wig = args.wig
59
+ Range = args.range
60
+ bam_file2 = args.bam2
61
+ mpileup_file = args.mpileup
62
+ mpileup_file2 = args.mpileup2
63
+ bed_file1 = args.bed1
64
+ bed_file2 = args.bed2
65
+ lowRd_file2 = args.lowRd2
66
+
67
+
68
+ # Checking prerequisite programs are installed
69
+ if flag_force != True:
70
+ check_program_install(["bedtools", \
71
+ "samtools", \
72
+ "bcftools"])
73
+
74
+ # Start running
75
+ command = "mkdir -p " + path_output
76
+ subprocess.call(command, shell=True)
77
+ prefix = path_output + '/' + sample_id
78
+ path_module = os.path.dirname(__file__) + '/'
79
+ if flag_scan:
80
+ print("[Biastools] Scanning...")
81
+ if os.path.exists(bam_file+'.bai'):
82
+ pass
83
+ else:
84
+ command = ["samtools", "index", bam_file]
85
+ subprocess.call(command)
86
+
87
+ print("[BIASTOOLS] SAMPLE", bam_file, " as ", sample_id + ".baseline ...")
88
+ command = ["python3", path_module+"sample_baseline.py", "-b", bam_file, "-f", path_ref, "-o", prefix+".sample"]
89
+ print(' '.join(command))
90
+ subprocess.call(command)
91
+
92
+ if Range == None:
93
+ print("[BIASTOOLS] Process the whole bam file...")
94
+ target_bam = bam_file
95
+ else:
96
+ print("[BIASTOOLS] Extract reads from " + Range + "...")
97
+ target_bam = prefix + '.range.bam'
98
+ command = ["samtools", "view", " -h", bam_file, Range, "-o", target_bam, "-@", thread]
99
+ print(' '.join(command))
100
+ subprocess.call(command)
101
+
102
+ print("[BIASTOOLS] Format the mpileup...")
103
+ if os.path.exists(prefix+'.'+run_id+'.mpileup'):
104
+ print(prefix+'.'+run_id+'.mpileup already exist!')
105
+ else:
106
+ command = ["bcftools", "mpileup", "--count-orphans", "--annotate", "FORMAT/AD,FORMAT/DP", \
107
+ "-f", path_ref, \
108
+ "--min-BQ", "0", \
109
+ "--min-MQ", "0", \
110
+ "--threads", str(thread), target_bam, "-o", prefix+'.'+run_id+'.mpileup']
111
+ print(' '.join(command))
112
+ subprocess.call(command)
113
+ print("[BIASTOOLS] Scanning bias...")
114
+ if flag_wig:
115
+ command = ["python3", path_module+"scanning_bias.py", "-g", prefix+'.'+run_id+'.mpileup', "--sample", "-b", prefix+".sample.baseline", \
116
+ "-wig", "-o", prefix+'.'+run_id+'.scanning']
117
+ else:
118
+ command = ["python3", path_module+"scanning_bias.py", "-g", prefix+'.'+run_id+'.mpileup', "--sample", "-b", prefix+".sample.baseline", \
119
+ "-o", prefix+'.'+run_id+'.scanning']
120
+ print(' '.join(command))
121
+ subprocess.call(command)
122
+
123
+ if flag_compare_bam:
124
+ if os.path.exists(bam_file+'.bai'):
125
+ pass
126
+ else:
127
+ command = ["samtools", "index", bam_file]
128
+ subprocess.call(command)
129
+ if os.path.exists(bam_file2+'.bai'):
130
+ pass
131
+ else:
132
+ command = ["samtools", "index", bam_file2]
133
+ subprocess.call(command)
134
+
135
+ print("[Biastools] Generate common baseline...")
136
+ baseline = prefix+"."+run_id+".combine"
137
+ command = ["python3", path_module+"merge_baseline.py", "-b1", bam_file, "-b2", bam_file2, "-f", path_ref, "-o", baseline]
138
+ #print(' '.join(command))
139
+ subprocess.call(command)
140
+ command = ' '.join(["python3", path_module+"scanning_bias.py", "-g", mpileup_file, "-b", baseline+".baseline", "-o", baseline+".1.scanning", ">", prefix+"."+run_id+".log"])
141
+ #print(command)
142
+ subprocess.call(command, shell=True)
143
+ command = ' '.join(["python3", path_module+"scanning_bias.py", "-g", mpileup_file2, "-b", baseline+".baseline", "-o", baseline+".2.scanning", ">", prefix+"."+run_id+".log"])
144
+ #print(command)
145
+ subprocess.call(command, shell=True)
146
+
147
+ print("[Biastools] Compare two bam files with common baseline...")
148
+ command = ' '.join(["bash", path_module+"biastools_compare.sh", path_output, sample_id, run_id, \
149
+ baseline+".1.scanning.bias.bed", \
150
+ baseline+".2.scanning.bias.bed", \
151
+ baseline+".2.scanning.lowRd.bed", \
152
+ path_module])
153
+ print(command)
154
+ subprocess.call(command, shell=True)
155
+ if flag_compare_rpt:
156
+ print("[Biastools] Compare two bed files...")
157
+ command = ' '.join(["bash", path_module+"biastools_compare.sh", path_output, sample_id, run_id, bed_file1, bed_file2, lowRd_file2, path_module])
158
+ print(command)
159
+ subprocess.call(command, shell=True)
160
+
161
+
162
+
163
+
164
+ if __name__ == "__main__":
165
+ main()