cagnut_gatk 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cagnut_gatk.gemspec +27 -0
- data/lib/cagnut_gatk/base.rb +82 -0
- data/lib/cagnut_gatk/check_tools.rb +16 -0
- data/lib/cagnut_gatk/configuration.rb +56 -0
- data/lib/cagnut_gatk/functions/analyze_covariates.rb +94 -0
- data/lib/cagnut_gatk/functions/base_recalibrator.rb +99 -0
- data/lib/cagnut_gatk/functions/count_read.rb +77 -0
- data/lib/cagnut_gatk/functions/depth_of_coverage.rb +81 -0
- data/lib/cagnut_gatk/functions/haplotype_caller.rb +89 -0
- data/lib/cagnut_gatk/functions/indel_realigner.rb +101 -0
- data/lib/cagnut_gatk/functions/print_reads.rb +92 -0
- data/lib/cagnut_gatk/functions/realigner_target_creator.rb +92 -0
- data/lib/cagnut_gatk/functions/templates/analyze_covariates.sh +26 -0
- data/lib/cagnut_gatk/functions/templates/base_recalibrator.sh +30 -0
- data/lib/cagnut_gatk/functions/templates/count_read.sh +23 -0
- data/lib/cagnut_gatk/functions/templates/depth_of_coverage.sh +19 -0
- data/lib/cagnut_gatk/functions/templates/print_reads.sh +25 -0
- data/lib/cagnut_gatk/functions/unified_genotyper.rb +89 -0
- data/lib/cagnut_gatk/functions/variant_eval.rb +88 -0
- data/lib/cagnut_gatk/functions/variant_filtration.rb +88 -0
- data/lib/cagnut_gatk/util.rb +118 -0
- data/lib/cagnut_gatk/version.rb +3 -0
- data/lib/cagnut_gatk.rb +16 -0
- metadata +135 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class HaplotypeCaller
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :java_path,
|
6
|
+
:ref_fasta, :target, :prefix_name, :dodebug, :target_flanks_file
|
7
|
+
def_delegators :'CagnutGatk.config', :haplotype_caller_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_recal.bam" : opts[:input]
|
12
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}.vcf"
|
13
|
+
@job_name = "#{prefix_name}_haplotype_caller_#{sample_name}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def run previous_job_id = nil
|
17
|
+
puts "Submitting HaplotypeCaller #{sample_name} "
|
18
|
+
script_name = generate_script
|
19
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
20
|
+
[@job_name, @output]
|
21
|
+
end
|
22
|
+
|
23
|
+
def cluster_options previous_job_id = nil
|
24
|
+
core_num = 6
|
25
|
+
job_mem1 = "adjustWorkingMem 256M #{core_num}"
|
26
|
+
job_mem2 = "adjustWorkingMem 10G #{core_num}"
|
27
|
+
{
|
28
|
+
previous_job_id: previous_job_id,
|
29
|
+
var_env: [core_num, target],
|
30
|
+
adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
|
31
|
+
parallel_env: [core_num],
|
32
|
+
tools: ['gatk', 'haplotype_caller']
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def haplotype_caller_options
|
37
|
+
array = haplotype_caller_params['params'].dup
|
38
|
+
array << "-T HaplotypeCaller"
|
39
|
+
array << "-R #{ref_fasta}"
|
40
|
+
array << "-I #{@input}"
|
41
|
+
array << "-o #{@output}"
|
42
|
+
array << "-L #{target_flanks_file}" if target_flanks_file
|
43
|
+
array.uniq
|
44
|
+
end
|
45
|
+
|
46
|
+
def modified_java_array
|
47
|
+
array = haplotype_caller_params['java'].dup
|
48
|
+
array.unshift(java_path).uniq
|
49
|
+
end
|
50
|
+
|
51
|
+
def params_combination
|
52
|
+
@params_combination_hash ||= {
|
53
|
+
'java' => modified_java_array,
|
54
|
+
'params' => haplotype_caller_options
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def generate_script
|
59
|
+
script_name = "#{@order}_gatk_haplotype_caller"
|
60
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
61
|
+
File.open(file, 'w') do |f|
|
62
|
+
f.puts <<-BASH.strip_heredoc
|
63
|
+
#!/bin/bash
|
64
|
+
|
65
|
+
cd "#{jobs_dir}/../"
|
66
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
67
|
+
|
68
|
+
#{params_combination['java'].join("\s")} \\
|
69
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
70
|
+
#{::Cagnut::JobManage.run_local}
|
71
|
+
|
72
|
+
EXITSTATUS=$?
|
73
|
+
|
74
|
+
if [ ! -s "#{@output}" ]
|
75
|
+
then
|
76
|
+
echo "vcf incomplete!"
|
77
|
+
exit 100;
|
78
|
+
fi
|
79
|
+
|
80
|
+
if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
|
81
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
82
|
+
|
83
|
+
BASH
|
84
|
+
end
|
85
|
+
File.chmod(0700, file)
|
86
|
+
script_name
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class IndelRealigner
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
|
6
|
+
:ref_fasta, :target_flanks_file, :dbsnp_ref_indels,
|
7
|
+
:magic28, :prefix_name, :java_path
|
8
|
+
def_delegators :'CagnutGatk.config', :indel_realigner_params
|
9
|
+
|
10
|
+
def initialize opts = {}
|
11
|
+
@order = sprintf '%02i', opts[:order]
|
12
|
+
@job_name = "#{prefix_name}_indelRealigner_#{sample_name}"
|
13
|
+
@interval_list = opts[:interval_list]
|
14
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_markdup.bam" : opts[:input]
|
15
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_realn.bam"
|
16
|
+
end
|
17
|
+
|
18
|
+
def run previous_job_id = nil
|
19
|
+
puts "Submitting indel_realigner #{sample_name}"
|
20
|
+
script_name = generate_script
|
21
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
22
|
+
[@job_name, @output]
|
23
|
+
end
|
24
|
+
|
25
|
+
def cluster_options previous_job_id = nil
|
26
|
+
{
|
27
|
+
previous_job_id: previous_job_id,
|
28
|
+
adjust_memory: ['h_vmem=8G'],
|
29
|
+
tools: ['gatk', 'indel_realigner']
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def indel_realigner_options
|
34
|
+
array = indel_realigner_params['params'].dup
|
35
|
+
array << "-T IndelRealigner"
|
36
|
+
array << "-R #{ref_fasta}"
|
37
|
+
array << "-targetIntervals #{@interval_list}"
|
38
|
+
array << "-I #{@input}"
|
39
|
+
array << "-o #{@output}"
|
40
|
+
array << "-known #{dbsnp_ref_indels}" if dbsnp_ref_indels
|
41
|
+
array.uniq!
|
42
|
+
array.uniq
|
43
|
+
end
|
44
|
+
|
45
|
+
def modified_java_array
|
46
|
+
array = indel_realigner_params['java'].dup
|
47
|
+
array.unshift(java_path).uniq
|
48
|
+
end
|
49
|
+
|
50
|
+
def params_combination
|
51
|
+
@params_combination_hash ||= {
|
52
|
+
'java' => modified_java_array,
|
53
|
+
'params' => indel_realigner_options
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_script
|
58
|
+
script_name = "#{@order}_gatk_indel_realigner"
|
59
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
60
|
+
File.open(file, 'w') do |f|
|
61
|
+
f.puts <<-BASH.strip_heredoc
|
62
|
+
#!/bin/bash
|
63
|
+
|
64
|
+
cd "#{jobs_dir}/../"
|
65
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
66
|
+
# Check for intervals file
|
67
|
+
if [ ! -s "#{@interval_list}" ];then
|
68
|
+
echo "Error: Missing interval file: "#{@interval_list}" from realignTargetCreator_#{sample_name}"
|
69
|
+
exit 100
|
70
|
+
fi
|
71
|
+
|
72
|
+
#{params_combination['java'].join("\s")} \\
|
73
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
74
|
+
#{::Cagnut::JobManage.run_local}
|
75
|
+
|
76
|
+
EXITSTATUS=$?
|
77
|
+
|
78
|
+
#force error when missing @output
|
79
|
+
if [ ! -s "#{@output}" ]
|
80
|
+
then
|
81
|
+
echo "Missing @output BAM #{@output}"
|
82
|
+
exit 100
|
83
|
+
fi
|
84
|
+
|
85
|
+
# Check BAM EOF
|
86
|
+
BAM_28=$(tail -c 28 #{@output}|xxd -p)
|
87
|
+
if [ "#{magic28}" != "$BAM_28" ]
|
88
|
+
then
|
89
|
+
echo "Error with BAM EOF" 1>&2
|
90
|
+
exit 100
|
91
|
+
fi
|
92
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
93
|
+
|
94
|
+
exit $EXITSTATUS
|
95
|
+
BASH
|
96
|
+
end
|
97
|
+
File.chmod(0700, file)
|
98
|
+
script_name
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class PrintReads
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :target_flanks_file,
|
6
|
+
:ref_fasta, :prefix_name, :dodebug, :java_path
|
7
|
+
def_delegators :'CagnutGatk.config', :print_reads_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@job_name = "#{prefix_name}_PrintReads_#{sample_name}"
|
12
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_realn.bam" : opts[:input]
|
13
|
+
@output = "#{opts[:dirs][:output]}/#{output_file}"
|
14
|
+
@bqsr_file = "#{opts[:dirs][:contrast]}/#{replace_filename('_recal.csv')}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def file_basename
|
18
|
+
@basename ||= File.basename @input
|
19
|
+
end
|
20
|
+
|
21
|
+
def replace_filename target
|
22
|
+
file_basename.gsub '_realn.bam', target
|
23
|
+
end
|
24
|
+
|
25
|
+
def output_file
|
26
|
+
output = replace_filename '_recal.bam'
|
27
|
+
return output unless output == file_basename
|
28
|
+
abort 'Input file is not correctly'
|
29
|
+
end
|
30
|
+
|
31
|
+
def run previous_job_id = nil
|
32
|
+
puts "Submitting PrintReads #{sample_name}"
|
33
|
+
script_name = generate_script
|
34
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
35
|
+
[@job_name, @output]
|
36
|
+
end
|
37
|
+
|
38
|
+
def cluster_options previous_job_id = nil
|
39
|
+
{
|
40
|
+
previous_job_id: previous_job_id,
|
41
|
+
adjust_memory: ['h_vmem=6G'],
|
42
|
+
tools: ['gatk', 'print_reads']
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
def generate_script
|
47
|
+
script_name = "#{@order}_gatk_print_reads"
|
48
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
49
|
+
path = File.expand_path '../templates/print_reads.sh', __FILE__
|
50
|
+
template = Tilt.new path
|
51
|
+
File.open(file, 'w') do |f|
|
52
|
+
f.puts template.render Object.new, job_params(script_name)
|
53
|
+
end
|
54
|
+
File.chmod(0700, file)
|
55
|
+
script_name
|
56
|
+
end
|
57
|
+
|
58
|
+
def print_reads_options
|
59
|
+
ary = print_reads_params['params'].dup
|
60
|
+
ary << "-T PrintReads"
|
61
|
+
ary << "-R #{ref_fasta}"
|
62
|
+
ary << "-I #{@input}"
|
63
|
+
ary << "-o #{@output}"
|
64
|
+
ary << "-BQSR #{@bqsr_file}"
|
65
|
+
ary << "-L #{target_flanks_file}" if target_flanks_file
|
66
|
+
ary.uniq
|
67
|
+
end
|
68
|
+
|
69
|
+
def modified_java_array
|
70
|
+
array = print_reads_params['java'].dup
|
71
|
+
array.unshift(java_path).uniq
|
72
|
+
end
|
73
|
+
|
74
|
+
def params_combination
|
75
|
+
{
|
76
|
+
'java' => modified_java_array,
|
77
|
+
'params' => print_reads_options
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def job_params script_name
|
82
|
+
{
|
83
|
+
script_name: script_name,
|
84
|
+
jobs_dir: jobs_dir,
|
85
|
+
output: @output,
|
86
|
+
bqsr_file: @bqsr_file,
|
87
|
+
print_reads_params: params_combination,
|
88
|
+
run_local: ::Cagnut::JobManage.run_local,
|
89
|
+
}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class RealignerTargetCreator
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
|
6
|
+
:ref_fasta, :snpdb, :target_flanks_file, :prefix_name,
|
7
|
+
:java_path
|
8
|
+
def_delegators :'CagnutGatk.config', :realigner_target_creator_params
|
9
|
+
|
10
|
+
def initialize opts = {}
|
11
|
+
@order = sprintf '%02i', opts[:order]
|
12
|
+
@job_name = "#{prefix_name}_realignTargetCreator_#{sample_name}"
|
13
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_markdup.bam" : opts[:input]
|
14
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_markdup.interval_list"
|
15
|
+
end
|
16
|
+
|
17
|
+
def run previous_job_id = nil
|
18
|
+
puts "Submitting realigner_target_creator #{sample_name}"
|
19
|
+
script_name = generate_script
|
20
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
21
|
+
[@job_name, @output]
|
22
|
+
end
|
23
|
+
|
24
|
+
def cluster_options previous_job_id = nil
|
25
|
+
core_num = 6
|
26
|
+
job_mem1 = "adjustWorkingMem 256M #{core_num}"
|
27
|
+
job_mem2 = "adjustWorkingMem 10G #{core_num}"
|
28
|
+
{
|
29
|
+
previous_job_id: previous_job_id,
|
30
|
+
var_env: [core_num],
|
31
|
+
adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
|
32
|
+
parallel_env: [core_num],
|
33
|
+
tools: ['gatk', 'realigner_target_creator']
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def realigner_target_creator_options
|
38
|
+
array = realigner_target_creator_params['params'].dup
|
39
|
+
array << "-T RealignerTargetCreator"
|
40
|
+
array << "-R #{ref_fasta}"
|
41
|
+
array << "--known #{snpdb}"
|
42
|
+
array << "-I #{@input}"
|
43
|
+
array << "-o #{@output}"
|
44
|
+
array << "-L #{target_flanks_file}" if target_flanks_file
|
45
|
+
array.uniq!
|
46
|
+
array.uniq
|
47
|
+
end
|
48
|
+
|
49
|
+
def modified_java_array
|
50
|
+
array = realigner_target_creator_params['java'].dup
|
51
|
+
array.unshift(java_path).uniq
|
52
|
+
end
|
53
|
+
|
54
|
+
def params_combination
|
55
|
+
@params_combination_hash ||= {
|
56
|
+
'java' => modified_java_array,
|
57
|
+
'params' => realigner_target_creator_options
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def generate_script
|
62
|
+
script_name = "#{@order}_gatk_realigner_target_creator"
|
63
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
64
|
+
File.open(file, 'w') do |f|
|
65
|
+
f.puts <<-BASH.strip_heredoc
|
66
|
+
#!/bin/bash
|
67
|
+
|
68
|
+
cd "#{jobs_dir}/../"
|
69
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
70
|
+
|
71
|
+
#{params_combination['java'].join("\s")} \\
|
72
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
73
|
+
#{::Cagnut::JobManage.run_local}
|
74
|
+
|
75
|
+
EXITSTATUS=$?
|
76
|
+
|
77
|
+
#force error when missing output
|
78
|
+
if [ ! -s "#{@output}" ]
|
79
|
+
then
|
80
|
+
echo "Missing indel_calls #{@output}, can not continue"
|
81
|
+
exit 100
|
82
|
+
fi
|
83
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
84
|
+
|
85
|
+
exit $EXITSTATUS
|
86
|
+
BASH
|
87
|
+
end
|
88
|
+
File.chmod(0700, file)
|
89
|
+
script_name
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
if [ ! -s #{after} ]; then
|
6
|
+
echo "Error: missing recalfile #{after}"
|
7
|
+
exit 100
|
8
|
+
fi
|
9
|
+
|
10
|
+
rm core.* 2> /dev/null
|
11
|
+
|
12
|
+
#{analyze_covariates_params['java'].join("\s")} \\
|
13
|
+
#{analyze_covariates_params['params'].join(" \\\n ")} \\
|
14
|
+
#{run_local}
|
15
|
+
|
16
|
+
EXITSTATUS=$?
|
17
|
+
|
18
|
+
#throw an error if no output file
|
19
|
+
if [ ! -s #{output} ];then
|
20
|
+
echo "Error: no plots outputted"
|
21
|
+
exit 100
|
22
|
+
fi
|
23
|
+
|
24
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
25
|
+
|
26
|
+
exit $EXITSTATUS
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
rm core.* 2> /dev/null
|
6
|
+
|
7
|
+
# Check BAM EOF
|
8
|
+
BAM_28=$(tail -c 28 #{input}|xxd -p)
|
9
|
+
if [ "#{magic28}" != "$BAM_28" ]
|
10
|
+
then
|
11
|
+
echo "Error with BAM EOF"
|
12
|
+
exit 100
|
13
|
+
fi
|
14
|
+
|
15
|
+
#{base_recalibrator_params['java'].join("\s")} \\
|
16
|
+
#{base_recalibrator_params['params'].join(" \\\n ")} \\
|
17
|
+
#{run_local}
|
18
|
+
|
19
|
+
EXITSTATUS=$?
|
20
|
+
|
21
|
+
#force error when missing recalFile. Would prevent continutation of pipeline
|
22
|
+
if [ ! -s #{output} ]
|
23
|
+
then
|
24
|
+
echo "Missing #{output}"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
29
|
+
|
30
|
+
exit $EXITSTATUS
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
#{count_reads_params['java'].join("\s")} \\
|
6
|
+
#{count_reads_params['params'].join(" \\\n ")} \\
|
7
|
+
#{run_local}
|
8
|
+
|
9
|
+
EXITSTATUS=$?
|
10
|
+
|
11
|
+
if [ ! -s #{output} ]
|
12
|
+
then
|
13
|
+
echo "Incomplete output file #{output}"
|
14
|
+
exit 100
|
15
|
+
fi
|
16
|
+
|
17
|
+
if [ $(stat --printf="%s" #{output}) = 100 ];then
|
18
|
+
echo "Memory Error. Exitting."
|
19
|
+
exit 100
|
20
|
+
fi
|
21
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
22
|
+
|
23
|
+
if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
|
6
|
+
#{depth_of_coverage_params['java'].join("\s")} \\
|
7
|
+
#{depth_of_coverage_params['params'].join(" \\\n ")} \\
|
8
|
+
#{run_local}
|
9
|
+
|
10
|
+
EXITSTATUS=$?
|
11
|
+
|
12
|
+
if [ ! -e "#{output}.sample_statistics" ]
|
13
|
+
then
|
14
|
+
echo "Missing output: #{output}"
|
15
|
+
exit 100
|
16
|
+
fi
|
17
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
18
|
+
|
19
|
+
exit $EXITSTATUS
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
if [ ! -s "#{bqsr_file}" ]; then
|
6
|
+
echo "Error: missing recalfile #{bqsr_file}"
|
7
|
+
exit 100
|
8
|
+
fi
|
9
|
+
|
10
|
+
rm core.* 2> /dev/null
|
11
|
+
|
12
|
+
#{print_reads_params['java'].join("\s")} \\
|
13
|
+
#{print_reads_params['params'].join(" \\\n ")} \\
|
14
|
+
#{run_local}
|
15
|
+
|
16
|
+
EXITSTATUS=$?
|
17
|
+
|
18
|
+
# throw error if < 1024 bytes
|
19
|
+
if [ $(stat --printf="%s" "#{output}") -le 1024 ]
|
20
|
+
then
|
21
|
+
exit 100
|
22
|
+
fi
|
23
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
24
|
+
|
25
|
+
exit $EXITSTATUS
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class UnifiedGenotyper
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
|
6
|
+
:ref_fasta, :snpdb, :target, :prefix_name, :java_path
|
7
|
+
def_delegators :'CagnutGatk.config', :unified_genotyper_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_recal.bam" : opts[:input]
|
12
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}.vcf"
|
13
|
+
@job_name = "#{prefix_name}_snpcal_#{sample_name}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def run previous_job_id = nil
|
17
|
+
puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -call "
|
18
|
+
script_name = generate_script
|
19
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
20
|
+
[@job_name, @output]
|
21
|
+
end
|
22
|
+
|
23
|
+
def cluster_options previous_job_id = nil
|
24
|
+
core_num = 6
|
25
|
+
job_mem1 = "adjustWorkingMem 256M #{core_num}"
|
26
|
+
job_mem2 = "adjustWorkingMem 10G #{core_num}"
|
27
|
+
{
|
28
|
+
previous_job_id: previous_job_id,
|
29
|
+
var_env: [core_num],
|
30
|
+
adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
|
31
|
+
parallel_env: [core_num],
|
32
|
+
tools: ['gatk', 'unified_genotyper']
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def unified_genotyper_options
|
37
|
+
array = unified_genotyper_params['params'].dup
|
38
|
+
array << "-T UnifiedGenotyper"
|
39
|
+
array << "-R #{ref_fasta}"
|
40
|
+
array << "-I #{@input}"
|
41
|
+
array << "-o #{@output}"
|
42
|
+
array << "-D #{snpdb}" if snpdb
|
43
|
+
array << "-L #{target}" if target
|
44
|
+
array.uniq
|
45
|
+
end
|
46
|
+
|
47
|
+
def modified_java_array
|
48
|
+
array = unified_genotyper_params['java'].dup
|
49
|
+
array.unshift(java_path).uniq
|
50
|
+
end
|
51
|
+
|
52
|
+
def params_combination
|
53
|
+
@params_combination_hash ||= {
|
54
|
+
'java' => modified_java_array,
|
55
|
+
'params' => unified_genotyper_options
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_script
|
60
|
+
script_name = "#{@order}_gatk_unified_genotyper"
|
61
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
62
|
+
File.open(file, 'w') do |f|
|
63
|
+
f.puts <<-BASH.strip_heredoc
|
64
|
+
#!/bin/bash
|
65
|
+
|
66
|
+
cd "#{jobs_dir}/../"
|
67
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
68
|
+
#{params_combination['java'].join("\s")} \\
|
69
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
70
|
+
#{::Cagnut::JobManage.run_local}
|
71
|
+
|
72
|
+
EXITSTATUS=$?
|
73
|
+
|
74
|
+
if [ ! -s "#{@output}.idx" ]
|
75
|
+
then
|
76
|
+
echo "vcf incomplete!"
|
77
|
+
exit 100;
|
78
|
+
fi
|
79
|
+
|
80
|
+
if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
|
81
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
82
|
+
|
83
|
+
BASH
|
84
|
+
end
|
85
|
+
File.chmod(0700, file)
|
86
|
+
script_name
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class VariantEval
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :java_path,
|
6
|
+
:ref_fasta, :snpdb, :target, :prefix_name, :dodebug
|
7
|
+
def_delegators :'CagnutGatk.config', :variant_eval_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@vcf_dir = opts[:dirs][:output]
|
12
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_filtered.vcf" : opts[:input]
|
13
|
+
@output = "#{@vcf_dir}/#{sample_name}.eval"
|
14
|
+
@job_name = "#{prefix_name}_snpEval_#{sample_name}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def run previous_job_id = nil
|
18
|
+
return unless snpdb
|
19
|
+
puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -evaluation "
|
20
|
+
script_name = generate_script
|
21
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
22
|
+
@job_name
|
23
|
+
end
|
24
|
+
|
25
|
+
def cluster_options previous_job_id = nil
|
26
|
+
{
|
27
|
+
previous_job_id: previous_job_id,
|
28
|
+
adjust_memory: ['h_stack=256M', 'h_vmem=8G'],
|
29
|
+
tools: ['gatk', 'variant_eval']
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def variant_eval_options
|
34
|
+
array = variant_eval_params['params'].dup
|
35
|
+
array << "-T VariantEval"
|
36
|
+
array << "-R #{ref_fasta}"
|
37
|
+
array << "--dbsnp #{snpdb}"
|
38
|
+
array << "-o #{@output}"
|
39
|
+
array << "--eval:$EVALNAME #{@input}"
|
40
|
+
array << "-L #{target}" if target
|
41
|
+
array.uniq
|
42
|
+
end
|
43
|
+
|
44
|
+
def modified_java_array
|
45
|
+
array = variant_eval_params['java'].dup
|
46
|
+
array.unshift(java_path).uniq
|
47
|
+
end
|
48
|
+
|
49
|
+
def params_combination
|
50
|
+
@params_combination_hash ||= {
|
51
|
+
'java' => modified_java_array,
|
52
|
+
'params' => variant_eval_options
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def generate_script
|
57
|
+
script_name = "#{@order}_gatk_variant_eval"
|
58
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
59
|
+
ltag = target.nil? ? '' : "-L #{target}"
|
60
|
+
File.open(file, 'w') do |f|
|
61
|
+
f.puts <<-BASH.strip_heredoc
|
62
|
+
#!/bin/bash
|
63
|
+
|
64
|
+
cd "#{jobs_dir}/../"
|
65
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
66
|
+
EVALNAME=$(basename #{@vcf_dir}/#{sample_name})
|
67
|
+
|
68
|
+
#{params_combination['java'].join("\s")} \\
|
69
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
70
|
+
#{::Cagnut::JobManage.run_local}
|
71
|
+
|
72
|
+
EXITSTATUS=$?
|
73
|
+
|
74
|
+
if [ ! -s "#{@output}" ]
|
75
|
+
then
|
76
|
+
echo "Missing #{@output}"
|
77
|
+
exit 100
|
78
|
+
fi
|
79
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
80
|
+
|
81
|
+
exit $EXITSTATUS
|
82
|
+
BASH
|
83
|
+
end
|
84
|
+
File.chmod(0700, file)
|
85
|
+
script_name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|