cagnut_gatk 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +35 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +8 -0
  12. data/cagnut_gatk.gemspec +27 -0
  13. data/lib/cagnut_gatk/base.rb +82 -0
  14. data/lib/cagnut_gatk/check_tools.rb +16 -0
  15. data/lib/cagnut_gatk/configuration.rb +56 -0
  16. data/lib/cagnut_gatk/functions/analyze_covariates.rb +94 -0
  17. data/lib/cagnut_gatk/functions/base_recalibrator.rb +99 -0
  18. data/lib/cagnut_gatk/functions/count_read.rb +77 -0
  19. data/lib/cagnut_gatk/functions/depth_of_coverage.rb +81 -0
  20. data/lib/cagnut_gatk/functions/haplotype_caller.rb +89 -0
  21. data/lib/cagnut_gatk/functions/indel_realigner.rb +101 -0
  22. data/lib/cagnut_gatk/functions/print_reads.rb +92 -0
  23. data/lib/cagnut_gatk/functions/realigner_target_creator.rb +92 -0
  24. data/lib/cagnut_gatk/functions/templates/analyze_covariates.sh +26 -0
  25. data/lib/cagnut_gatk/functions/templates/base_recalibrator.sh +30 -0
  26. data/lib/cagnut_gatk/functions/templates/count_read.sh +23 -0
  27. data/lib/cagnut_gatk/functions/templates/depth_of_coverage.sh +19 -0
  28. data/lib/cagnut_gatk/functions/templates/print_reads.sh +25 -0
  29. data/lib/cagnut_gatk/functions/unified_genotyper.rb +89 -0
  30. data/lib/cagnut_gatk/functions/variant_eval.rb +88 -0
  31. data/lib/cagnut_gatk/functions/variant_filtration.rb +88 -0
  32. data/lib/cagnut_gatk/util.rb +118 -0
  33. data/lib/cagnut_gatk/version.rb +3 -0
  34. data/lib/cagnut_gatk.rb +16 -0
  35. metadata +135 -0
@@ -0,0 +1,89 @@
1
+ module CagnutGatk
2
+ class HaplotypeCaller
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :java_path,
6
+ :ref_fasta, :target, :prefix_name, :dodebug, :target_flanks_file
7
+ def_delegators :'CagnutGatk.config', :haplotype_caller_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_recal.bam" : opts[:input]
12
+ @output = "#{opts[:dirs][:output]}/#{sample_name}.vcf"
13
+ @job_name = "#{prefix_name}_haplotype_caller_#{sample_name}"
14
+ end
15
+
16
+ def run previous_job_id = nil
17
+ puts "Submitting HaplotypeCaller #{sample_name} "
18
+ script_name = generate_script
19
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
20
+ [@job_name, @output]
21
+ end
22
+
23
+ def cluster_options previous_job_id = nil
24
+ core_num = 6
25
+ job_mem1 = "adjustWorkingMem 256M #{core_num}"
26
+ job_mem2 = "adjustWorkingMem 10G #{core_num}"
27
+ {
28
+ previous_job_id: previous_job_id,
29
+ var_env: [core_num, target],
30
+ adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
31
+ parallel_env: [core_num],
32
+ tools: ['gatk', 'haplotype_caller']
33
+ }
34
+ end
35
+
36
+ def haplotype_caller_options
37
+ array = haplotype_caller_params['params'].dup
38
+ array << "-T HaplotypeCaller"
39
+ array << "-R #{ref_fasta}"
40
+ array << "-I #{@input}"
41
+ array << "-o #{@output}"
42
+ array << "-L #{target_flanks_file}" if target_flanks_file
43
+ array.uniq
44
+ end
45
+
46
+ def modified_java_array
47
+ array = haplotype_caller_params['java'].dup
48
+ array.unshift(java_path).uniq
49
+ end
50
+
51
+ def params_combination
52
+ @params_combination_hash ||= {
53
+ 'java' => modified_java_array,
54
+ 'params' => haplotype_caller_options
55
+ }
56
+ end
57
+
58
+ def generate_script
59
+ script_name = "#{@order}_gatk_haplotype_caller"
60
+ file = File.join jobs_dir, "#{script_name}.sh"
61
+ File.open(file, 'w') do |f|
62
+ f.puts <<-BASH.strip_heredoc
63
+ #!/bin/bash
64
+
65
+ cd "#{jobs_dir}/../"
66
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
67
+
68
+ #{params_combination['java'].join("\s")} \\
69
+ #{params_combination['params'].join(" \\\n ")} \\
70
+ #{::Cagnut::JobManage.run_local}
71
+
72
+ EXITSTATUS=$?
73
+
74
+ if [ ! -s "#{@output}" ]
75
+ then
76
+ echo "vcf incomplete!"
77
+ exit 100;
78
+ fi
79
+
80
+ if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
81
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
82
+
83
+ BASH
84
+ end
85
+ File.chmod(0700, file)
86
+ script_name
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,101 @@
1
+ module CagnutGatk
2
+ class IndelRealigner
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
6
+ :ref_fasta, :target_flanks_file, :dbsnp_ref_indels,
7
+ :magic28, :prefix_name, :java_path
8
+ def_delegators :'CagnutGatk.config', :indel_realigner_params
9
+
10
+ def initialize opts = {}
11
+ @order = sprintf '%02i', opts[:order]
12
+ @job_name = "#{prefix_name}_indelRealigner_#{sample_name}"
13
+ @interval_list = opts[:interval_list]
14
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_markdup.bam" : opts[:input]
15
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_realn.bam"
16
+ end
17
+
18
+ def run previous_job_id = nil
19
+ puts "Submitting indel_realigner #{sample_name}"
20
+ script_name = generate_script
21
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
22
+ [@job_name, @output]
23
+ end
24
+
25
+ def cluster_options previous_job_id = nil
26
+ {
27
+ previous_job_id: previous_job_id,
28
+ adjust_memory: ['h_vmem=8G'],
29
+ tools: ['gatk', 'indel_realigner']
30
+ }
31
+ end
32
+
33
+ def indel_realigner_options
34
+ array = indel_realigner_params['params'].dup
35
+ array << "-T IndelRealigner"
36
+ array << "-R #{ref_fasta}"
37
+ array << "-targetIntervals #{@interval_list}"
38
+ array << "-I #{@input}"
39
+ array << "-o #{@output}"
40
+ array << "-known #{dbsnp_ref_indels}" if dbsnp_ref_indels
41
+ array.uniq!
42
+ array.uniq
43
+ end
44
+
45
+ def modified_java_array
46
+ array = indel_realigner_params['java'].dup
47
+ array.unshift(java_path).uniq
48
+ end
49
+
50
+ def params_combination
51
+ @params_combination_hash ||= {
52
+ 'java' => modified_java_array,
53
+ 'params' => indel_realigner_options
54
+ }
55
+ end
56
+
57
+ def generate_script
58
+ script_name = "#{@order}_gatk_indel_realigner"
59
+ file = File.join jobs_dir, "#{script_name}.sh"
60
+ File.open(file, 'w') do |f|
61
+ f.puts <<-BASH.strip_heredoc
62
+ #!/bin/bash
63
+
64
+ cd "#{jobs_dir}/../"
65
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
66
+ # Check for intervals file
67
+ if [ ! -s "#{@interval_list}" ];then
68
+ echo "Error: Missing interval file: "#{@interval_list}" from realignTargetCreator_#{sample_name}"
69
+ exit 100
70
+ fi
71
+
72
+ #{params_combination['java'].join("\s")} \\
73
+ #{params_combination['params'].join(" \\\n ")} \\
74
+ #{::Cagnut::JobManage.run_local}
75
+
76
+ EXITSTATUS=$?
77
+
78
+ #force error when missing @output
79
+ if [ ! -s "#{@output}" ]
80
+ then
81
+ echo "Missing @output BAM #{@output}"
82
+ exit 100
83
+ fi
84
+
85
+ # Check BAM EOF
86
+ BAM_28=$(tail -c 28 #{@output}|xxd -p)
87
+ if [ "#{magic28}" != "$BAM_28" ]
88
+ then
89
+ echo "Error with BAM EOF" 1>&2
90
+ exit 100
91
+ fi
92
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
93
+
94
+ exit $EXITSTATUS
95
+ BASH
96
+ end
97
+ File.chmod(0700, file)
98
+ script_name
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,92 @@
1
+ module CagnutGatk
2
+ class PrintReads
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :target_flanks_file,
6
+ :ref_fasta, :prefix_name, :dodebug, :java_path
7
+ def_delegators :'CagnutGatk.config', :print_reads_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @job_name = "#{prefix_name}_PrintReads_#{sample_name}"
12
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_realn.bam" : opts[:input]
13
+ @output = "#{opts[:dirs][:output]}/#{output_file}"
14
+ @bqsr_file = "#{opts[:dirs][:contrast]}/#{replace_filename('_recal.csv')}"
15
+ end
16
+
17
+ def file_basename
18
+ @basename ||= File.basename @input
19
+ end
20
+
21
+ def replace_filename target
22
+ file_basename.gsub '_realn.bam', target
23
+ end
24
+
25
+ def output_file
26
+ output = replace_filename '_recal.bam'
27
+ return output unless output == file_basename
28
+ abort 'Input file is not correctly'
29
+ end
30
+
31
+ def run previous_job_id = nil
32
+ puts "Submitting PrintReads #{sample_name}"
33
+ script_name = generate_script
34
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
35
+ [@job_name, @output]
36
+ end
37
+
38
+ def cluster_options previous_job_id = nil
39
+ {
40
+ previous_job_id: previous_job_id,
41
+ adjust_memory: ['h_vmem=6G'],
42
+ tools: ['gatk', 'print_reads']
43
+ }
44
+ end
45
+
46
+ def generate_script
47
+ script_name = "#{@order}_gatk_print_reads"
48
+ file = File.join jobs_dir, "#{script_name}.sh"
49
+ path = File.expand_path '../templates/print_reads.sh', __FILE__
50
+ template = Tilt.new path
51
+ File.open(file, 'w') do |f|
52
+ f.puts template.render Object.new, job_params(script_name)
53
+ end
54
+ File.chmod(0700, file)
55
+ script_name
56
+ end
57
+
58
+ def print_reads_options
59
+ ary = print_reads_params['params'].dup
60
+ ary << "-T PrintReads"
61
+ ary << "-R #{ref_fasta}"
62
+ ary << "-I #{@input}"
63
+ ary << "-o #{@output}"
64
+ ary << "-BQSR #{@bqsr_file}"
65
+ ary << "-L #{target_flanks_file}" if target_flanks_file
66
+ ary.uniq
67
+ end
68
+
69
+ def modified_java_array
70
+ array = print_reads_params['java'].dup
71
+ array.unshift(java_path).uniq
72
+ end
73
+
74
+ def params_combination
75
+ {
76
+ 'java' => modified_java_array,
77
+ 'params' => print_reads_options
78
+ }
79
+ end
80
+
81
+ def job_params script_name
82
+ {
83
+ script_name: script_name,
84
+ jobs_dir: jobs_dir,
85
+ output: @output,
86
+ bqsr_file: @bqsr_file,
87
+ print_reads_params: params_combination,
88
+ run_local: ::Cagnut::JobManage.run_local,
89
+ }
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,92 @@
1
+ module CagnutGatk
2
+ class RealignerTargetCreator
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
6
+ :ref_fasta, :snpdb, :target_flanks_file, :prefix_name,
7
+ :java_path
8
+ def_delegators :'CagnutGatk.config', :realigner_target_creator_params
9
+
10
+ def initialize opts = {}
11
+ @order = sprintf '%02i', opts[:order]
12
+ @job_name = "#{prefix_name}_realignTargetCreator_#{sample_name}"
13
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_markdup.bam" : opts[:input]
14
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_markdup.interval_list"
15
+ end
16
+
17
+ def run previous_job_id = nil
18
+ puts "Submitting realigner_target_creator #{sample_name}"
19
+ script_name = generate_script
20
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
21
+ [@job_name, @output]
22
+ end
23
+
24
+ def cluster_options previous_job_id = nil
25
+ core_num = 6
26
+ job_mem1 = "adjustWorkingMem 256M #{core_num}"
27
+ job_mem2 = "adjustWorkingMem 10G #{core_num}"
28
+ {
29
+ previous_job_id: previous_job_id,
30
+ var_env: [core_num],
31
+ adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
32
+ parallel_env: [core_num],
33
+ tools: ['gatk', 'realigner_target_creator']
34
+ }
35
+ end
36
+
37
+ def realigner_target_creator_options
38
+ array = realigner_target_creator_params['params'].dup
39
+ array << "-T RealignerTargetCreator"
40
+ array << "-R #{ref_fasta}"
41
+ array << "--known #{snpdb}"
42
+ array << "-I #{@input}"
43
+ array << "-o #{@output}"
44
+ array << "-L #{target_flanks_file}" if target_flanks_file
45
+ array.uniq!
46
+ array.uniq
47
+ end
48
+
49
+ def modified_java_array
50
+ array = realigner_target_creator_params['java'].dup
51
+ array.unshift(java_path).uniq
52
+ end
53
+
54
+ def params_combination
55
+ @params_combination_hash ||= {
56
+ 'java' => modified_java_array,
57
+ 'params' => realigner_target_creator_options
58
+ }
59
+ end
60
+
61
+ def generate_script
62
+ script_name = "#{@order}_gatk_realigner_target_creator"
63
+ file = File.join jobs_dir, "#{script_name}.sh"
64
+ File.open(file, 'w') do |f|
65
+ f.puts <<-BASH.strip_heredoc
66
+ #!/bin/bash
67
+
68
+ cd "#{jobs_dir}/../"
69
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
70
+
71
+ #{params_combination['java'].join("\s")} \\
72
+ #{params_combination['params'].join(" \\\n ")} \\
73
+ #{::Cagnut::JobManage.run_local}
74
+
75
+ EXITSTATUS=$?
76
+
77
+ #force error when missing output
78
+ if [ ! -s "#{@output}" ]
79
+ then
80
+ echo "Missing indel_calls #{@output}, can not continue"
81
+ exit 100
82
+ fi
83
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
84
+
85
+ exit $EXITSTATUS
86
+ BASH
87
+ end
88
+ File.chmod(0700, file)
89
+ script_name
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ if [ ! -s #{after} ]; then
6
+ echo "Error: missing recalfile #{after}"
7
+ exit 100
8
+ fi
9
+
10
+ rm core.* 2> /dev/null
11
+
12
+ #{analyze_covariates_params['java'].join("\s")} \\
13
+ #{analyze_covariates_params['params'].join(" \\\n ")} \\
14
+ #{run_local}
15
+
16
+ EXITSTATUS=$?
17
+
18
+ #throw an error if no output file
19
+ if [ ! -s #{output} ];then
20
+ echo "Error: no plots outputted"
21
+ exit 100
22
+ fi
23
+
24
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
25
+
26
+ exit $EXITSTATUS
@@ -0,0 +1,30 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ rm core.* 2> /dev/null
6
+
7
+ # Check BAM EOF
8
+ BAM_28=$(tail -c 28 #{input}|xxd -p)
9
+ if [ "#{magic28}" != "$BAM_28" ]
10
+ then
11
+ echo "Error with BAM EOF"
12
+ exit 100
13
+ fi
14
+
15
+ #{base_recalibrator_params['java'].join("\s")} \\
16
+ #{base_recalibrator_params['params'].join(" \\\n ")} \\
17
+ #{run_local}
18
+
19
+ EXITSTATUS=$?
20
+
21
+ #force error when missing recalFile. Would prevent continutation of pipeline
22
+ if [ ! -s #{output} ]
23
+ then
24
+ echo "Missing #{output}"
25
+ exit 100
26
+ fi
27
+
28
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
29
+
30
+ exit $EXITSTATUS
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ #{count_reads_params['java'].join("\s")} \\
6
+ #{count_reads_params['params'].join(" \\\n ")} \\
7
+ #{run_local}
8
+
9
+ EXITSTATUS=$?
10
+
11
+ if [ ! -s #{output} ]
12
+ then
13
+ echo "Incomplete output file #{output}"
14
+ exit 100
15
+ fi
16
+
17
+ if [ $(stat --printf="%s" #{output}) = 100 ];then
18
+ echo "Memory Error. Exitting."
19
+ exit 100
20
+ fi
21
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
22
+
23
+ if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
@@ -0,0 +1,19 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+
6
+ #{depth_of_coverage_params['java'].join("\s")} \\
7
+ #{depth_of_coverage_params['params'].join(" \\\n ")} \\
8
+ #{run_local}
9
+
10
+ EXITSTATUS=$?
11
+
12
+ if [ ! -e "#{output}.sample_statistics" ]
13
+ then
14
+ echo "Missing output: #{output}"
15
+ exit 100
16
+ fi
17
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
18
+
19
+ exit $EXITSTATUS
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ if [ ! -s "#{bqsr_file}" ]; then
6
+ echo "Error: missing recalfile #{bqsr_file}"
7
+ exit 100
8
+ fi
9
+
10
+ rm core.* 2> /dev/null
11
+
12
+ #{print_reads_params['java'].join("\s")} \\
13
+ #{print_reads_params['params'].join(" \\\n ")} \\
14
+ #{run_local}
15
+
16
+ EXITSTATUS=$?
17
+
18
+ # throw error if < 1024 bytes
19
+ if [ $(stat --printf="%s" "#{output}") -le 1024 ]
20
+ then
21
+ exit 100
22
+ fi
23
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
24
+
25
+ exit $EXITSTATUS
@@ -0,0 +1,89 @@
1
+ module CagnutGatk
2
+ class UnifiedGenotyper
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :dodebug,
6
+ :ref_fasta, :snpdb, :target, :prefix_name, :java_path
7
+ def_delegators :'CagnutGatk.config', :unified_genotyper_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_recal.bam" : opts[:input]
12
+ @output = "#{opts[:dirs][:output]}/#{sample_name}.vcf"
13
+ @job_name = "#{prefix_name}_snpcal_#{sample_name}"
14
+ end
15
+
16
+ def run previous_job_id = nil
17
+ puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -call "
18
+ script_name = generate_script
19
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
20
+ [@job_name, @output]
21
+ end
22
+
23
+ def cluster_options previous_job_id = nil
24
+ core_num = 6
25
+ job_mem1 = "adjustWorkingMem 256M #{core_num}"
26
+ job_mem2 = "adjustWorkingMem 10G #{core_num}"
27
+ {
28
+ previous_job_id: previous_job_id,
29
+ var_env: [core_num],
30
+ adjust_memory: ["h_stack=#{job_mem1}", "h_vmem=#{job_mem2}"],
31
+ parallel_env: [core_num],
32
+ tools: ['gatk', 'unified_genotyper']
33
+ }
34
+ end
35
+
36
+ def unified_genotyper_options
37
+ array = unified_genotyper_params['params'].dup
38
+ array << "-T UnifiedGenotyper"
39
+ array << "-R #{ref_fasta}"
40
+ array << "-I #{@input}"
41
+ array << "-o #{@output}"
42
+ array << "-D #{snpdb}" if snpdb
43
+ array << "-L #{target}" if target
44
+ array.uniq
45
+ end
46
+
47
+ def modified_java_array
48
+ array = unified_genotyper_params['java'].dup
49
+ array.unshift(java_path).uniq
50
+ end
51
+
52
+ def params_combination
53
+ @params_combination_hash ||= {
54
+ 'java' => modified_java_array,
55
+ 'params' => unified_genotyper_options
56
+ }
57
+ end
58
+
59
+ def generate_script
60
+ script_name = "#{@order}_gatk_unified_genotyper"
61
+ file = File.join jobs_dir, "#{script_name}.sh"
62
+ File.open(file, 'w') do |f|
63
+ f.puts <<-BASH.strip_heredoc
64
+ #!/bin/bash
65
+
66
+ cd "#{jobs_dir}/../"
67
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
68
+ #{params_combination['java'].join("\s")} \\
69
+ #{params_combination['params'].join(" \\\n ")} \\
70
+ #{::Cagnut::JobManage.run_local}
71
+
72
+ EXITSTATUS=$?
73
+
74
+ if [ ! -s "#{@output}.idx" ]
75
+ then
76
+ echo "vcf incomplete!"
77
+ exit 100;
78
+ fi
79
+
80
+ if [ $EXITSTATUS -ne 0 ];then exit $EXITSTATUS;fi
81
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
82
+
83
+ BASH
84
+ end
85
+ File.chmod(0700, file)
86
+ script_name
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,88 @@
1
+ module CagnutGatk
2
+ class VariantEval
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :java_path,
6
+ :ref_fasta, :snpdb, :target, :prefix_name, :dodebug
7
+ def_delegators :'CagnutGatk.config', :variant_eval_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @vcf_dir = opts[:dirs][:output]
12
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}_filtered.vcf" : opts[:input]
13
+ @output = "#{@vcf_dir}/#{sample_name}.eval"
14
+ @job_name = "#{prefix_name}_snpEval_#{sample_name}"
15
+ end
16
+
17
+ def run previous_job_id = nil
18
+ return unless snpdb
19
+ puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -evaluation "
20
+ script_name = generate_script
21
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
22
+ @job_name
23
+ end
24
+
25
+ def cluster_options previous_job_id = nil
26
+ {
27
+ previous_job_id: previous_job_id,
28
+ adjust_memory: ['h_stack=256M', 'h_vmem=8G'],
29
+ tools: ['gatk', 'variant_eval']
30
+ }
31
+ end
32
+
33
+ def variant_eval_options
34
+ array = variant_eval_params['params'].dup
35
+ array << "-T VariantEval"
36
+ array << "-R #{ref_fasta}"
37
+ array << "--dbsnp #{snpdb}"
38
+ array << "-o #{@output}"
39
+ array << "--eval:$EVALNAME #{@input}"
40
+ array << "-L #{target}" if target
41
+ array.uniq
42
+ end
43
+
44
+ def modified_java_array
45
+ array = variant_eval_params['java'].dup
46
+ array.unshift(java_path).uniq
47
+ end
48
+
49
+ def params_combination
50
+ @params_combination_hash ||= {
51
+ 'java' => modified_java_array,
52
+ 'params' => variant_eval_options
53
+ }
54
+ end
55
+
56
+ def generate_script
57
+ script_name = "#{@order}_gatk_variant_eval"
58
+ file = File.join jobs_dir, "#{script_name}.sh"
59
+ ltag = target.nil? ? '' : "-L #{target}"
60
+ File.open(file, 'w') do |f|
61
+ f.puts <<-BASH.strip_heredoc
62
+ #!/bin/bash
63
+
64
+ cd "#{jobs_dir}/../"
65
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
66
+ EVALNAME=$(basename #{@vcf_dir}/#{sample_name})
67
+
68
+ #{params_combination['java'].join("\s")} \\
69
+ #{params_combination['params'].join(" \\\n ")} \\
70
+ #{::Cagnut::JobManage.run_local}
71
+
72
+ EXITSTATUS=$?
73
+
74
+ if [ ! -s "#{@output}" ]
75
+ then
76
+ echo "Missing #{@output}"
77
+ exit 100
78
+ fi
79
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
80
+
81
+ exit $EXITSTATUS
82
+ BASH
83
+ end
84
+ File.chmod(0700, file)
85
+ script_name
86
+ end
87
+ end
88
+ end