cagnut_gatk 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cagnut_gatk.gemspec +27 -0
- data/lib/cagnut_gatk/base.rb +82 -0
- data/lib/cagnut_gatk/check_tools.rb +16 -0
- data/lib/cagnut_gatk/configuration.rb +56 -0
- data/lib/cagnut_gatk/functions/analyze_covariates.rb +94 -0
- data/lib/cagnut_gatk/functions/base_recalibrator.rb +99 -0
- data/lib/cagnut_gatk/functions/count_read.rb +77 -0
- data/lib/cagnut_gatk/functions/depth_of_coverage.rb +81 -0
- data/lib/cagnut_gatk/functions/haplotype_caller.rb +89 -0
- data/lib/cagnut_gatk/functions/indel_realigner.rb +101 -0
- data/lib/cagnut_gatk/functions/print_reads.rb +92 -0
- data/lib/cagnut_gatk/functions/realigner_target_creator.rb +92 -0
- data/lib/cagnut_gatk/functions/templates/analyze_covariates.sh +26 -0
- data/lib/cagnut_gatk/functions/templates/base_recalibrator.sh +30 -0
- data/lib/cagnut_gatk/functions/templates/count_read.sh +23 -0
- data/lib/cagnut_gatk/functions/templates/depth_of_coverage.sh +19 -0
- data/lib/cagnut_gatk/functions/templates/print_reads.sh +25 -0
- data/lib/cagnut_gatk/functions/unified_genotyper.rb +89 -0
- data/lib/cagnut_gatk/functions/variant_eval.rb +88 -0
- data/lib/cagnut_gatk/functions/variant_filtration.rb +88 -0
- data/lib/cagnut_gatk/util.rb +118 -0
- data/lib/cagnut_gatk/version.rb +3 -0
- data/lib/cagnut_gatk.rb +16 -0
- metadata +135 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class VariantFiltration
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :prefix_name,
|
6
|
+
:ref_fasta, :dodebug, :java_path
|
7
|
+
def_delegators :'CagnutGatk.config', :variant_filtration_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}.vcf" : opts[:input]
|
12
|
+
@job_name = "#{prefix_name}_snpFiltr_#{sample_name}"
|
13
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_filtered.vcf"
|
14
|
+
end
|
15
|
+
|
16
|
+
def run previous_job_id = nil
|
17
|
+
puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -filtration"
|
18
|
+
script_name = generate_script
|
19
|
+
::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
|
20
|
+
[@job_name, @output]
|
21
|
+
end
|
22
|
+
|
23
|
+
def cluster_options previous_job_id = nil
|
24
|
+
{
|
25
|
+
previous_job_id: previous_job_id,
|
26
|
+
adjust_memory: ['h_stack=256M', 'h_vmem=10G'],
|
27
|
+
tools: ['gatk', 'variant_filtration']
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def params_combination
|
32
|
+
@params_combination_hash ||= {
|
33
|
+
'java' => modified_java_array,
|
34
|
+
'params' => variant_filtration_options
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def variant_filtration_options
|
39
|
+
array = variant_filtration_params['params'].dup
|
40
|
+
array << "-T VariantFiltration"
|
41
|
+
array << "-R #{ref_fasta}"
|
42
|
+
array << "--variant:VCF #{@input}"
|
43
|
+
array << "-o #{@output}"
|
44
|
+
array.uniq
|
45
|
+
end
|
46
|
+
|
47
|
+
def modified_java_array
|
48
|
+
array = variant_filtration_params['java'].dup
|
49
|
+
array.unshift(java_path).uniq
|
50
|
+
end
|
51
|
+
|
52
|
+
def generate_script
|
53
|
+
script_name = "#{@order}_gatk_variant_filtration"
|
54
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
55
|
+
File.open(file, 'w') do |f|
|
56
|
+
f.puts <<-BASH.strip_heredoc
|
57
|
+
#!/bin/bash
|
58
|
+
|
59
|
+
cd "#{jobs_dir}/../"
|
60
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
61
|
+
if [ ! -s "#{@input}.idx" ]; then
|
62
|
+
echo "Incomplete VCF:" #{@input}
|
63
|
+
exit 100
|
64
|
+
fi
|
65
|
+
|
66
|
+
#{params_combination['java'].join("\s")} \\
|
67
|
+
#{params_combination['params'].join(" \\\n ")} \\
|
68
|
+
#{::Cagnut::JobManage.run_local}
|
69
|
+
|
70
|
+
EXITSTATUS=$?
|
71
|
+
|
72
|
+
#if [ ! -s "#{@output}" ]; then exit 100;fi;
|
73
|
+
|
74
|
+
if [ ! -s "#{@output}.idx" ]
|
75
|
+
then
|
76
|
+
echo "vcf incomplete!"
|
77
|
+
exit 100;
|
78
|
+
fi
|
79
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
80
|
+
|
81
|
+
exit $EXITSTATUS
|
82
|
+
BASH
|
83
|
+
end
|
84
|
+
File.chmod(0700, file)
|
85
|
+
script_name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module CagnutGatk
|
2
|
+
class Util
|
3
|
+
attr_accessor :gatk, :config
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
@gatk = CagnutGatk::Base.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def count_read dirs, order=1, previous_job_id=nil, filename=nil
|
11
|
+
count_read_params(filename).each do |option|
|
12
|
+
gatk.count_read dirs, order, previous_job_id, option
|
13
|
+
end
|
14
|
+
order+1
|
15
|
+
end
|
16
|
+
|
17
|
+
def recal dirs, order=1, previous_job_id=nil, filename = nil
|
18
|
+
# if (@config['cagnut']['ref_fasta'].scan 'hg').empty?
|
19
|
+
# # filename = "s_#{@config['line']}_merged_markdup.bam"
|
20
|
+
# filename = "#{line}_realn.bam"
|
21
|
+
# else
|
22
|
+
# previous_job_id, target_interval = realigner_target_creator previous_job_id, filename
|
23
|
+
# previous_job_id, filename = indel_realigner previous_job_id, filename, target_interval
|
24
|
+
# end
|
25
|
+
|
26
|
+
previous_job_id, interval_list, order = realigner_target_creator dirs, order, previous_job_id, filename
|
27
|
+
previous_job_id, filename, order = indel_realigner dirs, order, previous_job_id, filename, interval_list
|
28
|
+
previous_job_id, order = base_recalibrator dirs, order, previous_job_id, filename
|
29
|
+
previous_job_id, filename = print_reads dirs, order, previous_job_id, filename
|
30
|
+
[previous_job_id, previous_job_id, order+1]
|
31
|
+
end
|
32
|
+
|
33
|
+
def realigner_target_creator dirs, order=1, previous_job_id=nil, filename=nil
|
34
|
+
job_name, interval_list = @gatk.realigner_target_creator dirs, order, previous_job_id, filename
|
35
|
+
[job_name, interval_list, order+1]
|
36
|
+
end
|
37
|
+
|
38
|
+
def indel_realigner dirs, order=1, previous_job_id=nil, filename=nil, interval_list=nil
|
39
|
+
job_name, filename = @gatk.indel_realigner dirs, order, previous_job_id, filename, interval_list
|
40
|
+
[job_name, filename, order+1]
|
41
|
+
end
|
42
|
+
|
43
|
+
def base_recalibrator dirs, order=1, previous_job_id=nil, filename=nil
|
44
|
+
before_and_after_generated_bqsr_file.each do |option|
|
45
|
+
previous_job_id = @gatk.base_recalibrator dirs, order, previous_job_id, filename, option
|
46
|
+
end
|
47
|
+
[previous_job_id, order+1]
|
48
|
+
end
|
49
|
+
|
50
|
+
def analyze_covariates dirs, order=1, previous_job_id=nil, filename=nil
|
51
|
+
job_name = @gatk.analyze_covariates dirs, order, previous_job_id, filename
|
52
|
+
[job_name, order+1]
|
53
|
+
end
|
54
|
+
|
55
|
+
def print_reads dirs, order=1, previous_job_id=nil, file_name=nil
|
56
|
+
job_name, filename = @gatk.print_reads dirs, order, previous_job_id, file_name
|
57
|
+
[job_name, filename, order+1]
|
58
|
+
end
|
59
|
+
|
60
|
+
def depth_of_coverage dirs, order=1, previous_job_id=nil, filename=nil
|
61
|
+
depth_of_coverage_params.each do |option|
|
62
|
+
@gatk.depth_of_coverage dirs, order, previous_job_id, filename, option
|
63
|
+
end
|
64
|
+
order+1
|
65
|
+
end
|
66
|
+
|
67
|
+
def haplotype_caller dirs, order=1, previous_job_id=nil, file_name=nil
|
68
|
+
job_name, filename = @gatk.haplotype_caller dirs, order, previous_job_id, file_name
|
69
|
+
[job_name, filename, order+1]
|
70
|
+
end
|
71
|
+
|
72
|
+
def unified_genotyper dirs, order=1, previous_job_id=nil, file_name=nil
|
73
|
+
job_name, filename = @gatk.unified_genotyper dirs, order, previous_job_id, file_name
|
74
|
+
[job_name, filename, order+1]
|
75
|
+
end
|
76
|
+
|
77
|
+
def snpcal dirs, order=1, previous_job_id=nil, filename = nil
|
78
|
+
previous_job_id, filename, order = variant_filtration dirs, order, previous_job_id, filename
|
79
|
+
variant_eval dirs, order, previous_job_id, filename
|
80
|
+
end
|
81
|
+
|
82
|
+
def variant_filtration dirs, order=1, previous_job_id=nil, filename=nil
|
83
|
+
job_name, filename = gatk.variant_filtration dirs, order, previous_job_id, filename
|
84
|
+
[job_name, filename, order+1]
|
85
|
+
end
|
86
|
+
|
87
|
+
def variant_eval dirs, order=1, previous_job_id=nil, filename=nil
|
88
|
+
job_name, filename = gatk.variant_eval dirs, order, previous_job_id, filename
|
89
|
+
[job_name, filename, order+1]
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def before_and_after_generated_bqsr_file
|
95
|
+
[{ has_bqsr_file: false }, { has_bqsr_file: true }]
|
96
|
+
end
|
97
|
+
|
98
|
+
def count_read_params filename
|
99
|
+
ary = [{ input: filename }]
|
100
|
+
if @config['refs']['targets_file']
|
101
|
+
ary <<
|
102
|
+
{ input: filename, target: @config['refs']['targets_file'] }
|
103
|
+
end
|
104
|
+
ary
|
105
|
+
end
|
106
|
+
|
107
|
+
def depth_of_coverage_params
|
108
|
+
ary = [{ suffix: 'genome' }]
|
109
|
+
if @config['refs']['targets_file']
|
110
|
+
ary << { suffix: 'target', target: @config['refs']['targets_file'] }
|
111
|
+
end
|
112
|
+
if @config['refs']['target_flanks_file']
|
113
|
+
ary << { suffix: 'flank', target: @config['refs']['target_flanks_file'] }
|
114
|
+
end
|
115
|
+
ary
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/lib/cagnut_gatk.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "cagnut_gatk/version"
|
2
|
+
|
3
|
+
module CagnutGatk
|
4
|
+
class << self
|
5
|
+
def config
|
6
|
+
@config ||= begin
|
7
|
+
CagnutGatk::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['gatk'])
|
8
|
+
CagnutGatk::Configuration.instance
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'cagnut_gatk/configuration'
|
15
|
+
require 'cagnut_gatk/base'
|
16
|
+
require 'cagnut_gatk/util'
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cagnut_gatk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shi-Gang Wang
|
8
|
+
- Tse-Ching Ho
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-11-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: cagnut_core
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bundler
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.12'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rake
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '10.0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '10.0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rspec
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '3.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.0'
|
70
|
+
description: Cagnut Gatk tools
|
71
|
+
email:
|
72
|
+
- seanwang@goldenio.com
|
73
|
+
- tsechingho@goldenio.com
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".ruby-version"
|
81
|
+
- ".travis.yml"
|
82
|
+
- Gemfile
|
83
|
+
- LICENSE.txt
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- bin/console
|
87
|
+
- bin/setup
|
88
|
+
- cagnut_gatk.gemspec
|
89
|
+
- lib/cagnut_gatk.rb
|
90
|
+
- lib/cagnut_gatk/base.rb
|
91
|
+
- lib/cagnut_gatk/check_tools.rb
|
92
|
+
- lib/cagnut_gatk/configuration.rb
|
93
|
+
- lib/cagnut_gatk/functions/analyze_covariates.rb
|
94
|
+
- lib/cagnut_gatk/functions/base_recalibrator.rb
|
95
|
+
- lib/cagnut_gatk/functions/count_read.rb
|
96
|
+
- lib/cagnut_gatk/functions/depth_of_coverage.rb
|
97
|
+
- lib/cagnut_gatk/functions/haplotype_caller.rb
|
98
|
+
- lib/cagnut_gatk/functions/indel_realigner.rb
|
99
|
+
- lib/cagnut_gatk/functions/print_reads.rb
|
100
|
+
- lib/cagnut_gatk/functions/realigner_target_creator.rb
|
101
|
+
- lib/cagnut_gatk/functions/templates/analyze_covariates.sh
|
102
|
+
- lib/cagnut_gatk/functions/templates/base_recalibrator.sh
|
103
|
+
- lib/cagnut_gatk/functions/templates/count_read.sh
|
104
|
+
- lib/cagnut_gatk/functions/templates/depth_of_coverage.sh
|
105
|
+
- lib/cagnut_gatk/functions/templates/print_reads.sh
|
106
|
+
- lib/cagnut_gatk/functions/unified_genotyper.rb
|
107
|
+
- lib/cagnut_gatk/functions/variant_eval.rb
|
108
|
+
- lib/cagnut_gatk/functions/variant_filtration.rb
|
109
|
+
- lib/cagnut_gatk/util.rb
|
110
|
+
- lib/cagnut_gatk/version.rb
|
111
|
+
homepage: https://github.com/CAGNUT/cagnut_gatk
|
112
|
+
licenses:
|
113
|
+
- MIT
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubyforge_project:
|
131
|
+
rubygems_version: 2.5.1
|
132
|
+
signing_key:
|
133
|
+
specification_version: 4
|
134
|
+
summary: Cagnut Gatk tools
|
135
|
+
test_files: []
|