cagnut_gatk 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +35 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +8 -0
  12. data/cagnut_gatk.gemspec +27 -0
  13. data/lib/cagnut_gatk/base.rb +82 -0
  14. data/lib/cagnut_gatk/check_tools.rb +16 -0
  15. data/lib/cagnut_gatk/configuration.rb +56 -0
  16. data/lib/cagnut_gatk/functions/analyze_covariates.rb +94 -0
  17. data/lib/cagnut_gatk/functions/base_recalibrator.rb +99 -0
  18. data/lib/cagnut_gatk/functions/count_read.rb +77 -0
  19. data/lib/cagnut_gatk/functions/depth_of_coverage.rb +81 -0
  20. data/lib/cagnut_gatk/functions/haplotype_caller.rb +89 -0
  21. data/lib/cagnut_gatk/functions/indel_realigner.rb +101 -0
  22. data/lib/cagnut_gatk/functions/print_reads.rb +92 -0
  23. data/lib/cagnut_gatk/functions/realigner_target_creator.rb +92 -0
  24. data/lib/cagnut_gatk/functions/templates/analyze_covariates.sh +26 -0
  25. data/lib/cagnut_gatk/functions/templates/base_recalibrator.sh +30 -0
  26. data/lib/cagnut_gatk/functions/templates/count_read.sh +23 -0
  27. data/lib/cagnut_gatk/functions/templates/depth_of_coverage.sh +19 -0
  28. data/lib/cagnut_gatk/functions/templates/print_reads.sh +25 -0
  29. data/lib/cagnut_gatk/functions/unified_genotyper.rb +89 -0
  30. data/lib/cagnut_gatk/functions/variant_eval.rb +88 -0
  31. data/lib/cagnut_gatk/functions/variant_filtration.rb +88 -0
  32. data/lib/cagnut_gatk/util.rb +118 -0
  33. data/lib/cagnut_gatk/version.rb +3 -0
  34. data/lib/cagnut_gatk.rb +16 -0
  35. metadata +135 -0
@@ -0,0 +1,88 @@
1
+ module CagnutGatk
2
+ class VariantFiltration
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :prefix_name,
6
+ :ref_fasta, :dodebug, :java_path
7
+ def_delegators :'CagnutGatk.config', :variant_filtration_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{opts[:dirs][:input]}/#{sample_name}.vcf" : opts[:input]
12
+ @job_name = "#{prefix_name}_snpFiltr_#{sample_name}"
13
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_filtered.vcf"
14
+ end
15
+
16
+ def run previous_job_id = nil
17
+ puts "Submitting #{sample_name} Jobs: variant (SNPs, INDELs) -filtration"
18
+ script_name = generate_script
19
+ ::Cagnut::JobManage.submit script_name, @job_name, cluster_options(previous_job_id)
20
+ [@job_name, @output]
21
+ end
22
+
23
+ def cluster_options previous_job_id = nil
24
+ {
25
+ previous_job_id: previous_job_id,
26
+ adjust_memory: ['h_stack=256M', 'h_vmem=10G'],
27
+ tools: ['gatk', 'variant_filtration']
28
+ }
29
+ end
30
+
31
+ def params_combination
32
+ @params_combination_hash ||= {
33
+ 'java' => modified_java_array,
34
+ 'params' => variant_filtration_options
35
+ }
36
+ end
37
+
38
+ def variant_filtration_options
39
+ array = variant_filtration_params['params'].dup
40
+ array << "-T VariantFiltration"
41
+ array << "-R #{ref_fasta}"
42
+ array << "--variant:VCF #{@input}"
43
+ array << "-o #{@output}"
44
+ array.uniq
45
+ end
46
+
47
+ def modified_java_array
48
+ array = variant_filtration_params['java'].dup
49
+ array.unshift(java_path).uniq
50
+ end
51
+
52
+ def generate_script
53
+ script_name = "#{@order}_gatk_variant_filtration"
54
+ file = File.join jobs_dir, "#{script_name}.sh"
55
+ File.open(file, 'w') do |f|
56
+ f.puts <<-BASH.strip_heredoc
57
+ #!/bin/bash
58
+
59
+ cd "#{jobs_dir}/../"
60
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
61
+ if [ ! -s "#{@input}.idx" ]; then
62
+ echo "Incomplete VCF:" #{@input}
63
+ exit 100
64
+ fi
65
+
66
+ #{params_combination['java'].join("\s")} \\
67
+ #{params_combination['params'].join(" \\\n ")} \\
68
+ #{::Cagnut::JobManage.run_local}
69
+
70
+ EXITSTATUS=$?
71
+
72
+ #if [ ! -s "#{@output}" ]; then exit 100;fi;
73
+
74
+ if [ ! -s "#{@output}.idx" ]
75
+ then
76
+ echo "vcf incomplete!"
77
+ exit 100;
78
+ fi
79
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
80
+
81
+ exit $EXITSTATUS
82
+ BASH
83
+ end
84
+ File.chmod(0700, file)
85
+ script_name
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,118 @@
1
+ module CagnutGatk
2
+ class Util
3
+ attr_accessor :gatk, :config
4
+
5
+ def initialize config
6
+ @config = config
7
+ @gatk = CagnutGatk::Base.new
8
+ end
9
+
10
+ def count_read dirs, order=1, previous_job_id=nil, filename=nil
11
+ count_read_params(filename).each do |option|
12
+ gatk.count_read dirs, order, previous_job_id, option
13
+ end
14
+ order+1
15
+ end
16
+
17
+ def recal dirs, order=1, previous_job_id=nil, filename = nil
18
+ # if (@config['cagnut']['ref_fasta'].scan 'hg').empty?
19
+ # # filename = "s_#{@config['line']}_merged_markdup.bam"
20
+ # filename = "#{line}_realn.bam"
21
+ # else
22
+ # previous_job_id, target_interval = realigner_target_creator previous_job_id, filename
23
+ # previous_job_id, filename = indel_realigner previous_job_id, filename, target_interval
24
+ # end
25
+
26
+ previous_job_id, interval_list, order = realigner_target_creator dirs, order, previous_job_id, filename
27
+ previous_job_id, filename, order = indel_realigner dirs, order, previous_job_id, filename, interval_list
28
+ previous_job_id, order = base_recalibrator dirs, order, previous_job_id, filename
29
+ previous_job_id, filename = print_reads dirs, order, previous_job_id, filename
30
+ [previous_job_id, previous_job_id, order+1]
31
+ end
32
+
33
+ def realigner_target_creator dirs, order=1, previous_job_id=nil, filename=nil
34
+ job_name, interval_list = @gatk.realigner_target_creator dirs, order, previous_job_id, filename
35
+ [job_name, interval_list, order+1]
36
+ end
37
+
38
+ def indel_realigner dirs, order=1, previous_job_id=nil, filename=nil, interval_list=nil
39
+ job_name, filename = @gatk.indel_realigner dirs, order, previous_job_id, filename, interval_list
40
+ [job_name, filename, order+1]
41
+ end
42
+
43
+ def base_recalibrator dirs, order=1, previous_job_id=nil, filename=nil
44
+ before_and_after_generated_bqsr_file.each do |option|
45
+ previous_job_id = @gatk.base_recalibrator dirs, order, previous_job_id, filename, option
46
+ end
47
+ [previous_job_id, order+1]
48
+ end
49
+
50
+ def analyze_covariates dirs, order=1, previous_job_id=nil, filename=nil
51
+ job_name = @gatk.analyze_covariates dirs, order, previous_job_id, filename
52
+ [job_name, order+1]
53
+ end
54
+
55
+ def print_reads dirs, order=1, previous_job_id=nil, file_name=nil
56
+ job_name, filename = @gatk.print_reads dirs, order, previous_job_id, file_name
57
+ [job_name, filename, order+1]
58
+ end
59
+
60
+ def depth_of_coverage dirs, order=1, previous_job_id=nil, filename=nil
61
+ depth_of_coverage_params.each do |option|
62
+ @gatk.depth_of_coverage dirs, order, previous_job_id, filename, option
63
+ end
64
+ order+1
65
+ end
66
+
67
+ def haplotype_caller dirs, order=1, previous_job_id=nil, file_name=nil
68
+ job_name, filename = @gatk.haplotype_caller dirs, order, previous_job_id, file_name
69
+ [job_name, filename, order+1]
70
+ end
71
+
72
+ def unified_genotyper dirs, order=1, previous_job_id=nil, file_name=nil
73
+ job_name, filename = @gatk.unified_genotyper dirs, order, previous_job_id, file_name
74
+ [job_name, filename, order+1]
75
+ end
76
+
77
+ def snpcal dirs, order=1, previous_job_id=nil, filename = nil
78
+ previous_job_id, filename, order = variant_filtration dirs, order, previous_job_id, filename
79
+ variant_eval dirs, order, previous_job_id, filename
80
+ end
81
+
82
+ def variant_filtration dirs, order=1, previous_job_id=nil, filename=nil
83
+ job_name, filename = gatk.variant_filtration dirs, order, previous_job_id, filename
84
+ [job_name, filename, order+1]
85
+ end
86
+
87
+ def variant_eval dirs, order=1, previous_job_id=nil, filename=nil
88
+ job_name, filename = gatk.variant_eval dirs, order, previous_job_id, filename
89
+ [job_name, filename, order+1]
90
+ end
91
+
92
+ private
93
+
94
+ def before_and_after_generated_bqsr_file
95
+ [{ has_bqsr_file: false }, { has_bqsr_file: true }]
96
+ end
97
+
98
+ def count_read_params filename
99
+ ary = [{ input: filename }]
100
+ if @config['refs']['targets_file']
101
+ ary <<
102
+ { input: filename, target: @config['refs']['targets_file'] }
103
+ end
104
+ ary
105
+ end
106
+
107
+ def depth_of_coverage_params
108
+ ary = [{ suffix: 'genome' }]
109
+ if @config['refs']['targets_file']
110
+ ary << { suffix: 'target', target: @config['refs']['targets_file'] }
111
+ end
112
+ if @config['refs']['target_flanks_file']
113
+ ary << { suffix: 'flank', target: @config['refs']['target_flanks_file'] }
114
+ end
115
+ ary
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,3 @@
1
+ module CagnutGatk
2
+ VERSION = "0.3.0"
3
+ end
@@ -0,0 +1,16 @@
1
+ require "cagnut_gatk/version"
2
+
3
+ module CagnutGatk
4
+ class << self
5
+ def config
6
+ @config ||= begin
7
+ CagnutGatk::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['gatk'])
8
+ CagnutGatk::Configuration.instance
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ require 'cagnut_gatk/configuration'
15
+ require 'cagnut_gatk/base'
16
+ require 'cagnut_gatk/util'
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cagnut_gatk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Shi-Gang Wang
8
+ - Tse-Ching Ho
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-11-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: cagnut_core
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: bundler
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.12'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.12'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rake
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '10.0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '10.0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rspec
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '3.0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ description: Cagnut Gatk tools
71
+ email:
72
+ - seanwang@goldenio.com
73
+ - tsechingho@goldenio.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".ruby-version"
81
+ - ".travis.yml"
82
+ - Gemfile
83
+ - LICENSE.txt
84
+ - README.md
85
+ - Rakefile
86
+ - bin/console
87
+ - bin/setup
88
+ - cagnut_gatk.gemspec
89
+ - lib/cagnut_gatk.rb
90
+ - lib/cagnut_gatk/base.rb
91
+ - lib/cagnut_gatk/check_tools.rb
92
+ - lib/cagnut_gatk/configuration.rb
93
+ - lib/cagnut_gatk/functions/analyze_covariates.rb
94
+ - lib/cagnut_gatk/functions/base_recalibrator.rb
95
+ - lib/cagnut_gatk/functions/count_read.rb
96
+ - lib/cagnut_gatk/functions/depth_of_coverage.rb
97
+ - lib/cagnut_gatk/functions/haplotype_caller.rb
98
+ - lib/cagnut_gatk/functions/indel_realigner.rb
99
+ - lib/cagnut_gatk/functions/print_reads.rb
100
+ - lib/cagnut_gatk/functions/realigner_target_creator.rb
101
+ - lib/cagnut_gatk/functions/templates/analyze_covariates.sh
102
+ - lib/cagnut_gatk/functions/templates/base_recalibrator.sh
103
+ - lib/cagnut_gatk/functions/templates/count_read.sh
104
+ - lib/cagnut_gatk/functions/templates/depth_of_coverage.sh
105
+ - lib/cagnut_gatk/functions/templates/print_reads.sh
106
+ - lib/cagnut_gatk/functions/unified_genotyper.rb
107
+ - lib/cagnut_gatk/functions/variant_eval.rb
108
+ - lib/cagnut_gatk/functions/variant_filtration.rb
109
+ - lib/cagnut_gatk/util.rb
110
+ - lib/cagnut_gatk/version.rb
111
+ homepage: https://github.com/CAGNUT/cagnut_gatk
112
+ licenses:
113
+ - MIT
114
+ metadata: {}
115
+ post_install_message:
116
+ rdoc_options: []
117
+ require_paths:
118
+ - lib
119
+ required_ruby_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ requirements: []
130
+ rubyforge_project:
131
+ rubygems_version: 2.5.1
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: Cagnut Gatk tools
135
+ test_files: []