cagnut_bwa 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 212201476cdd487c7d175275b4b2dda5064794a8
4
+ data.tar.gz: e1b5fb5113d105b8ba7782c3b34e100fb05f6820
5
+ SHA512:
6
+ metadata.gz: 4432d3082474d04bed19517a8cb42c2c350706c7d06ecd67e888b1997c6137e601a56bdb00fc86979b8b600dcaad6a3386b305fa0922024fb79b3dfcda233ad9
7
+ data.tar.gz: 002602c3c99c28f7eb7eef0504038c1c07e3df88fafecf8b15c53c202c96bd32040032abd2fda8d67f60edd984f6da195c0e36473a04f55179f27d9668b586d9
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.3.1
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cagnut_bwa.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Goldenio Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # CagnutBwa
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ ```ruby
8
+ gem 'cagnut_bwa'
9
+ ```
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cagnut_bwa
18
+
19
+ ## Usage
20
+
21
+ ## Development
22
+
23
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
24
+
25
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cagnut_bwa.
30
+
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
35
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "cagnut_bwa"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cagnut_bwa/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cagnut_bwa"
8
+ spec.version = CagnutBwa::VERSION
9
+ spec.authors = ['Shi-Gang Wang', 'Tse-Ching Ho']
10
+ spec.email = ['seanwang@goldenio.com', 'tsechingho@goldenio.com']
11
+
12
+ spec.summary = %q{Cagnut BWA tools}
13
+ spec.description = %q{Cagnut BWA tools}
14
+ spec.homepage = "https://github.com/CAGNUT/cagnut_bwa"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency 'cagnut_core'
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.12"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ end
@@ -0,0 +1,34 @@
1
+ require 'cagnut_bwa/functions/aln'
2
+ require 'cagnut_bwa/functions/samp'
3
+ require 'cagnut_bwa/functions/aln_one_fastq'
4
+ require 'cagnut_bwa/functions/samp_one_fastq'
5
+ require 'cagnut_bwa/functions/mem'
6
+
7
+ module CagnutBwa
8
+ class Base
9
+ def aln dirs, order, previous_job_id, input = nil
10
+ opts = { input: input, dirs: dirs, order: order }
11
+ CagnutBwa::Aln.new(opts).run previous_job_id
12
+ end
13
+
14
+ def samp dirs, order, previous_job_id = nil, input = nil
15
+ opts = { input: input, dirs: dirs, order: order }
16
+ CagnutBwa::Samp.new(opts).run previous_job_id
17
+ end
18
+
19
+ def aln_one_fastq dirs, order, input = nil
20
+ opts = { input: input, dirs: dirs, order: order }
21
+ CagnutBwa::AlnOneFastq.new(opts).run
22
+ end
23
+
24
+ def samp_one_fastq dirs, order, previous_job_id = nil, input = nil
25
+ opts = { input: input, dirs: dirs, order: order }
26
+ CagnutBwa::SampOneFastq.new(opts).run previous_job_id
27
+ end
28
+
29
+ def mem dirs, order, input = nil
30
+ opts = { input: input, dirs: dirs, order: order }
31
+ CagnutBwa::Mem.new(opts).run
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,37 @@
1
+ module CagnutBwa
2
+ module CheckTools
3
+ def check_tool tools_path
4
+ super if defined?(super)
5
+ check_bwa tools_path['bwa']
6
+ check_bwa_index refs['ref_fasta']
7
+ end
8
+
9
+ def check_bwa path
10
+ check_tool_ver 'BWA' do
11
+ `#{path} 2>&1 | grep Version | cut -f2 -d ' '` if path
12
+ check_bwa_index
13
+ end
14
+ end
15
+
16
+ def check_bwa_index ref_path
17
+ tool = 'Bwa Index'
18
+ file = "#{ref_path}.ann"
19
+ command = "#{@config['tools']['bwa']} index #{ref_path}"
20
+ check_ref_related file, tool, command
21
+ end
22
+
23
+
24
+ def check_ref_related file, tool, command
25
+ if File.exist?(file)
26
+ puts "\t#{tool}: Done"
27
+ else
28
+ puts "\t#{tool}: Not Found!"
29
+ puts "\tPlease execute command:"
30
+ puts "\t\t#{command}"
31
+ @check_completed = false
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ Cagnut::Configuration::Checks::Tools.prepend CagnutBwa::CheckTools
@@ -0,0 +1,56 @@
1
+ require 'singleton'
2
+
3
+ module CagnutBwa
4
+ class Configuration
5
+ include Singleton
6
+ attr_accessor :rg_str, :mem_params, :aln_params, :samp_params
7
+
8
+ class << self
9
+ def load config, params
10
+ instance.load config, params
11
+ end
12
+ end
13
+
14
+ def load config, params
15
+ @config = config
16
+ @params = params
17
+ generate_rg_str
18
+ attributes.each do |name, value|
19
+ send "#{name}=", value if respond_to? "#{name}="
20
+ end
21
+ end
22
+
23
+ def attributes
24
+ {
25
+ rg_str: @config['sample']['rg_str'],
26
+ mem_params: add_bwa_path_in_params(@params['mem']),
27
+ aln_params: add_bwa_path_in_params(@params['aln']),
28
+ samp_params: add_bwa_path_in_params(@params['samp'])
29
+ }
30
+ end
31
+
32
+ def add_bwa_path_in_params method_params
33
+ return if method_params.blank?
34
+ array = method_params['params'].dup
35
+ array.unshift "#{@config['tools']['bwa']}"
36
+ end
37
+
38
+ def generate_rg_str
39
+ @config['samples'].each do |sample|
40
+ arg = %W(
41
+ @RG
42
+ ID:#{sample['rgid']}
43
+ SM:#{sample['name']}
44
+ PL:#{@config['info']['pl']}
45
+ PU:#{sample['pu']}
46
+ LB:#{@config['info']['lb']}
47
+ DS:#{@config['info']['ds']}
48
+ CN:#{@config['info']['cn']}
49
+ DT:#{@config['info']['dt']}
50
+ )
51
+ rg_str = { 'rg_str' => arg.join('\t') }
52
+ sample.merge! rg_str
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,88 @@
1
+ module CagnutBwa
2
+ class Aln
3
+ extend Forwardable
4
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :dodebug, :seqs_path,
5
+ :ref_fasta, :jobs_dir, :prefix_name, :pipeline_name
6
+ def_delegators :'CagnutBwa.config', :aln_params
7
+
8
+ def initialize opts = {}
9
+ @order = sprintf '%02i', opts[:order]
10
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
11
+ @input2 = File.expand_path fetch_filename, File.dirname(@input)
12
+ abort('Cant recognized sequence files') if @input2.nil?
13
+ @output = "#{opts[:dirs][:output]}/#{File.basename(@input)}.sai"
14
+ @output2 = "#{opts[:dirs][:output]}/#{fetch_filename}.sai"
15
+ @job_name = "#{prefix_name}_#{sample_name}_Aln"
16
+ end
17
+
18
+ def fetch_filename
19
+ filename = File.basename(@input)
20
+ if filename.match '_R1_'
21
+ filename.gsub '_R1_', '_R2_'
22
+ elsif filename.match '_1_'
23
+ filename.gsub '_1_', '_2_'
24
+ end
25
+ end
26
+
27
+ def run previous_job_id = nil
28
+ puts "Submitting bwaAln #{sample_name}"
29
+ script_name = generate_script
30
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
31
+ @job_name
32
+ end
33
+
34
+ def queuing_options previous_job_id = nil
35
+ threads = 2
36
+ {
37
+ previous_job_id: previous_job_id,
38
+ var_env: [ref_fasta],
39
+ adjust_memory: ["h_vmem=adjustWorkingMem 5G #{threads}"],
40
+ parallel_env: [threads],
41
+ tools: ['bwa', 'aln']
42
+ }
43
+ end
44
+
45
+ def aln_params_for_r1
46
+ array = aln_params.dup
47
+ array.insert 1, 'aln'
48
+ array << "#{ref_fasta}"
49
+ array << "-f #{@output}"
50
+ array << "#{@input}"
51
+ array.uniq
52
+ end
53
+
54
+ def aln_params_for_r2
55
+ array = aln_params.dup
56
+ array.insert 1, 'aln'
57
+ array << "#{ref_fasta}"
58
+ array << "-f #{@output2}"
59
+ array << "#{@input2}"
60
+ array.uniq
61
+ end
62
+
63
+ def generate_script
64
+ script_name = "#{@order}_bwa_aln"
65
+ file = File.join jobs_dir, "#{script_name}.sh"
66
+ template = Tilt.new(File.expand_path '../templates/aln.sh', __FILE__)
67
+ File.open(file, 'w') do |f|
68
+ f.puts template.render Object.new, job_params(script_name)
69
+ end
70
+ File.chmod(0700, file)
71
+ script_name
72
+ end
73
+
74
+ def job_params script_name
75
+ {
76
+ jobs_dir: jobs_dir,
77
+ script_name: script_name,
78
+ input: @input,
79
+ input2: @input2,
80
+ output: @output,
81
+ output2: @output2,
82
+ aln_params_for_r1: aln_params_for_r1,
83
+ aln_params_for_r2: aln_params_for_r2,
84
+ run_local: ::Cagnut::JobManage.run_local
85
+ }
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,94 @@
1
+ module CagnutBwa
2
+ class AlnOneFastq
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name, :dodebug,
6
+ :ref_fasta, :jobs_dir, :data_type
7
+ def_delegators :'CagnutBwa.config', :aln_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ abort('Cant recognized sequence files') if @input.nil?
13
+ @input2 = File.expand_path fetch_filename(@input), File.dirname(@input) if @input.match '_1_'
14
+ @output = "#{opts[:dirs][:output]}/#{File.basename(@input).gsub('.gz', '').gsub('.txt','.sai')}"
15
+ @output2 = "#{opts[:dirs][:output]}/#{fetch_filename(@output)}" if @input.match '_1_'
16
+ @job_name = "#{prefix_name}_#{sample_name}_Aln_one_fastq"
17
+ end
18
+
19
+ def fetch_filename file
20
+ filename = File.basename(file)
21
+ if filename.match '_R1_'
22
+ filename.gsub '_R1_', '_R2_'
23
+ elsif filename.match '_1_'
24
+ filename.gsub '_1_', '_2_'
25
+ end
26
+ end
27
+
28
+ def run previous_job_id = nil
29
+ puts "Submitting bwa_aln_one_fastq #{sample_name}"
30
+ script_name = generate_script
31
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
32
+ @job_name
33
+ end
34
+
35
+ def queuing_options previous_job_id = nil
36
+ threads = 2
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ var_env: [fastq_dir, sai_dir, threads],
40
+ adjust_memory: ['h_vmem=3.4G'],
41
+ parallel_env: [threads],
42
+ tools: ['bwa', 'aln']
43
+ }
44
+ end
45
+
46
+ def aln_params_for_r1
47
+ array = aln_params.dup
48
+ array.insert 1, 'aln'
49
+ array << "#{ref_fasta}"
50
+ array << "-f #{@output}"
51
+ array << "#{@input}"
52
+ array.uniq
53
+ end
54
+
55
+ def aln_params_for_r2
56
+ array = aln_params.dup
57
+ array.insert 1, 'aln'
58
+ array << "#{ref_fasta}"
59
+ array << "-f #{@output2}"
60
+ array << "#{@input2}"
61
+ array.uniq
62
+ end
63
+
64
+ def generate_script
65
+ script_name = "#{@order}_bwa_aln_one_fastq"
66
+ file = File.join jobs_dir, "#{script_name}.sh"
67
+ File.open(file, 'w') do |f|
68
+ f.puts <<-BASH.strip_heredoc
69
+ #!/bin/bash
70
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
71
+ if [[ #{@input} =~ _1_ ]]
72
+ then
73
+ #{aln_params_for_r2.join(" \\\n ")} \\
74
+ #{::Cagnut::JobManage.run_local}
75
+ else
76
+ fi
77
+
78
+ #{aln_params_for_r1.join(" \\\n ")} \\
79
+ #{::Cagnut::JobManage.run_local}
80
+
81
+ if [ ! -s "#{@output}" ]
82
+ then
83
+ echo "Missing SAI:#{@output} file!"
84
+ exit 100
85
+ fi
86
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
87
+
88
+ BASH
89
+ end
90
+ File.chmod(0700, file)
91
+ script_name
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,74 @@
1
+ module CagnutBwa
2
+ class Mem
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
6
+ :ref_fasta, :jobs_dir, :prefix_name
7
+ def_delegators :'CagnutBwa.config', :rg_str, :mem_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ @input2 = File.expand_path fetch_filename, File.dirname(@input)
13
+ abort('Cant recognized sequence files') if @input2.nil?
14
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_mem.sam"
15
+ @job_name = "#{prefix_name}_#{sample_name}_mem*"
16
+ end
17
+
18
+ def fetch_filename
19
+ filename = File.basename(@input)
20
+ if filename.match '_R1_'
21
+ filename.gsub '_R1_', '_R2_'
22
+ elsif filename.match '_1_'
23
+ filename.gsub '_1_', '_2_'
24
+ end
25
+ end
26
+
27
+ def run previous_job_id = nil
28
+ puts "Submitting bwaMem #{sample_name}"
29
+ script_name = generate_script
30
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
31
+ [@job_name, @output]
32
+ end
33
+
34
+ def queuing_options previous_job_id = nil
35
+ {
36
+ previous_job_id: previous_job_id,
37
+ tools: ['bwa', 'mem']
38
+ }
39
+ end
40
+
41
+ def mem_options
42
+ array = mem_params.dup
43
+ array.insert 1, 'mem'
44
+ array << "-M"
45
+ array << "-R \"#{rg_str}\""
46
+ array << "#{ref_fasta}"
47
+ array << "#{@input}"
48
+ array << "#{@input2}"
49
+ array << "> #{@output}"
50
+ array.uniq
51
+ end
52
+
53
+ def generate_script
54
+ script_name = "#{@order}_bwa_mem"
55
+ file = File.join jobs_dir, "#{script_name}.sh"
56
+ template = Tilt.new(File.expand_path '../templates/mem.sh', __FILE__)
57
+ File.open(file, 'w') do |f|
58
+ f.puts template.render Object.new, job_params(script_name)
59
+ end
60
+ File.chmod(0700, file)
61
+ script_name
62
+ end
63
+
64
+ def job_params script_name
65
+ {
66
+ jobs_dir: jobs_dir,
67
+ script_name: script_name,
68
+ output: @output,
69
+ mem_params: mem_options,
70
+ run_local: ::Cagnut::JobManage.run_local
71
+ }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,81 @@
1
+ module CagnutBwa
2
+ class Samp
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
6
+ :ref_fasta, :jobs_dir, :dodebug, :prefix_name
7
+ def_delegators :'CagnutBwa.config', :rg_str,:samp_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @fastq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ @fastq2 = File.expand_path fetch_filename(@fastq), File.dirname(@fastq)
13
+ @input = "#{opts[:dirs][:input]}/#{File.basename(@fastq)}.sai"
14
+ @input2 = File.expand_path fetch_filename(@input), File.dirname(@input)
15
+ abort('Cant recognized sequence files') if @input2.nil?
16
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_aligned.sam.gz"
17
+ @job_name = "#{prefix_name}_#{sample_name}_Samp"
18
+ end
19
+
20
+ def fetch_filename file
21
+ filename = File.basename(file)
22
+ if filename.match '_R1_'
23
+ filename.gsub '_R1_', '_R2_'
24
+ elsif filename.match '_1_'
25
+ filename.gsub '_1_', '_2_'
26
+ end
27
+ end
28
+
29
+ def run previous_job_id = nil
30
+ puts "Submitting bwaSamp #{sample_name} RG_STR= #{rg_str}"
31
+ script_name = generate_script
32
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
33
+ [@job_name, @output]
34
+ end
35
+
36
+ def queuing_options previous_job_id = nil
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ adjust_memory: ['h_vmem=5G'],
40
+ tools: ['bwa', 'samp']
41
+ }
42
+ end
43
+
44
+ def samp_options
45
+ array = samp_params.dup
46
+ array.insert 1, 'sampe'
47
+ array << "-r \"#{rg_str}\""
48
+ array << "#{ref_fasta}"
49
+ array << "#{@input}"
50
+ array << "#{@input2}"
51
+ array << "#{@fastq}"
52
+ array << "#{@fastq2} | gzip > #{@output}"
53
+ array.uniq.compact
54
+ end
55
+
56
+ def generate_script
57
+ script_name = "#{@order}_bwa_samp"
58
+ file = File.join jobs_dir, "#{script_name}.sh"
59
+ template = Tilt.new(File.expand_path '../templates/samp.sh', __FILE__)
60
+ File.open(file, 'w') do |f|
61
+ f.puts template.render Object.new, job_params(script_name)
62
+ end
63
+ File.chmod(0700, file)
64
+ script_name
65
+ end
66
+
67
+ def job_params script_name
68
+ {
69
+ jobs_dir: jobs_dir,
70
+ script_name: script_name,
71
+ input: @input,
72
+ input2: @input2,
73
+ fastq: @fastq,
74
+ fastq2: @fastq2,
75
+ output: @output,
76
+ samp_options: samp_options,
77
+ run_local: ::Cagnut::JobManage.run_local
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,95 @@
1
+ module CagnutBwa
2
+ class SampOneFastq
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name,
6
+ :ref_fasta, :jobs_dir, :data_type, :dodebug
7
+ def_delegators :'CagnutBwa.config', :rg_str, :samp_params
8
+
9
+ def initizaline opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @job_name = "#{prefix_name}_#{sample_name}_Samp"
12
+ @seq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
13
+ abort('Cant recognized sequence files') if @seq.nil?
14
+ @sai = "#{opts[:dirs][:input]}/#{File.basename(@seq).gsub('.gz', '').gsub('.txt','.sai')}"
15
+ @seq2 = @seq.match('_1_') ? "#{File.expand_path(fetch_filename(@seq), File.dirname(@seq))}" : ''
16
+ @sai2 = @sai.match('_1_') ? "#{opts[:dirs][:input]}/#{fetch_filename(@sai)}" : ''
17
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_sequence.aligned.sam.gz"
18
+ end
19
+
20
+ def fetch_filename file
21
+ filename = File.basename(file)
22
+ if filename.match '_R1_'
23
+ filename.gsub '_R1_', '_R2_'
24
+ elsif filename.match '_1_'
25
+ filename.gsub '_1_', '_2_'
26
+ end
27
+ end
28
+
29
+ def run previous_job_id = nil
30
+ puts "Submitting bwaSampOneFastq #{sample_name} RG_STR= #{rg_str}"
31
+ script_name = generate_script
32
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
33
+ [@job_name, @output]
34
+ end
35
+
36
+ def queuing_options previous_job_id = nil
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ adjust_memory: ['h_vmem=5G'],
40
+ parallel_env: ['30'],
41
+ tools: ['bwa', 'samp']
42
+ }
43
+ end
44
+
45
+ def generate_script
46
+ script_name = data_type == 'ONEFASTQ' ? 'bwa_samp_one_fastq' : 'bwa_samse_one_fastq'
47
+ bwa_samp_one_fastq script_name
48
+ script_name
49
+ end
50
+
51
+ def samp_one_fastq_options
52
+ array = samp_params.dup
53
+ array.insert 1, 'sampe'
54
+ array << "-r \"#{rg_str}\""
55
+ array << "#{ref_fasta}"
56
+ array << "#{@sai}"
57
+ array << "#{@sai2}"
58
+ array << "#{@seq}"
59
+ array << "#{@seq2} | gzip > #{@output}"
60
+ array.uniq.compact
61
+ end
62
+
63
+ def samse_one_fastq_options
64
+ array = samp_params.dup
65
+ array.insert 1, 'sampe'
66
+ array << "-r \"#{rg_str}\""
67
+ array << "#{ref_fasta}"
68
+ array << "#{@sai}"
69
+ array << "#{@seq} | gzip > #{@output}"
70
+ array.uniq.compact
71
+ end
72
+
73
+ def bwa_samp_one_fastq script_name
74
+ file = File.join jobs_dir, "#{@order}_#{script_name}.sh"
75
+ path = File.expand_path "../templates/#{script_name}.sh", __FILE__
76
+ template = Tilt.new path
77
+ File.open(file, 'w') do |f|
78
+ f.puts template.render Object.new, job_params(script_name)
79
+ end
80
+ File.chmod(0700, file)
81
+ end
82
+
83
+ def job_params script_name
84
+ {
85
+ jobs_dir: jobs_dir,
86
+ script_name: script_name,
87
+ output: @output,
88
+ seq: @seq,
89
+ seq2: @seq2,
90
+ samp_options: (data_type == 'ONEFASTQ' ? 'samp_one_fastq_options' : 'samse_one_fastq_options'),
91
+ run_local: ::Cagnut::JobManage.run_local
92
+ }
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,104 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ echo #{aln_params_for_r1.join("\s")}
6
+ #{aln_params_for_r1.join(" \\\n ")} \\
7
+ #{run_local}
8
+
9
+ #force error when missing/empty sai . Would prevent continutation of pipeline
10
+ if [ ! -s "#{output}" ]
11
+ then
12
+ echo "Missing SAI:#{output} file!"
13
+ exit 100
14
+ fi
15
+
16
+ # check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ OK=1
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
30
+
31
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
32
+ echo "bwa processed" $PROCESSED
33
+
34
+ if [[ "#{input}" =~ gz$ ]]
35
+ then
36
+ LINESFASTQ1=$(zcat "#{input}" | wc -l)
37
+ else
38
+ # non gz files
39
+ LINESFASTQ1=$(wc -l "#{input}" | cut -d" " -f1 )
40
+ fi
41
+ echo "Fastq1 number lines:= " $LINESFASTQ1
42
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
43
+ echo "Estimated Minimum Sequences:= " $SEQLINES
44
+ if (( "$PROCESSED" >= "$SEQLINES" ))
45
+ then
46
+ echo "Complete."
47
+ else
48
+ echo "Error, incorrect number of processed sequences"
49
+ exit 100
50
+ fi
51
+
52
+ ####################################################################
53
+ # PAIR _2_
54
+ # run and check pair _2_
55
+ #
56
+ #
57
+ ####################################################################
58
+
59
+ #{aln_params_for_r2.join(" \\\n ")} \\
60
+ #{run_local}
61
+
62
+ #force error when missing/empty sai . Would prevent continutation of pipeline
63
+ if [ ! -s "#{output2}" ]
64
+ then
65
+ echo "Missing SAI:#{output2} file!"
66
+ exit 100
67
+ fi
68
+
69
+ # check STDOUT has correct termination string
70
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
71
+
72
+ if [ -n "$HASENDING" ]
73
+ then
74
+ OK=1
75
+ else
76
+ #echo " empty variable"
77
+ echo "Improper stdout termination"
78
+ exit 100
79
+ fi
80
+
81
+ #check for correct number of sequences processed, based on fastq records
82
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
83
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
84
+ echo "bwa processed" $PROCESSED
85
+
86
+ if [[ "#{input2}" =~ gz$ ]]
87
+ then
88
+ LINESFASTQ2=$(zcat "#{input2}" | wc -l)
89
+ else
90
+ # non gz files
91
+ LINESFASTQ2=$(wc -l "#{input2}" | cut -d" " -f1 )
92
+ fi
93
+ echo "Fastq2 number lines:= " $LINESFASTQ2
94
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
95
+ echo "Estimated Minimum Sequences:= " $SEQLINES
96
+ if (( "$PROCESSED" >= "$SEQLINES" ))
97
+ then
98
+ echo "Complete."
99
+ else
100
+ echo "Error, incorrect number of processed sequences"
101
+ exit 100
102
+ fi
103
+
104
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,67 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ #{samp_options.join(" \\\n ")} \\
6
+ #{run_local}
7
+
8
+ # check file size less than 1MB
9
+ #
10
+ # if [ $(stat --printf="%s" "#{sam_dir}/#{line}_sequence.aligned.sam.gz") -le 1024000 ]
11
+ # then
12
+ # echo "Error with output."
13
+ # exit 100
14
+ # fi
15
+
16
+ #check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ echo "OUTPUT ok."
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
30
+
31
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
32
+ echo "bwa processed" $PROCESSED
33
+
34
+ if [[ "#{seq}" =~ gz$ ]]
35
+ then
36
+
37
+ LINESFASTQ1=$(zcat "#{seq}" | wc -l)
38
+ LINESFASTQ2=$(zcat "#{seq2}" | wc -l)
39
+
40
+ else
41
+ # non gz files
42
+ LINESFASTQ1=$(wc -l "#{seq}" )
43
+ LINESFASTQ2=$(wc -l "#{seq2}" )
44
+ fi
45
+
46
+ echo "Fastq1 number lines:= " $LINESFASTQ1
47
+ echo "Fastq2 number lines:= " $LINESFASTQ2
48
+
49
+ if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
50
+ then
51
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
52
+ else
53
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
54
+ fi
55
+
56
+ echo "Estimated Minimum Sequences:= " $SEQLINES
57
+
58
+ if (( "$PROCESSED" >= "$SEQLINES" ))
59
+ then
60
+ echo "Complete."
61
+ else
62
+ echo "Error, incorrect number of processed sequences"
63
+ exit 100
64
+ fi
65
+
66
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
67
+
@@ -0,0 +1,34 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ #{samp_options.join(" \\\n ")} \\
6
+ #{run_local}
7
+
8
+ # check file size less than 1MB
9
+
10
+ if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
11
+ then
12
+ echo "Error with output."
13
+ exit 100
14
+ fi
15
+
16
+ #check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ echo "OUTPUT ok."
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+
30
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
31
+
32
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
33
+ echo "bwa processed" $PROCESSED
34
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,29 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ echo #{mem_params.join("\s")}
6
+ #{mem_params.join(" \\\n ")} \\
7
+ #{run_local}
8
+
9
+ #force error when missing/empty sam . Would prevent continutation of pipeline
10
+ if [ ! -s #{output} ]
11
+ then
12
+ echo "Missing SAM:#{output} file!"
13
+ exit 100
14
+ fi
15
+
16
+ # check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " Processed")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ OK=1
22
+ else
23
+ #echo " empty variable"
24
+ echo "Error: Improper stdout termination"
25
+ echo $EXITSTATUS
26
+ echo "bwa (mem) has likely crashed. Exiting"
27
+ exit 100
28
+ fi
29
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,73 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ # File Checking
6
+ if [ ! -s "#{input}" ];then
7
+ echo "Error: Missing " #{input}
8
+ exit 100
9
+ fi
10
+ if [ ! -s "#{input2}" ];then
11
+ echo "Error: Missing " #{input2}
12
+ exit 100
13
+ fi
14
+
15
+ #{samp_options.join(" \\\n ")} \\
16
+ #{run_local}
17
+
18
+ # check if file size less than 1MB
19
+ if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
20
+ then
21
+ echo "Error with output."
22
+ exit 100
23
+ fi
24
+
25
+ # check STDOUT has correct termination string
26
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
27
+
28
+ if [ -n "$HASENDING" ]
29
+ then
30
+ OK=1
31
+ else
32
+ #echo " empty variable"
33
+ echo "Improper stdout termination"
34
+ exit 100
35
+ fi
36
+
37
+ #check for correct number of sequences processed, based on fastq records
38
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
39
+
40
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
41
+ echo "bwa processed" $PROCESSED
42
+
43
+ if [[ "#{fastq}" =~ gz$ ]]
44
+ then
45
+ LINESFASTQ1=$(zcat "#{fastq}" | wc -l)
46
+ LINESFASTQ2=$(zcat "#{fastq2}.gz" | wc -l)
47
+ else
48
+ # non gz files
49
+ LINESFASTQ1=$(wc -l "#{fastq}" | cut -d" " -f1 )
50
+ LINESFASTQ2=$(wc -l "#{fastq2}" | cut -d" " -f1 )
51
+ fi
52
+
53
+ echo "Fastq1 number lines:= " $LINESFASTQ1
54
+ echo "Fastq2 number lines:= " $LINESFASTQ2
55
+
56
+ if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
57
+ then
58
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
59
+ else
60
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
61
+ fi
62
+
63
+ echo "Estimated Minimum Sequences:= " $SEQLINES
64
+
65
+ if (( "$PROCESSED" >= "$SEQLINES" ))
66
+ then
67
+ echo "Complete."
68
+ else
69
+ echo "Error, incorrect number of processed sequences"
70
+ exit 100
71
+ fi
72
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
73
+
@@ -0,0 +1,35 @@
1
+ module CagnutBwa
2
+ class Util
3
+ attr_accessor :bwa, :config
4
+
5
+ def initialize config
6
+ @config = config
7
+ @bwa = CagnutBwa::Base.new
8
+ end
9
+
10
+ def aln_one_fastq dirs, order=1, filename=nil
11
+ job_name = bwa.aln_one_fastq dirs, order, filename
12
+ [job_name, order+1]
13
+ end
14
+
15
+ def samp_one_fastq dirs, order=1, previous_job_id=nil, filename=nil
16
+ job_name, filename = bwa.samp_one_fastq dirs, order, previous_job_id, filename
17
+ [job_name, filename, order+1]
18
+ end
19
+
20
+ def aln dirs, order=1, previous_job_id = nil, filename=nil
21
+ job_name = bwa.aln dirs, order, previous_job_id, filename
22
+ [job_name, order+1]
23
+ end
24
+
25
+ def samp dirs, order=1, previous_job_id=nil, filename=nil
26
+ job_name, filename = bwa.samp dirs, order, previous_job_id, filename
27
+ [job_name, filename, order+1]
28
+ end
29
+
30
+ def mem dirs, order=1, filename = nil
31
+ job_name, filename = bwa.mem dirs, order, filename
32
+ [job_name, filename, order+1]
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module CagnutBwa
2
+ VERSION = "0.3.0"
3
+ end
data/lib/cagnut_bwa.rb ADDED
@@ -0,0 +1,16 @@
1
+ require "cagnut_bwa/version"
2
+
3
+ module CagnutBwa
4
+ class << self
5
+ def config
6
+ @config ||= begin
7
+ CagnutBwa::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['bwa'])
8
+ CagnutBwa::Configuration.instance
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ require 'cagnut_bwa/configuration'
15
+ require 'cagnut_bwa/base'
16
+ require 'cagnut_bwa/util'
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cagnut_bwa
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Shi-Gang Wang
8
+ - Tse-Ching Ho
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-11-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: cagnut_core
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: bundler
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.12'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.12'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rake
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '10.0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '10.0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rspec
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '3.0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ description: Cagnut BWA tools
71
+ email:
72
+ - seanwang@goldenio.com
73
+ - tsechingho@goldenio.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".ruby-version"
81
+ - ".travis.yml"
82
+ - Gemfile
83
+ - LICENSE.txt
84
+ - README.md
85
+ - Rakefile
86
+ - bin/console
87
+ - bin/setup
88
+ - cagnut_bwa.gemspec
89
+ - lib/cagnut_bwa.rb
90
+ - lib/cagnut_bwa/base.rb
91
+ - lib/cagnut_bwa/check_tools.rb
92
+ - lib/cagnut_bwa/configuration.rb
93
+ - lib/cagnut_bwa/functions/aln.rb
94
+ - lib/cagnut_bwa/functions/aln_one_fastq.rb
95
+ - lib/cagnut_bwa/functions/mem.rb
96
+ - lib/cagnut_bwa/functions/samp.rb
97
+ - lib/cagnut_bwa/functions/samp_one_fastq.rb
98
+ - lib/cagnut_bwa/functions/templates/aln.sh
99
+ - lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh
100
+ - lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh
101
+ - lib/cagnut_bwa/functions/templates/mem.sh
102
+ - lib/cagnut_bwa/functions/templates/samp.sh
103
+ - lib/cagnut_bwa/util.rb
104
+ - lib/cagnut_bwa/version.rb
105
+ homepage: https://github.com/CAGNUT/cagnut_bwa
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.5.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Cagnut BWA tools
129
+ test_files: []