cagnut_bwa 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 212201476cdd487c7d175275b4b2dda5064794a8
4
+ data.tar.gz: e1b5fb5113d105b8ba7782c3b34e100fb05f6820
5
+ SHA512:
6
+ metadata.gz: 4432d3082474d04bed19517a8cb42c2c350706c7d06ecd67e888b1997c6137e601a56bdb00fc86979b8b600dcaad6a3386b305fa0922024fb79b3dfcda233ad9
7
+ data.tar.gz: 002602c3c99c28f7eb7eef0504038c1c07e3df88fafecf8b15c53c202c96bd32040032abd2fda8d67f60edd984f6da195c0e36473a04f55179f27d9668b586d9
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.3.1
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cagnut_bwa.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Goldenio Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # CagnutBwa
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ ```ruby
8
+ gem 'cagnut_bwa'
9
+ ```
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cagnut_bwa
18
+
19
+ ## Usage
20
+
21
+ ## Development
22
+
23
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
24
+
25
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cagnut_bwa.
30
+
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
35
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "cagnut_bwa"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cagnut_bwa/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cagnut_bwa"
8
+ spec.version = CagnutBwa::VERSION
9
+ spec.authors = ['Shi-Gang Wang', 'Tse-Ching Ho']
10
+ spec.email = ['seanwang@goldenio.com', 'tsechingho@goldenio.com']
11
+
12
+ spec.summary = %q{Cagnut BWA tools}
13
+ spec.description = %q{Cagnut BWA tools}
14
+ spec.homepage = "https://github.com/CAGNUT/cagnut_bwa"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency 'cagnut_core'
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.12"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ end
@@ -0,0 +1,34 @@
1
+ require 'cagnut_bwa/functions/aln'
2
+ require 'cagnut_bwa/functions/samp'
3
+ require 'cagnut_bwa/functions/aln_one_fastq'
4
+ require 'cagnut_bwa/functions/samp_one_fastq'
5
+ require 'cagnut_bwa/functions/mem'
6
+
7
+ module CagnutBwa
8
+ class Base
9
+ def aln dirs, order, previous_job_id, input = nil
10
+ opts = { input: input, dirs: dirs, order: order }
11
+ CagnutBwa::Aln.new(opts).run previous_job_id
12
+ end
13
+
14
+ def samp dirs, order, previous_job_id = nil, input = nil
15
+ opts = { input: input, dirs: dirs, order: order }
16
+ CagnutBwa::Samp.new(opts).run previous_job_id
17
+ end
18
+
19
+ def aln_one_fastq dirs, order, input = nil
20
+ opts = { input: input, dirs: dirs, order: order }
21
+ CagnutBwa::AlnOneFastq.new(opts).run
22
+ end
23
+
24
+ def samp_one_fastq dirs, order, previous_job_id = nil, input = nil
25
+ opts = { input: input, dirs: dirs, order: order }
26
+ CagnutBwa::SampOneFastq.new(opts).run previous_job_id
27
+ end
28
+
29
+ def mem dirs, order, input = nil
30
+ opts = { input: input, dirs: dirs, order: order }
31
+ CagnutBwa::Mem.new(opts).run
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,37 @@
1
+ module CagnutBwa
2
+ module CheckTools
3
+ def check_tool tools_path
4
+ super if defined?(super)
5
+ check_bwa tools_path['bwa']
6
+ check_bwa_index refs['ref_fasta']
7
+ end
8
+
9
+ def check_bwa path
10
+ check_tool_ver 'BWA' do
11
+ `#{path} 2>&1 | grep Version | cut -f2 -d ' '` if path
12
+ check_bwa_index
13
+ end
14
+ end
15
+
16
+ def check_bwa_index ref_path
17
+ tool = 'Bwa Index'
18
+ file = "#{ref_path}.ann"
19
+ command = "#{@config['tools']['bwa']} index #{ref_path}"
20
+ check_ref_related file, tool, command
21
+ end
22
+
23
+
24
+ def check_ref_related file, tool, command
25
+ if File.exist?(file)
26
+ puts "\t#{tool}: Done"
27
+ else
28
+ puts "\t#{tool}: Not Found!"
29
+ puts "\tPlease execute command:"
30
+ puts "\t\t#{command}"
31
+ @check_completed = false
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ Cagnut::Configuration::Checks::Tools.prepend CagnutBwa::CheckTools
@@ -0,0 +1,56 @@
1
+ require 'singleton'
2
+
3
+ module CagnutBwa
4
+ class Configuration
5
+ include Singleton
6
+ attr_accessor :rg_str, :mem_params, :aln_params, :samp_params
7
+
8
+ class << self
9
+ def load config, params
10
+ instance.load config, params
11
+ end
12
+ end
13
+
14
+ def load config, params
15
+ @config = config
16
+ @params = params
17
+ generate_rg_str
18
+ attributes.each do |name, value|
19
+ send "#{name}=", value if respond_to? "#{name}="
20
+ end
21
+ end
22
+
23
+ def attributes
24
+ {
25
+ rg_str: @config['sample']['rg_str'],
26
+ mem_params: add_bwa_path_in_params(@params['mem']),
27
+ aln_params: add_bwa_path_in_params(@params['aln']),
28
+ samp_params: add_bwa_path_in_params(@params['samp'])
29
+ }
30
+ end
31
+
32
+ def add_bwa_path_in_params method_params
33
+ return if method_params.blank?
34
+ array = method_params['params'].dup
35
+ array.unshift "#{@config['tools']['bwa']}"
36
+ end
37
+
38
+ def generate_rg_str
39
+ @config['samples'].each do |sample|
40
+ arg = %W(
41
+ @RG
42
+ ID:#{sample['rgid']}
43
+ SM:#{sample['name']}
44
+ PL:#{@config['info']['pl']}
45
+ PU:#{sample['pu']}
46
+ LB:#{@config['info']['lb']}
47
+ DS:#{@config['info']['ds']}
48
+ CN:#{@config['info']['cn']}
49
+ DT:#{@config['info']['dt']}
50
+ )
51
+ rg_str = { 'rg_str' => arg.join('\t') }
52
+ sample.merge! rg_str
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,88 @@
1
+ module CagnutBwa
2
+ class Aln
3
+ extend Forwardable
4
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :dodebug, :seqs_path,
5
+ :ref_fasta, :jobs_dir, :prefix_name, :pipeline_name
6
+ def_delegators :'CagnutBwa.config', :aln_params
7
+
8
+ def initialize opts = {}
9
+ @order = sprintf '%02i', opts[:order]
10
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
11
+ @input2 = File.expand_path fetch_filename, File.dirname(@input)
12
+ abort('Cant recognized sequence files') if @input2.nil?
13
+ @output = "#{opts[:dirs][:output]}/#{File.basename(@input)}.sai"
14
+ @output2 = "#{opts[:dirs][:output]}/#{fetch_filename}.sai"
15
+ @job_name = "#{prefix_name}_#{sample_name}_Aln"
16
+ end
17
+
18
+ def fetch_filename
19
+ filename = File.basename(@input)
20
+ if filename.match '_R1_'
21
+ filename.gsub '_R1_', '_R2_'
22
+ elsif filename.match '_1_'
23
+ filename.gsub '_1_', '_2_'
24
+ end
25
+ end
26
+
27
+ def run previous_job_id = nil
28
+ puts "Submitting bwaAln #{sample_name}"
29
+ script_name = generate_script
30
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
31
+ @job_name
32
+ end
33
+
34
+ def queuing_options previous_job_id = nil
35
+ threads = 2
36
+ {
37
+ previous_job_id: previous_job_id,
38
+ var_env: [ref_fasta],
39
+ adjust_memory: ["h_vmem=adjustWorkingMem 5G #{threads}"],
40
+ parallel_env: [threads],
41
+ tools: ['bwa', 'aln']
42
+ }
43
+ end
44
+
45
+ def aln_params_for_r1
46
+ array = aln_params.dup
47
+ array.insert 1, 'aln'
48
+ array << "#{ref_fasta}"
49
+ array << "-f #{@output}"
50
+ array << "#{@input}"
51
+ array.uniq
52
+ end
53
+
54
+ def aln_params_for_r2
55
+ array = aln_params.dup
56
+ array.insert 1, 'aln'
57
+ array << "#{ref_fasta}"
58
+ array << "-f #{@output2}"
59
+ array << "#{@input2}"
60
+ array.uniq
61
+ end
62
+
63
+ def generate_script
64
+ script_name = "#{@order}_bwa_aln"
65
+ file = File.join jobs_dir, "#{script_name}.sh"
66
+ template = Tilt.new(File.expand_path '../templates/aln.sh', __FILE__)
67
+ File.open(file, 'w') do |f|
68
+ f.puts template.render Object.new, job_params(script_name)
69
+ end
70
+ File.chmod(0700, file)
71
+ script_name
72
+ end
73
+
74
+ def job_params script_name
75
+ {
76
+ jobs_dir: jobs_dir,
77
+ script_name: script_name,
78
+ input: @input,
79
+ input2: @input2,
80
+ output: @output,
81
+ output2: @output2,
82
+ aln_params_for_r1: aln_params_for_r1,
83
+ aln_params_for_r2: aln_params_for_r2,
84
+ run_local: ::Cagnut::JobManage.run_local
85
+ }
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,94 @@
1
+ module CagnutBwa
2
+ class AlnOneFastq
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name, :dodebug,
6
+ :ref_fasta, :jobs_dir, :data_type
7
+ def_delegators :'CagnutBwa.config', :aln_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ abort('Cant recognized sequence files') if @input.nil?
13
+ @input2 = File.expand_path fetch_filename(@input), File.dirname(@input) if @input.match '_1_'
14
+ @output = "#{opts[:dirs][:output]}/#{File.basename(@input).gsub('.gz', '').gsub('.txt','.sai')}"
15
+ @output2 = "#{opts[:dirs][:output]}/#{fetch_filename(@output)}" if @input.match '_1_'
16
+ @job_name = "#{prefix_name}_#{sample_name}_Aln_one_fastq"
17
+ end
18
+
19
+ def fetch_filename file
20
+ filename = File.basename(file)
21
+ if filename.match '_R1_'
22
+ filename.gsub '_R1_', '_R2_'
23
+ elsif filename.match '_1_'
24
+ filename.gsub '_1_', '_2_'
25
+ end
26
+ end
27
+
28
+ def run previous_job_id = nil
29
+ puts "Submitting bwa_aln_one_fastq #{sample_name}"
30
+ script_name = generate_script
31
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
32
+ @job_name
33
+ end
34
+
35
+ def queuing_options previous_job_id = nil
36
+ threads = 2
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ var_env: [fastq_dir, sai_dir, threads],
40
+ adjust_memory: ['h_vmem=3.4G'],
41
+ parallel_env: [threads],
42
+ tools: ['bwa', 'aln']
43
+ }
44
+ end
45
+
46
+ def aln_params_for_r1
47
+ array = aln_params.dup
48
+ array.insert 1, 'aln'
49
+ array << "#{ref_fasta}"
50
+ array << "-f #{@output}"
51
+ array << "#{@input}"
52
+ array.uniq
53
+ end
54
+
55
+ def aln_params_for_r2
56
+ array = aln_params.dup
57
+ array.insert 1, 'aln'
58
+ array << "#{ref_fasta}"
59
+ array << "-f #{@output2}"
60
+ array << "#{@input2}"
61
+ array.uniq
62
+ end
63
+
64
+ def generate_script
65
+ script_name = "#{@order}_bwa_aln_one_fastq"
66
+ file = File.join jobs_dir, "#{script_name}.sh"
67
+ File.open(file, 'w') do |f|
68
+ f.puts <<-BASH.strip_heredoc
69
+ #!/bin/bash
70
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
71
+ if [[ #{@input} =~ _1_ ]]
72
+ then
73
+ #{aln_params_for_r2.join(" \\\n ")} \\
74
+ #{::Cagnut::JobManage.run_local}
75
+ else
76
+ fi
77
+
78
+ #{aln_params_for_r1.join(" \\\n ")} \\
79
+ #{::Cagnut::JobManage.run_local}
80
+
81
+ if [ ! -s "#{@output}" ]
82
+ then
83
+ echo "Missing SAI:#{@output} file!"
84
+ exit 100
85
+ fi
86
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
87
+
88
+ BASH
89
+ end
90
+ File.chmod(0700, file)
91
+ script_name
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,74 @@
1
+ module CagnutBwa
2
+ class Mem
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
6
+ :ref_fasta, :jobs_dir, :prefix_name
7
+ def_delegators :'CagnutBwa.config', :rg_str, :mem_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ @input2 = File.expand_path fetch_filename, File.dirname(@input)
13
+ abort('Cant recognized sequence files') if @input2.nil?
14
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_mem.sam"
15
+ @job_name = "#{prefix_name}_#{sample_name}_mem*"
16
+ end
17
+
18
+ def fetch_filename
19
+ filename = File.basename(@input)
20
+ if filename.match '_R1_'
21
+ filename.gsub '_R1_', '_R2_'
22
+ elsif filename.match '_1_'
23
+ filename.gsub '_1_', '_2_'
24
+ end
25
+ end
26
+
27
+ def run previous_job_id = nil
28
+ puts "Submitting bwaMem #{sample_name}"
29
+ script_name = generate_script
30
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
31
+ [@job_name, @output]
32
+ end
33
+
34
+ def queuing_options previous_job_id = nil
35
+ {
36
+ previous_job_id: previous_job_id,
37
+ tools: ['bwa', 'mem']
38
+ }
39
+ end
40
+
41
+ def mem_options
42
+ array = mem_params.dup
43
+ array.insert 1, 'mem'
44
+ array << "-M"
45
+ array << "-R \"#{rg_str}\""
46
+ array << "#{ref_fasta}"
47
+ array << "#{@input}"
48
+ array << "#{@input2}"
49
+ array << "> #{@output}"
50
+ array.uniq
51
+ end
52
+
53
+ def generate_script
54
+ script_name = "#{@order}_bwa_mem"
55
+ file = File.join jobs_dir, "#{script_name}.sh"
56
+ template = Tilt.new(File.expand_path '../templates/mem.sh', __FILE__)
57
+ File.open(file, 'w') do |f|
58
+ f.puts template.render Object.new, job_params(script_name)
59
+ end
60
+ File.chmod(0700, file)
61
+ script_name
62
+ end
63
+
64
+ def job_params script_name
65
+ {
66
+ jobs_dir: jobs_dir,
67
+ script_name: script_name,
68
+ output: @output,
69
+ mem_params: mem_options,
70
+ run_local: ::Cagnut::JobManage.run_local
71
+ }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,81 @@
1
+ module CagnutBwa
2
+ class Samp
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
6
+ :ref_fasta, :jobs_dir, :dodebug, :prefix_name
7
+ def_delegators :'CagnutBwa.config', :rg_str,:samp_params
8
+
9
+ def initialize opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @fastq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
12
+ @fastq2 = File.expand_path fetch_filename(@fastq), File.dirname(@fastq)
13
+ @input = "#{opts[:dirs][:input]}/#{File.basename(@fastq)}.sai"
14
+ @input2 = File.expand_path fetch_filename(@input), File.dirname(@input)
15
+ abort('Cant recognized sequence files') if @input2.nil?
16
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_aligned.sam.gz"
17
+ @job_name = "#{prefix_name}_#{sample_name}_Samp"
18
+ end
19
+
20
+ def fetch_filename file
21
+ filename = File.basename(file)
22
+ if filename.match '_R1_'
23
+ filename.gsub '_R1_', '_R2_'
24
+ elsif filename.match '_1_'
25
+ filename.gsub '_1_', '_2_'
26
+ end
27
+ end
28
+
29
+ def run previous_job_id = nil
30
+ puts "Submitting bwaSamp #{sample_name} RG_STR= #{rg_str}"
31
+ script_name = generate_script
32
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
33
+ [@job_name, @output]
34
+ end
35
+
36
+ def queuing_options previous_job_id = nil
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ adjust_memory: ['h_vmem=5G'],
40
+ tools: ['bwa', 'samp']
41
+ }
42
+ end
43
+
44
+ def samp_options
45
+ array = samp_params.dup
46
+ array.insert 1, 'sampe'
47
+ array << "-r \"#{rg_str}\""
48
+ array << "#{ref_fasta}"
49
+ array << "#{@input}"
50
+ array << "#{@input2}"
51
+ array << "#{@fastq}"
52
+ array << "#{@fastq2} | gzip > #{@output}"
53
+ array.uniq.compact
54
+ end
55
+
56
+ def generate_script
57
+ script_name = "#{@order}_bwa_samp"
58
+ file = File.join jobs_dir, "#{script_name}.sh"
59
+ template = Tilt.new(File.expand_path '../templates/samp.sh', __FILE__)
60
+ File.open(file, 'w') do |f|
61
+ f.puts template.render Object.new, job_params(script_name)
62
+ end
63
+ File.chmod(0700, file)
64
+ script_name
65
+ end
66
+
67
+ def job_params script_name
68
+ {
69
+ jobs_dir: jobs_dir,
70
+ script_name: script_name,
71
+ input: @input,
72
+ input2: @input2,
73
+ fastq: @fastq,
74
+ fastq2: @fastq2,
75
+ output: @output,
76
+ samp_options: samp_options,
77
+ run_local: ::Cagnut::JobManage.run_local
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,95 @@
1
+ module CagnutBwa
2
+ class SampOneFastq
3
+ extend Forwardable
4
+
5
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name,
6
+ :ref_fasta, :jobs_dir, :data_type, :dodebug
7
+ def_delegators :'CagnutBwa.config', :rg_str, :samp_params
8
+
9
+ def initizaline opts = {}
10
+ @order = sprintf '%02i', opts[:order]
11
+ @job_name = "#{prefix_name}_#{sample_name}_Samp"
12
+ @seq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
13
+ abort('Cant recognized sequence files') if @seq.nil?
14
+ @sai = "#{opts[:dirs][:input]}/#{File.basename(@seq).gsub('.gz', '').gsub('.txt','.sai')}"
15
+ @seq2 = @seq.match('_1_') ? "#{File.expand_path(fetch_filename(@seq), File.dirname(@seq))}" : ''
16
+ @sai2 = @sai.match('_1_') ? "#{opts[:dirs][:input]}/#{fetch_filename(@sai)}" : ''
17
+ @output = "#{opts[:dirs][:output]}/#{sample_name}_sequence.aligned.sam.gz"
18
+ end
19
+
20
+ def fetch_filename file
21
+ filename = File.basename(file)
22
+ if filename.match '_R1_'
23
+ filename.gsub '_R1_', '_R2_'
24
+ elsif filename.match '_1_'
25
+ filename.gsub '_1_', '_2_'
26
+ end
27
+ end
28
+
29
+ def run previous_job_id = nil
30
+ puts "Submitting bwaSampOneFastq #{sample_name} RG_STR= #{rg_str}"
31
+ script_name = generate_script
32
+ ::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
33
+ [@job_name, @output]
34
+ end
35
+
36
+ def queuing_options previous_job_id = nil
37
+ {
38
+ previous_job_id: previous_job_id,
39
+ adjust_memory: ['h_vmem=5G'],
40
+ parallel_env: ['30'],
41
+ tools: ['bwa', 'samp']
42
+ }
43
+ end
44
+
45
+ def generate_script
46
+ script_name = data_type == 'ONEFASTQ' ? 'bwa_samp_one_fastq' : 'bwa_samse_one_fastq'
47
+ bwa_samp_one_fastq script_name
48
+ script_name
49
+ end
50
+
51
+ def samp_one_fastq_options
52
+ array = samp_params.dup
53
+ array.insert 1, 'sampe'
54
+ array << "-r \"#{rg_str}\""
55
+ array << "#{ref_fasta}"
56
+ array << "#{@sai}"
57
+ array << "#{@sai2}"
58
+ array << "#{@seq}"
59
+ array << "#{@seq2} | gzip > #{@output}"
60
+ array.uniq.compact
61
+ end
62
+
63
+ def samse_one_fastq_options
64
+ array = samp_params.dup
65
+ array.insert 1, 'sampe'
66
+ array << "-r \"#{rg_str}\""
67
+ array << "#{ref_fasta}"
68
+ array << "#{@sai}"
69
+ array << "#{@seq} | gzip > #{@output}"
70
+ array.uniq.compact
71
+ end
72
+
73
+ def bwa_samp_one_fastq script_name
74
+ file = File.join jobs_dir, "#{@order}_#{script_name}.sh"
75
+ path = File.expand_path "../templates/#{script_name}.sh", __FILE__
76
+ template = Tilt.new path
77
+ File.open(file, 'w') do |f|
78
+ f.puts template.render Object.new, job_params(script_name)
79
+ end
80
+ File.chmod(0700, file)
81
+ end
82
+
83
+ def job_params script_name
84
+ {
85
+ jobs_dir: jobs_dir,
86
+ script_name: script_name,
87
+ output: @output,
88
+ seq: @seq,
89
+ seq2: @seq2,
90
+ samp_options: (data_type == 'ONEFASTQ' ? 'samp_one_fastq_options' : 'samse_one_fastq_options'),
91
+ run_local: ::Cagnut::JobManage.run_local
92
+ }
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,104 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ echo #{aln_params_for_r1.join("\s")}
6
+ #{aln_params_for_r1.join(" \\\n ")} \\
7
+ #{run_local}
8
+
9
+ #force error when missing/empty sai . Would prevent continutation of pipeline
10
+ if [ ! -s "#{output}" ]
11
+ then
12
+ echo "Missing SAI:#{output} file!"
13
+ exit 100
14
+ fi
15
+
16
+ # check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ OK=1
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
30
+
31
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
32
+ echo "bwa processed" $PROCESSED
33
+
34
+ if [[ "#{input}" =~ gz$ ]]
35
+ then
36
+ LINESFASTQ1=$(zcat "#{input}" | wc -l)
37
+ else
38
+ # non gz files
39
+ LINESFASTQ1=$(wc -l "#{input}" | cut -d" " -f1 )
40
+ fi
41
+ echo "Fastq1 number lines:= " $LINESFASTQ1
42
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
43
+ echo "Estimated Minimum Sequences:= " $SEQLINES
44
+ if (( "$PROCESSED" >= "$SEQLINES" ))
45
+ then
46
+ echo "Complete."
47
+ else
48
+ echo "Error, incorrect number of processed sequences"
49
+ exit 100
50
+ fi
51
+
52
+ ####################################################################
53
+ # PAIR _2_
54
+ # run and check pair _2_
55
+ #
56
+ #
57
+ ####################################################################
58
+
59
+ #{aln_params_for_r2.join(" \\\n ")} \\
60
+ #{run_local}
61
+
62
+ #force error when missing/empty sai . Would prevent continutation of pipeline
63
+ if [ ! -s "#{output2}" ]
64
+ then
65
+ echo "Missing SAI:#{output2} file!"
66
+ exit 100
67
+ fi
68
+
69
+ # check STDOUT has correct termination string
70
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
71
+
72
+ if [ -n "$HASENDING" ]
73
+ then
74
+ OK=1
75
+ else
76
+ #echo " empty variable"
77
+ echo "Improper stdout termination"
78
+ exit 100
79
+ fi
80
+
81
+ #check for correct number of sequences processed, based on fastq records
82
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
83
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
84
+ echo "bwa processed" $PROCESSED
85
+
86
+ if [[ "#{input2}" =~ gz$ ]]
87
+ then
88
+ LINESFASTQ2=$(zcat "#{input2}" | wc -l)
89
+ else
90
+ # non gz files
91
+ LINESFASTQ2=$(wc -l "#{input2}" | cut -d" " -f1 )
92
+ fi
93
+ echo "Fastq2 number lines:= " $LINESFASTQ2
94
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
95
+ echo "Estimated Minimum Sequences:= " $SEQLINES
96
+ if (( "$PROCESSED" >= "$SEQLINES" ))
97
+ then
98
+ echo "Complete."
99
+ else
100
+ echo "Error, incorrect number of processed sequences"
101
+ exit 100
102
+ fi
103
+
104
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,67 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ #{samp_options.join(" \\\n ")} \\
6
+ #{run_local}
7
+
8
+ # check file size less than 1MB
9
+ #
10
+ # if [ $(stat --printf="%s" "#{sam_dir}/#{line}_sequence.aligned.sam.gz") -le 1024000 ]
11
+ # then
12
+ # echo "Error with output."
13
+ # exit 100
14
+ # fi
15
+
16
+ #check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ echo "OUTPUT ok."
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
30
+
31
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
32
+ echo "bwa processed" $PROCESSED
33
+
34
+ if [[ "#{seq}" =~ gz$ ]]
35
+ then
36
+
37
+ LINESFASTQ1=$(zcat "#{seq}" | wc -l)
38
+ LINESFASTQ2=$(zcat "#{seq2}" | wc -l)
39
+
40
+ else
41
+ # non gz files
42
+ LINESFASTQ1=$(wc -l "#{seq}" )
43
+ LINESFASTQ2=$(wc -l "#{seq2}" )
44
+ fi
45
+
46
+ echo "Fastq1 number lines:= " $LINESFASTQ1
47
+ echo "Fastq2 number lines:= " $LINESFASTQ2
48
+
49
+ if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
50
+ then
51
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
52
+ else
53
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
54
+ fi
55
+
56
+ echo "Estimated Minimum Sequences:= " $SEQLINES
57
+
58
+ if (( "$PROCESSED" >= "$SEQLINES" ))
59
+ then
60
+ echo "Complete."
61
+ else
62
+ echo "Error, incorrect number of processed sequences"
63
+ exit 100
64
+ fi
65
+
66
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
67
+
@@ -0,0 +1,34 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ #{samp_options.join(" \\\n ")} \\
6
+ #{run_local}
7
+
8
+ # check file size less than 1MB
9
+
10
+ if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
11
+ then
12
+ echo "Error with output."
13
+ exit 100
14
+ fi
15
+
16
+ #check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ echo "OUTPUT ok."
22
+ else
23
+ #echo " empty variable"
24
+ echo "Improper stdout termination"
25
+ exit 100
26
+ fi
27
+
28
+ #check for correct number of sequences processed, based on fastq records
29
+
30
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
31
+
32
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
33
+ echo "bwa processed" $PROCESSED
34
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,29 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ echo #{mem_params.join("\s")}
6
+ #{mem_params.join(" \\\n ")} \\
7
+ #{run_local}
8
+
9
+ #force error when missing/empty sam . Would prevent continutation of pipeline
10
+ if [ ! -s #{output} ]
11
+ then
12
+ echo "Missing SAM:#{output} file!"
13
+ exit 100
14
+ fi
15
+
16
+ # check STDOUT has correct termination string
17
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " Processed")
18
+
19
+ if [ -n "$HASENDING" ]
20
+ then
21
+ OK=1
22
+ else
23
+ #echo " empty variable"
24
+ echo "Error: Improper stdout termination"
25
+ echo $EXITSTATUS
26
+ echo "bwa (mem) has likely crashed. Exiting"
27
+ exit 100
28
+ fi
29
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
@@ -0,0 +1,73 @@
1
+ #!/bin/bash
2
+
3
+ cd "#{jobs_dir}/../"
4
+ echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
5
+ # File Checking
6
+ if [ ! -s "#{input}" ];then
7
+ echo "Error: Missing " #{input}
8
+ exit 100
9
+ fi
10
+ if [ ! -s "#{input2}" ];then
11
+ echo "Error: Missing " #{input2}
12
+ exit 100
13
+ fi
14
+
15
+ #{samp_options.join(" \\\n ")} \\
16
+ #{run_local}
17
+
18
+ # check if file size less than 1MB
19
+ if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
20
+ then
21
+ echo "Error with output."
22
+ exit 100
23
+ fi
24
+
25
+ # check STDOUT has correct termination string
26
+ HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
27
+
28
+ if [ -n "$HASENDING" ]
29
+ then
30
+ OK=1
31
+ else
32
+ #echo " empty variable"
33
+ echo "Improper stdout termination"
34
+ exit 100
35
+ fi
36
+
37
+ #check for correct number of sequences processed, based on fastq records
38
+ PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
39
+
40
+ echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
41
+ echo "bwa processed" $PROCESSED
42
+
43
+ if [[ "#{fastq}" =~ gz$ ]]
44
+ then
45
+ LINESFASTQ1=$(zcat "#{fastq}" | wc -l)
46
+ LINESFASTQ2=$(zcat "#{fastq2}.gz" | wc -l)
47
+ else
48
+ # non gz files
49
+ LINESFASTQ1=$(wc -l "#{fastq}" | cut -d" " -f1 )
50
+ LINESFASTQ2=$(wc -l "#{fastq2}" | cut -d" " -f1 )
51
+ fi
52
+
53
+ echo "Fastq1 number lines:= " $LINESFASTQ1
54
+ echo "Fastq2 number lines:= " $LINESFASTQ2
55
+
56
+ if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
57
+ then
58
+ SEQLINES=$[ $LINESFASTQ2 / 4 ]
59
+ else
60
+ SEQLINES=$[ $LINESFASTQ1 / 4 ]
61
+ fi
62
+
63
+ echo "Estimated Minimum Sequences:= " $SEQLINES
64
+
65
+ if (( "$PROCESSED" >= "$SEQLINES" ))
66
+ then
67
+ echo "Complete."
68
+ else
69
+ echo "Error, incorrect number of processed sequences"
70
+ exit 100
71
+ fi
72
+ echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
73
+
@@ -0,0 +1,35 @@
1
+ module CagnutBwa
2
+ class Util
3
+ attr_accessor :bwa, :config
4
+
5
+ def initialize config
6
+ @config = config
7
+ @bwa = CagnutBwa::Base.new
8
+ end
9
+
10
+ def aln_one_fastq dirs, order=1, filename=nil
11
+ job_name = bwa.aln_one_fastq dirs, order, filename
12
+ [job_name, order+1]
13
+ end
14
+
15
+ def samp_one_fastq dirs, order=1, previous_job_id=nil, filename=nil
16
+ job_name, filename = bwa.samp_one_fastq dirs, order, previous_job_id, filename
17
+ [job_name, filename, order+1]
18
+ end
19
+
20
+ def aln dirs, order=1, previous_job_id = nil, filename=nil
21
+ job_name = bwa.aln dirs, order, previous_job_id, filename
22
+ [job_name, order+1]
23
+ end
24
+
25
+ def samp dirs, order=1, previous_job_id=nil, filename=nil
26
+ job_name, filename = bwa.samp dirs, order, previous_job_id, filename
27
+ [job_name, filename, order+1]
28
+ end
29
+
30
+ def mem dirs, order=1, filename = nil
31
+ job_name, filename = bwa.mem dirs, order, filename
32
+ [job_name, filename, order+1]
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module CagnutBwa
2
+ VERSION = "0.3.0"
3
+ end
data/lib/cagnut_bwa.rb ADDED
@@ -0,0 +1,16 @@
1
+ require "cagnut_bwa/version"
2
+
3
+ module CagnutBwa
4
+ class << self
5
+ def config
6
+ @config ||= begin
7
+ CagnutBwa::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['bwa'])
8
+ CagnutBwa::Configuration.instance
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ require 'cagnut_bwa/configuration'
15
+ require 'cagnut_bwa/base'
16
+ require 'cagnut_bwa/util'
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cagnut_bwa
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Shi-Gang Wang
8
+ - Tse-Ching Ho
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-11-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: cagnut_core
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: bundler
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.12'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.12'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rake
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '10.0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '10.0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rspec
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '3.0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ description: Cagnut BWA tools
71
+ email:
72
+ - seanwang@goldenio.com
73
+ - tsechingho@goldenio.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".ruby-version"
81
+ - ".travis.yml"
82
+ - Gemfile
83
+ - LICENSE.txt
84
+ - README.md
85
+ - Rakefile
86
+ - bin/console
87
+ - bin/setup
88
+ - cagnut_bwa.gemspec
89
+ - lib/cagnut_bwa.rb
90
+ - lib/cagnut_bwa/base.rb
91
+ - lib/cagnut_bwa/check_tools.rb
92
+ - lib/cagnut_bwa/configuration.rb
93
+ - lib/cagnut_bwa/functions/aln.rb
94
+ - lib/cagnut_bwa/functions/aln_one_fastq.rb
95
+ - lib/cagnut_bwa/functions/mem.rb
96
+ - lib/cagnut_bwa/functions/samp.rb
97
+ - lib/cagnut_bwa/functions/samp_one_fastq.rb
98
+ - lib/cagnut_bwa/functions/templates/aln.sh
99
+ - lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh
100
+ - lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh
101
+ - lib/cagnut_bwa/functions/templates/mem.sh
102
+ - lib/cagnut_bwa/functions/templates/samp.sh
103
+ - lib/cagnut_bwa/util.rb
104
+ - lib/cagnut_bwa/version.rb
105
+ homepage: https://github.com/CAGNUT/cagnut_bwa
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.5.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Cagnut BWA tools
129
+ test_files: []