cagnut_bwa 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cagnut_bwa.gemspec +27 -0
- data/lib/cagnut_bwa/base.rb +34 -0
- data/lib/cagnut_bwa/check_tools.rb +37 -0
- data/lib/cagnut_bwa/configuration.rb +56 -0
- data/lib/cagnut_bwa/functions/aln.rb +88 -0
- data/lib/cagnut_bwa/functions/aln_one_fastq.rb +94 -0
- data/lib/cagnut_bwa/functions/mem.rb +74 -0
- data/lib/cagnut_bwa/functions/samp.rb +81 -0
- data/lib/cagnut_bwa/functions/samp_one_fastq.rb +95 -0
- data/lib/cagnut_bwa/functions/templates/aln.sh +104 -0
- data/lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh +67 -0
- data/lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh +34 -0
- data/lib/cagnut_bwa/functions/templates/mem.sh +29 -0
- data/lib/cagnut_bwa/functions/templates/samp.sh +73 -0
- data/lib/cagnut_bwa/util.rb +35 -0
- data/lib/cagnut_bwa/version.rb +3 -0
- data/lib/cagnut_bwa.rb +16 -0
- metadata +129 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 212201476cdd487c7d175275b4b2dda5064794a8
|
4
|
+
data.tar.gz: e1b5fb5113d105b8ba7782c3b34e100fb05f6820
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4432d3082474d04bed19517a8cb42c2c350706c7d06ecd67e888b1997c6137e601a56bdb00fc86979b8b600dcaad6a3386b305fa0922024fb79b3dfcda233ad9
|
7
|
+
data.tar.gz: 002602c3c99c28f7eb7eef0504038c1c07e3df88fafecf8b15c53c202c96bd32040032abd2fda8d67f60edd984f6da195c0e36473a04f55179f27d9668b586d9
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.3.1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Goldenio Technology
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# CagnutBwa
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
gem 'cagnut_bwa'
|
9
|
+
```
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install cagnut_bwa
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
## Development
|
22
|
+
|
23
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
24
|
+
|
25
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cagnut_bwa.
|
30
|
+
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
35
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "cagnut_bwa"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/cagnut_bwa.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cagnut_bwa/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cagnut_bwa"
|
8
|
+
spec.version = CagnutBwa::VERSION
|
9
|
+
spec.authors = ['Shi-Gang Wang', 'Tse-Ching Ho']
|
10
|
+
spec.email = ['seanwang@goldenio.com', 'tsechingho@goldenio.com']
|
11
|
+
|
12
|
+
spec.summary = %q{Cagnut BWA tools}
|
13
|
+
spec.description = %q{Cagnut BWA tools}
|
14
|
+
spec.homepage = "https://github.com/CAGNUT/cagnut_bwa"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency 'cagnut_core'
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'cagnut_bwa/functions/aln'
|
2
|
+
require 'cagnut_bwa/functions/samp'
|
3
|
+
require 'cagnut_bwa/functions/aln_one_fastq'
|
4
|
+
require 'cagnut_bwa/functions/samp_one_fastq'
|
5
|
+
require 'cagnut_bwa/functions/mem'
|
6
|
+
|
7
|
+
module CagnutBwa
|
8
|
+
class Base
|
9
|
+
def aln dirs, order, previous_job_id, input = nil
|
10
|
+
opts = { input: input, dirs: dirs, order: order }
|
11
|
+
CagnutBwa::Aln.new(opts).run previous_job_id
|
12
|
+
end
|
13
|
+
|
14
|
+
def samp dirs, order, previous_job_id = nil, input = nil
|
15
|
+
opts = { input: input, dirs: dirs, order: order }
|
16
|
+
CagnutBwa::Samp.new(opts).run previous_job_id
|
17
|
+
end
|
18
|
+
|
19
|
+
def aln_one_fastq dirs, order, input = nil
|
20
|
+
opts = { input: input, dirs: dirs, order: order }
|
21
|
+
CagnutBwa::AlnOneFastq.new(opts).run
|
22
|
+
end
|
23
|
+
|
24
|
+
def samp_one_fastq dirs, order, previous_job_id = nil, input = nil
|
25
|
+
opts = { input: input, dirs: dirs, order: order }
|
26
|
+
CagnutBwa::SampOneFastq.new(opts).run previous_job_id
|
27
|
+
end
|
28
|
+
|
29
|
+
def mem dirs, order, input = nil
|
30
|
+
opts = { input: input, dirs: dirs, order: order }
|
31
|
+
CagnutBwa::Mem.new(opts).run
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
module CheckTools
|
3
|
+
def check_tool tools_path
|
4
|
+
super if defined?(super)
|
5
|
+
check_bwa tools_path['bwa']
|
6
|
+
check_bwa_index refs['ref_fasta']
|
7
|
+
end
|
8
|
+
|
9
|
+
def check_bwa path
|
10
|
+
check_tool_ver 'BWA' do
|
11
|
+
`#{path} 2>&1 | grep Version | cut -f2 -d ' '` if path
|
12
|
+
check_bwa_index
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_bwa_index ref_path
|
17
|
+
tool = 'Bwa Index'
|
18
|
+
file = "#{ref_path}.ann"
|
19
|
+
command = "#{@config['tools']['bwa']} index #{ref_path}"
|
20
|
+
check_ref_related file, tool, command
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def check_ref_related file, tool, command
|
25
|
+
if File.exist?(file)
|
26
|
+
puts "\t#{tool}: Done"
|
27
|
+
else
|
28
|
+
puts "\t#{tool}: Not Found!"
|
29
|
+
puts "\tPlease execute command:"
|
30
|
+
puts "\t\t#{command}"
|
31
|
+
@check_completed = false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
Cagnut::Configuration::Checks::Tools.prepend CagnutBwa::CheckTools
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module CagnutBwa
|
4
|
+
class Configuration
|
5
|
+
include Singleton
|
6
|
+
attr_accessor :rg_str, :mem_params, :aln_params, :samp_params
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def load config, params
|
10
|
+
instance.load config, params
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def load config, params
|
15
|
+
@config = config
|
16
|
+
@params = params
|
17
|
+
generate_rg_str
|
18
|
+
attributes.each do |name, value|
|
19
|
+
send "#{name}=", value if respond_to? "#{name}="
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def attributes
|
24
|
+
{
|
25
|
+
rg_str: @config['sample']['rg_str'],
|
26
|
+
mem_params: add_bwa_path_in_params(@params['mem']),
|
27
|
+
aln_params: add_bwa_path_in_params(@params['aln']),
|
28
|
+
samp_params: add_bwa_path_in_params(@params['samp'])
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_bwa_path_in_params method_params
|
33
|
+
return if method_params.blank?
|
34
|
+
array = method_params['params'].dup
|
35
|
+
array.unshift "#{@config['tools']['bwa']}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate_rg_str
|
39
|
+
@config['samples'].each do |sample|
|
40
|
+
arg = %W(
|
41
|
+
@RG
|
42
|
+
ID:#{sample['rgid']}
|
43
|
+
SM:#{sample['name']}
|
44
|
+
PL:#{@config['info']['pl']}
|
45
|
+
PU:#{sample['pu']}
|
46
|
+
LB:#{@config['info']['lb']}
|
47
|
+
DS:#{@config['info']['ds']}
|
48
|
+
CN:#{@config['info']['cn']}
|
49
|
+
DT:#{@config['info']['dt']}
|
50
|
+
)
|
51
|
+
rg_str = { 'rg_str' => arg.join('\t') }
|
52
|
+
sample.merge! rg_str
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Aln
|
3
|
+
extend Forwardable
|
4
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :dodebug, :seqs_path,
|
5
|
+
:ref_fasta, :jobs_dir, :prefix_name, :pipeline_name
|
6
|
+
def_delegators :'CagnutBwa.config', :aln_params
|
7
|
+
|
8
|
+
def initialize opts = {}
|
9
|
+
@order = sprintf '%02i', opts[:order]
|
10
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
11
|
+
@input2 = File.expand_path fetch_filename, File.dirname(@input)
|
12
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
13
|
+
@output = "#{opts[:dirs][:output]}/#{File.basename(@input)}.sai"
|
14
|
+
@output2 = "#{opts[:dirs][:output]}/#{fetch_filename}.sai"
|
15
|
+
@job_name = "#{prefix_name}_#{sample_name}_Aln"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch_filename
|
19
|
+
filename = File.basename(@input)
|
20
|
+
if filename.match '_R1_'
|
21
|
+
filename.gsub '_R1_', '_R2_'
|
22
|
+
elsif filename.match '_1_'
|
23
|
+
filename.gsub '_1_', '_2_'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def run previous_job_id = nil
|
28
|
+
puts "Submitting bwaAln #{sample_name}"
|
29
|
+
script_name = generate_script
|
30
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
31
|
+
@job_name
|
32
|
+
end
|
33
|
+
|
34
|
+
def queuing_options previous_job_id = nil
|
35
|
+
threads = 2
|
36
|
+
{
|
37
|
+
previous_job_id: previous_job_id,
|
38
|
+
var_env: [ref_fasta],
|
39
|
+
adjust_memory: ["h_vmem=adjustWorkingMem 5G #{threads}"],
|
40
|
+
parallel_env: [threads],
|
41
|
+
tools: ['bwa', 'aln']
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def aln_params_for_r1
|
46
|
+
array = aln_params.dup
|
47
|
+
array.insert 1, 'aln'
|
48
|
+
array << "#{ref_fasta}"
|
49
|
+
array << "-f #{@output}"
|
50
|
+
array << "#{@input}"
|
51
|
+
array.uniq
|
52
|
+
end
|
53
|
+
|
54
|
+
def aln_params_for_r2
|
55
|
+
array = aln_params.dup
|
56
|
+
array.insert 1, 'aln'
|
57
|
+
array << "#{ref_fasta}"
|
58
|
+
array << "-f #{@output2}"
|
59
|
+
array << "#{@input2}"
|
60
|
+
array.uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_script
|
64
|
+
script_name = "#{@order}_bwa_aln"
|
65
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
66
|
+
template = Tilt.new(File.expand_path '../templates/aln.sh', __FILE__)
|
67
|
+
File.open(file, 'w') do |f|
|
68
|
+
f.puts template.render Object.new, job_params(script_name)
|
69
|
+
end
|
70
|
+
File.chmod(0700, file)
|
71
|
+
script_name
|
72
|
+
end
|
73
|
+
|
74
|
+
def job_params script_name
|
75
|
+
{
|
76
|
+
jobs_dir: jobs_dir,
|
77
|
+
script_name: script_name,
|
78
|
+
input: @input,
|
79
|
+
input2: @input2,
|
80
|
+
output: @output,
|
81
|
+
output2: @output2,
|
82
|
+
aln_params_for_r1: aln_params_for_r1,
|
83
|
+
aln_params_for_r2: aln_params_for_r2,
|
84
|
+
run_local: ::Cagnut::JobManage.run_local
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class AlnOneFastq
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name, :dodebug,
|
6
|
+
:ref_fasta, :jobs_dir, :data_type
|
7
|
+
def_delegators :'CagnutBwa.config', :aln_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
abort('Cant recognized sequence files') if @input.nil?
|
13
|
+
@input2 = File.expand_path fetch_filename(@input), File.dirname(@input) if @input.match '_1_'
|
14
|
+
@output = "#{opts[:dirs][:output]}/#{File.basename(@input).gsub('.gz', '').gsub('.txt','.sai')}"
|
15
|
+
@output2 = "#{opts[:dirs][:output]}/#{fetch_filename(@output)}" if @input.match '_1_'
|
16
|
+
@job_name = "#{prefix_name}_#{sample_name}_Aln_one_fastq"
|
17
|
+
end
|
18
|
+
|
19
|
+
def fetch_filename file
|
20
|
+
filename = File.basename(file)
|
21
|
+
if filename.match '_R1_'
|
22
|
+
filename.gsub '_R1_', '_R2_'
|
23
|
+
elsif filename.match '_1_'
|
24
|
+
filename.gsub '_1_', '_2_'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def run previous_job_id = nil
|
29
|
+
puts "Submitting bwa_aln_one_fastq #{sample_name}"
|
30
|
+
script_name = generate_script
|
31
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
32
|
+
@job_name
|
33
|
+
end
|
34
|
+
|
35
|
+
def queuing_options previous_job_id = nil
|
36
|
+
threads = 2
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
var_env: [fastq_dir, sai_dir, threads],
|
40
|
+
adjust_memory: ['h_vmem=3.4G'],
|
41
|
+
parallel_env: [threads],
|
42
|
+
tools: ['bwa', 'aln']
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
def aln_params_for_r1
|
47
|
+
array = aln_params.dup
|
48
|
+
array.insert 1, 'aln'
|
49
|
+
array << "#{ref_fasta}"
|
50
|
+
array << "-f #{@output}"
|
51
|
+
array << "#{@input}"
|
52
|
+
array.uniq
|
53
|
+
end
|
54
|
+
|
55
|
+
def aln_params_for_r2
|
56
|
+
array = aln_params.dup
|
57
|
+
array.insert 1, 'aln'
|
58
|
+
array << "#{ref_fasta}"
|
59
|
+
array << "-f #{@output2}"
|
60
|
+
array << "#{@input2}"
|
61
|
+
array.uniq
|
62
|
+
end
|
63
|
+
|
64
|
+
def generate_script
|
65
|
+
script_name = "#{@order}_bwa_aln_one_fastq"
|
66
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
67
|
+
File.open(file, 'w') do |f|
|
68
|
+
f.puts <<-BASH.strip_heredoc
|
69
|
+
#!/bin/bash
|
70
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
71
|
+
if [[ #{@input} =~ _1_ ]]
|
72
|
+
then
|
73
|
+
#{aln_params_for_r2.join(" \\\n ")} \\
|
74
|
+
#{::Cagnut::JobManage.run_local}
|
75
|
+
else
|
76
|
+
fi
|
77
|
+
|
78
|
+
#{aln_params_for_r1.join(" \\\n ")} \\
|
79
|
+
#{::Cagnut::JobManage.run_local}
|
80
|
+
|
81
|
+
if [ ! -s "#{@output}" ]
|
82
|
+
then
|
83
|
+
echo "Missing SAI:#{@output} file!"
|
84
|
+
exit 100
|
85
|
+
fi
|
86
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
87
|
+
|
88
|
+
BASH
|
89
|
+
end
|
90
|
+
File.chmod(0700, file)
|
91
|
+
script_name
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Mem
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
|
6
|
+
:ref_fasta, :jobs_dir, :prefix_name
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str, :mem_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
@input2 = File.expand_path fetch_filename, File.dirname(@input)
|
13
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
14
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_mem.sam"
|
15
|
+
@job_name = "#{prefix_name}_#{sample_name}_mem*"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch_filename
|
19
|
+
filename = File.basename(@input)
|
20
|
+
if filename.match '_R1_'
|
21
|
+
filename.gsub '_R1_', '_R2_'
|
22
|
+
elsif filename.match '_1_'
|
23
|
+
filename.gsub '_1_', '_2_'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def run previous_job_id = nil
|
28
|
+
puts "Submitting bwaMem #{sample_name}"
|
29
|
+
script_name = generate_script
|
30
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
31
|
+
[@job_name, @output]
|
32
|
+
end
|
33
|
+
|
34
|
+
def queuing_options previous_job_id = nil
|
35
|
+
{
|
36
|
+
previous_job_id: previous_job_id,
|
37
|
+
tools: ['bwa', 'mem']
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def mem_options
|
42
|
+
array = mem_params.dup
|
43
|
+
array.insert 1, 'mem'
|
44
|
+
array << "-M"
|
45
|
+
array << "-R \"#{rg_str}\""
|
46
|
+
array << "#{ref_fasta}"
|
47
|
+
array << "#{@input}"
|
48
|
+
array << "#{@input2}"
|
49
|
+
array << "> #{@output}"
|
50
|
+
array.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_script
|
54
|
+
script_name = "#{@order}_bwa_mem"
|
55
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
56
|
+
template = Tilt.new(File.expand_path '../templates/mem.sh', __FILE__)
|
57
|
+
File.open(file, 'w') do |f|
|
58
|
+
f.puts template.render Object.new, job_params(script_name)
|
59
|
+
end
|
60
|
+
File.chmod(0700, file)
|
61
|
+
script_name
|
62
|
+
end
|
63
|
+
|
64
|
+
def job_params script_name
|
65
|
+
{
|
66
|
+
jobs_dir: jobs_dir,
|
67
|
+
script_name: script_name,
|
68
|
+
output: @output,
|
69
|
+
mem_params: mem_options,
|
70
|
+
run_local: ::Cagnut::JobManage.run_local
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Samp
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
|
6
|
+
:ref_fasta, :jobs_dir, :dodebug, :prefix_name
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str,:samp_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@fastq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
@fastq2 = File.expand_path fetch_filename(@fastq), File.dirname(@fastq)
|
13
|
+
@input = "#{opts[:dirs][:input]}/#{File.basename(@fastq)}.sai"
|
14
|
+
@input2 = File.expand_path fetch_filename(@input), File.dirname(@input)
|
15
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
16
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_aligned.sam.gz"
|
17
|
+
@job_name = "#{prefix_name}_#{sample_name}_Samp"
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_filename file
|
21
|
+
filename = File.basename(file)
|
22
|
+
if filename.match '_R1_'
|
23
|
+
filename.gsub '_R1_', '_R2_'
|
24
|
+
elsif filename.match '_1_'
|
25
|
+
filename.gsub '_1_', '_2_'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def run previous_job_id = nil
|
30
|
+
puts "Submitting bwaSamp #{sample_name} RG_STR= #{rg_str}"
|
31
|
+
script_name = generate_script
|
32
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
33
|
+
[@job_name, @output]
|
34
|
+
end
|
35
|
+
|
36
|
+
def queuing_options previous_job_id = nil
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
adjust_memory: ['h_vmem=5G'],
|
40
|
+
tools: ['bwa', 'samp']
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def samp_options
|
45
|
+
array = samp_params.dup
|
46
|
+
array.insert 1, 'sampe'
|
47
|
+
array << "-r \"#{rg_str}\""
|
48
|
+
array << "#{ref_fasta}"
|
49
|
+
array << "#{@input}"
|
50
|
+
array << "#{@input2}"
|
51
|
+
array << "#{@fastq}"
|
52
|
+
array << "#{@fastq2} | gzip > #{@output}"
|
53
|
+
array.uniq.compact
|
54
|
+
end
|
55
|
+
|
56
|
+
def generate_script
|
57
|
+
script_name = "#{@order}_bwa_samp"
|
58
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
59
|
+
template = Tilt.new(File.expand_path '../templates/samp.sh', __FILE__)
|
60
|
+
File.open(file, 'w') do |f|
|
61
|
+
f.puts template.render Object.new, job_params(script_name)
|
62
|
+
end
|
63
|
+
File.chmod(0700, file)
|
64
|
+
script_name
|
65
|
+
end
|
66
|
+
|
67
|
+
def job_params script_name
|
68
|
+
{
|
69
|
+
jobs_dir: jobs_dir,
|
70
|
+
script_name: script_name,
|
71
|
+
input: @input,
|
72
|
+
input2: @input2,
|
73
|
+
fastq: @fastq,
|
74
|
+
fastq2: @fastq2,
|
75
|
+
output: @output,
|
76
|
+
samp_options: samp_options,
|
77
|
+
run_local: ::Cagnut::JobManage.run_local
|
78
|
+
}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class SampOneFastq
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name,
|
6
|
+
:ref_fasta, :jobs_dir, :data_type, :dodebug
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str, :samp_params
|
8
|
+
|
9
|
+
def initizaline opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@job_name = "#{prefix_name}_#{sample_name}_Samp"
|
12
|
+
@seq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
13
|
+
abort('Cant recognized sequence files') if @seq.nil?
|
14
|
+
@sai = "#{opts[:dirs][:input]}/#{File.basename(@seq).gsub('.gz', '').gsub('.txt','.sai')}"
|
15
|
+
@seq2 = @seq.match('_1_') ? "#{File.expand_path(fetch_filename(@seq), File.dirname(@seq))}" : ''
|
16
|
+
@sai2 = @sai.match('_1_') ? "#{opts[:dirs][:input]}/#{fetch_filename(@sai)}" : ''
|
17
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_sequence.aligned.sam.gz"
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_filename file
|
21
|
+
filename = File.basename(file)
|
22
|
+
if filename.match '_R1_'
|
23
|
+
filename.gsub '_R1_', '_R2_'
|
24
|
+
elsif filename.match '_1_'
|
25
|
+
filename.gsub '_1_', '_2_'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def run previous_job_id = nil
|
30
|
+
puts "Submitting bwaSampOneFastq #{sample_name} RG_STR= #{rg_str}"
|
31
|
+
script_name = generate_script
|
32
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
33
|
+
[@job_name, @output]
|
34
|
+
end
|
35
|
+
|
36
|
+
def queuing_options previous_job_id = nil
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
adjust_memory: ['h_vmem=5G'],
|
40
|
+
parallel_env: ['30'],
|
41
|
+
tools: ['bwa', 'samp']
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_script
|
46
|
+
script_name = data_type == 'ONEFASTQ' ? 'bwa_samp_one_fastq' : 'bwa_samse_one_fastq'
|
47
|
+
bwa_samp_one_fastq script_name
|
48
|
+
script_name
|
49
|
+
end
|
50
|
+
|
51
|
+
def samp_one_fastq_options
|
52
|
+
array = samp_params.dup
|
53
|
+
array.insert 1, 'sampe'
|
54
|
+
array << "-r \"#{rg_str}\""
|
55
|
+
array << "#{ref_fasta}"
|
56
|
+
array << "#{@sai}"
|
57
|
+
array << "#{@sai2}"
|
58
|
+
array << "#{@seq}"
|
59
|
+
array << "#{@seq2} | gzip > #{@output}"
|
60
|
+
array.uniq.compact
|
61
|
+
end
|
62
|
+
|
63
|
+
def samse_one_fastq_options
|
64
|
+
array = samp_params.dup
|
65
|
+
array.insert 1, 'sampe'
|
66
|
+
array << "-r \"#{rg_str}\""
|
67
|
+
array << "#{ref_fasta}"
|
68
|
+
array << "#{@sai}"
|
69
|
+
array << "#{@seq} | gzip > #{@output}"
|
70
|
+
array.uniq.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def bwa_samp_one_fastq script_name
|
74
|
+
file = File.join jobs_dir, "#{@order}_#{script_name}.sh"
|
75
|
+
path = File.expand_path "../templates/#{script_name}.sh", __FILE__
|
76
|
+
template = Tilt.new path
|
77
|
+
File.open(file, 'w') do |f|
|
78
|
+
f.puts template.render Object.new, job_params(script_name)
|
79
|
+
end
|
80
|
+
File.chmod(0700, file)
|
81
|
+
end
|
82
|
+
|
83
|
+
def job_params script_name
|
84
|
+
{
|
85
|
+
jobs_dir: jobs_dir,
|
86
|
+
script_name: script_name,
|
87
|
+
output: @output,
|
88
|
+
seq: @seq,
|
89
|
+
seq2: @seq2,
|
90
|
+
samp_options: (data_type == 'ONEFASTQ' ? 'samp_one_fastq_options' : 'samse_one_fastq_options'),
|
91
|
+
run_local: ::Cagnut::JobManage.run_local
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
echo #{aln_params_for_r1.join("\s")}
|
6
|
+
#{aln_params_for_r1.join(" \\\n ")} \\
|
7
|
+
#{run_local}
|
8
|
+
|
9
|
+
#force error when missing/empty sai . Would prevent continutation of pipeline
|
10
|
+
if [ ! -s "#{output}" ]
|
11
|
+
then
|
12
|
+
echo "Missing SAI:#{output} file!"
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
# check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
OK=1
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
30
|
+
|
31
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
32
|
+
echo "bwa processed" $PROCESSED
|
33
|
+
|
34
|
+
if [[ "#{input}" =~ gz$ ]]
|
35
|
+
then
|
36
|
+
LINESFASTQ1=$(zcat "#{input}" | wc -l)
|
37
|
+
else
|
38
|
+
# non gz files
|
39
|
+
LINESFASTQ1=$(wc -l "#{input}" | cut -d" " -f1 )
|
40
|
+
fi
|
41
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
42
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
43
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
44
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
45
|
+
then
|
46
|
+
echo "Complete."
|
47
|
+
else
|
48
|
+
echo "Error, incorrect number of processed sequences"
|
49
|
+
exit 100
|
50
|
+
fi
|
51
|
+
|
52
|
+
####################################################################
|
53
|
+
# PAIR _2_
|
54
|
+
# run and check pair _2_
|
55
|
+
#
|
56
|
+
#
|
57
|
+
####################################################################
|
58
|
+
|
59
|
+
#{aln_params_for_r2.join(" \\\n ")} \\
|
60
|
+
#{run_local}
|
61
|
+
|
62
|
+
#force error when missing/empty sai . Would prevent continutation of pipeline
|
63
|
+
if [ ! -s "#{output2}" ]
|
64
|
+
then
|
65
|
+
echo "Missing SAI:#{output2} file!"
|
66
|
+
exit 100
|
67
|
+
fi
|
68
|
+
|
69
|
+
# check STDOUT has correct termination string
|
70
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
71
|
+
|
72
|
+
if [ -n "$HASENDING" ]
|
73
|
+
then
|
74
|
+
OK=1
|
75
|
+
else
|
76
|
+
#echo " empty variable"
|
77
|
+
echo "Improper stdout termination"
|
78
|
+
exit 100
|
79
|
+
fi
|
80
|
+
|
81
|
+
#check for correct number of sequences processed, based on fastq records
|
82
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
83
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
84
|
+
echo "bwa processed" $PROCESSED
|
85
|
+
|
86
|
+
if [[ "#{input2}" =~ gz$ ]]
|
87
|
+
then
|
88
|
+
LINESFASTQ2=$(zcat "#{input2}" | wc -l)
|
89
|
+
else
|
90
|
+
# non gz files
|
91
|
+
LINESFASTQ2=$(wc -l "#{input2}" | cut -d" " -f1 )
|
92
|
+
fi
|
93
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
94
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
95
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
96
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
97
|
+
then
|
98
|
+
echo "Complete."
|
99
|
+
else
|
100
|
+
echo "Error, incorrect number of processed sequences"
|
101
|
+
exit 100
|
102
|
+
fi
|
103
|
+
|
104
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
#{samp_options.join(" \\\n ")} \\
|
6
|
+
#{run_local}
|
7
|
+
|
8
|
+
# check file size less than 1MB
|
9
|
+
#
|
10
|
+
# if [ $(stat --printf="%s" "#{sam_dir}/#{line}_sequence.aligned.sam.gz") -le 1024000 ]
|
11
|
+
# then
|
12
|
+
# echo "Error with output."
|
13
|
+
# exit 100
|
14
|
+
# fi
|
15
|
+
|
16
|
+
#check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
echo "OUTPUT ok."
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
30
|
+
|
31
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
32
|
+
echo "bwa processed" $PROCESSED
|
33
|
+
|
34
|
+
if [[ "#{seq}" =~ gz$ ]]
|
35
|
+
then
|
36
|
+
|
37
|
+
LINESFASTQ1=$(zcat "#{seq}" | wc -l)
|
38
|
+
LINESFASTQ2=$(zcat "#{seq2}" | wc -l)
|
39
|
+
|
40
|
+
else
|
41
|
+
# non gz files
|
42
|
+
LINESFASTQ1=$(wc -l "#{seq}" )
|
43
|
+
LINESFASTQ2=$(wc -l "#{seq2}" )
|
44
|
+
fi
|
45
|
+
|
46
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
47
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
48
|
+
|
49
|
+
if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
|
50
|
+
then
|
51
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
52
|
+
else
|
53
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
54
|
+
fi
|
55
|
+
|
56
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
57
|
+
|
58
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
59
|
+
then
|
60
|
+
echo "Complete."
|
61
|
+
else
|
62
|
+
echo "Error, incorrect number of processed sequences"
|
63
|
+
exit 100
|
64
|
+
fi
|
65
|
+
|
66
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
67
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
#{samp_options.join(" \\\n ")} \\
|
6
|
+
#{run_local}
|
7
|
+
|
8
|
+
# check file size less than 1MB
|
9
|
+
|
10
|
+
if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
|
11
|
+
then
|
12
|
+
echo "Error with output."
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
#check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
echo "OUTPUT ok."
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
|
30
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
31
|
+
|
32
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
33
|
+
echo "bwa processed" $PROCESSED
|
34
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
echo #{mem_params.join("\s")}
|
6
|
+
#{mem_params.join(" \\\n ")} \\
|
7
|
+
#{run_local}
|
8
|
+
|
9
|
+
#force error when missing/empty sam . Would prevent continutation of pipeline
|
10
|
+
if [ ! -s #{output} ]
|
11
|
+
then
|
12
|
+
echo "Missing SAM:#{output} file!"
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
# check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " Processed")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
OK=1
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Error: Improper stdout termination"
|
25
|
+
echo $EXITSTATUS
|
26
|
+
echo "bwa (mem) has likely crashed. Exiting"
|
27
|
+
exit 100
|
28
|
+
fi
|
29
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
# File Checking
|
6
|
+
if [ ! -s "#{input}" ];then
|
7
|
+
echo "Error: Missing " #{input}
|
8
|
+
exit 100
|
9
|
+
fi
|
10
|
+
if [ ! -s "#{input2}" ];then
|
11
|
+
echo "Error: Missing " #{input2}
|
12
|
+
exit 100
|
13
|
+
fi
|
14
|
+
|
15
|
+
#{samp_options.join(" \\\n ")} \\
|
16
|
+
#{run_local}
|
17
|
+
|
18
|
+
# check if file size less than 1MB
|
19
|
+
if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
|
20
|
+
then
|
21
|
+
echo "Error with output."
|
22
|
+
exit 100
|
23
|
+
fi
|
24
|
+
|
25
|
+
# check STDOUT has correct termination string
|
26
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
27
|
+
|
28
|
+
if [ -n "$HASENDING" ]
|
29
|
+
then
|
30
|
+
OK=1
|
31
|
+
else
|
32
|
+
#echo " empty variable"
|
33
|
+
echo "Improper stdout termination"
|
34
|
+
exit 100
|
35
|
+
fi
|
36
|
+
|
37
|
+
#check for correct number of sequences processed, based on fastq records
|
38
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
39
|
+
|
40
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
41
|
+
echo "bwa processed" $PROCESSED
|
42
|
+
|
43
|
+
if [[ "#{fastq}" =~ gz$ ]]
|
44
|
+
then
|
45
|
+
LINESFASTQ1=$(zcat "#{fastq}" | wc -l)
|
46
|
+
LINESFASTQ2=$(zcat "#{fastq2}.gz" | wc -l)
|
47
|
+
else
|
48
|
+
# non gz files
|
49
|
+
LINESFASTQ1=$(wc -l "#{fastq}" | cut -d" " -f1 )
|
50
|
+
LINESFASTQ2=$(wc -l "#{fastq2}" | cut -d" " -f1 )
|
51
|
+
fi
|
52
|
+
|
53
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
54
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
55
|
+
|
56
|
+
if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
|
57
|
+
then
|
58
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
59
|
+
else
|
60
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
61
|
+
fi
|
62
|
+
|
63
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
64
|
+
|
65
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
66
|
+
then
|
67
|
+
echo "Complete."
|
68
|
+
else
|
69
|
+
echo "Error, incorrect number of processed sequences"
|
70
|
+
exit 100
|
71
|
+
fi
|
72
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
73
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Util
|
3
|
+
attr_accessor :bwa, :config
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
@bwa = CagnutBwa::Base.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def aln_one_fastq dirs, order=1, filename=nil
|
11
|
+
job_name = bwa.aln_one_fastq dirs, order, filename
|
12
|
+
[job_name, order+1]
|
13
|
+
end
|
14
|
+
|
15
|
+
def samp_one_fastq dirs, order=1, previous_job_id=nil, filename=nil
|
16
|
+
job_name, filename = bwa.samp_one_fastq dirs, order, previous_job_id, filename
|
17
|
+
[job_name, filename, order+1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def aln dirs, order=1, previous_job_id = nil, filename=nil
|
21
|
+
job_name = bwa.aln dirs, order, previous_job_id, filename
|
22
|
+
[job_name, order+1]
|
23
|
+
end
|
24
|
+
|
25
|
+
def samp dirs, order=1, previous_job_id=nil, filename=nil
|
26
|
+
job_name, filename = bwa.samp dirs, order, previous_job_id, filename
|
27
|
+
[job_name, filename, order+1]
|
28
|
+
end
|
29
|
+
|
30
|
+
def mem dirs, order=1, filename = nil
|
31
|
+
job_name, filename = bwa.mem dirs, order, filename
|
32
|
+
[job_name, filename, order+1]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/cagnut_bwa.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "cagnut_bwa/version"
|
2
|
+
|
3
|
+
module CagnutBwa
|
4
|
+
class << self
|
5
|
+
def config
|
6
|
+
@config ||= begin
|
7
|
+
CagnutBwa::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['bwa'])
|
8
|
+
CagnutBwa::Configuration.instance
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'cagnut_bwa/configuration'
|
15
|
+
require 'cagnut_bwa/base'
|
16
|
+
require 'cagnut_bwa/util'
|
metadata
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cagnut_bwa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shi-Gang Wang
|
8
|
+
- Tse-Ching Ho
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-11-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: cagnut_core
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bundler
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.12'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rake
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '10.0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '10.0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rspec
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '3.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.0'
|
70
|
+
description: Cagnut BWA tools
|
71
|
+
email:
|
72
|
+
- seanwang@goldenio.com
|
73
|
+
- tsechingho@goldenio.com
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".ruby-version"
|
81
|
+
- ".travis.yml"
|
82
|
+
- Gemfile
|
83
|
+
- LICENSE.txt
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- bin/console
|
87
|
+
- bin/setup
|
88
|
+
- cagnut_bwa.gemspec
|
89
|
+
- lib/cagnut_bwa.rb
|
90
|
+
- lib/cagnut_bwa/base.rb
|
91
|
+
- lib/cagnut_bwa/check_tools.rb
|
92
|
+
- lib/cagnut_bwa/configuration.rb
|
93
|
+
- lib/cagnut_bwa/functions/aln.rb
|
94
|
+
- lib/cagnut_bwa/functions/aln_one_fastq.rb
|
95
|
+
- lib/cagnut_bwa/functions/mem.rb
|
96
|
+
- lib/cagnut_bwa/functions/samp.rb
|
97
|
+
- lib/cagnut_bwa/functions/samp_one_fastq.rb
|
98
|
+
- lib/cagnut_bwa/functions/templates/aln.sh
|
99
|
+
- lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh
|
100
|
+
- lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh
|
101
|
+
- lib/cagnut_bwa/functions/templates/mem.sh
|
102
|
+
- lib/cagnut_bwa/functions/templates/samp.sh
|
103
|
+
- lib/cagnut_bwa/util.rb
|
104
|
+
- lib/cagnut_bwa/version.rb
|
105
|
+
homepage: https://github.com/CAGNUT/cagnut_bwa
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.5.1
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: Cagnut BWA tools
|
129
|
+
test_files: []
|