cagnut_bwa 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cagnut_bwa.gemspec +27 -0
- data/lib/cagnut_bwa/base.rb +34 -0
- data/lib/cagnut_bwa/check_tools.rb +37 -0
- data/lib/cagnut_bwa/configuration.rb +56 -0
- data/lib/cagnut_bwa/functions/aln.rb +88 -0
- data/lib/cagnut_bwa/functions/aln_one_fastq.rb +94 -0
- data/lib/cagnut_bwa/functions/mem.rb +74 -0
- data/lib/cagnut_bwa/functions/samp.rb +81 -0
- data/lib/cagnut_bwa/functions/samp_one_fastq.rb +95 -0
- data/lib/cagnut_bwa/functions/templates/aln.sh +104 -0
- data/lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh +67 -0
- data/lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh +34 -0
- data/lib/cagnut_bwa/functions/templates/mem.sh +29 -0
- data/lib/cagnut_bwa/functions/templates/samp.sh +73 -0
- data/lib/cagnut_bwa/util.rb +35 -0
- data/lib/cagnut_bwa/version.rb +3 -0
- data/lib/cagnut_bwa.rb +16 -0
- metadata +129 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 212201476cdd487c7d175275b4b2dda5064794a8
|
4
|
+
data.tar.gz: e1b5fb5113d105b8ba7782c3b34e100fb05f6820
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4432d3082474d04bed19517a8cb42c2c350706c7d06ecd67e888b1997c6137e601a56bdb00fc86979b8b600dcaad6a3386b305fa0922024fb79b3dfcda233ad9
|
7
|
+
data.tar.gz: 002602c3c99c28f7eb7eef0504038c1c07e3df88fafecf8b15c53c202c96bd32040032abd2fda8d67f60edd984f6da195c0e36473a04f55179f27d9668b586d9
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.3.1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Goldenio Technology
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# CagnutBwa
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
Add this line to your application's Gemfile:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
gem 'cagnut_bwa'
|
9
|
+
```
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install cagnut_bwa
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
## Development
|
22
|
+
|
23
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
24
|
+
|
25
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cagnut_bwa.
|
30
|
+
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
35
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "cagnut_bwa"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/cagnut_bwa.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cagnut_bwa/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cagnut_bwa"
|
8
|
+
spec.version = CagnutBwa::VERSION
|
9
|
+
spec.authors = ['Shi-Gang Wang', 'Tse-Ching Ho']
|
10
|
+
spec.email = ['seanwang@goldenio.com', 'tsechingho@goldenio.com']
|
11
|
+
|
12
|
+
spec.summary = %q{Cagnut BWA tools}
|
13
|
+
spec.description = %q{Cagnut BWA tools}
|
14
|
+
spec.homepage = "https://github.com/CAGNUT/cagnut_bwa"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency 'cagnut_core'
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'cagnut_bwa/functions/aln'
|
2
|
+
require 'cagnut_bwa/functions/samp'
|
3
|
+
require 'cagnut_bwa/functions/aln_one_fastq'
|
4
|
+
require 'cagnut_bwa/functions/samp_one_fastq'
|
5
|
+
require 'cagnut_bwa/functions/mem'
|
6
|
+
|
7
|
+
module CagnutBwa
|
8
|
+
class Base
|
9
|
+
def aln dirs, order, previous_job_id, input = nil
|
10
|
+
opts = { input: input, dirs: dirs, order: order }
|
11
|
+
CagnutBwa::Aln.new(opts).run previous_job_id
|
12
|
+
end
|
13
|
+
|
14
|
+
def samp dirs, order, previous_job_id = nil, input = nil
|
15
|
+
opts = { input: input, dirs: dirs, order: order }
|
16
|
+
CagnutBwa::Samp.new(opts).run previous_job_id
|
17
|
+
end
|
18
|
+
|
19
|
+
def aln_one_fastq dirs, order, input = nil
|
20
|
+
opts = { input: input, dirs: dirs, order: order }
|
21
|
+
CagnutBwa::AlnOneFastq.new(opts).run
|
22
|
+
end
|
23
|
+
|
24
|
+
def samp_one_fastq dirs, order, previous_job_id = nil, input = nil
|
25
|
+
opts = { input: input, dirs: dirs, order: order }
|
26
|
+
CagnutBwa::SampOneFastq.new(opts).run previous_job_id
|
27
|
+
end
|
28
|
+
|
29
|
+
def mem dirs, order, input = nil
|
30
|
+
opts = { input: input, dirs: dirs, order: order }
|
31
|
+
CagnutBwa::Mem.new(opts).run
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
module CheckTools
|
3
|
+
def check_tool tools_path
|
4
|
+
super if defined?(super)
|
5
|
+
check_bwa tools_path['bwa']
|
6
|
+
check_bwa_index refs['ref_fasta']
|
7
|
+
end
|
8
|
+
|
9
|
+
def check_bwa path
|
10
|
+
check_tool_ver 'BWA' do
|
11
|
+
`#{path} 2>&1 | grep Version | cut -f2 -d ' '` if path
|
12
|
+
check_bwa_index
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_bwa_index ref_path
|
17
|
+
tool = 'Bwa Index'
|
18
|
+
file = "#{ref_path}.ann"
|
19
|
+
command = "#{@config['tools']['bwa']} index #{ref_path}"
|
20
|
+
check_ref_related file, tool, command
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def check_ref_related file, tool, command
|
25
|
+
if File.exist?(file)
|
26
|
+
puts "\t#{tool}: Done"
|
27
|
+
else
|
28
|
+
puts "\t#{tool}: Not Found!"
|
29
|
+
puts "\tPlease execute command:"
|
30
|
+
puts "\t\t#{command}"
|
31
|
+
@check_completed = false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
Cagnut::Configuration::Checks::Tools.prepend CagnutBwa::CheckTools
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module CagnutBwa
|
4
|
+
class Configuration
|
5
|
+
include Singleton
|
6
|
+
attr_accessor :rg_str, :mem_params, :aln_params, :samp_params
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def load config, params
|
10
|
+
instance.load config, params
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def load config, params
|
15
|
+
@config = config
|
16
|
+
@params = params
|
17
|
+
generate_rg_str
|
18
|
+
attributes.each do |name, value|
|
19
|
+
send "#{name}=", value if respond_to? "#{name}="
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def attributes
|
24
|
+
{
|
25
|
+
rg_str: @config['sample']['rg_str'],
|
26
|
+
mem_params: add_bwa_path_in_params(@params['mem']),
|
27
|
+
aln_params: add_bwa_path_in_params(@params['aln']),
|
28
|
+
samp_params: add_bwa_path_in_params(@params['samp'])
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_bwa_path_in_params method_params
|
33
|
+
return if method_params.blank?
|
34
|
+
array = method_params['params'].dup
|
35
|
+
array.unshift "#{@config['tools']['bwa']}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate_rg_str
|
39
|
+
@config['samples'].each do |sample|
|
40
|
+
arg = %W(
|
41
|
+
@RG
|
42
|
+
ID:#{sample['rgid']}
|
43
|
+
SM:#{sample['name']}
|
44
|
+
PL:#{@config['info']['pl']}
|
45
|
+
PU:#{sample['pu']}
|
46
|
+
LB:#{@config['info']['lb']}
|
47
|
+
DS:#{@config['info']['ds']}
|
48
|
+
CN:#{@config['info']['cn']}
|
49
|
+
DT:#{@config['info']['dt']}
|
50
|
+
)
|
51
|
+
rg_str = { 'rg_str' => arg.join('\t') }
|
52
|
+
sample.merge! rg_str
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Aln
|
3
|
+
extend Forwardable
|
4
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :dodebug, :seqs_path,
|
5
|
+
:ref_fasta, :jobs_dir, :prefix_name, :pipeline_name
|
6
|
+
def_delegators :'CagnutBwa.config', :aln_params
|
7
|
+
|
8
|
+
def initialize opts = {}
|
9
|
+
@order = sprintf '%02i', opts[:order]
|
10
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
11
|
+
@input2 = File.expand_path fetch_filename, File.dirname(@input)
|
12
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
13
|
+
@output = "#{opts[:dirs][:output]}/#{File.basename(@input)}.sai"
|
14
|
+
@output2 = "#{opts[:dirs][:output]}/#{fetch_filename}.sai"
|
15
|
+
@job_name = "#{prefix_name}_#{sample_name}_Aln"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch_filename
|
19
|
+
filename = File.basename(@input)
|
20
|
+
if filename.match '_R1_'
|
21
|
+
filename.gsub '_R1_', '_R2_'
|
22
|
+
elsif filename.match '_1_'
|
23
|
+
filename.gsub '_1_', '_2_'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def run previous_job_id = nil
|
28
|
+
puts "Submitting bwaAln #{sample_name}"
|
29
|
+
script_name = generate_script
|
30
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
31
|
+
@job_name
|
32
|
+
end
|
33
|
+
|
34
|
+
def queuing_options previous_job_id = nil
|
35
|
+
threads = 2
|
36
|
+
{
|
37
|
+
previous_job_id: previous_job_id,
|
38
|
+
var_env: [ref_fasta],
|
39
|
+
adjust_memory: ["h_vmem=adjustWorkingMem 5G #{threads}"],
|
40
|
+
parallel_env: [threads],
|
41
|
+
tools: ['bwa', 'aln']
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def aln_params_for_r1
|
46
|
+
array = aln_params.dup
|
47
|
+
array.insert 1, 'aln'
|
48
|
+
array << "#{ref_fasta}"
|
49
|
+
array << "-f #{@output}"
|
50
|
+
array << "#{@input}"
|
51
|
+
array.uniq
|
52
|
+
end
|
53
|
+
|
54
|
+
def aln_params_for_r2
|
55
|
+
array = aln_params.dup
|
56
|
+
array.insert 1, 'aln'
|
57
|
+
array << "#{ref_fasta}"
|
58
|
+
array << "-f #{@output2}"
|
59
|
+
array << "#{@input2}"
|
60
|
+
array.uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_script
|
64
|
+
script_name = "#{@order}_bwa_aln"
|
65
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
66
|
+
template = Tilt.new(File.expand_path '../templates/aln.sh', __FILE__)
|
67
|
+
File.open(file, 'w') do |f|
|
68
|
+
f.puts template.render Object.new, job_params(script_name)
|
69
|
+
end
|
70
|
+
File.chmod(0700, file)
|
71
|
+
script_name
|
72
|
+
end
|
73
|
+
|
74
|
+
def job_params script_name
|
75
|
+
{
|
76
|
+
jobs_dir: jobs_dir,
|
77
|
+
script_name: script_name,
|
78
|
+
input: @input,
|
79
|
+
input2: @input2,
|
80
|
+
output: @output,
|
81
|
+
output2: @output2,
|
82
|
+
aln_params_for_r1: aln_params_for_r1,
|
83
|
+
aln_params_for_r2: aln_params_for_r2,
|
84
|
+
run_local: ::Cagnut::JobManage.run_local
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class AlnOneFastq
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name, :dodebug,
|
6
|
+
:ref_fasta, :jobs_dir, :data_type
|
7
|
+
def_delegators :'CagnutBwa.config', :aln_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
abort('Cant recognized sequence files') if @input.nil?
|
13
|
+
@input2 = File.expand_path fetch_filename(@input), File.dirname(@input) if @input.match '_1_'
|
14
|
+
@output = "#{opts[:dirs][:output]}/#{File.basename(@input).gsub('.gz', '').gsub('.txt','.sai')}"
|
15
|
+
@output2 = "#{opts[:dirs][:output]}/#{fetch_filename(@output)}" if @input.match '_1_'
|
16
|
+
@job_name = "#{prefix_name}_#{sample_name}_Aln_one_fastq"
|
17
|
+
end
|
18
|
+
|
19
|
+
def fetch_filename file
|
20
|
+
filename = File.basename(file)
|
21
|
+
if filename.match '_R1_'
|
22
|
+
filename.gsub '_R1_', '_R2_'
|
23
|
+
elsif filename.match '_1_'
|
24
|
+
filename.gsub '_1_', '_2_'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def run previous_job_id = nil
|
29
|
+
puts "Submitting bwa_aln_one_fastq #{sample_name}"
|
30
|
+
script_name = generate_script
|
31
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
32
|
+
@job_name
|
33
|
+
end
|
34
|
+
|
35
|
+
def queuing_options previous_job_id = nil
|
36
|
+
threads = 2
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
var_env: [fastq_dir, sai_dir, threads],
|
40
|
+
adjust_memory: ['h_vmem=3.4G'],
|
41
|
+
parallel_env: [threads],
|
42
|
+
tools: ['bwa', 'aln']
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
def aln_params_for_r1
|
47
|
+
array = aln_params.dup
|
48
|
+
array.insert 1, 'aln'
|
49
|
+
array << "#{ref_fasta}"
|
50
|
+
array << "-f #{@output}"
|
51
|
+
array << "#{@input}"
|
52
|
+
array.uniq
|
53
|
+
end
|
54
|
+
|
55
|
+
def aln_params_for_r2
|
56
|
+
array = aln_params.dup
|
57
|
+
array.insert 1, 'aln'
|
58
|
+
array << "#{ref_fasta}"
|
59
|
+
array << "-f #{@output2}"
|
60
|
+
array << "#{@input2}"
|
61
|
+
array.uniq
|
62
|
+
end
|
63
|
+
|
64
|
+
def generate_script
|
65
|
+
script_name = "#{@order}_bwa_aln_one_fastq"
|
66
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
67
|
+
File.open(file, 'w') do |f|
|
68
|
+
f.puts <<-BASH.strip_heredoc
|
69
|
+
#!/bin/bash
|
70
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
71
|
+
if [[ #{@input} =~ _1_ ]]
|
72
|
+
then
|
73
|
+
#{aln_params_for_r2.join(" \\\n ")} \\
|
74
|
+
#{::Cagnut::JobManage.run_local}
|
75
|
+
else
|
76
|
+
fi
|
77
|
+
|
78
|
+
#{aln_params_for_r1.join(" \\\n ")} \\
|
79
|
+
#{::Cagnut::JobManage.run_local}
|
80
|
+
|
81
|
+
if [ ! -s "#{@output}" ]
|
82
|
+
then
|
83
|
+
echo "Missing SAI:#{@output} file!"
|
84
|
+
exit 100
|
85
|
+
fi
|
86
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
87
|
+
|
88
|
+
BASH
|
89
|
+
end
|
90
|
+
File.chmod(0700, file)
|
91
|
+
script_name
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Mem
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
|
6
|
+
:ref_fasta, :jobs_dir, :prefix_name
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str, :mem_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@input = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
@input2 = File.expand_path fetch_filename, File.dirname(@input)
|
13
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
14
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_mem.sam"
|
15
|
+
@job_name = "#{prefix_name}_#{sample_name}_mem*"
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch_filename
|
19
|
+
filename = File.basename(@input)
|
20
|
+
if filename.match '_R1_'
|
21
|
+
filename.gsub '_R1_', '_R2_'
|
22
|
+
elsif filename.match '_1_'
|
23
|
+
filename.gsub '_1_', '_2_'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def run previous_job_id = nil
|
28
|
+
puts "Submitting bwaMem #{sample_name}"
|
29
|
+
script_name = generate_script
|
30
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
31
|
+
[@job_name, @output]
|
32
|
+
end
|
33
|
+
|
34
|
+
def queuing_options previous_job_id = nil
|
35
|
+
{
|
36
|
+
previous_job_id: previous_job_id,
|
37
|
+
tools: ['bwa', 'mem']
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def mem_options
|
42
|
+
array = mem_params.dup
|
43
|
+
array.insert 1, 'mem'
|
44
|
+
array << "-M"
|
45
|
+
array << "-R \"#{rg_str}\""
|
46
|
+
array << "#{ref_fasta}"
|
47
|
+
array << "#{@input}"
|
48
|
+
array << "#{@input2}"
|
49
|
+
array << "> #{@output}"
|
50
|
+
array.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_script
|
54
|
+
script_name = "#{@order}_bwa_mem"
|
55
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
56
|
+
template = Tilt.new(File.expand_path '../templates/mem.sh', __FILE__)
|
57
|
+
File.open(file, 'w') do |f|
|
58
|
+
f.puts template.render Object.new, job_params(script_name)
|
59
|
+
end
|
60
|
+
File.chmod(0700, file)
|
61
|
+
script_name
|
62
|
+
end
|
63
|
+
|
64
|
+
def job_params script_name
|
65
|
+
{
|
66
|
+
jobs_dir: jobs_dir,
|
67
|
+
script_name: script_name,
|
68
|
+
output: @output,
|
69
|
+
mem_params: mem_options,
|
70
|
+
run_local: ::Cagnut::JobManage.run_local
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Samp
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :seqs_path,
|
6
|
+
:ref_fasta, :jobs_dir, :dodebug, :prefix_name
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str,:samp_params
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@fastq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
12
|
+
@fastq2 = File.expand_path fetch_filename(@fastq), File.dirname(@fastq)
|
13
|
+
@input = "#{opts[:dirs][:input]}/#{File.basename(@fastq)}.sai"
|
14
|
+
@input2 = File.expand_path fetch_filename(@input), File.dirname(@input)
|
15
|
+
abort('Cant recognized sequence files') if @input2.nil?
|
16
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_aligned.sam.gz"
|
17
|
+
@job_name = "#{prefix_name}_#{sample_name}_Samp"
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_filename file
|
21
|
+
filename = File.basename(file)
|
22
|
+
if filename.match '_R1_'
|
23
|
+
filename.gsub '_R1_', '_R2_'
|
24
|
+
elsif filename.match '_1_'
|
25
|
+
filename.gsub '_1_', '_2_'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def run previous_job_id = nil
|
30
|
+
puts "Submitting bwaSamp #{sample_name} RG_STR= #{rg_str}"
|
31
|
+
script_name = generate_script
|
32
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
33
|
+
[@job_name, @output]
|
34
|
+
end
|
35
|
+
|
36
|
+
def queuing_options previous_job_id = nil
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
adjust_memory: ['h_vmem=5G'],
|
40
|
+
tools: ['bwa', 'samp']
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def samp_options
|
45
|
+
array = samp_params.dup
|
46
|
+
array.insert 1, 'sampe'
|
47
|
+
array << "-r \"#{rg_str}\""
|
48
|
+
array << "#{ref_fasta}"
|
49
|
+
array << "#{@input}"
|
50
|
+
array << "#{@input2}"
|
51
|
+
array << "#{@fastq}"
|
52
|
+
array << "#{@fastq2} | gzip > #{@output}"
|
53
|
+
array.uniq.compact
|
54
|
+
end
|
55
|
+
|
56
|
+
def generate_script
|
57
|
+
script_name = "#{@order}_bwa_samp"
|
58
|
+
file = File.join jobs_dir, "#{script_name}.sh"
|
59
|
+
template = Tilt.new(File.expand_path '../templates/samp.sh', __FILE__)
|
60
|
+
File.open(file, 'w') do |f|
|
61
|
+
f.puts template.render Object.new, job_params(script_name)
|
62
|
+
end
|
63
|
+
File.chmod(0700, file)
|
64
|
+
script_name
|
65
|
+
end
|
66
|
+
|
67
|
+
def job_params script_name
|
68
|
+
{
|
69
|
+
jobs_dir: jobs_dir,
|
70
|
+
script_name: script_name,
|
71
|
+
input: @input,
|
72
|
+
input2: @input2,
|
73
|
+
fastq: @fastq,
|
74
|
+
fastq2: @fastq2,
|
75
|
+
output: @output,
|
76
|
+
samp_options: samp_options,
|
77
|
+
run_local: ::Cagnut::JobManage.run_local
|
78
|
+
}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class SampOneFastq
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegators :'Cagnut::Configuration.base', :sample_name, :prefix_name,
|
6
|
+
:ref_fasta, :jobs_dir, :data_type, :dodebug
|
7
|
+
def_delegators :'CagnutBwa.config', :rg_str, :samp_params
|
8
|
+
|
9
|
+
def initizaline opts = {}
|
10
|
+
@order = sprintf '%02i', opts[:order]
|
11
|
+
@job_name = "#{prefix_name}_#{sample_name}_Samp"
|
12
|
+
@seq = opts[:input].nil? ? "#{seqs_path}" : opts[:input]
|
13
|
+
abort('Cant recognized sequence files') if @seq.nil?
|
14
|
+
@sai = "#{opts[:dirs][:input]}/#{File.basename(@seq).gsub('.gz', '').gsub('.txt','.sai')}"
|
15
|
+
@seq2 = @seq.match('_1_') ? "#{File.expand_path(fetch_filename(@seq), File.dirname(@seq))}" : ''
|
16
|
+
@sai2 = @sai.match('_1_') ? "#{opts[:dirs][:input]}/#{fetch_filename(@sai)}" : ''
|
17
|
+
@output = "#{opts[:dirs][:output]}/#{sample_name}_sequence.aligned.sam.gz"
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_filename file
|
21
|
+
filename = File.basename(file)
|
22
|
+
if filename.match '_R1_'
|
23
|
+
filename.gsub '_R1_', '_R2_'
|
24
|
+
elsif filename.match '_1_'
|
25
|
+
filename.gsub '_1_', '_2_'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def run previous_job_id = nil
|
30
|
+
puts "Submitting bwaSampOneFastq #{sample_name} RG_STR= #{rg_str}"
|
31
|
+
script_name = generate_script
|
32
|
+
::Cagnut::JobManage.submit script_name, @job_name, queuing_options(previous_job_id)
|
33
|
+
[@job_name, @output]
|
34
|
+
end
|
35
|
+
|
36
|
+
def queuing_options previous_job_id = nil
|
37
|
+
{
|
38
|
+
previous_job_id: previous_job_id,
|
39
|
+
adjust_memory: ['h_vmem=5G'],
|
40
|
+
parallel_env: ['30'],
|
41
|
+
tools: ['bwa', 'samp']
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_script
|
46
|
+
script_name = data_type == 'ONEFASTQ' ? 'bwa_samp_one_fastq' : 'bwa_samse_one_fastq'
|
47
|
+
bwa_samp_one_fastq script_name
|
48
|
+
script_name
|
49
|
+
end
|
50
|
+
|
51
|
+
def samp_one_fastq_options
|
52
|
+
array = samp_params.dup
|
53
|
+
array.insert 1, 'sampe'
|
54
|
+
array << "-r \"#{rg_str}\""
|
55
|
+
array << "#{ref_fasta}"
|
56
|
+
array << "#{@sai}"
|
57
|
+
array << "#{@sai2}"
|
58
|
+
array << "#{@seq}"
|
59
|
+
array << "#{@seq2} | gzip > #{@output}"
|
60
|
+
array.uniq.compact
|
61
|
+
end
|
62
|
+
|
63
|
+
def samse_one_fastq_options
|
64
|
+
array = samp_params.dup
|
65
|
+
array.insert 1, 'sampe'
|
66
|
+
array << "-r \"#{rg_str}\""
|
67
|
+
array << "#{ref_fasta}"
|
68
|
+
array << "#{@sai}"
|
69
|
+
array << "#{@seq} | gzip > #{@output}"
|
70
|
+
array.uniq.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def bwa_samp_one_fastq script_name
|
74
|
+
file = File.join jobs_dir, "#{@order}_#{script_name}.sh"
|
75
|
+
path = File.expand_path "../templates/#{script_name}.sh", __FILE__
|
76
|
+
template = Tilt.new path
|
77
|
+
File.open(file, 'w') do |f|
|
78
|
+
f.puts template.render Object.new, job_params(script_name)
|
79
|
+
end
|
80
|
+
File.chmod(0700, file)
|
81
|
+
end
|
82
|
+
|
83
|
+
def job_params script_name
|
84
|
+
{
|
85
|
+
jobs_dir: jobs_dir,
|
86
|
+
script_name: script_name,
|
87
|
+
output: @output,
|
88
|
+
seq: @seq,
|
89
|
+
seq2: @seq2,
|
90
|
+
samp_options: (data_type == 'ONEFASTQ' ? 'samp_one_fastq_options' : 'samse_one_fastq_options'),
|
91
|
+
run_local: ::Cagnut::JobManage.run_local
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
echo #{aln_params_for_r1.join("\s")}
|
6
|
+
#{aln_params_for_r1.join(" \\\n ")} \\
|
7
|
+
#{run_local}
|
8
|
+
|
9
|
+
#force error when missing/empty sai . Would prevent continutation of pipeline
|
10
|
+
if [ ! -s "#{output}" ]
|
11
|
+
then
|
12
|
+
echo "Missing SAI:#{output} file!"
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
# check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
OK=1
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
30
|
+
|
31
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
32
|
+
echo "bwa processed" $PROCESSED
|
33
|
+
|
34
|
+
if [[ "#{input}" =~ gz$ ]]
|
35
|
+
then
|
36
|
+
LINESFASTQ1=$(zcat "#{input}" | wc -l)
|
37
|
+
else
|
38
|
+
# non gz files
|
39
|
+
LINESFASTQ1=$(wc -l "#{input}" | cut -d" " -f1 )
|
40
|
+
fi
|
41
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
42
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
43
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
44
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
45
|
+
then
|
46
|
+
echo "Complete."
|
47
|
+
else
|
48
|
+
echo "Error, incorrect number of processed sequences"
|
49
|
+
exit 100
|
50
|
+
fi
|
51
|
+
|
52
|
+
####################################################################
|
53
|
+
# PAIR _2_
|
54
|
+
# run and check pair _2_
|
55
|
+
#
|
56
|
+
#
|
57
|
+
####################################################################
|
58
|
+
|
59
|
+
#{aln_params_for_r2.join(" \\\n ")} \\
|
60
|
+
#{run_local}
|
61
|
+
|
62
|
+
#force error when missing/empty sai . Would prevent continutation of pipeline
|
63
|
+
if [ ! -s "#{output2}" ]
|
64
|
+
then
|
65
|
+
echo "Missing SAI:#{output2} file!"
|
66
|
+
exit 100
|
67
|
+
fi
|
68
|
+
|
69
|
+
# check STDOUT has correct termination string
|
70
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
71
|
+
|
72
|
+
if [ -n "$HASENDING" ]
|
73
|
+
then
|
74
|
+
OK=1
|
75
|
+
else
|
76
|
+
#echo " empty variable"
|
77
|
+
echo "Improper stdout termination"
|
78
|
+
exit 100
|
79
|
+
fi
|
80
|
+
|
81
|
+
#check for correct number of sequences processed, based on fastq records
|
82
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
83
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
84
|
+
echo "bwa processed" $PROCESSED
|
85
|
+
|
86
|
+
if [[ "#{input2}" =~ gz$ ]]
|
87
|
+
then
|
88
|
+
LINESFASTQ2=$(zcat "#{input2}" | wc -l)
|
89
|
+
else
|
90
|
+
# non gz files
|
91
|
+
LINESFASTQ2=$(wc -l "#{input2}" | cut -d" " -f1 )
|
92
|
+
fi
|
93
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
94
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
95
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
96
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
97
|
+
then
|
98
|
+
echo "Complete."
|
99
|
+
else
|
100
|
+
echo "Error, incorrect number of processed sequences"
|
101
|
+
exit 100
|
102
|
+
fi
|
103
|
+
|
104
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
#{samp_options.join(" \\\n ")} \\
|
6
|
+
#{run_local}
|
7
|
+
|
8
|
+
# check file size less than 1MB
|
9
|
+
#
|
10
|
+
# if [ $(stat --printf="%s" "#{sam_dir}/#{line}_sequence.aligned.sam.gz") -le 1024000 ]
|
11
|
+
# then
|
12
|
+
# echo "Error with output."
|
13
|
+
# exit 100
|
14
|
+
# fi
|
15
|
+
|
16
|
+
#check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
echo "OUTPUT ok."
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
30
|
+
|
31
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
32
|
+
echo "bwa processed" $PROCESSED
|
33
|
+
|
34
|
+
if [[ "#{seq}" =~ gz$ ]]
|
35
|
+
then
|
36
|
+
|
37
|
+
LINESFASTQ1=$(zcat "#{seq}" | wc -l)
|
38
|
+
LINESFASTQ2=$(zcat "#{seq2}" | wc -l)
|
39
|
+
|
40
|
+
else
|
41
|
+
# non gz files
|
42
|
+
LINESFASTQ1=$(wc -l "#{seq}" )
|
43
|
+
LINESFASTQ2=$(wc -l "#{seq2}" )
|
44
|
+
fi
|
45
|
+
|
46
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
47
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
48
|
+
|
49
|
+
if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
|
50
|
+
then
|
51
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
52
|
+
else
|
53
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
54
|
+
fi
|
55
|
+
|
56
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
57
|
+
|
58
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
59
|
+
then
|
60
|
+
echo "Complete."
|
61
|
+
else
|
62
|
+
echo "Error, incorrect number of processed sequences"
|
63
|
+
exit 100
|
64
|
+
fi
|
65
|
+
|
66
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
67
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
#{samp_options.join(" \\\n ")} \\
|
6
|
+
#{run_local}
|
7
|
+
|
8
|
+
# check file size less than 1MB
|
9
|
+
|
10
|
+
if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
|
11
|
+
then
|
12
|
+
echo "Error with output."
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
#check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
echo "OUTPUT ok."
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Improper stdout termination"
|
25
|
+
exit 100
|
26
|
+
fi
|
27
|
+
|
28
|
+
#check for correct number of sequences processed, based on fastq records
|
29
|
+
|
30
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
31
|
+
|
32
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
33
|
+
echo "bwa processed" $PROCESSED
|
34
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
echo #{mem_params.join("\s")}
|
6
|
+
#{mem_params.join(" \\\n ")} \\
|
7
|
+
#{run_local}
|
8
|
+
|
9
|
+
#force error when missing/empty sam . Would prevent continutation of pipeline
|
10
|
+
if [ ! -s #{output} ]
|
11
|
+
then
|
12
|
+
echo "Missing SAM:#{output} file!"
|
13
|
+
exit 100
|
14
|
+
fi
|
15
|
+
|
16
|
+
# check STDOUT has correct termination string
|
17
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " Processed")
|
18
|
+
|
19
|
+
if [ -n "$HASENDING" ]
|
20
|
+
then
|
21
|
+
OK=1
|
22
|
+
else
|
23
|
+
#echo " empty variable"
|
24
|
+
echo "Error: Improper stdout termination"
|
25
|
+
echo $EXITSTATUS
|
26
|
+
echo "bwa (mem) has likely crashed. Exiting"
|
27
|
+
exit 100
|
28
|
+
fi
|
29
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
cd "#{jobs_dir}/../"
|
4
|
+
echo "#{script_name} is starting at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
5
|
+
# File Checking
|
6
|
+
if [ ! -s "#{input}" ];then
|
7
|
+
echo "Error: Missing " #{input}
|
8
|
+
exit 100
|
9
|
+
fi
|
10
|
+
if [ ! -s "#{input2}" ];then
|
11
|
+
echo "Error: Missing " #{input2}
|
12
|
+
exit 100
|
13
|
+
fi
|
14
|
+
|
15
|
+
#{samp_options.join(" \\\n ")} \\
|
16
|
+
#{run_local}
|
17
|
+
|
18
|
+
# check if file size less than 1MB
|
19
|
+
if [ $(stat --printf="%s" "#{output}") -le 1024000 ]
|
20
|
+
then
|
21
|
+
echo "Error with output."
|
22
|
+
exit 100
|
23
|
+
fi
|
24
|
+
|
25
|
+
# check STDOUT has correct termination string
|
26
|
+
HASENDING=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed.")
|
27
|
+
|
28
|
+
if [ -n "$HASENDING" ]
|
29
|
+
then
|
30
|
+
OK=1
|
31
|
+
else
|
32
|
+
#echo " empty variable"
|
33
|
+
echo "Improper stdout termination"
|
34
|
+
exit 100
|
35
|
+
fi
|
36
|
+
|
37
|
+
#check for correct number of sequences processed, based on fastq records
|
38
|
+
PROCESSED=$(tail -5 #{jobs_dir}/#{script_name}.err | grep " sequences have been processed." | grep -o -P " \\d+ ")
|
39
|
+
|
40
|
+
echo "checking stdout file: " #{jobs_dir}/#{script_name}.err
|
41
|
+
echo "bwa processed" $PROCESSED
|
42
|
+
|
43
|
+
if [[ "#{fastq}" =~ gz$ ]]
|
44
|
+
then
|
45
|
+
LINESFASTQ1=$(zcat "#{fastq}" | wc -l)
|
46
|
+
LINESFASTQ2=$(zcat "#{fastq2}.gz" | wc -l)
|
47
|
+
else
|
48
|
+
# non gz files
|
49
|
+
LINESFASTQ1=$(wc -l "#{fastq}" | cut -d" " -f1 )
|
50
|
+
LINESFASTQ2=$(wc -l "#{fastq2}" | cut -d" " -f1 )
|
51
|
+
fi
|
52
|
+
|
53
|
+
echo "Fastq1 number lines:= " $LINESFASTQ1
|
54
|
+
echo "Fastq2 number lines:= " $LINESFASTQ2
|
55
|
+
|
56
|
+
if (( "$LINESFASTQ1" >= "$LINESFASTQ2" ))
|
57
|
+
then
|
58
|
+
SEQLINES=$[ $LINESFASTQ2 / 4 ]
|
59
|
+
else
|
60
|
+
SEQLINES=$[ $LINESFASTQ1 / 4 ]
|
61
|
+
fi
|
62
|
+
|
63
|
+
echo "Estimated Minimum Sequences:= " $SEQLINES
|
64
|
+
|
65
|
+
if (( "$PROCESSED" >= "$SEQLINES" ))
|
66
|
+
then
|
67
|
+
echo "Complete."
|
68
|
+
else
|
69
|
+
echo "Error, incorrect number of processed sequences"
|
70
|
+
exit 100
|
71
|
+
fi
|
72
|
+
echo "#{script_name} is finished at $(date +%Y%m%d%H%M%S)" >> "#{jobs_dir}/finished_jobs"
|
73
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module CagnutBwa
|
2
|
+
class Util
|
3
|
+
attr_accessor :bwa, :config
|
4
|
+
|
5
|
+
def initialize config
|
6
|
+
@config = config
|
7
|
+
@bwa = CagnutBwa::Base.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def aln_one_fastq dirs, order=1, filename=nil
|
11
|
+
job_name = bwa.aln_one_fastq dirs, order, filename
|
12
|
+
[job_name, order+1]
|
13
|
+
end
|
14
|
+
|
15
|
+
def samp_one_fastq dirs, order=1, previous_job_id=nil, filename=nil
|
16
|
+
job_name, filename = bwa.samp_one_fastq dirs, order, previous_job_id, filename
|
17
|
+
[job_name, filename, order+1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def aln dirs, order=1, previous_job_id = nil, filename=nil
|
21
|
+
job_name = bwa.aln dirs, order, previous_job_id, filename
|
22
|
+
[job_name, order+1]
|
23
|
+
end
|
24
|
+
|
25
|
+
def samp dirs, order=1, previous_job_id=nil, filename=nil
|
26
|
+
job_name, filename = bwa.samp dirs, order, previous_job_id, filename
|
27
|
+
[job_name, filename, order+1]
|
28
|
+
end
|
29
|
+
|
30
|
+
def mem dirs, order=1, filename = nil
|
31
|
+
job_name, filename = bwa.mem dirs, order, filename
|
32
|
+
[job_name, filename, order+1]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/cagnut_bwa.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "cagnut_bwa/version"
|
2
|
+
|
3
|
+
module CagnutBwa
|
4
|
+
class << self
|
5
|
+
def config
|
6
|
+
@config ||= begin
|
7
|
+
CagnutBwa::Configuration.load(Cagnut::Configuration.config, Cagnut::Configuration.params['bwa'])
|
8
|
+
CagnutBwa::Configuration.instance
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'cagnut_bwa/configuration'
|
15
|
+
require 'cagnut_bwa/base'
|
16
|
+
require 'cagnut_bwa/util'
|
metadata
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cagnut_bwa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shi-Gang Wang
|
8
|
+
- Tse-Ching Ho
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-11-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: cagnut_core
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bundler
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.12'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rake
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '10.0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '10.0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rspec
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '3.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.0'
|
70
|
+
description: Cagnut BWA tools
|
71
|
+
email:
|
72
|
+
- seanwang@goldenio.com
|
73
|
+
- tsechingho@goldenio.com
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".ruby-version"
|
81
|
+
- ".travis.yml"
|
82
|
+
- Gemfile
|
83
|
+
- LICENSE.txt
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- bin/console
|
87
|
+
- bin/setup
|
88
|
+
- cagnut_bwa.gemspec
|
89
|
+
- lib/cagnut_bwa.rb
|
90
|
+
- lib/cagnut_bwa/base.rb
|
91
|
+
- lib/cagnut_bwa/check_tools.rb
|
92
|
+
- lib/cagnut_bwa/configuration.rb
|
93
|
+
- lib/cagnut_bwa/functions/aln.rb
|
94
|
+
- lib/cagnut_bwa/functions/aln_one_fastq.rb
|
95
|
+
- lib/cagnut_bwa/functions/mem.rb
|
96
|
+
- lib/cagnut_bwa/functions/samp.rb
|
97
|
+
- lib/cagnut_bwa/functions/samp_one_fastq.rb
|
98
|
+
- lib/cagnut_bwa/functions/templates/aln.sh
|
99
|
+
- lib/cagnut_bwa/functions/templates/bwa_samp_one_fastq.sh
|
100
|
+
- lib/cagnut_bwa/functions/templates/bwa_samse_one_fastq.sh
|
101
|
+
- lib/cagnut_bwa/functions/templates/mem.sh
|
102
|
+
- lib/cagnut_bwa/functions/templates/samp.sh
|
103
|
+
- lib/cagnut_bwa/util.rb
|
104
|
+
- lib/cagnut_bwa/version.rb
|
105
|
+
homepage: https://github.com/CAGNUT/cagnut_bwa
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.5.1
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: Cagnut BWA tools
|
129
|
+
test_files: []
|