bio-ngs 0.4.2.alpha.01 → 0.4.4.alpha.01
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +5 -4
- data/Gemfile.lock +34 -32
- data/README.rdoc +3 -0
- data/VERSION +1 -1
- data/bin/biongs +0 -5
- data/bio-ngs.gemspec +17 -28
- data/ext/mkrf_conf.rb +5 -1
- data/lib/bio-ngs.rb +9 -2
- data/lib/bio/appl/ngs/bcftools.rb +100 -0
- data/lib/bio/appl/ngs/bwa.rb +210 -0
- data/lib/bio/appl/ngs/fastx.rb +22 -6
- data/lib/bio/appl/ngs/samtools.rb +85 -0
- data/lib/bio/appl/ngs/tophat.rb +1 -1
- data/lib/bio/ngs/ext/versions.yaml +16 -0
- data/lib/bio/ngs/record.rb +2 -2
- data/lib/bio/ngs/utils.rb +11 -8
- data/lib/enumerable.rb +13 -1
- data/lib/tasks/bwa.thor +32 -106
- data/lib/tasks/filter.thor +75 -0
- data/lib/tasks/pre.thor +17 -37
- data/lib/tasks/project.thor +2 -1
- data/lib/tasks/quality.thor +27 -5
- data/lib/wrapper.rb +32 -5
- metadata +100 -99
data/lib/tasks/bwa.thor
CHANGED
@@ -10,117 +10,43 @@
|
|
10
10
|
|
11
11
|
class Bwa < Thor
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
desc "long [FASTA]", "Make the BWT index for a LONG FASTA database"
|
24
|
-
method_option :colorspace, :type => :boolean, :desc => "Index in Colorspace"
|
25
|
-
method_option :prefix, :type => :string, :desc => "Database index name"
|
26
|
-
def long(fasta)
|
27
|
-
real_prefix = (options[:prefix]) ? options[:prefix] : fasta
|
28
|
-
Bio::BWA.make_index(:file_in => fasta, :a => "bwtsw",:c => options[:colorspace], :prefix => real_prefix)
|
29
|
-
end
|
30
|
-
|
13
|
+
desc "index [FASTA]", "Create BWA index"
|
14
|
+
Bio::Ngs::Bwa::Index.new.thor_task(self, :index) do |wrapper, task, file_in|
|
15
|
+
wrapper.params = task.options
|
16
|
+
puts wrapper.run :arguments => [file_in]
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "aln [PREFIX] [FASTA/Q]", "Run BWA aln (short reads)"
|
20
|
+
Bio::Ngs::Bwa::Aln.new.thor_task(self, :aln) do |wrapper, task, prefix, file_in|
|
21
|
+
wrapper.params = task.options
|
22
|
+
puts wrapper.run :arguments => [prefix,file_in]
|
31
23
|
end
|
32
24
|
|
25
|
+
desc "samse [PREFIX] [SAI FILE] [FASTA/Q FILE]", "Run BWA SAM Single End conversion"
|
26
|
+
Bio::Ngs::Bwa::Samse.new.thor_task(self, :samse) do |wrapper, task, prefix, sai_in, file_in|
|
27
|
+
wrapper.params = task.options
|
28
|
+
puts wrapper.run :arguments => [prefix,sai_in,file_in]
|
29
|
+
end
|
33
30
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
method_option :m, :type => :numeric, :desc => "maximum entries in the queue [2000000]"
|
47
|
-
method_option :t, :type => :numeric, :desc => "number of threads [1]"
|
48
|
-
method_option :M, :type => :numeric, :desc => "mismatch penalty [3]"
|
49
|
-
method_option :O, :type => :numeric, :desc => "gap open penalty [11]"
|
50
|
-
method_option :E, :type => :numeric, :desc => "gap extension penalty [4]"
|
51
|
-
method_option :R, :type => :numeric, :desc => "stop searching when there are >INT equally best hits [30]"
|
52
|
-
method_option :q, :type => :numeric, :desc => "quality threshold for read trimming down to 35bp [0]"
|
53
|
-
method_option :B, :type => :numeric, :desc => "length of barcode"
|
54
|
-
method_option :c, :type => :boolean, :desc => "input sequences are in the color space"
|
55
|
-
method_option :L, :type => :boolean, :desc => "log-scaled gap penalty for long deletions"
|
56
|
-
method_option :N, :type => :boolean, :desc => "non-iterative mode: search for all n-difference hits"
|
57
|
-
method_option :I, :type => :boolean, :desc => "the input is in the Illumina 1.3+ FASTQ-like format"
|
58
|
-
method_option :b, :type => :boolean, :desc => "the input read file is in the BAM format"
|
59
|
-
method_option :single, :type => :boolean, :desc => "use single-end reads only (effective with -b)"
|
60
|
-
method_option :first, :type => :boolean, :desc => "use the 1st read in a pair (effective with -b)"
|
61
|
-
method_option :second, :type => :boolean, :desc => "use the 2nd read in a pair (effective with -b)"
|
62
|
-
def short(fastq)
|
63
|
-
bwa_options = options.dup
|
64
|
-
bwa_options[:file_in] = fastq
|
65
|
-
Bio::BWA.short_read_alignment(bwa_options.symbolize_keys)
|
66
|
-
end
|
67
|
-
|
68
|
-
desc "long [FASTQ]", "Run the aligment for LONG query sequences"
|
69
|
-
method_option :file_out, :type => :string, :desc => "file to output results to instead of stdout", :required => true
|
70
|
-
method_option :prefix, :type => :string, :desc => "Database prefix", :required => true
|
71
|
-
method_option :a, :type => :numeric, :desc => "score for a match [1]"
|
72
|
-
method_option :b, :type => :numeric, :desc => "mismatch penalty [3]"
|
73
|
-
method_option :q, :type => :numeric, :desc => "gap open penalty [5]"
|
74
|
-
method_option :r, :type => :numeric, :desc => "gap extension penalty [2]"
|
75
|
-
method_option :t, :type => :numeric, :desc => "number of threads [1]"
|
76
|
-
method_option :w, :type => :numeric, :desc => "band width [50]"
|
77
|
-
method_option :m, :type => :numeric, :desc => "mask level [0.50]"
|
78
|
-
method_option :T, :type => :numeric, :desc => "score threshold divided by a [30]"
|
79
|
-
method_option :s, :type => :numeric, :desc => "maximum seeding interval size [3]"
|
80
|
-
method_option :z, :type => :numeric, :desc => "Z-best [1]"
|
81
|
-
method_option :N, :type => :numeric, :desc => "seeds to trigger reverse alignment [5]"
|
82
|
-
method_option :c, :type => :numeric, :desc => "coefficient of length-threshold adjustment [5.5]"
|
83
|
-
method_option :H, :type => :boolean, :desc => "in SAM output, use hard clipping rather than soft"
|
84
|
-
def long(fastq)
|
85
|
-
bwa_options = options.dup
|
86
|
-
bwa_options[:file_in] = fastq
|
87
|
-
Bio::BWA.long_read_alignment(bwa_options.symbolize_keys)
|
88
|
-
end
|
89
|
-
|
31
|
+
desc "sampe [PREFIX] [SAI-1 FILE] [SAI-2 FILE] [FASTA/Q-1 FILE] [FASTA/Q-2 FILE]", "Run BWA SAM Paired End conversion"
|
32
|
+
Bio::Ngs::Bwa::Sampe.new.thor_task(self, :sampe) do |wrapper, task, prefix, sai1_in, sai2_in, file1_in, file2_in|
|
33
|
+
wrapper.params = task.options
|
34
|
+
puts wrapper.run :arguments => [prefix, sai1_in, sai2_in, file1_in, file2_in]
|
35
|
+
end
|
36
|
+
|
37
|
+
desc "bwasw [PREFIX] [FASTA/Q]", "Run BWA bwasw (long reads)"
|
38
|
+
Bio::Ngs::Bwa::Bwasw.new.thor_task(self, :bwasw) do |wrapper, task, prefix, file_in|
|
39
|
+
wrapper.params = task.options
|
40
|
+
arguments = [prefix,sai_in,file_in]
|
41
|
+
arguments+[task.options[:paired]] if task.options[:paired] =~/\w+/
|
42
|
+
puts wrapper.run :arguments => arguments
|
90
43
|
end
|
91
44
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
method_option :prefix, :type => :string, :required => true, :desc => "Database prefix"
|
97
|
-
method_option :fastq, :type => :string, :required => true, :desc => "FastQ file"
|
98
|
-
method_option :file_out, :type => :string, :required => true, :desc => "File to save the output"
|
99
|
-
method_options :n => :numeric, :r => :string
|
100
|
-
def single(sai)
|
101
|
-
bwa_options = options.dup
|
102
|
-
bwa_options[:sai] = sai
|
103
|
-
Bio::BWA.sai_to_sam_single(bwa_options.symbolize_keys)
|
104
|
-
end
|
105
|
-
|
106
|
-
desc "paired", "Convert SAI alignment output into SAM format (paired ends)"
|
107
|
-
method_option :prefix, :type => :string, :required => true, :desc => "Database prefix"
|
108
|
-
method_option :file_out, :type => :string, :required => true, :desc => "File to save the output"
|
109
|
-
method_option :sai, :type => :array, :required => true, :desc => "The 2 SAI files"
|
110
|
-
method_option :fastq, :type => :array, :required => true, :desc => "The 2 Fasta/Q files"
|
111
|
-
method_option :a, :type => :numeric, :desc => "maximum insert size [500]"
|
112
|
-
method_option :o, :type => :numeric, :desc => "maximum occurrences for one end [100000]"
|
113
|
-
method_option :n, :type => :numeric, :desc => "maximum hits to output for paired reads [3]"
|
114
|
-
method_option :N, :type => :numeric, :desc => "maximum hits to output for discordant pairs [10]"
|
115
|
-
method_option :c, :type => :numeric, :desc => "prior of chimeric rate (lower bound) [1.0e-05]"
|
116
|
-
method_option :r, :type => :string, :desc => "read group header line such as `@RG\tID:foo\tSM:bar' [null]"
|
117
|
-
method_option :P, :type => :boolean, :desc => "preload index into memory (for base-space reads only)"
|
118
|
-
method_option :s, :type => :boolean, :desc => "disable Smith-Waterman for the unmapped mate"
|
119
|
-
method_option :A, :type => :boolean, :desc => "disable insert size estimate (force -s)"
|
120
|
-
def paired
|
121
|
-
Bio::BWA.sai_to_sam_paired(options.dup.symbolize_keys)
|
122
|
-
end
|
123
|
-
|
45
|
+
desc "fastmap [PREFIX] [FASTA/Q]", "Run BWA Fastmap (identify super-maximal exact matches)"
|
46
|
+
Bio::Ngs::Bwa::Fastmap.new.thor_task(self, :fastmap) do |wrapper, task, prefix, file_in|
|
47
|
+
wrapper.params = task.options
|
48
|
+
puts wrapper.run :arguments => [prefix,file_in]
|
124
49
|
end
|
125
50
|
|
51
|
+
|
126
52
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
class Filter < Thor
|
2
|
+
|
3
|
+
# Assume that this is a plain list of elements, with just one column. In the future it could be
|
4
|
+
# a table as well.
|
5
|
+
desc "by_list TABLE LIST", "Extract from TABLE the row with a key in LIST"
|
6
|
+
method_option :exclude, :type => :boolean, :aliases => '-e', :desc => "return the elements in TABLE which are not listed in LIST"
|
7
|
+
method_option :tablekey, :type => :numeric, :aliases => '-k', :desc =>"which field is the key to consider, start from 0"
|
8
|
+
method_option :listkey, :type => :numeric, :aliases => '-l', :desc =>"which field is the key to consider, start from 0"
|
9
|
+
method_option :delimiter, :type => :string, :default => " ", :aliases => '-d'
|
10
|
+
method_option :skip_table_header, :type => :boolean, :default => true, :aliases => '-h', :desc => 'Skip first line, usually the header'
|
11
|
+
method_option :skip_list_header, :type => :boolean, :default => true, :aliases => '-j', :desc => 'Skip first line, usually the header'
|
12
|
+
method_option :skip_table_lines, :type => :numeric, :aliases => '-n', :desc => 'Skip Ns line before start'
|
13
|
+
method_option :skip_list_lines, :type => :numeric, :aliases => '-m', :desc => 'Skip Ns line before start'
|
14
|
+
method_option :output, :type => :string, :aliases => '-o', :desc => 'Output results to file'
|
15
|
+
method_option :keep_skipped_lines, :type => :boolean, :default => false, :aliases => '-g', :desc => 'Write on output skipped lines from the TABLE file, header and number of lines skipped using option skip_table_line'
|
16
|
+
method_option :zero_index_system, :type => :boolean, :default => true, :aliases => '-s', :desc => 'Starts Index from ZERO ? Otherwise starts from ONE'
|
17
|
+
def by_list(table, list)
|
18
|
+
unless File.exists?(table)
|
19
|
+
STDERR.puts "by_list: #{table} does not exist."
|
20
|
+
return
|
21
|
+
end
|
22
|
+
unless File.exists?(list)
|
23
|
+
STDERR.puts "by_list: #{list} does not exist."
|
24
|
+
return
|
25
|
+
end
|
26
|
+
table_key_idx = options[:tablekey] || 0 # by default the first element of the table.
|
27
|
+
list_key_idx = options[:listkey] || 0
|
28
|
+
#increment indexes in case the use wants to start from 1 and not from 0
|
29
|
+
unless options[:zero_index_system]
|
30
|
+
table_key_idx+=1
|
31
|
+
list_key_idx+=1
|
32
|
+
end
|
33
|
+
delimiter = options[:delimiter] || " " # useless it's by default a space, just for developers
|
34
|
+
keep_skipped_lines = options[:keep_skipped_lines] || false
|
35
|
+
|
36
|
+
flist = File.open(list, 'r')
|
37
|
+
#skip header/lines if required
|
38
|
+
if (nlines = options[:skip_list_lines])
|
39
|
+
nlines.times.each{|i| flist.readline}
|
40
|
+
end
|
41
|
+
flist.readline unless options[:skip_list_header]
|
42
|
+
list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
43
|
+
|
44
|
+
flist.each_line do |line|
|
45
|
+
#split row
|
46
|
+
#store the list key
|
47
|
+
#populate an hash wich keys
|
48
|
+
list_dictionary[line.split(delimiter)[list_key_idx]]
|
49
|
+
end
|
50
|
+
flist.close
|
51
|
+
|
52
|
+
ftable = File.open(table, 'r')
|
53
|
+
#skip header/lines if required
|
54
|
+
#keep skipped line in case it's a proprietary format
|
55
|
+
skipped_lines = []
|
56
|
+
if (nlines = options[:skip_table_lines])
|
57
|
+
nlines.times.each{|i| skipped_lines << ftable.readline}
|
58
|
+
end
|
59
|
+
skipped_lines << ftable.readline unless options[:skip_table_header]
|
60
|
+
#list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
61
|
+
|
62
|
+
fout = (output_name=options[:output]).nil? ? STDOUT : File.open(output_name,'w')
|
63
|
+
fout.puts skipped_lines if keep_skipped_lines
|
64
|
+
ftable.each_line do |line|
|
65
|
+
#search for a key in the dictionary/list
|
66
|
+
if list_dictionary.key?(line.split(delimiter)[table_key_idx]) || options[:exclude]
|
67
|
+
fout.puts line
|
68
|
+
end
|
69
|
+
end
|
70
|
+
ftable.close
|
71
|
+
fout.close
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
data/lib/tasks/pre.thor
CHANGED
@@ -1,29 +1,23 @@
|
|
1
1
|
class Pre < Thor
|
2
2
|
|
3
|
-
desc "
|
3
|
+
desc "illumina_filter [DIR(s)]", "Filter the data using Y/N flag in FastQ headers (Illumina). Search for fastq.gz files within directory(ies) passed."
|
4
4
|
method_option :compression, :type => :string, :default => "pigz"
|
5
5
|
method_option :cpu, :type => :numeric, :default => 4
|
6
|
-
def
|
6
|
+
def illumina_filter(dir)
|
7
7
|
folders = Dir.glob(dir)
|
8
8
|
cmd_blocks = []
|
9
9
|
folders.each do |folder|
|
10
|
-
Dir.glob(folder+"/*.fastq.gz").sort.
|
10
|
+
Parallel.each(Dir.glob(folder+"/*.fastq.gz").sort,:in_processes => options[:cpu].to_i) do |fastq|
|
11
11
|
Dir.mkdir(folder+"/filtered") unless Dir.exists? folder+"/filtered"
|
12
12
|
fastq = fastq.split("/")[-1]
|
13
|
-
|
14
|
-
if cmd_blocks == options[:cpu]
|
15
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
16
|
-
cmd_blocks = []
|
17
|
-
end
|
13
|
+
system("zcat #{folder+"/"+fastq} | grep -A 3 '^@.* [^:]*:N:[^:]*:' | grep -v '^\-\-'| #{options[:compression]} > #{folder}/filtered/#{fastq}")
|
18
14
|
end
|
19
15
|
end
|
20
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
21
16
|
end
|
22
17
|
|
23
18
|
|
24
19
|
desc "merge [file(s)]","Merge together fastQ files (accepts wildcards)"
|
25
20
|
method_option :compressed, :type => :boolean, :default => true
|
26
|
-
method_option :cpu, :type => :numeric, :default => 4
|
27
21
|
def merge(file)
|
28
22
|
files = Dir.glob(file).sort
|
29
23
|
cat = (options[:compressed]) ? "zcat" : "cat"
|
@@ -37,18 +31,17 @@ class Pre < Thor
|
|
37
31
|
def paired_merge(file)
|
38
32
|
files = Dir.glob(file).sort.find_all {|f| f=~/_R1_/}
|
39
33
|
cat = (options[:compressed] == true) ? "zcat" : "cat"
|
40
|
-
files.each do |
|
41
|
-
|
42
|
-
r2 = file.gsub(/_R1_/,"_R2_")
|
34
|
+
files.each do |r1|
|
35
|
+
r2 = r1.gsub(/_R1_/,"_R2_")
|
43
36
|
if File.exists? r2
|
44
37
|
r1_count = count_reads(r1,compressed:options[:compressed])
|
45
38
|
r2_count = count_reads(r2,compressed:options[:compressed])
|
46
39
|
puts "Read count: #{r1_count} : #{r2_count} , #{file}"
|
47
40
|
if r1_count == r2_count
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
41
|
+
Parallel.each(["R1","R2"],:in_processes => options[:cpu].to_i) do |read|
|
42
|
+
filename = (read == "R1") ? r1 : r2
|
43
|
+
system "#{cat} #{filename} >> #{read}_reads.fastq"
|
44
|
+
end
|
52
45
|
else
|
53
46
|
raise RuntimeError "Error: files #{r1} and #{r2} do not have the same number of reads!"
|
54
47
|
end
|
@@ -64,26 +57,21 @@ class Pre < Thor
|
|
64
57
|
def uncompress(file)
|
65
58
|
files = Dir.glob(file).sort
|
66
59
|
blocks = []
|
67
|
-
|
68
|
-
|
69
|
-
if blocks.size == options[:cpu]
|
70
|
-
Bio::Ngs::Utils.parallel_exec(blocks)
|
71
|
-
blocks = []
|
72
|
-
end
|
60
|
+
Parallel.each(files,:in_processes => options[:cpu].to_i) do |file|
|
61
|
+
system("gunzip #{file}")
|
73
62
|
end
|
74
|
-
Bio::Ngs::Utils.parallel_exec(blocks)
|
75
63
|
end
|
76
64
|
|
77
65
|
|
78
|
-
desc "trim [
|
66
|
+
desc "trim [fastq(s)]","Calulate quality profile and trim the all the reads using FastX (accepts wildcards)"
|
79
67
|
method_option :cpu, :type => :numeric, :default => 4
|
80
68
|
method_option :min_qual, :type => :numeric, :default => 20
|
81
69
|
def trim(file)
|
82
|
-
|
70
|
+
list = Dir.glob(file).sort
|
71
|
+
groups = list / options[:cpu].to_i # get group of files equal to number of CPUs
|
83
72
|
cmd_blocks = []
|
84
|
-
|
85
|
-
|
86
|
-
block = -> do
|
73
|
+
groups.each do |files|
|
74
|
+
Parallel.each(files, :in_processes => options[:cpu].to_i) do |file|
|
87
75
|
invoke "quality:fastq_stats", [file], {output:file+".stats"}
|
88
76
|
trim_position = options[:read_length]
|
89
77
|
lines = File.read(file+".stats").split("\n")
|
@@ -109,15 +97,7 @@ class Pre < Thor
|
|
109
97
|
trim.run
|
110
98
|
end
|
111
99
|
end
|
112
|
-
|
113
|
-
cmd_blocks << block
|
114
|
-
|
115
|
-
if cmd_blocks.size == options[:cpu]
|
116
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
117
|
-
cmd_blocks = []
|
118
|
-
end
|
119
100
|
end
|
120
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
121
101
|
end
|
122
102
|
|
123
103
|
private
|
data/lib/tasks/project.thor
CHANGED
@@ -20,7 +20,8 @@ class Project < Thor
|
|
20
20
|
method_option :type, :type => :string, :desc => "The project type (e.g. annotation)"
|
21
21
|
def new(name)
|
22
22
|
empty_directory name
|
23
|
-
empty_directory File.join("#{name}","
|
23
|
+
empty_directory File.join("#{name}","raw_data")
|
24
|
+
empty_directory File.join("#{name}","outputs")
|
24
25
|
empty_directory File.join("#{name}","tasks")
|
25
26
|
empty_directory File.join("#{name}","scripts")
|
26
27
|
self.name = name # for template to take the correct values
|
data/lib/tasks/quality.thor
CHANGED
@@ -45,19 +45,32 @@ class Quality < Thor
|
|
45
45
|
desc "fastq_stats FASTQ", "Reports quality of FASTQ file"
|
46
46
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
47
47
|
def fastq_stats(fastq)
|
48
|
-
|
48
|
+
|
49
|
+
output_file = options.output || "#{fastq.gsub(/\.fastq\.gz/,'')}_stats.txt"
|
49
50
|
stats = Bio::Ngs::Fastx::FastqStats.new
|
50
|
-
|
51
|
+
if fastq=~/\.gz/
|
52
|
+
stats.params = {output:output_file}
|
53
|
+
stats.pipe_ahead=["zcat", fastq]
|
54
|
+
else
|
55
|
+
stats.params = {input:fastq, output:output_file}
|
56
|
+
end
|
51
57
|
stats.run
|
52
|
-
|
53
|
-
|
58
|
+
require 'parallel'
|
59
|
+
go_in_parallel = [[:boxplot,[output_file]],
|
60
|
+
[:reads_coverage,[output_file]],
|
61
|
+
[:nucleotide_distribution,[output_file]]]
|
62
|
+
Parallel.map(go_in_parallel, in_processes:go_in_parallel.size) do |graph|
|
63
|
+
invoke graph.first, graph.last
|
64
|
+
end
|
65
|
+
#invoke :boxplot, [output_file]
|
66
|
+
#invoke :reads_coverage, [output_file]
|
54
67
|
end
|
55
68
|
|
56
69
|
desc "boxplot FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
|
57
70
|
method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
58
71
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
59
72
|
def boxplot(fastq_quality_stats)
|
60
|
-
output_file = options.output || "#{fastq_quality_stats}.png"
|
73
|
+
output_file = options.output || "#{fastq_quality_stats}_boxplot.png"
|
61
74
|
boxplot = Bio::Ngs::Fastx::ReadsBoxPlot.new
|
62
75
|
boxplot.params={input:fastq_quality_stats, output:output_file}
|
63
76
|
boxplot.run
|
@@ -74,6 +87,15 @@ class Quality < Thor
|
|
74
87
|
coverage.run
|
75
88
|
end
|
76
89
|
|
90
|
+
desc "nucleotide_distribution FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
|
91
|
+
method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
92
|
+
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
93
|
+
def nucleotide_distribution(fastq_quality_stats)
|
94
|
+
output_file = options.output || "#{fastq_quality_stats}_nuc_dist.png"
|
95
|
+
nucdistr = Bio::Ngs::Fastx::NucleotideDistribution.new
|
96
|
+
nucdistr.params={input:fastq_quality_stats, output:output_file}
|
97
|
+
nucdistr.run
|
98
|
+
end
|
77
99
|
|
78
100
|
desc "illumina_b_profile_raw FASTQ", "perform a profile for reads coming fom Illumina 1.5+ and write the report in a txt file"
|
79
101
|
method_option :read_length, :type => :numeric, :required => true
|
data/lib/wrapper.rb
CHANGED
@@ -42,6 +42,7 @@ module Bio
|
|
42
42
|
@program = binary || self.class.program
|
43
43
|
@options = options
|
44
44
|
@params = {}
|
45
|
+
@pipe_ahead = []
|
45
46
|
end
|
46
47
|
|
47
48
|
# Parameters are accepted ONLY if the key is present as
|
@@ -73,6 +74,27 @@ module Bio
|
|
73
74
|
@params.clear
|
74
75
|
end
|
75
76
|
|
77
|
+
|
78
|
+
def pipe_ahead
|
79
|
+
# TODO: recursive call to other Bio::Ngs wrapped commands
|
80
|
+
@pipe_ahead
|
81
|
+
end
|
82
|
+
|
83
|
+
def pipe_ahead?
|
84
|
+
return !@pipe_ahead.empty?
|
85
|
+
end
|
86
|
+
|
87
|
+
# If setted is an array described like the ones in Open3.pipeline
|
88
|
+
# http://www.ruby-doc.org/stdlib-1.9.3/libdoc/open3/rdoc/Open3.html#method-c-pipeline
|
89
|
+
def pipe_ahead=(ary)
|
90
|
+
@pipe_ahead=ary || []
|
91
|
+
end
|
92
|
+
|
93
|
+
#Return an array of elements of the command line
|
94
|
+
def to_cmd_ary(opts={arguments:[],separator:"="})
|
95
|
+
[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
96
|
+
end
|
97
|
+
|
76
98
|
# Return the options and parameters formmatted as typed in the command line as a string
|
77
99
|
# opts[:separator] is important not all the applications require a "=" for separating options and values
|
78
100
|
# TODO: need to be compliant with Bio::Command ?
|
@@ -130,8 +152,8 @@ module Bio
|
|
130
152
|
raise "Can't write to any output file. With a program which writes on stdout you must provide a file name" if opts[:output_file].nil?
|
131
153
|
file_stdlog = File.open(opts[:output_file], 'w')
|
132
154
|
file_errlog = File.open(opts[:output_file]+".err",'w')
|
133
|
-
|
134
|
-
Bio::Command.call_command_open3(
|
155
|
+
#[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
156
|
+
Bio::Command.call_command_open3(to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments])) do |pin, pout, perr|
|
135
157
|
pout.sync = true
|
136
158
|
perr.sync = true
|
137
159
|
t = Thread.start {pout.lines{|line| file_stdlog.puts line}}
|
@@ -143,12 +165,17 @@ module Bio
|
|
143
165
|
end #command call open3
|
144
166
|
file_stdlog.close
|
145
167
|
file_errlog.close
|
168
|
+
|
169
|
+
elsif pipe_ahead?
|
170
|
+
#in case the user setted the pipeline we use it.
|
171
|
+
Open3.pipeline(pipe_ahead, to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments]))
|
146
172
|
else
|
147
173
|
# puts "Normlized #{normalize_params(opts[:separator])}"
|
148
174
|
# puts "Arguments #{opts[:arguments]}"
|
149
|
-
#puts [program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact.inspect
|
150
|
-
|
151
|
-
Bio::Command.query_command(
|
175
|
+
#puts [program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact.inspect
|
176
|
+
#Note: maybe seprator could be defined as a method for each wrapped program ?
|
177
|
+
Bio::Command.query_command(to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments]))
|
178
|
+
#[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
152
179
|
end #if
|
153
180
|
end #run
|
154
181
|
|