bio-ngs 0.4.2.alpha.01 → 0.4.4.alpha.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +5 -4
- data/Gemfile.lock +34 -32
- data/README.rdoc +3 -0
- data/VERSION +1 -1
- data/bin/biongs +0 -5
- data/bio-ngs.gemspec +17 -28
- data/ext/mkrf_conf.rb +5 -1
- data/lib/bio-ngs.rb +9 -2
- data/lib/bio/appl/ngs/bcftools.rb +100 -0
- data/lib/bio/appl/ngs/bwa.rb +210 -0
- data/lib/bio/appl/ngs/fastx.rb +22 -6
- data/lib/bio/appl/ngs/samtools.rb +85 -0
- data/lib/bio/appl/ngs/tophat.rb +1 -1
- data/lib/bio/ngs/ext/versions.yaml +16 -0
- data/lib/bio/ngs/record.rb +2 -2
- data/lib/bio/ngs/utils.rb +11 -8
- data/lib/enumerable.rb +13 -1
- data/lib/tasks/bwa.thor +32 -106
- data/lib/tasks/filter.thor +75 -0
- data/lib/tasks/pre.thor +17 -37
- data/lib/tasks/project.thor +2 -1
- data/lib/tasks/quality.thor +27 -5
- data/lib/wrapper.rb +32 -5
- metadata +100 -99
data/lib/tasks/bwa.thor
CHANGED
@@ -10,117 +10,43 @@
|
|
10
10
|
|
11
11
|
class Bwa < Thor
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
desc "long [FASTA]", "Make the BWT index for a LONG FASTA database"
|
24
|
-
method_option :colorspace, :type => :boolean, :desc => "Index in Colorspace"
|
25
|
-
method_option :prefix, :type => :string, :desc => "Database index name"
|
26
|
-
def long(fasta)
|
27
|
-
real_prefix = (options[:prefix]) ? options[:prefix] : fasta
|
28
|
-
Bio::BWA.make_index(:file_in => fasta, :a => "bwtsw",:c => options[:colorspace], :prefix => real_prefix)
|
29
|
-
end
|
30
|
-
|
13
|
+
desc "index [FASTA]", "Create BWA index"
|
14
|
+
Bio::Ngs::Bwa::Index.new.thor_task(self, :index) do |wrapper, task, file_in|
|
15
|
+
wrapper.params = task.options
|
16
|
+
puts wrapper.run :arguments => [file_in]
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "aln [PREFIX] [FASTA/Q]", "Run BWA aln (short reads)"
|
20
|
+
Bio::Ngs::Bwa::Aln.new.thor_task(self, :aln) do |wrapper, task, prefix, file_in|
|
21
|
+
wrapper.params = task.options
|
22
|
+
puts wrapper.run :arguments => [prefix,file_in]
|
31
23
|
end
|
32
24
|
|
25
|
+
desc "samse [PREFIX] [SAI FILE] [FASTA/Q FILE]", "Run BWA SAM Single End conversion"
|
26
|
+
Bio::Ngs::Bwa::Samse.new.thor_task(self, :samse) do |wrapper, task, prefix, sai_in, file_in|
|
27
|
+
wrapper.params = task.options
|
28
|
+
puts wrapper.run :arguments => [prefix,sai_in,file_in]
|
29
|
+
end
|
33
30
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
method_option :m, :type => :numeric, :desc => "maximum entries in the queue [2000000]"
|
47
|
-
method_option :t, :type => :numeric, :desc => "number of threads [1]"
|
48
|
-
method_option :M, :type => :numeric, :desc => "mismatch penalty [3]"
|
49
|
-
method_option :O, :type => :numeric, :desc => "gap open penalty [11]"
|
50
|
-
method_option :E, :type => :numeric, :desc => "gap extension penalty [4]"
|
51
|
-
method_option :R, :type => :numeric, :desc => "stop searching when there are >INT equally best hits [30]"
|
52
|
-
method_option :q, :type => :numeric, :desc => "quality threshold for read trimming down to 35bp [0]"
|
53
|
-
method_option :B, :type => :numeric, :desc => "length of barcode"
|
54
|
-
method_option :c, :type => :boolean, :desc => "input sequences are in the color space"
|
55
|
-
method_option :L, :type => :boolean, :desc => "log-scaled gap penalty for long deletions"
|
56
|
-
method_option :N, :type => :boolean, :desc => "non-iterative mode: search for all n-difference hits"
|
57
|
-
method_option :I, :type => :boolean, :desc => "the input is in the Illumina 1.3+ FASTQ-like format"
|
58
|
-
method_option :b, :type => :boolean, :desc => "the input read file is in the BAM format"
|
59
|
-
method_option :single, :type => :boolean, :desc => "use single-end reads only (effective with -b)"
|
60
|
-
method_option :first, :type => :boolean, :desc => "use the 1st read in a pair (effective with -b)"
|
61
|
-
method_option :second, :type => :boolean, :desc => "use the 2nd read in a pair (effective with -b)"
|
62
|
-
def short(fastq)
|
63
|
-
bwa_options = options.dup
|
64
|
-
bwa_options[:file_in] = fastq
|
65
|
-
Bio::BWA.short_read_alignment(bwa_options.symbolize_keys)
|
66
|
-
end
|
67
|
-
|
68
|
-
desc "long [FASTQ]", "Run the aligment for LONG query sequences"
|
69
|
-
method_option :file_out, :type => :string, :desc => "file to output results to instead of stdout", :required => true
|
70
|
-
method_option :prefix, :type => :string, :desc => "Database prefix", :required => true
|
71
|
-
method_option :a, :type => :numeric, :desc => "score for a match [1]"
|
72
|
-
method_option :b, :type => :numeric, :desc => "mismatch penalty [3]"
|
73
|
-
method_option :q, :type => :numeric, :desc => "gap open penalty [5]"
|
74
|
-
method_option :r, :type => :numeric, :desc => "gap extension penalty [2]"
|
75
|
-
method_option :t, :type => :numeric, :desc => "number of threads [1]"
|
76
|
-
method_option :w, :type => :numeric, :desc => "band width [50]"
|
77
|
-
method_option :m, :type => :numeric, :desc => "mask level [0.50]"
|
78
|
-
method_option :T, :type => :numeric, :desc => "score threshold divided by a [30]"
|
79
|
-
method_option :s, :type => :numeric, :desc => "maximum seeding interval size [3]"
|
80
|
-
method_option :z, :type => :numeric, :desc => "Z-best [1]"
|
81
|
-
method_option :N, :type => :numeric, :desc => "seeds to trigger reverse alignment [5]"
|
82
|
-
method_option :c, :type => :numeric, :desc => "coefficient of length-threshold adjustment [5.5]"
|
83
|
-
method_option :H, :type => :boolean, :desc => "in SAM output, use hard clipping rather than soft"
|
84
|
-
def long(fastq)
|
85
|
-
bwa_options = options.dup
|
86
|
-
bwa_options[:file_in] = fastq
|
87
|
-
Bio::BWA.long_read_alignment(bwa_options.symbolize_keys)
|
88
|
-
end
|
89
|
-
|
31
|
+
desc "sampe [PREFIX] [SAI-1 FILE] [SAI-2 FILE] [FASTA/Q-1 FILE] [FASTA/Q-2 FILE]", "Run BWA SAM Paired End conversion"
|
32
|
+
Bio::Ngs::Bwa::Sampe.new.thor_task(self, :sampe) do |wrapper, task, prefix, sai1_in, sai2_in, file1_in, file2_in|
|
33
|
+
wrapper.params = task.options
|
34
|
+
puts wrapper.run :arguments => [prefix, sai1_in, sai2_in, file1_in, file2_in]
|
35
|
+
end
|
36
|
+
|
37
|
+
desc "bwasw [PREFIX] [FASTA/Q]", "Run BWA bwasw (long reads)"
|
38
|
+
Bio::Ngs::Bwa::Bwasw.new.thor_task(self, :bwasw) do |wrapper, task, prefix, file_in|
|
39
|
+
wrapper.params = task.options
|
40
|
+
arguments = [prefix,sai_in,file_in]
|
41
|
+
arguments+[task.options[:paired]] if task.options[:paired] =~/\w+/
|
42
|
+
puts wrapper.run :arguments => arguments
|
90
43
|
end
|
91
44
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
method_option :prefix, :type => :string, :required => true, :desc => "Database prefix"
|
97
|
-
method_option :fastq, :type => :string, :required => true, :desc => "FastQ file"
|
98
|
-
method_option :file_out, :type => :string, :required => true, :desc => "File to save the output"
|
99
|
-
method_options :n => :numeric, :r => :string
|
100
|
-
def single(sai)
|
101
|
-
bwa_options = options.dup
|
102
|
-
bwa_options[:sai] = sai
|
103
|
-
Bio::BWA.sai_to_sam_single(bwa_options.symbolize_keys)
|
104
|
-
end
|
105
|
-
|
106
|
-
desc "paired", "Convert SAI alignment output into SAM format (paired ends)"
|
107
|
-
method_option :prefix, :type => :string, :required => true, :desc => "Database prefix"
|
108
|
-
method_option :file_out, :type => :string, :required => true, :desc => "File to save the output"
|
109
|
-
method_option :sai, :type => :array, :required => true, :desc => "The 2 SAI files"
|
110
|
-
method_option :fastq, :type => :array, :required => true, :desc => "The 2 Fasta/Q files"
|
111
|
-
method_option :a, :type => :numeric, :desc => "maximum insert size [500]"
|
112
|
-
method_option :o, :type => :numeric, :desc => "maximum occurrences for one end [100000]"
|
113
|
-
method_option :n, :type => :numeric, :desc => "maximum hits to output for paired reads [3]"
|
114
|
-
method_option :N, :type => :numeric, :desc => "maximum hits to output for discordant pairs [10]"
|
115
|
-
method_option :c, :type => :numeric, :desc => "prior of chimeric rate (lower bound) [1.0e-05]"
|
116
|
-
method_option :r, :type => :string, :desc => "read group header line such as `@RG\tID:foo\tSM:bar' [null]"
|
117
|
-
method_option :P, :type => :boolean, :desc => "preload index into memory (for base-space reads only)"
|
118
|
-
method_option :s, :type => :boolean, :desc => "disable Smith-Waterman for the unmapped mate"
|
119
|
-
method_option :A, :type => :boolean, :desc => "disable insert size estimate (force -s)"
|
120
|
-
def paired
|
121
|
-
Bio::BWA.sai_to_sam_paired(options.dup.symbolize_keys)
|
122
|
-
end
|
123
|
-
|
45
|
+
desc "fastmap [PREFIX] [FASTA/Q]", "Run BWA Fastmap (identify super-maximal exact matches)"
|
46
|
+
Bio::Ngs::Bwa::Fastmap.new.thor_task(self, :fastmap) do |wrapper, task, prefix, file_in|
|
47
|
+
wrapper.params = task.options
|
48
|
+
puts wrapper.run :arguments => [prefix,file_in]
|
124
49
|
end
|
125
50
|
|
51
|
+
|
126
52
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
class Filter < Thor
|
2
|
+
|
3
|
+
# Assume that this is a plain list of elements, with just one column. In the future it could be
|
4
|
+
# a table as well.
|
5
|
+
desc "by_list TABLE LIST", "Extract from TABLE the row with a key in LIST"
|
6
|
+
method_option :exclude, :type => :boolean, :aliases => '-e', :desc => "return the elements in TABLE which are not listed in LIST"
|
7
|
+
method_option :tablekey, :type => :numeric, :aliases => '-k', :desc =>"which field is the key to consider, start from 0"
|
8
|
+
method_option :listkey, :type => :numeric, :aliases => '-l', :desc =>"which field is the key to consider, start from 0"
|
9
|
+
method_option :delimiter, :type => :string, :default => " ", :aliases => '-d'
|
10
|
+
method_option :skip_table_header, :type => :boolean, :default => true, :aliases => '-h', :desc => 'Skip first line, usually the header'
|
11
|
+
method_option :skip_list_header, :type => :boolean, :default => true, :aliases => '-j', :desc => 'Skip first line, usually the header'
|
12
|
+
method_option :skip_table_lines, :type => :numeric, :aliases => '-n', :desc => 'Skip Ns line before start'
|
13
|
+
method_option :skip_list_lines, :type => :numeric, :aliases => '-m', :desc => 'Skip Ns line before start'
|
14
|
+
method_option :output, :type => :string, :aliases => '-o', :desc => 'Output results to file'
|
15
|
+
method_option :keep_skipped_lines, :type => :boolean, :default => false, :aliases => '-g', :desc => 'Write on output skipped lines from the TABLE file, header and number of lines skipped using option skip_table_line'
|
16
|
+
method_option :zero_index_system, :type => :boolean, :default => true, :aliases => '-s', :desc => 'Starts Index from ZERO ? Otherwise starts from ONE'
|
17
|
+
def by_list(table, list)
|
18
|
+
unless File.exists?(table)
|
19
|
+
STDERR.puts "by_list: #{table} does not exist."
|
20
|
+
return
|
21
|
+
end
|
22
|
+
unless File.exists?(list)
|
23
|
+
STDERR.puts "by_list: #{list} does not exist."
|
24
|
+
return
|
25
|
+
end
|
26
|
+
table_key_idx = options[:tablekey] || 0 # by default the first element of the table.
|
27
|
+
list_key_idx = options[:listkey] || 0
|
28
|
+
#increment indexes in case the use wants to start from 1 and not from 0
|
29
|
+
unless options[:zero_index_system]
|
30
|
+
table_key_idx+=1
|
31
|
+
list_key_idx+=1
|
32
|
+
end
|
33
|
+
delimiter = options[:delimiter] || " " # useless it's by default a space, just for developers
|
34
|
+
keep_skipped_lines = options[:keep_skipped_lines] || false
|
35
|
+
|
36
|
+
flist = File.open(list, 'r')
|
37
|
+
#skip header/lines if required
|
38
|
+
if (nlines = options[:skip_list_lines])
|
39
|
+
nlines.times.each{|i| flist.readline}
|
40
|
+
end
|
41
|
+
flist.readline unless options[:skip_list_header]
|
42
|
+
list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
43
|
+
|
44
|
+
flist.each_line do |line|
|
45
|
+
#split row
|
46
|
+
#store the list key
|
47
|
+
#populate an hash wich keys
|
48
|
+
list_dictionary[line.split(delimiter)[list_key_idx]]
|
49
|
+
end
|
50
|
+
flist.close
|
51
|
+
|
52
|
+
ftable = File.open(table, 'r')
|
53
|
+
#skip header/lines if required
|
54
|
+
#keep skipped line in case it's a proprietary format
|
55
|
+
skipped_lines = []
|
56
|
+
if (nlines = options[:skip_table_lines])
|
57
|
+
nlines.times.each{|i| skipped_lines << ftable.readline}
|
58
|
+
end
|
59
|
+
skipped_lines << ftable.readline unless options[:skip_table_header]
|
60
|
+
#list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
61
|
+
|
62
|
+
fout = (output_name=options[:output]).nil? ? STDOUT : File.open(output_name,'w')
|
63
|
+
fout.puts skipped_lines if keep_skipped_lines
|
64
|
+
ftable.each_line do |line|
|
65
|
+
#search for a key in the dictionary/list
|
66
|
+
if list_dictionary.key?(line.split(delimiter)[table_key_idx]) || options[:exclude]
|
67
|
+
fout.puts line
|
68
|
+
end
|
69
|
+
end
|
70
|
+
ftable.close
|
71
|
+
fout.close
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
data/lib/tasks/pre.thor
CHANGED
@@ -1,29 +1,23 @@
|
|
1
1
|
class Pre < Thor
|
2
2
|
|
3
|
-
desc "
|
3
|
+
desc "illumina_filter [DIR(s)]", "Filter the data using Y/N flag in FastQ headers (Illumina). Search for fastq.gz files within directory(ies) passed."
|
4
4
|
method_option :compression, :type => :string, :default => "pigz"
|
5
5
|
method_option :cpu, :type => :numeric, :default => 4
|
6
|
-
def
|
6
|
+
def illumina_filter(dir)
|
7
7
|
folders = Dir.glob(dir)
|
8
8
|
cmd_blocks = []
|
9
9
|
folders.each do |folder|
|
10
|
-
Dir.glob(folder+"/*.fastq.gz").sort.
|
10
|
+
Parallel.each(Dir.glob(folder+"/*.fastq.gz").sort,:in_processes => options[:cpu].to_i) do |fastq|
|
11
11
|
Dir.mkdir(folder+"/filtered") unless Dir.exists? folder+"/filtered"
|
12
12
|
fastq = fastq.split("/")[-1]
|
13
|
-
|
14
|
-
if cmd_blocks == options[:cpu]
|
15
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
16
|
-
cmd_blocks = []
|
17
|
-
end
|
13
|
+
system("zcat #{folder+"/"+fastq} | grep -A 3 '^@.* [^:]*:N:[^:]*:' | grep -v '^\-\-'| #{options[:compression]} > #{folder}/filtered/#{fastq}")
|
18
14
|
end
|
19
15
|
end
|
20
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
21
16
|
end
|
22
17
|
|
23
18
|
|
24
19
|
desc "merge [file(s)]","Merge together fastQ files (accepts wildcards)"
|
25
20
|
method_option :compressed, :type => :boolean, :default => true
|
26
|
-
method_option :cpu, :type => :numeric, :default => 4
|
27
21
|
def merge(file)
|
28
22
|
files = Dir.glob(file).sort
|
29
23
|
cat = (options[:compressed]) ? "zcat" : "cat"
|
@@ -37,18 +31,17 @@ class Pre < Thor
|
|
37
31
|
def paired_merge(file)
|
38
32
|
files = Dir.glob(file).sort.find_all {|f| f=~/_R1_/}
|
39
33
|
cat = (options[:compressed] == true) ? "zcat" : "cat"
|
40
|
-
files.each do |
|
41
|
-
|
42
|
-
r2 = file.gsub(/_R1_/,"_R2_")
|
34
|
+
files.each do |r1|
|
35
|
+
r2 = r1.gsub(/_R1_/,"_R2_")
|
43
36
|
if File.exists? r2
|
44
37
|
r1_count = count_reads(r1,compressed:options[:compressed])
|
45
38
|
r2_count = count_reads(r2,compressed:options[:compressed])
|
46
39
|
puts "Read count: #{r1_count} : #{r2_count} , #{file}"
|
47
40
|
if r1_count == r2_count
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
41
|
+
Parallel.each(["R1","R2"],:in_processes => options[:cpu].to_i) do |read|
|
42
|
+
filename = (read == "R1") ? r1 : r2
|
43
|
+
system "#{cat} #{filename} >> #{read}_reads.fastq"
|
44
|
+
end
|
52
45
|
else
|
53
46
|
raise RuntimeError "Error: files #{r1} and #{r2} do not have the same number of reads!"
|
54
47
|
end
|
@@ -64,26 +57,21 @@ class Pre < Thor
|
|
64
57
|
def uncompress(file)
|
65
58
|
files = Dir.glob(file).sort
|
66
59
|
blocks = []
|
67
|
-
|
68
|
-
|
69
|
-
if blocks.size == options[:cpu]
|
70
|
-
Bio::Ngs::Utils.parallel_exec(blocks)
|
71
|
-
blocks = []
|
72
|
-
end
|
60
|
+
Parallel.each(files,:in_processes => options[:cpu].to_i) do |file|
|
61
|
+
system("gunzip #{file}")
|
73
62
|
end
|
74
|
-
Bio::Ngs::Utils.parallel_exec(blocks)
|
75
63
|
end
|
76
64
|
|
77
65
|
|
78
|
-
desc "trim [
|
66
|
+
desc "trim [fastq(s)]","Calulate quality profile and trim the all the reads using FastX (accepts wildcards)"
|
79
67
|
method_option :cpu, :type => :numeric, :default => 4
|
80
68
|
method_option :min_qual, :type => :numeric, :default => 20
|
81
69
|
def trim(file)
|
82
|
-
|
70
|
+
list = Dir.glob(file).sort
|
71
|
+
groups = list / options[:cpu].to_i # get group of files equal to number of CPUs
|
83
72
|
cmd_blocks = []
|
84
|
-
|
85
|
-
|
86
|
-
block = -> do
|
73
|
+
groups.each do |files|
|
74
|
+
Parallel.each(files, :in_processes => options[:cpu].to_i) do |file|
|
87
75
|
invoke "quality:fastq_stats", [file], {output:file+".stats"}
|
88
76
|
trim_position = options[:read_length]
|
89
77
|
lines = File.read(file+".stats").split("\n")
|
@@ -109,15 +97,7 @@ class Pre < Thor
|
|
109
97
|
trim.run
|
110
98
|
end
|
111
99
|
end
|
112
|
-
|
113
|
-
cmd_blocks << block
|
114
|
-
|
115
|
-
if cmd_blocks.size == options[:cpu]
|
116
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
117
|
-
cmd_blocks = []
|
118
|
-
end
|
119
100
|
end
|
120
|
-
Bio::Ngs::Utils.parallel_exec(cmd_blocks)
|
121
101
|
end
|
122
102
|
|
123
103
|
private
|
data/lib/tasks/project.thor
CHANGED
@@ -20,7 +20,8 @@ class Project < Thor
|
|
20
20
|
method_option :type, :type => :string, :desc => "The project type (e.g. annotation)"
|
21
21
|
def new(name)
|
22
22
|
empty_directory name
|
23
|
-
empty_directory File.join("#{name}","
|
23
|
+
empty_directory File.join("#{name}","raw_data")
|
24
|
+
empty_directory File.join("#{name}","outputs")
|
24
25
|
empty_directory File.join("#{name}","tasks")
|
25
26
|
empty_directory File.join("#{name}","scripts")
|
26
27
|
self.name = name # for template to take the correct values
|
data/lib/tasks/quality.thor
CHANGED
@@ -45,19 +45,32 @@ class Quality < Thor
|
|
45
45
|
desc "fastq_stats FASTQ", "Reports quality of FASTQ file"
|
46
46
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
47
47
|
def fastq_stats(fastq)
|
48
|
-
|
48
|
+
|
49
|
+
output_file = options.output || "#{fastq.gsub(/\.fastq\.gz/,'')}_stats.txt"
|
49
50
|
stats = Bio::Ngs::Fastx::FastqStats.new
|
50
|
-
|
51
|
+
if fastq=~/\.gz/
|
52
|
+
stats.params = {output:output_file}
|
53
|
+
stats.pipe_ahead=["zcat", fastq]
|
54
|
+
else
|
55
|
+
stats.params = {input:fastq, output:output_file}
|
56
|
+
end
|
51
57
|
stats.run
|
52
|
-
|
53
|
-
|
58
|
+
require 'parallel'
|
59
|
+
go_in_parallel = [[:boxplot,[output_file]],
|
60
|
+
[:reads_coverage,[output_file]],
|
61
|
+
[:nucleotide_distribution,[output_file]]]
|
62
|
+
Parallel.map(go_in_parallel, in_processes:go_in_parallel.size) do |graph|
|
63
|
+
invoke graph.first, graph.last
|
64
|
+
end
|
65
|
+
#invoke :boxplot, [output_file]
|
66
|
+
#invoke :reads_coverage, [output_file]
|
54
67
|
end
|
55
68
|
|
56
69
|
desc "boxplot FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
|
57
70
|
method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
58
71
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
59
72
|
def boxplot(fastq_quality_stats)
|
60
|
-
output_file = options.output || "#{fastq_quality_stats}.png"
|
73
|
+
output_file = options.output || "#{fastq_quality_stats}_boxplot.png"
|
61
74
|
boxplot = Bio::Ngs::Fastx::ReadsBoxPlot.new
|
62
75
|
boxplot.params={input:fastq_quality_stats, output:output_file}
|
63
76
|
boxplot.run
|
@@ -74,6 +87,15 @@ class Quality < Thor
|
|
74
87
|
coverage.run
|
75
88
|
end
|
76
89
|
|
90
|
+
desc "nucleotide_distribution FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
|
91
|
+
method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
92
|
+
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
93
|
+
def nucleotide_distribution(fastq_quality_stats)
|
94
|
+
output_file = options.output || "#{fastq_quality_stats}_nuc_dist.png"
|
95
|
+
nucdistr = Bio::Ngs::Fastx::NucleotideDistribution.new
|
96
|
+
nucdistr.params={input:fastq_quality_stats, output:output_file}
|
97
|
+
nucdistr.run
|
98
|
+
end
|
77
99
|
|
78
100
|
desc "illumina_b_profile_raw FASTQ", "perform a profile for reads coming fom Illumina 1.5+ and write the report in a txt file"
|
79
101
|
method_option :read_length, :type => :numeric, :required => true
|
data/lib/wrapper.rb
CHANGED
@@ -42,6 +42,7 @@ module Bio
|
|
42
42
|
@program = binary || self.class.program
|
43
43
|
@options = options
|
44
44
|
@params = {}
|
45
|
+
@pipe_ahead = []
|
45
46
|
end
|
46
47
|
|
47
48
|
# Parameters are accepted ONLY if the key is present as
|
@@ -73,6 +74,27 @@ module Bio
|
|
73
74
|
@params.clear
|
74
75
|
end
|
75
76
|
|
77
|
+
|
78
|
+
def pipe_ahead
|
79
|
+
# TODO: recursive call to other Bio::Ngs wrapped commands
|
80
|
+
@pipe_ahead
|
81
|
+
end
|
82
|
+
|
83
|
+
def pipe_ahead?
|
84
|
+
return !@pipe_ahead.empty?
|
85
|
+
end
|
86
|
+
|
87
|
+
# If setted is an array described like the ones in Open3.pipeline
|
88
|
+
# http://www.ruby-doc.org/stdlib-1.9.3/libdoc/open3/rdoc/Open3.html#method-c-pipeline
|
89
|
+
def pipe_ahead=(ary)
|
90
|
+
@pipe_ahead=ary || []
|
91
|
+
end
|
92
|
+
|
93
|
+
#Return an array of elements of the command line
|
94
|
+
def to_cmd_ary(opts={arguments:[],separator:"="})
|
95
|
+
[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
96
|
+
end
|
97
|
+
|
76
98
|
# Return the options and parameters formmatted as typed in the command line as a string
|
77
99
|
# opts[:separator] is important not all the applications require a "=" for separating options and values
|
78
100
|
# TODO: need to be compliant with Bio::Command ?
|
@@ -130,8 +152,8 @@ module Bio
|
|
130
152
|
raise "Can't write to any output file. With a program which writes on stdout you must provide a file name" if opts[:output_file].nil?
|
131
153
|
file_stdlog = File.open(opts[:output_file], 'w')
|
132
154
|
file_errlog = File.open(opts[:output_file]+".err",'w')
|
133
|
-
|
134
|
-
Bio::Command.call_command_open3(
|
155
|
+
#[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
156
|
+
Bio::Command.call_command_open3(to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments])) do |pin, pout, perr|
|
135
157
|
pout.sync = true
|
136
158
|
perr.sync = true
|
137
159
|
t = Thread.start {pout.lines{|line| file_stdlog.puts line}}
|
@@ -143,12 +165,17 @@ module Bio
|
|
143
165
|
end #command call open3
|
144
166
|
file_stdlog.close
|
145
167
|
file_errlog.close
|
168
|
+
|
169
|
+
elsif pipe_ahead?
|
170
|
+
#in case the user setted the pipeline we use it.
|
171
|
+
Open3.pipeline(pipe_ahead, to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments]))
|
146
172
|
else
|
147
173
|
# puts "Normlized #{normalize_params(opts[:separator])}"
|
148
174
|
# puts "Arguments #{opts[:arguments]}"
|
149
|
-
#puts [program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact.inspect
|
150
|
-
|
151
|
-
Bio::Command.query_command(
|
175
|
+
#puts [program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact.inspect
|
176
|
+
#Note: maybe seprator could be defined as a method for each wrapped program ?
|
177
|
+
Bio::Command.query_command(to_cmd_ary(separator:opts[:separator], arguments:opts[:arguments]))
|
178
|
+
#[program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact
|
152
179
|
end #if
|
153
180
|
end #run
|
154
181
|
|