bio-ngs 0.4.6.alpha.01 → 0.4.6.alpha.02
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +21 -21
- data/README.rdoc +51 -4
- data/VERSION +1 -1
- data/bin/biongs +1 -0
- data/bio-ngs.gemspec +36 -8
- data/features/cufflinks_gtf_parser.feature +22 -0
- data/features/cufflinks_gtf_parser_indexing.feature +20 -0
- data/features/step_definitions/cufflinks_gtf.rb +30 -0
- data/features/step_definitions/cufflinks_gtf_parser_indexing.rb +53 -0
- data/features/support/env.rb +2 -0
- data/lib/bio-ngs.rb +19 -5
- data/lib/bio/appl/ngs/cufflinks.rb +447 -281
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf.rb +23 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb +248 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/transcript.rb +154 -0
- data/lib/bio/ngs/fs.rb +46 -0
- data/lib/bio/ngs/illumina/fastq.rb +176 -0
- data/lib/bio/ngs/illumina/illumina.rb +64 -0
- data/lib/bio/ngs/illumina/project.rb +81 -0
- data/lib/bio/ngs/illumina/sample.rb +85 -0
- data/lib/bio/ngs/task.rb +1 -1
- data/lib/bio/ngs/utils.rb +124 -112
- data/lib/meta.rb +162 -0
- data/lib/tasks/convert.thor +14 -14
- data/lib/tasks/filter.thor +158 -23
- data/lib/tasks/quality.thor +24 -4
- data/lib/tasks/rna.thor +26 -0
- data/lib/wrapper.rb +28 -0
- data/spec/bio/ngs/fs_spec.rb +70 -0
- data/spec/bio/ngs/illumina/fastq_spec.rb +52 -0
- data/spec/bio/ngs/illumina/illumina_spec.rb +21 -0
- data/spec/bio/ngs/illumina/project_spec.rb +0 -0
- data/spec/bio/ngs/illumina/sample_spec.rb +0 -0
- data/spec/bio/ngs/illumina/samples_spec.rb +0 -0
- data/spec/filter_spec.rb +25 -0
- data/spec/fixture/table_filter_list.txt +3 -0
- data/spec/fixture/table_filter_list_first_column.txt +2 -0
- data/spec/fixture/table_filter_source.tsv +44 -0
- data/spec/fixture/test-filtered-reference.fastq.gz +0 -0
- data/spec/fixture/test-merged-reference.fastq.gz +0 -0
- data/spec/fixture/test.fastq.gz +0 -0
- data/spec/meta_spec.rb +117 -0
- data/spec/spec_helper.rb +1 -1
- metadata +97 -69
data/lib/meta.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
module Meta
|
2
|
+
|
3
|
+
class Data
|
4
|
+
attr_accessor :metadata
|
5
|
+
|
6
|
+
def initialize(name, metadata={})
|
7
|
+
@metadata={}
|
8
|
+
@metadata[:name]=name
|
9
|
+
@metadata.merge! metadata
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
@metadata[:name]
|
14
|
+
end
|
15
|
+
|
16
|
+
def name=(val)
|
17
|
+
@metadata[:name]=val
|
18
|
+
end
|
19
|
+
|
20
|
+
def ==(other)
|
21
|
+
if self.name==other.name && self.metadata==other.metadata
|
22
|
+
true
|
23
|
+
else
|
24
|
+
false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def has_tag?(tag)
|
29
|
+
metadata.key? tag
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_value?(val)
|
33
|
+
metadata.each_pair do |tag, value|
|
34
|
+
return true if value == val
|
35
|
+
end
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
|
39
|
+
def [](tag)
|
40
|
+
metadata[tag]
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def to_json(*a)
|
45
|
+
{
|
46
|
+
"json_class" => self.class.name,
|
47
|
+
"name" => name,
|
48
|
+
"metadata" => metadata
|
49
|
+
}.to_json(*a)
|
50
|
+
end
|
51
|
+
# end #Data
|
52
|
+
|
53
|
+
# class File
|
54
|
+
# include Data
|
55
|
+
|
56
|
+
|
57
|
+
#TODO: make this class generic and available to other classes
|
58
|
+
#TODO: include or subclass original class File, I need to borrow most of its methods. File.exists? File.open File.read
|
59
|
+
|
60
|
+
#TODO: configure a generic classifier to add any kind of tag passing a block do/yield?
|
61
|
+
end #File
|
62
|
+
|
63
|
+
#TODO: this class could be generalized
|
64
|
+
class Pool < Data
|
65
|
+
include Enumerable
|
66
|
+
# include Data
|
67
|
+
attr_accessor :pool
|
68
|
+
def initialize(name=SecureRandom.uuid)
|
69
|
+
super(name)
|
70
|
+
@pool = {}
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def each &block
|
75
|
+
@pool.each_pair{|name, member| block.call(member)}
|
76
|
+
end
|
77
|
+
|
78
|
+
# TODO implement <=>
|
79
|
+
|
80
|
+
|
81
|
+
def add(element)
|
82
|
+
unless element.nil?
|
83
|
+
if @pool.key? element.name #TODO I don't know if this is correct.
|
84
|
+
@pool[element.name].metadata.merge! element.metadata
|
85
|
+
else
|
86
|
+
@pool[element.name]=element
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
alias :<< :add
|
91
|
+
|
92
|
+
def empty?
|
93
|
+
@pool.empty?
|
94
|
+
end
|
95
|
+
|
96
|
+
def names
|
97
|
+
@pool.keys
|
98
|
+
end
|
99
|
+
|
100
|
+
def get(name_or_tag_or_value=nil)
|
101
|
+
# TODO implement recursive query or passing multiple values as hash, insercet or etc.....
|
102
|
+
# if name_or_tag_or_value.is_a? Hash
|
103
|
+
# name_or_tag_or_value.each_pair do |tag, value|
|
104
|
+
#
|
105
|
+
# end
|
106
|
+
# else
|
107
|
+
get_by_name(name_or_tag_or_value) || get_by_tag(name_or_tag_or_value) || get_by_value(name_or_tag_or_value) || get_down_to_childer(name_or_tag_or_value)
|
108
|
+
# end
|
109
|
+
end #get
|
110
|
+
|
111
|
+
def get_by_name(name)
|
112
|
+
@pool[name]
|
113
|
+
end #get_by_name
|
114
|
+
|
115
|
+
def get_by_tag(tag)
|
116
|
+
get_generic :tag, tag
|
117
|
+
end #get_by_tag
|
118
|
+
|
119
|
+
def get_by_value(val)
|
120
|
+
get_generic :value, val
|
121
|
+
end #get_by_value
|
122
|
+
|
123
|
+
def get_by_tag_and_value(tag, val)
|
124
|
+
ret_pool = Pool.new
|
125
|
+
@pool.each_pair do |name, meta|
|
126
|
+
if meta.has_tag?(tag) && meta[tag]==val
|
127
|
+
ret_pool.add meta
|
128
|
+
else
|
129
|
+
@pool.each_pair do |name, element|
|
130
|
+
ret_pool.add element.get_by_tag_and_value(tag, val) if element.respond_to?(:get_by_tag_and_value) && element.respond_to?(:pool)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
ret_pool unless ret_pool.empty?
|
135
|
+
end #get_by_tag_and_value
|
136
|
+
|
137
|
+
def get_down_to_childer(x)
|
138
|
+
ret_pool = Pool.new
|
139
|
+
@pool.each_pair do |name, element|
|
140
|
+
ret_pool.add element.get(x) if element.respond_to?(:get) && element.respond_to?(:pool)
|
141
|
+
end
|
142
|
+
ret_pool unless ret_pool.empty?
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
def get_generic(type, data)
|
147
|
+
ret_pool = Pool.new
|
148
|
+
type = type.to_sym
|
149
|
+
if [:tag,:value].include? type
|
150
|
+
@pool.each_pair do |name, meta|
|
151
|
+
if meta.send("has_#{type}?", data)
|
152
|
+
ret_pool.add(meta)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
ret_pool unless ret_pool.empty?
|
156
|
+
else
|
157
|
+
raise ArgumentError, "#{type} is not a valid parameter, use only tag or value"
|
158
|
+
end # valid parameters
|
159
|
+
end #get_generic
|
160
|
+
|
161
|
+
end #Pool
|
162
|
+
end #Meta
|
data/lib/tasks/convert.thor
CHANGED
@@ -451,20 +451,20 @@ module Convert
|
|
451
451
|
end #Illumina
|
452
452
|
|
453
453
|
|
454
|
-
desc "list2table
|
455
|
-
def list2table(list)
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
end
|
454
|
+
# desc "list2table LIST", "reorganize a list of pairs key value in a table of key values. Tabular is the default separator"
|
455
|
+
# def list2table(list)
|
456
|
+
# dict = Hash.new{|h,k| h[k]=[]}
|
457
|
+
# File.open(ARGV[0],'r') do |f|
|
458
|
+
# f.each_line do |l|
|
459
|
+
# key, value = l.split
|
460
|
+
# dict[key]<<value
|
461
|
+
# end
|
462
|
+
# end
|
463
|
+
|
464
|
+
# dict.each_pair do |key, values|
|
465
|
+
# puts "#{key} #{values.join(' ')}"
|
466
|
+
# end
|
467
|
+
# end
|
468
468
|
|
469
469
|
end #Convert
|
470
470
|
# Add methods to Enumerable, which makes them available to Array
|
data/lib/tasks/filter.thor
CHANGED
@@ -1,19 +1,106 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../bio/ngs/utils')
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/../wrapper')
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/../bio/appl/ngs/cufflinks')
|
1
4
|
class Filter < Thor
|
2
5
|
|
6
|
+
class Cufflinks < Thor
|
7
|
+
#TODO method_option :ucsc, :type => :boolean, :aliases => '-u', :desc => "use chr as UCSC a prefix for chromosomes, otherwise uses ENSEMBL notation without chr"
|
8
|
+
|
9
|
+
desc "transcripts [GTF]", "Extract transcripts from Cufflinks' GTF"
|
10
|
+
method_option :brand_new, :type => :boolean, :aliases => '-b', :desc => "get only brand new transcripts, no overlap with any annotation feature"
|
11
|
+
method_option :new, :type => :boolean, :aliases => '-n', :desc => "get only new transcripts, overlapping annotations are accepted"
|
12
|
+
method_option :annotated, :type => :boolean, :aliases => '-a', :desc => "get only annotated transcripts"
|
13
|
+
method_option :mono_exon, :type => :boolean, :aliases => '-s', :desc => "get mono exon transcripts"
|
14
|
+
method_option :multi_exons, :type => :boolean, :aliases => '-m', :desc => "get multi exons transcripts"
|
15
|
+
method_option :length, :type => :numeric, :aliases => '-l', :desc => "transcripts with a length gt"
|
16
|
+
method_option :coverage, :type => :numeric, :aliases => '-c', :desc => "transcripts with a coverage gt"
|
17
|
+
method_option :bed, :type => :boolean, :aliases => '-t', :desc => "output data in bed format"
|
18
|
+
method_option :count, :type => :boolean, :aliases => '-x', :desc => "counts the selected transcripts"
|
19
|
+
method_option :discover, :type => :boolean, :aliases => '-d', :desc => "discovers transcripts.gtf files from within the current directory"
|
20
|
+
method_option :split, :type => :boolean, :aliases => '-j', :desc => "split each transcript in a file"
|
21
|
+
method_option :output, :type => :string, :aliases => '-o', :desc => "save the results in the output file"
|
22
|
+
def transcripts(gtf=nil)
|
23
|
+
if gtf.nil? && options[:discover]
|
24
|
+
options.remove(:discover)
|
25
|
+
Dir.glob("**/transcripts.gtf").each do |gtf_file|
|
26
|
+
transcripts(gtf_file)
|
27
|
+
end
|
28
|
+
elsif !gtf.nil? && File.exists?(gtf)
|
29
|
+
data = Bio::Ngs::Cufflinks::Gtf.new gtf
|
30
|
+
data.set_lazy
|
31
|
+
data.brand_new_isoforms if options[:brand_new]
|
32
|
+
data.new_isoforms if options[:new]
|
33
|
+
data.annotated_isoforms if options[:annotated]
|
34
|
+
data.mono_exons if options[:mono_exons]
|
35
|
+
data.multi_exons if options[:multi_exons]
|
36
|
+
data.length_gt(options[:length]) if options[:length]
|
37
|
+
data.coverage_gt(options[:coverage]) if options[:coverage]
|
38
|
+
|
39
|
+
default_stdout = (options[:output] && File.open(options[:output], 'w')) || $stdout
|
40
|
+
|
41
|
+
if options[:bed] && options[:split]
|
42
|
+
data.to_bed do |t, bed_exons|
|
43
|
+
File.open(t.attributes[:transcript_id], 'w') do |w|
|
44
|
+
w.puts bed_exons
|
45
|
+
end
|
46
|
+
end
|
47
|
+
elsif options[:bed]
|
48
|
+
data.to_bed do |t, bed_exons|
|
49
|
+
default_stdout.puts bed_exons
|
50
|
+
end
|
51
|
+
elsif options[:count]
|
52
|
+
default_stdout.puts "#{gtf}:\t#{data.count}"
|
53
|
+
else
|
54
|
+
if options[:output]
|
55
|
+
data.save(options[:output])
|
56
|
+
else
|
57
|
+
data.each_transcript do |t|
|
58
|
+
default_stdout.puts t
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
else
|
63
|
+
raise ArgumentError, "file #{gtf} doesn't exist"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
desc "tra_at_idx GTF IDX", "Extract transcripts from Cufflinks' GTF at specific location, print filename in output"
|
68
|
+
method_option :split, :type => :boolean, :aliases => '-j', :desc => "split each transcript in a file"
|
69
|
+
method_option :extract, :type => :numeric, :aliases => '-e', :desc => "extract the n-th transcript"
|
70
|
+
method_option :ucsc, :type => :boolean, :aliases => '-u', :desc => "use chr as UCSC a prefix for chromosomes, otherwise uses ENSEMBL notation without chr"
|
71
|
+
method_option :exons, :type => :boolean, :aliases => '-x', :desc => "proved in output only exons without transcripts", :default => true
|
72
|
+
def tra_at_idx(gtf, idx)
|
73
|
+
data = Bio::Ngs::Cufflinks::Gtf.new gtf
|
74
|
+
t=data[idx.to_i]
|
75
|
+
if options[:ucsc]
|
76
|
+
t.set_ucsc_notation
|
77
|
+
end
|
78
|
+
fn = "#{t.attributes[:gene_id]}-#{t.attributes[:transcript_id]}.bed"
|
79
|
+
File.open(fn, 'w') do |f|
|
80
|
+
f.puts t.to_bed(options[:exons]) #by default only the exons
|
81
|
+
end
|
82
|
+
puts fn
|
83
|
+
end
|
84
|
+
|
85
|
+
end #Cufflinks
|
86
|
+
|
87
|
+
|
3
88
|
# Assume that this is a plain list of elements, with just one column. In the future it could be
|
4
89
|
# a table as well.
|
5
90
|
desc "by_list TABLE LIST", "Extract from TABLE the row with a key in LIST"
|
6
|
-
method_option :exclude, :type => :boolean,
|
7
|
-
method_option :tablekey, :type => :numeric,
|
8
|
-
method_option :listkey, :type => :numeric,
|
9
|
-
method_option :delimiter, :type => :string, :default => " ",
|
10
|
-
method_option :skip_table_header, :type => :boolean, :default => true,
|
11
|
-
method_option :skip_list_header, :type => :boolean, :default => true,
|
12
|
-
method_option :skip_table_lines, :type => :numeric,
|
13
|
-
method_option :skip_list_lines, :type => :numeric,
|
14
|
-
method_option :output, :type => :string,
|
91
|
+
method_option :exclude, :type => :boolean, :aliases => '-e', :desc => "return the elements in TABLE which are not listed in LIST"
|
92
|
+
method_option :tablekey, :type => :numeric, :aliases => '-k', :desc =>"which field is the key to consider, start from 0"
|
93
|
+
method_option :listkey, :type => :numeric, :aliases => '-l', :desc =>"which field is the key to consider, start from 0"
|
94
|
+
method_option :delimiter, :type => :string, :default => " ", :aliases => '-d'
|
95
|
+
method_option :skip_table_header, :type => :boolean, :default => true, :aliases => '-h', :desc => 'Skip first line, usually the header'
|
96
|
+
method_option :skip_list_header, :type => :boolean, :default => true, :aliases => '-j', :desc => 'Skip first line, usually the header'
|
97
|
+
method_option :skip_table_lines, :type => :numeric, :aliases => '-n', :desc => 'Skip Ns line before start'
|
98
|
+
method_option :skip_list_lines, :type => :numeric, :aliases => '-m', :desc => 'Skip Ns line before start'
|
99
|
+
method_option :output, :type => :string, :aliases => '-o', :desc => 'Output results to file'
|
15
100
|
method_option :keep_skipped_lines, :type => :boolean, :default => false, :aliases => '-g', :desc => 'Write on output skipped lines from the TABLE file, header and number of lines skipped using option skip_table_line'
|
16
|
-
method_option :zero_index_system, :type => :boolean, :default => true,
|
101
|
+
method_option :zero_index_system, :type => :boolean, :default => true, :aliases => '-s', :desc => 'Starts Index from ZERO ? Otherwise starts from ONE'
|
102
|
+
method_option :fuse, :type => :boolean, :default => false, :aliases => '-f', :desc => 'JOIN two input file using a specific key'
|
103
|
+
method_option :in_column_delimiter, :type => :string, :aliases => '-i', :desc => 'Define a delimiter for table key, if setted we assume to split the key columns by this separator'
|
17
104
|
def by_list(table, list)
|
18
105
|
unless File.exists?(table)
|
19
106
|
STDERR.puts "by_list: #{table} does not exist."
|
@@ -25,7 +112,9 @@ class Filter < Thor
|
|
25
112
|
end
|
26
113
|
table_key_idx = options[:tablekey] || 0 # by default the first element of the table.
|
27
114
|
list_key_idx = options[:listkey] || 0
|
28
|
-
|
115
|
+
fuse = options[:fuse] || false
|
116
|
+
#increment indexes in case user wants to start from 1 and not from 0
|
117
|
+
#TODO: fix not increment but decrement, user will pass a +1 value
|
29
118
|
unless options[:zero_index_system]
|
30
119
|
table_key_idx+=1
|
31
120
|
list_key_idx+=1
|
@@ -38,17 +127,31 @@ class Filter < Thor
|
|
38
127
|
if (nlines = options[:skip_list_lines])
|
39
128
|
nlines.times.each{|i| flist.readline}
|
40
129
|
end
|
41
|
-
flist.readline
|
130
|
+
flist.readline if options[:skip_list_header]
|
42
131
|
list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
43
132
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
133
|
+
#TODO: refactor, find a smarter way to distinguish between fuse or not
|
134
|
+
if fuse
|
135
|
+
flist.each_line do |line|
|
136
|
+
#split row
|
137
|
+
#store the list key
|
138
|
+
#populate an hash wich keys
|
139
|
+
list_line = line.split(delimiter)
|
140
|
+
#save the line but remove the key
|
141
|
+
list_key = list_line[list_key_idx]
|
142
|
+
list_line.delete_at(list_key_idx)
|
143
|
+
list_dictionary[list_key]=list_line
|
144
|
+
end
|
145
|
+
else
|
146
|
+
flist.each_line do |line|
|
147
|
+
#split row
|
148
|
+
#store the list key
|
149
|
+
#populate an hash wich keys
|
150
|
+
list_dictionary[line.split(delimiter)[list_key_idx]]=:fool
|
151
|
+
end
|
49
152
|
end
|
50
153
|
flist.close
|
51
|
-
|
154
|
+
|
52
155
|
ftable = File.open(table, 'r')
|
53
156
|
#skip header/lines if required
|
54
157
|
#keep skipped line in case it's a proprietary format
|
@@ -56,20 +159,52 @@ class Filter < Thor
|
|
56
159
|
if (nlines = options[:skip_table_lines])
|
57
160
|
nlines.times.each{|i| skipped_lines << ftable.readline}
|
58
161
|
end
|
59
|
-
|
162
|
+
|
163
|
+
skipped_lines << ftable.readline if options[:skip_table_header]
|
164
|
+
|
60
165
|
#list_dictionary = Hash.new {|hash,key| hash[key] = :fool}
|
61
166
|
|
62
|
-
fout = (output_name=options[:output]).nil? ?
|
167
|
+
fout = (output_name=options[:output]).nil? ? $stdout : File.open(output_name,'w')
|
63
168
|
fout.puts skipped_lines if keep_skipped_lines
|
169
|
+
|
170
|
+
fuse_lambda = if fuse
|
171
|
+
lambda {|table_line, list_dict, key| "#{table_line.chomp}#{delimiter}#{list_dict[key].join(delimiter)}" }
|
172
|
+
#don't know if need to chomp
|
173
|
+
else
|
174
|
+
lambda {|table_line, list_dict, key| table_line}
|
175
|
+
end
|
64
176
|
ftable.each_line do |line|
|
65
177
|
#search for a key in the dictionary/list
|
66
|
-
if list_dictionary.key?(line.split(delimiter)[table_key_idx]) || options[:exclude]
|
178
|
+
#if list_dictionary.key?(line.split(delimiter)[table_key_idx]) || options[:exclude]
|
179
|
+
if find_key_in_dictionary(line.split(delimiter)[table_key_idx], list_dictionary, options[:in_column_delimiter]) || options[:exclude]
|
67
180
|
fout.puts line
|
68
181
|
end
|
69
182
|
end
|
70
183
|
ftable.close
|
71
|
-
fout.close
|
184
|
+
fout.close unless options[:output].nil?
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
private
|
190
|
+
|
191
|
+
def find_key_in_dictionary(key, dict, split_key=nil)
|
192
|
+
#puts dict
|
193
|
+
if split_key.nil?
|
194
|
+
if dict.key?(key)
|
195
|
+
return true
|
196
|
+
end
|
197
|
+
else
|
198
|
+
key.split(split_key).each do |ikey|
|
199
|
+
if dict.key?(ikey)
|
200
|
+
return true
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
return false
|
72
205
|
end
|
73
206
|
|
74
207
|
|
75
|
-
|
208
|
+
|
209
|
+
|
210
|
+
end
|
data/lib/tasks/quality.thor
CHANGED
@@ -45,7 +45,6 @@ class Quality < Thor
|
|
45
45
|
desc "fastq_stats FASTQ", "Reports quality of FASTQ file"
|
46
46
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|
47
47
|
def fastq_stats(fastq)
|
48
|
-
|
49
48
|
output_file = options.output || "#{fastq.gsub(/\.fastq\.gz/,'')}_stats.txt"
|
50
49
|
stats = Bio::Ngs::Fastx::FastqStats.new
|
51
50
|
if fastq=~/\.gz/
|
@@ -60,12 +59,33 @@ class Quality < Thor
|
|
60
59
|
[:reads_coverage,[output_file]],
|
61
60
|
[:nucleotide_distribution,[output_file]]]
|
62
61
|
Parallel.map(go_in_parallel, in_processes:go_in_parallel.size) do |graph|
|
63
|
-
|
62
|
+
send graph.first, graph.last
|
64
63
|
end
|
65
|
-
#invoke :boxplot, [output_file]
|
66
|
-
#invoke :reads_coverage, [output_file]
|
67
64
|
end
|
68
65
|
|
66
|
+
desc "illumina_projects_stats", "Reports quality of FASTQ files in an Illumina project directory"
|
67
|
+
method_option :cpus, :type=>:numeric, :default=>4, :aliases=>'-c', :desc=>'Number of processes to use.'
|
68
|
+
def illumina_projects_stats(directory=".")
|
69
|
+
if File.directory?(directory) && Bio::Ngs::Illumina.project_directory?(directory)
|
70
|
+
projects = Bio::Ngs::Illumina.build(directory)
|
71
|
+
files = []
|
72
|
+
projects.each do |project_name, project|
|
73
|
+
project.samples_path.each do |reads_file|
|
74
|
+
#reads_file is an hash with right or left, maybe single also but I didn't code anything for it yet.
|
75
|
+
#TODO: refactor these calls
|
76
|
+
|
77
|
+
files<<File.join(directory, reads_file[:left]) if reads_file.key?(:left)
|
78
|
+
files<<File.join(directory, reads_file[:right]) if reads_file.key?(:right)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
Parallel.map(files, in_processes:options[:cpus]) do |file|
|
82
|
+
fastq_stats file
|
83
|
+
end
|
84
|
+
else
|
85
|
+
STDERR.puts "illumina_projects_stats: Not an Illumina directory"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
69
89
|
desc "boxplot FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
|
70
90
|
method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
|
71
91
|
method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
|