demultiplexer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/LICENSE +340 -0
- data/README.md +2 -0
- data/Rakefile +17 -0
- data/bin/demultiplexer +194 -0
- data/demultiplexer.gemspec +26 -0
- data/lib/data_io.rb +207 -0
- data/lib/demultiplexer.rb +263 -0
- data/lib/demultiplexer/version.rb +26 -0
- data/lib/index_builder.rb +181 -0
- data/lib/sample_reader.rb +198 -0
- data/lib/screen.rb +39 -0
- data/lib/status.rb +101 -0
- data/test/helper.rb +51 -0
- data/test/test_data_io.rb +7 -0
- data/test/test_demultiplexer.rb +7 -0
- data/test/test_index_builder.rb +7 -0
- data/test/test_sample_reader.rb +7 -0
- data/test/test_screen.rb +7 -0
- data/test/test_status.rb +7 -0
- metadata +127 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'demultiplexer/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'demultiplexer'
|
7
|
+
s.version = Demultiplexer::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.date = Time.now.strftime("%F")
|
10
|
+
s.summary = "Demultiplexer"
|
11
|
+
s.description = "Demultiplex sequences from the Illumina platform."
|
12
|
+
s.authors = ["Martin A. Hansen"]
|
13
|
+
s.email = 'mail@maasha.dk'
|
14
|
+
s.rubyforge_project = "demultiplexer"
|
15
|
+
s.homepage = 'http://github.com/maasha/demultiplexer'
|
16
|
+
s.license = 'GPL2'
|
17
|
+
s.rubygems_version = "2.0.0"
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency("biopieces", ">= 0.4.1")
|
23
|
+
s.add_dependency("google_hash", ">= 0.8.4")
|
24
|
+
s.add_development_dependency("bundler", ">= 1.7.4")
|
25
|
+
s.add_development_dependency("simplecov", ">= 0.9.2")
|
26
|
+
end
|
data/lib/data_io.rb
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
2
|
+
# #
|
3
|
+
# Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
|
4
|
+
# #
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
8
|
+
# of the License, or (at your option) any later version. #
|
9
|
+
# #
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
13
|
+
# GNU General Public License for more details. #
|
14
|
+
# #
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
16
|
+
# along with this program; if not, write to the Free Software #
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
18
|
+
# USA. #
|
19
|
+
# #
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
21
|
+
# #
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
|
+
|
24
|
+
# Class containing methods for reading and write FASTQ data files.
|
25
|
+
class DataIO
|
26
|
+
def initialize(samples, fastq_files, compress, output_dir)
|
27
|
+
@samples = samples
|
28
|
+
@compress = compress
|
29
|
+
@output_dir = output_dir
|
30
|
+
@suffix1 = extract_suffix(fastq_files.grep(/_R1_/).first)
|
31
|
+
@suffix2 = extract_suffix(fastq_files.grep(/_R2_/).first)
|
32
|
+
@input_files = identify_input_files(fastq_files)
|
33
|
+
@undetermined = @samples.size + 1
|
34
|
+
@file_hash = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
# Method that extracts the Sample, Lane, Region information from a given file.
|
38
|
+
#
|
39
|
+
# file - String with file name.
|
40
|
+
#
|
41
|
+
# Examples
|
42
|
+
#
|
43
|
+
# extract_suffix("Sample1_S1_L001_R1_001.fastq.gz")
|
44
|
+
# # => "_S1_L001_R1_001"
|
45
|
+
#
|
46
|
+
# Returns String with SLR info.
|
47
|
+
def extract_suffix(file)
|
48
|
+
if file =~ /.+(_S\d_L\d{3}_R[12]_\d{3}).+$/
|
49
|
+
slr = Regexp.last_match(1)
|
50
|
+
else
|
51
|
+
fail "Unable to parse file SLR from: #{file}"
|
52
|
+
end
|
53
|
+
|
54
|
+
append_suffix(slr)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Method that appends a file suffix to a given Sample, Lane, Region
|
58
|
+
# information String based on the @options[:compress] option. The
|
59
|
+
# file suffix can be either ".fastq.gz", ".fastq.bz2", or ".fastq".
|
60
|
+
#
|
61
|
+
# slr - String Sample, Lane, Region information.
|
62
|
+
#
|
63
|
+
# Examples
|
64
|
+
#
|
65
|
+
# append_suffix("_S1_L001_R1_001")
|
66
|
+
# # => "_S1_L001_R1_001.fastq.gz"
|
67
|
+
#
|
68
|
+
# Returns String with SLR info and file suffix.
|
69
|
+
def append_suffix(slr)
|
70
|
+
case @compress
|
71
|
+
when /gzip/
|
72
|
+
slr << '.fastq.gz'
|
73
|
+
when /bzip2/
|
74
|
+
slr << '.fastq.bz2'
|
75
|
+
else
|
76
|
+
slr << '.fastq'
|
77
|
+
end
|
78
|
+
|
79
|
+
slr
|
80
|
+
end
|
81
|
+
|
82
|
+
# Method identify the different input files from a given Array of FASTQ files.
|
83
|
+
# The forward index file contains a _I1_, the reverse index file contains a
|
84
|
+
# _I2_, the forward read file contains a _R1_ and finally, the reverse read
|
85
|
+
# file contain a _R2_.
|
86
|
+
#
|
87
|
+
# fastq_files - Array with FASTQ files (Strings).
|
88
|
+
#
|
89
|
+
# Returns an Array with input files (Strings).
|
90
|
+
def identify_input_files(fastq_files)
|
91
|
+
input_files = []
|
92
|
+
|
93
|
+
input_files << fastq_files.grep(/_I1_/).first
|
94
|
+
input_files << fastq_files.grep(/_I2_/).first
|
95
|
+
input_files << fastq_files.grep(/_R1_/).first
|
96
|
+
input_files << fastq_files.grep(/_R2_/).first
|
97
|
+
|
98
|
+
input_files
|
99
|
+
end
|
100
|
+
|
101
|
+
# Method that opens the @input_files for reading.
|
102
|
+
#
|
103
|
+
# input_files - Array with input file paths.
|
104
|
+
#
|
105
|
+
# Returns an Array with IO objects (file handles).
|
106
|
+
def open_input_files
|
107
|
+
@file_ios = []
|
108
|
+
|
109
|
+
@input_files.each do |input_file|
|
110
|
+
@file_ios << BioPieces::Fastq.open(input_file)
|
111
|
+
end
|
112
|
+
|
113
|
+
yield self
|
114
|
+
ensure
|
115
|
+
close_input_files
|
116
|
+
end
|
117
|
+
|
118
|
+
# Method that closes open input files.
|
119
|
+
#
|
120
|
+
# Returns nothing.
|
121
|
+
def close_input_files
|
122
|
+
@file_ios.map(&:close)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Method that reads a Seq entry from each of the file handles in the
|
126
|
+
# @file_ios Array. Iteration stops when no more Seq entries are found.
|
127
|
+
#
|
128
|
+
# Yields an Array with 4 Seq objects.
|
129
|
+
#
|
130
|
+
# Returns nothing
|
131
|
+
def each
|
132
|
+
loop do
|
133
|
+
entries = @file_ios.each_with_object([]) { |e, a| a << e.next_entry }
|
134
|
+
|
135
|
+
break if entries.compact.size != 4
|
136
|
+
|
137
|
+
yield entries
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Method that opens the output files for writing.
|
142
|
+
#
|
143
|
+
# Yeilds a Hash with an incrementing index as keys, and a tuple of file
|
144
|
+
# handles as values.
|
145
|
+
def open_output_files
|
146
|
+
@file_hash = {}
|
147
|
+
comp = @compress
|
148
|
+
|
149
|
+
@file_hash.merge!(open_output_files_samples(comp))
|
150
|
+
@file_hash.merge!(open_output_files_undet(comp))
|
151
|
+
|
152
|
+
yield self
|
153
|
+
ensure
|
154
|
+
close_output_files
|
155
|
+
end
|
156
|
+
|
157
|
+
def close_output_files
|
158
|
+
@file_hash.each_value { |value| value.map(&:close) }
|
159
|
+
end
|
160
|
+
|
161
|
+
# Getter method that returns a tuple of file handles from @file_hash when
|
162
|
+
# given a key.
|
163
|
+
#
|
164
|
+
# key - Key used to lookup
|
165
|
+
#
|
166
|
+
# Returns Array with a tuple of IO objects.
|
167
|
+
def [](key)
|
168
|
+
@file_hash[key]
|
169
|
+
end
|
170
|
+
|
171
|
+
# Method that opens the sample output files for writing.
|
172
|
+
#
|
173
|
+
# comp - Symbol with type of output compression.
|
174
|
+
#
|
175
|
+
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
176
|
+
# handles as values.
|
177
|
+
def open_output_files_samples(comp)
|
178
|
+
file_hash = {}
|
179
|
+
|
180
|
+
@samples.each_with_index do |sample, i|
|
181
|
+
file_forward = File.join(@output_dir, "#{sample.id}#{@suffix1}")
|
182
|
+
file_reverse = File.join(@output_dir, "#{sample.id}#{@suffix2}")
|
183
|
+
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
184
|
+
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
185
|
+
file_hash[i] = [io_forward, io_reverse]
|
186
|
+
end
|
187
|
+
|
188
|
+
file_hash
|
189
|
+
end
|
190
|
+
|
191
|
+
# Method that opens the undertermined output files for writing.
|
192
|
+
#
|
193
|
+
# comp - Symbol with type of output compression.
|
194
|
+
#
|
195
|
+
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
196
|
+
# handles as values.
|
197
|
+
def open_output_files_undet(comp)
|
198
|
+
file_hash = {}
|
199
|
+
file_forward = File.join(@output_dir, "Undetermined#{@suffix1}")
|
200
|
+
file_reverse = File.join(@output_dir, "Undetermined#{@suffix2}")
|
201
|
+
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
202
|
+
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
203
|
+
file_hash[@undetermined] = [io_forward, io_reverse]
|
204
|
+
|
205
|
+
file_hash
|
206
|
+
end
|
207
|
+
end
|
@@ -0,0 +1,263 @@
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
2
|
+
# #
|
3
|
+
# Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
|
4
|
+
# #
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
8
|
+
# of the License, or (at your option) any later version. #
|
9
|
+
# #
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
13
|
+
# GNU General Public License for more details. #
|
14
|
+
# #
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
16
|
+
# along with this program; if not, write to the Free Software #
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
18
|
+
# USA. #
|
19
|
+
# #
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
21
|
+
# #
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
|
+
|
24
|
+
# Class containing methods for demultiplexing MiSeq sequences.
|
25
|
+
class Demultiplexer
|
26
|
+
attr_reader :status
|
27
|
+
|
28
|
+
# Public: Class method to run demultiplexing of MiSeq sequences.
|
29
|
+
#
|
30
|
+
# fastq_files - Array with paths to FASTQ files.
|
31
|
+
# options - Options Hash.
|
32
|
+
# :verbose - Verbose flag (default: false).
|
33
|
+
# :mismatches_max - Integer value indicating max mismatches
|
34
|
+
# (default: 0).
|
35
|
+
# :samples_file - String with path to samples file.
|
36
|
+
# :revcomp_index1 - Flag indicating that index1 should be
|
37
|
+
# reverse-complemented (default: false).
|
38
|
+
# :revcomp_index2 - Flag indicating that index2 should be
|
39
|
+
# reverse-complemented (default: false).
|
40
|
+
# :output_dir - String with output directory (optional).
|
41
|
+
# :scores_min - An Integer representing the Phred score
|
42
|
+
# minimum, such that a reads is dropped if a
|
43
|
+
# single position in the index contain a
|
44
|
+
# score below this value (default: 16).
|
45
|
+
# :scores_mean=> - An Integer representing the mean Phread
|
46
|
+
# score, such that a read is dropped if the
|
47
|
+
# mean quality score is below this value
|
48
|
+
# (default: 16).
|
49
|
+
#
|
50
|
+
# Examples
|
51
|
+
#
|
52
|
+
# Demultiplexer.run(['I1.fq', 'I2.fq', 'R1.fq', 'R2.fq'], \
|
53
|
+
# samples_file: 'samples.txt')
|
54
|
+
# # => <Demultiplexer>
|
55
|
+
#
|
56
|
+
# Returns Demultiplexer object
|
57
|
+
def self.run(fastq_files, options)
|
58
|
+
log_file = File.join(options[:output_dir], 'Demultiplex.log')
|
59
|
+
demultiplexer = new(fastq_files, options)
|
60
|
+
Screen.clear if options[:verbose]
|
61
|
+
demultiplexer.demultiplex
|
62
|
+
puts demultiplexer.status if options[:verbose]
|
63
|
+
demultiplexer.status.save(log_file)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Constructor method for Demultiplexer object.
|
67
|
+
#
|
68
|
+
# fastq_files - Array with paths to FASTQ files.
|
69
|
+
# options - Options Hash.
|
70
|
+
# :verbose - Verbose flag (default: false).
|
71
|
+
# :mismatches_max - Integer value indicating max mismatches
|
72
|
+
# (default: 0).
|
73
|
+
# :samples_file - String with path to samples file.
|
74
|
+
# :revcomp_index1 - Flag indicating that index1 should be
|
75
|
+
# reverse-complemented (default: false).
|
76
|
+
# :revcomp_index2 - Flag indicating that index2 should be
|
77
|
+
# reverse-complemented (default: false).
|
78
|
+
# :output_dir - String with output directory (optional).
|
79
|
+
# :scores_min - An Integer representing the Phred score
|
80
|
+
# minimum, such that a reads is dropped if a
|
81
|
+
# single position in the index contain a
|
82
|
+
# score below this value (default: 16).
|
83
|
+
# :scores_mean=> - An Integer representing the mean Phread
|
84
|
+
# score, such that a read is dropped if the
|
85
|
+
# mean quality score is below this value
|
86
|
+
# (default: 16).
|
87
|
+
#
|
88
|
+
# Returns Demultiplexer object
|
89
|
+
def initialize(fastq_files, options)
|
90
|
+
@options = options
|
91
|
+
@samples = SampleReader.read(options[:samples_file],
|
92
|
+
options[:revcomp_index1],
|
93
|
+
options[:revcomp_index2])
|
94
|
+
@undetermined = @samples.size + 1
|
95
|
+
@index_hash = IndexBuilder.build(@samples, options[:mismatches_max])
|
96
|
+
@data_io = DataIO.new(@samples, fastq_files, options[:compress],
|
97
|
+
options[:output_dir])
|
98
|
+
@status = Status.new
|
99
|
+
end
|
100
|
+
|
101
|
+
# Method to demultiplex reads according the index. This is done by
|
102
|
+
# simultaniously read-opening all input files (forward and reverse index
|
103
|
+
# files and forward and reverse read files) and read one entry from each.
|
104
|
+
# Such four entries we call a set of entries. If the quality scores from
|
105
|
+
# either index1 or index2 fails the criteria for mean and min required
|
106
|
+
# quality the set is skipped. In the combined indexes are found in the
|
107
|
+
# search index, then the reads are writting to files according to the sample
|
108
|
+
# information in the search index. If the combined indexes are not found,
|
109
|
+
# then the reads have their names appended with the index sequences and the
|
110
|
+
# reads are written to the Undertermined files.
|
111
|
+
#
|
112
|
+
# Returns nothing.
|
113
|
+
def demultiplex
|
114
|
+
@data_io.open_input_files do |ios_in|
|
115
|
+
@data_io.open_output_files do |ios_out|
|
116
|
+
ios_in.each do |index1, index2, read1, read2|
|
117
|
+
@status.count += 2
|
118
|
+
puts(@status) if @options[:verbose] &&
|
119
|
+
(@status.count % 1_000) == 0
|
120
|
+
|
121
|
+
next unless index_qual_ok?(index1, index2)
|
122
|
+
|
123
|
+
match_index(ios_out, index1, index2, read1, read2)
|
124
|
+
|
125
|
+
# break if @status.count == 100_000
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
# Method that matches the combined index1 and index2 sequences against the
|
134
|
+
# search index. In case of a match the reads are written to file according to
|
135
|
+
# the information in the search index, otherwise the reads will have thier
|
136
|
+
# names appended with the index sequences and they will be written to the
|
137
|
+
# Undetermined files.
|
138
|
+
#
|
139
|
+
# ios_out - DataIO object with an accessor method for file output handles.
|
140
|
+
# index1 - Seq object with index1.
|
141
|
+
# index2 - Seq object with index2.
|
142
|
+
# read1 - Seq object with read1.
|
143
|
+
# read2 - Seq object with read2.
|
144
|
+
#
|
145
|
+
# Returns nothing.
|
146
|
+
def match_index(ios_out, index1, index2, read1, read2)
|
147
|
+
if (sample_id = @index_hash["#{index1.seq}#{index2.seq}".hash])
|
148
|
+
write_match(ios_out, sample_id, read1, read2)
|
149
|
+
else
|
150
|
+
write_undetermined(ios_out, index1, index2, read1, read2)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Method that writes a index match to file according to the information in
|
155
|
+
# the search index.
|
156
|
+
#
|
157
|
+
# ios_out - DataIO object with an accessor method for file output handles.
|
158
|
+
# read1 - Seq object with read1.
|
159
|
+
# read2 - Seq object with read2.
|
160
|
+
#
|
161
|
+
# Returns nothing.
|
162
|
+
def write_match(ios_out, sample_id, read1, read2)
|
163
|
+
@status.match += 2
|
164
|
+
io_forward, io_reverse = ios_out[sample_id]
|
165
|
+
|
166
|
+
io_forward.puts read1.to_fastq
|
167
|
+
io_reverse.puts read2.to_fastq
|
168
|
+
end
|
169
|
+
|
170
|
+
# Method that appends the read names with the index sequences and writes
|
171
|
+
# the reads to the Undetermined files.
|
172
|
+
#
|
173
|
+
# ios_out - DataIO object with an accessor method for file output handles.
|
174
|
+
# index1 - Seq object with index1.
|
175
|
+
# index2 - Seq object with index2.
|
176
|
+
# read1 - Seq object with read1.
|
177
|
+
# read2 - Seq object with read2.
|
178
|
+
#
|
179
|
+
# Returns nothing.
|
180
|
+
def write_undetermined(ios_out, index1, index2, read1, read2)
|
181
|
+
@status.undetermined += 2
|
182
|
+
read1.seq_name = "#{read1.seq_name} #{index1.seq}"
|
183
|
+
read2.seq_name = "#{read2.seq_name} #{index2.seq}"
|
184
|
+
|
185
|
+
io_forward, io_reverse = ios_out[@undetermined]
|
186
|
+
io_forward.puts read1.to_fastq
|
187
|
+
io_reverse.puts read2.to_fastq
|
188
|
+
end
|
189
|
+
|
190
|
+
# Method to check the quality scores of the given indexes.
|
191
|
+
# If the mean score is higher than @options[:scores_mean] or
|
192
|
+
# if the min score is higher than @options[:scores_min] then
|
193
|
+
# the indexes are OK.
|
194
|
+
#
|
195
|
+
# index1 - Index1 Seq object.
|
196
|
+
# index2 - Index2 Seq object.
|
197
|
+
#
|
198
|
+
# Returns true if quality OK, else false.
|
199
|
+
def index_qual_ok?(index1, index2)
|
200
|
+
index_qual_mean_ok?(index1, index2) &&
|
201
|
+
index_qual_min_ok?(index1, index2)
|
202
|
+
end
|
203
|
+
|
204
|
+
# Method to check the mean quality scores of the given indexes.
|
205
|
+
# If the mean score is higher than @options[:scores_mean] the
|
206
|
+
# indexes are OK.
|
207
|
+
#
|
208
|
+
# index1 - Index1 Seq object.
|
209
|
+
# index2 - Index2 Seq object.
|
210
|
+
#
|
211
|
+
# Returns true if quality mean OK, else false.
|
212
|
+
def index_qual_mean_ok?(index1, index2)
|
213
|
+
if index1.scores_mean < @options[:scores_mean]
|
214
|
+
@status.index1_bad_mean += 2
|
215
|
+
return false
|
216
|
+
elsif index2.scores_mean < @options[:scores_mean]
|
217
|
+
@status.index2_bad_mean += 2
|
218
|
+
return false
|
219
|
+
end
|
220
|
+
|
221
|
+
true
|
222
|
+
end
|
223
|
+
|
224
|
+
# Method to check the min quality scores of the given indexes.
|
225
|
+
# If the min score is higher than @options[:scores_min] the
|
226
|
+
# indexes are OK.
|
227
|
+
#
|
228
|
+
# index1 - Index1 Seq object.
|
229
|
+
# index2 - Index2 Seq object.
|
230
|
+
#
|
231
|
+
# Returns true if quality min OK, else false.
|
232
|
+
def index_qual_min_ok?(index1, index2)
|
233
|
+
if index1.scores_min < @options[:scores_min]
|
234
|
+
@status.index1_bad_min += 2
|
235
|
+
return false
|
236
|
+
elsif index2.scores_min < @options[:scores_min]
|
237
|
+
@status.index2_bad_min += 2
|
238
|
+
return false
|
239
|
+
end
|
240
|
+
|
241
|
+
true
|
242
|
+
end
|
243
|
+
|
244
|
+
# Method that iterates over @samples and compiles a sorted Array with all
|
245
|
+
# unique index1 sequences.
|
246
|
+
#
|
247
|
+
# Returns Array with uniq index1 sequences.
|
248
|
+
def uniq_index1
|
249
|
+
@status.index1 = @samples.each_with_object(SortedSet.new) do |a, e|
|
250
|
+
a << e.index1
|
251
|
+
end.to_a
|
252
|
+
end
|
253
|
+
|
254
|
+
# Method that iterates over @samples and compiles a sorted Array with all
|
255
|
+
# unique index2 sequences.
|
256
|
+
#
|
257
|
+
# Returns Array with uniq index2 sequences.
|
258
|
+
def uniq_index2
|
259
|
+
@status.index2 = @samples.each_with_object(SortedSet.new) do |a, e|
|
260
|
+
a << e.index2
|
261
|
+
end.to_a
|
262
|
+
end
|
263
|
+
end
|