demultiplexer 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +6 -3
- data/bin/demultiplexer +67 -59
- data/demultiplexer.gemspec +12 -12
- data/lib/data_io.rb +132 -96
- data/lib/demultiplexer/version.rb +2 -1
- data/lib/demultiplexer.rb +34 -37
- data/lib/index_builder.rb +29 -33
- data/lib/sample_reader.rb +45 -23
- data/lib/status.rb +48 -21
- data/test/helper.rb +5 -0
- data/test/test_data_io.rb +158 -2
- data/test/test_demultiplexer.rb +206 -2
- data/test/test_index_builder.rb +57 -2
- data/test/test_sample_reader.rb +75 -2
- data/test/test_status.rb +56 -2
- metadata +3 -6
- data/test/test_screen.rb +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fd3a1561408ab33687de0b73f424873850cb55e
|
4
|
+
data.tar.gz: f57280b5897b453d43b0de72ecd72414ed4ac428
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4db2e0c15013c7a03ae483d0595b87feb147ff7284f2ae3541f0fe21c7b3e57e86d25cb4c1edd71e12042f5cce58f6c8773968d756f4d9812a20886205d71475
|
7
|
+
data.tar.gz: 1cfea327fdda5aa1d7f263d9b6f2e43890f4fcdcdac5c1380964c62cc39918e84b9121bfa0724625601a71f65202933066229b2abd267203b01398a231b36118
|
data/Rakefile
CHANGED
@@ -4,14 +4,17 @@ require 'rake/testtask'
|
|
4
4
|
|
5
5
|
Bundler::GemHelper.install_tasks
|
6
6
|
|
7
|
-
task :
|
7
|
+
task default: 'test'
|
8
8
|
|
9
9
|
Rake::TestTask.new do |t|
|
10
|
-
t.test_files = Dir['test/*'].select
|
10
|
+
t.test_files = Dir['test/*'].select do |file|
|
11
|
+
File.basename(file).match(/^test_.+\.rb$/)
|
12
|
+
end
|
13
|
+
|
11
14
|
t.warning = true
|
12
15
|
end
|
13
16
|
|
14
|
-
desc
|
17
|
+
desc 'Add or update rdoc'
|
15
18
|
task :doc do
|
16
19
|
`rdoc lib/`
|
17
20
|
end
|
data/bin/demultiplexer
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
4
|
+
require 'demultiplexer'
|
5
|
+
|
3
6
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
4
7
|
# #
|
5
8
|
# Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
|
@@ -23,56 +26,56 @@
|
|
23
26
|
# #
|
24
27
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
25
28
|
|
26
|
-
USAGE = <<USAGE
|
27
|
-
This program demultiplexes Illumina Paired data given a samples file and four
|
28
|
-
FASTQ files containing forward and reverse index data and forward and reverse
|
29
|
-
read data.
|
30
|
-
|
31
|
-
The samples file consists of three tab-separated columns: sample_id, forward
|
32
|
-
index, reverse index).
|
33
|
-
|
34
|
-
The FASTQ files are generated by the Illumina MiSeq instrument by adding the
|
35
|
-
following key:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
To the `MiSeq Reporter.exe.config` file located in the `MiSeq Reporter`
|
40
|
-
installation folder, `C:\\Illumina\\MiSeqReporter` and restarting the
|
41
|
-
|
42
|
-
|
43
|
-
http://support.illumina.com/downloads/miseq_reporter_user_guide_15042295.html
|
44
|
-
|
45
|
-
Thus Basecalling using a SampleSheet.csv containing a single entry `Data`
|
46
|
-
no index information will generate the following files:
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
Demultiplexing will generate file pairs according to the sample information
|
58
|
-
in the samples file and input file suffix, one pair per sample, and these
|
59
|
-
will be output to the output directory. Also a file pair with undetermined
|
60
|
-
reads are created where the index sequence is appended to the sequence name.
|
61
|
-
|
62
|
-
It is possible to allow up to three mismatches per index. Also, read pairs
|
63
|
-
filtered if either of the indexes have a mean quality score below a given
|
64
|
-
threshold or any single position in the index have a quality score below a
|
65
|
-
given theshold.
|
66
|
-
|
67
|
-
Finally, a log file `Demultiplex.log` is output containing the status of the
|
68
|
-
demultiplexing process along with a list of the samples ids and unique index1
|
69
|
-
and index2 sequences.
|
70
|
-
|
71
|
-
Usage: #{File.basename(__FILE__)} [options] <FASTQ files>
|
72
|
-
|
73
|
-
Example: #{File.basename(__FILE__)} -m samples.tsv Data*.fastq.gz
|
74
|
-
|
75
|
-
Options:
|
29
|
+
USAGE = <<USAGE.gsub(/^\s+\|/, '')
|
30
|
+
|This program demultiplexes Illumina Paired data given a samples file and four
|
31
|
+
|FASTQ files containing forward and reverse index data and forward and reverse
|
32
|
+
|read data.
|
33
|
+
|
34
|
+
|The samples file consists of three tab-separated columns: sample_id, forward
|
35
|
+
|index, reverse index).
|
36
|
+
|
37
|
+
|The FASTQ files are generated by the Illumina MiSeq instrument by adding the
|
38
|
+
|following key:
|
39
|
+
|
40
|
+
| <add key="CreateFastqForIndexReads" value="1">
|
41
|
+
|
42
|
+
|To the `MiSeq Reporter.exe.config` file located in the `MiSeq Reporter`
|
43
|
+
|installation folder, `C:\\Illumina\\MiSeqReporter` and restarting the `MiSeq
|
44
|
+
|Reporter` service. See the MiSeq Reporter User Guide page 29:
|
45
|
+
|
46
|
+
|http://support.illumina.com/downloads/miseq_reporter_user_guide_15042295.html
|
47
|
+
|
48
|
+
|Thus Basecalling using a SampleSheet.csv containing a single entry `Data`
|
49
|
+
|with no index information will generate the following files:
|
50
|
+
|
51
|
+
| Data_S1_L001_I1_001.fastq.gz
|
52
|
+
| Data_S1_L001_I2_001.fastq.gz
|
53
|
+
| Data_S1_L001_R1_001.fastq.gz
|
54
|
+
| Data_S1_L001_R2_001.fastq.gz
|
55
|
+
| Undetermined_S0_L001_I1_001.fastq.gz
|
56
|
+
| Undetermined_S0_L001_I2_001.fastq.gz
|
57
|
+
| Undetermined_S0_L001_R1_001.fastq.gz
|
58
|
+
| Undetermined_S0_L001_R2_001.fastq.gz
|
59
|
+
|
|
60
|
+
|Demultiplexing will generate file pairs according to the sample information
|
61
|
+
|in the samples file and input file suffix, one pair per sample, and these
|
62
|
+
|will be output to the output directory. Also a file pair with undetermined
|
63
|
+
|reads are created where the index sequence is appended to the sequence name.
|
64
|
+
|
|
65
|
+
|It is possible to allow up to three mismatches per index. Also, read pairs
|
66
|
+
|are filtered if either of the indexes have a mean quality score below a given
|
67
|
+
|threshold or any single position in the index have a quality score below a
|
68
|
+
|given theshold.
|
69
|
+
|
|
70
|
+
|Finally, a log file `Demultiplex.log` is output containing the status of the
|
71
|
+
|demultiplexing process along with a list of the samples ids and unique index1
|
72
|
+
|and index2 sequences.
|
73
|
+
|
|
74
|
+
|Usage: #{File.basename(__FILE__)} [options] <FASTQ files>
|
75
|
+
|
|
76
|
+
|Example: #{File.basename(__FILE__)} -m samples.tsv Data*.fastq.gz
|
77
|
+
|
|
78
|
+
|Options:
|
76
79
|
USAGE
|
77
80
|
|
78
81
|
DEFAULT_SCORE_MIN = 16
|
@@ -95,8 +98,8 @@ OptionParser.new do |opts|
|
|
95
98
|
options[:samples_file] = o
|
96
99
|
end
|
97
100
|
|
98
|
-
opts.on('-m', '--mismatches_max <uint>', Integer,
|
99
|
-
|
101
|
+
opts.on('-m', '--mismatches_max <uint>', Integer, 'Maximum mismatches_max ',
|
102
|
+
"allowed (default=#{DEFAULT_MISMATCHES})") do |o|
|
100
103
|
options[:mismatches_max] = o
|
101
104
|
end
|
102
105
|
|
@@ -108,14 +111,17 @@ OptionParser.new do |opts|
|
|
108
111
|
options[:revcomp_index2] = o
|
109
112
|
end
|
110
113
|
|
111
|
-
opts.on('--scores_min <uint>', Integer,
|
112
|
-
|
113
|
-
|
114
|
+
opts.on('--scores_min <uint>', Integer, 'Drop reads if a single position in ',
|
115
|
+
'the index have a quality score ',
|
116
|
+
'below scores_min (default= ' \
|
117
|
+
"#{DEFAULT_SCORE_MIN})") do |o|
|
114
118
|
options[:scores_min] = o
|
115
119
|
end
|
116
120
|
|
117
|
-
opts.on('--scores_mean <uint>', Integer,
|
118
|
-
|
121
|
+
opts.on('--scores_mean <uint>', Integer, 'Drop reads if the mean index',
|
122
|
+
'quality score is below ',
|
123
|
+
'scores_mean (default= ' \
|
124
|
+
"#{DEFAULT_SCORE_MEAN})") do |o|
|
119
125
|
options[:scores_mean] = o
|
120
126
|
end
|
121
127
|
|
@@ -123,8 +129,10 @@ OptionParser.new do |opts|
|
|
123
129
|
options[:output_dir] = o
|
124
130
|
end
|
125
131
|
|
126
|
-
opts.on('-c', '--compress <gzip|bzip2>', String, 'Compress output using \
|
127
|
-
|
132
|
+
opts.on('-c', '--compress <gzip|bzip2>', String, 'Compress output using ' \
|
133
|
+
'gzip or bzip2 ',
|
134
|
+
'(default=' \
|
135
|
+
'<no compression>)') do |o|
|
128
136
|
options[:compress] = o.to_sym
|
129
137
|
end
|
130
138
|
|
data/demultiplexer.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
2
|
|
3
3
|
require 'demultiplexer/version'
|
4
4
|
|
@@ -6,21 +6,21 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = 'demultiplexer'
|
7
7
|
s.version = Demultiplexer::VERSION
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
|
-
s.date = Time.now.strftime(
|
10
|
-
s.summary =
|
11
|
-
s.description =
|
12
|
-
s.authors = [
|
9
|
+
s.date = Time.now.strftime('%F')
|
10
|
+
s.summary = 'Demultiplexer'
|
11
|
+
s.description = 'Demultiplex sequences from the Illumina platform.'
|
12
|
+
s.authors = ['Martin A. Hansen']
|
13
13
|
s.email = 'mail@maasha.dk'
|
14
|
-
s.rubyforge_project =
|
14
|
+
s.rubyforge_project = 'demultiplexer'
|
15
15
|
s.homepage = 'http://github.com/maasha/demultiplexer'
|
16
16
|
s.license = 'GPL2'
|
17
|
-
s.rubygems_version =
|
17
|
+
s.rubygems_version = '2.0.0'
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
-
s.require_paths = [
|
20
|
+
s.require_paths = ['lib']
|
21
21
|
|
22
|
-
s.add_dependency(
|
23
|
-
s.add_dependency(
|
24
|
-
s.add_development_dependency(
|
25
|
-
s.add_development_dependency(
|
22
|
+
s.add_dependency('biopieces', '>= 0.4.1')
|
23
|
+
s.add_dependency('google_hash', '>= 0.8.4')
|
24
|
+
s.add_development_dependency('bundler', '>= 1.7.4')
|
25
|
+
s.add_development_dependency('simplecov', '>= 0.9.2')
|
26
26
|
end
|
data/lib/data_io.rb
CHANGED
@@ -21,40 +21,126 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
|
24
|
+
# Error class for all errors to do with DataIO.
|
25
|
+
DataIOError = Class.new(StandardError)
|
26
|
+
|
24
27
|
# Class containing methods for reading and write FASTQ data files.
|
25
28
|
class DataIO
|
29
|
+
# Internal: Constructor method for DataIO objects.
|
30
|
+
#
|
31
|
+
# samples - Array with Sample objects consisting id, index1 and index2
|
32
|
+
# fastq_files - Array of Strings with FASTQ file names of multiplexed data.
|
33
|
+
# compress - Symbol indicating if output data should be compressed with
|
34
|
+
# either gzip or bzip2.
|
35
|
+
# output_dir - String with path of output directory.
|
36
|
+
#
|
37
|
+
# Returns DataIO object.
|
26
38
|
def initialize(samples, fastq_files, compress, output_dir)
|
27
|
-
@samples
|
28
|
-
@compress
|
29
|
-
@output_dir
|
30
|
-
@suffix1
|
31
|
-
@suffix2
|
32
|
-
@input_files
|
33
|
-
@undetermined
|
34
|
-
@
|
39
|
+
@samples = samples
|
40
|
+
@compress = compress
|
41
|
+
@output_dir = output_dir
|
42
|
+
@suffix1 = extract_suffix(fastq_files, '_R1_')
|
43
|
+
@suffix2 = extract_suffix(fastq_files, '_R2_')
|
44
|
+
@input_files = identify_input_files(fastq_files)
|
45
|
+
@undetermined = @samples.size
|
46
|
+
@output_file_ios = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
# Internal: Method that opens the @input_files for reading.
|
50
|
+
#
|
51
|
+
# input_files - Array with input file paths.
|
52
|
+
#
|
53
|
+
# Returns an Array with IO objects (file handles).
|
54
|
+
def open_input_files
|
55
|
+
@input_file_ios = []
|
56
|
+
|
57
|
+
@input_files.each do |input_file|
|
58
|
+
@input_file_ios << BioPieces::Fastq.open(input_file)
|
59
|
+
end
|
60
|
+
|
61
|
+
yield self
|
62
|
+
ensure
|
63
|
+
close_input_files
|
64
|
+
end
|
65
|
+
|
66
|
+
# Internal: Method that opens the output files for writing.
|
67
|
+
#
|
68
|
+
# Yields a Hash with an incrementing index as keys, and a tuple of file
|
69
|
+
# handles as values.
|
70
|
+
def open_output_files
|
71
|
+
@output_file_ios = {}
|
72
|
+
comp = @compress
|
73
|
+
|
74
|
+
@output_file_ios.merge!(open_output_files_samples(comp))
|
75
|
+
@output_file_ios.merge!(open_output_files_undet(comp))
|
76
|
+
|
77
|
+
yield self
|
78
|
+
ensure
|
79
|
+
close_output_files
|
80
|
+
end
|
81
|
+
|
82
|
+
# Internal: Method that reads a Seq entry from each of the file handles in
|
83
|
+
# the @input_file_ios Array. Iteration stops when no more Seq entries are
|
84
|
+
# found.
|
85
|
+
#
|
86
|
+
# Yields an Array with 4 Seq objects.
|
87
|
+
#
|
88
|
+
# Returns nothing
|
89
|
+
def each
|
90
|
+
loop do
|
91
|
+
entries = @input_file_ios.each_with_object([]) do |e, a|
|
92
|
+
a << e.next_entry
|
93
|
+
end
|
94
|
+
|
95
|
+
break if entries.compact.size != 4
|
96
|
+
|
97
|
+
yield entries
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Internal: Getter method that returns a tuple of file handles from
|
102
|
+
# @output_file_ios when given a sample index key.
|
103
|
+
#
|
104
|
+
# key - Sample index Integer key used for lookup.
|
105
|
+
#
|
106
|
+
# Returns Array with a tuple of IO objects.
|
107
|
+
def [](key)
|
108
|
+
@output_file_ios[key]
|
35
109
|
end
|
36
110
|
|
37
|
-
|
111
|
+
private
|
112
|
+
|
113
|
+
# Internal: Method that extracts the Sample, Lane, Region information from
|
114
|
+
# given files.
|
38
115
|
#
|
39
|
-
#
|
116
|
+
# files - Array with FASTQ file names as Strings.
|
117
|
+
# pattern - String with pattern to use for matching file names.
|
40
118
|
#
|
41
119
|
# Examples
|
42
120
|
#
|
43
|
-
# extract_suffix("Sample1_S1_L001_R1_001.fastq.gz")
|
121
|
+
# extract_suffix("Sample1_S1_L001_R1_001.fastq.gz", "_R1_")
|
44
122
|
# # => "_S1_L001_R1_001"
|
45
123
|
#
|
46
124
|
# Returns String with SLR info.
|
47
|
-
|
48
|
-
|
125
|
+
# Raises unless pattern match exactly 1 file.
|
126
|
+
# Raises unless SLR info can be parsed.
|
127
|
+
def extract_suffix(files, pattern)
|
128
|
+
hits = files.grep(Regexp.new(pattern))
|
129
|
+
|
130
|
+
unless hits.size == 1
|
131
|
+
fail DataIOError, "Expecting exactly 1 hit but got: #{hits.size}"
|
132
|
+
end
|
133
|
+
|
134
|
+
if hits.first =~ /.+(_S\d_L\d{3}_R[12]_\d{3}).+$/
|
49
135
|
slr = Regexp.last_match(1)
|
50
136
|
else
|
51
|
-
fail "Unable to parse file SLR from: #{
|
137
|
+
fail DataIOError, "Unable to parse file SLR from: #{hits.first}"
|
52
138
|
end
|
53
139
|
|
54
140
|
append_suffix(slr)
|
55
141
|
end
|
56
142
|
|
57
|
-
# Method that appends a file suffix to a given Sample, Lane, Region
|
143
|
+
# Internal: Method that appends a file suffix to a given Sample, Lane, Region
|
58
144
|
# information String based on the @options[:compress] option. The
|
59
145
|
# file suffix can be either ".fastq.gz", ".fastq.bz2", or ".fastq".
|
60
146
|
#
|
@@ -79,14 +165,15 @@ class DataIO
|
|
79
165
|
slr
|
80
166
|
end
|
81
167
|
|
82
|
-
# Method identify the different input files from a given Array of
|
83
|
-
# The forward index file contains a _I1_, the reverse index file
|
84
|
-
# _I2_, the forward read file contains a _R1_ and finally, the
|
85
|
-
# file contain a _R2_.
|
168
|
+
# Internal: Method identify the different input files from a given Array of
|
169
|
+
# FASTQ files. The forward index file contains a _I1_, the reverse index file
|
170
|
+
# contains a _I2_, the forward read file contains a _R1_ and finally, the
|
171
|
+
# reverse read file contain a _R2_.
|
86
172
|
#
|
87
173
|
# fastq_files - Array with FASTQ files (Strings).
|
88
174
|
#
|
89
175
|
# Returns an Array with input files (Strings).
|
176
|
+
# Raises unless 4 input_files are found.
|
90
177
|
def identify_input_files(fastq_files)
|
91
178
|
input_files = []
|
92
179
|
|
@@ -95,113 +182,62 @@ class DataIO
|
|
95
182
|
input_files << fastq_files.grep(/_R1_/).first
|
96
183
|
input_files << fastq_files.grep(/_R2_/).first
|
97
184
|
|
98
|
-
input_files
|
99
|
-
|
100
|
-
|
101
|
-
# Method that opens the @input_files for reading.
|
102
|
-
#
|
103
|
-
# input_files - Array with input file paths.
|
104
|
-
#
|
105
|
-
# Returns an Array with IO objects (file handles).
|
106
|
-
def open_input_files
|
107
|
-
@file_ios = []
|
108
|
-
|
109
|
-
@input_files.each do |input_file|
|
110
|
-
@file_ios << BioPieces::Fastq.open(input_file)
|
111
|
-
end
|
112
|
-
|
113
|
-
yield self
|
114
|
-
ensure
|
115
|
-
close_input_files
|
116
|
-
end
|
117
|
-
|
118
|
-
# Method that closes open input files.
|
119
|
-
#
|
120
|
-
# Returns nothing.
|
121
|
-
def close_input_files
|
122
|
-
@file_ios.map(&:close)
|
123
|
-
end
|
124
|
-
|
125
|
-
# Method that reads a Seq entry from each of the file handles in the
|
126
|
-
# @file_ios Array. Iteration stops when no more Seq entries are found.
|
127
|
-
#
|
128
|
-
# Yields an Array with 4 Seq objects.
|
129
|
-
#
|
130
|
-
# Returns nothing
|
131
|
-
def each
|
132
|
-
loop do
|
133
|
-
entries = @file_ios.each_with_object([]) { |e, a| a << e.next_entry }
|
134
|
-
|
135
|
-
break if entries.compact.size != 4
|
136
|
-
|
137
|
-
yield entries
|
185
|
+
unless input_files.compact.size == 4
|
186
|
+
fail DataIOError, 'Expecting exactly 4 input_files but got: ' \
|
187
|
+
"#{input_files.compact.size}"
|
138
188
|
end
|
139
|
-
end
|
140
189
|
|
141
|
-
|
142
|
-
#
|
143
|
-
# Yeilds a Hash with an incrementing index as keys, and a tuple of file
|
144
|
-
# handles as values.
|
145
|
-
def open_output_files
|
146
|
-
@file_hash = {}
|
147
|
-
comp = @compress
|
148
|
-
|
149
|
-
@file_hash.merge!(open_output_files_samples(comp))
|
150
|
-
@file_hash.merge!(open_output_files_undet(comp))
|
151
|
-
|
152
|
-
yield self
|
153
|
-
ensure
|
154
|
-
close_output_files
|
155
|
-
end
|
156
|
-
|
157
|
-
def close_output_files
|
158
|
-
@file_hash.each_value { |value| value.map(&:close) }
|
159
|
-
end
|
160
|
-
|
161
|
-
# Getter method that returns a tuple of file handles from @file_hash when
|
162
|
-
# given a key.
|
163
|
-
#
|
164
|
-
# key - Key used to lookup
|
165
|
-
#
|
166
|
-
# Returns Array with a tuple of IO objects.
|
167
|
-
def [](key)
|
168
|
-
@file_hash[key]
|
190
|
+
input_files
|
169
191
|
end
|
170
192
|
|
171
|
-
# Method that opens the sample output files for writing.
|
193
|
+
# Internal: Method that opens the sample output files for writing.
|
172
194
|
#
|
173
195
|
# comp - Symbol with type of output compression.
|
174
196
|
#
|
175
197
|
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
176
198
|
# handles as values.
|
177
199
|
def open_output_files_samples(comp)
|
178
|
-
|
200
|
+
output_file_ios = {}
|
179
201
|
|
180
202
|
@samples.each_with_index do |sample, i|
|
181
203
|
file_forward = File.join(@output_dir, "#{sample.id}#{@suffix1}")
|
182
204
|
file_reverse = File.join(@output_dir, "#{sample.id}#{@suffix2}")
|
183
205
|
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
184
206
|
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
185
|
-
|
207
|
+
output_file_ios[i] = [io_forward, io_reverse]
|
186
208
|
end
|
187
209
|
|
188
|
-
|
210
|
+
output_file_ios
|
189
211
|
end
|
190
212
|
|
191
|
-
# Method that opens the undertermined output files for writing.
|
213
|
+
# Internal: Method that opens the undertermined output files for writing.
|
192
214
|
#
|
193
215
|
# comp - Symbol with type of output compression.
|
194
216
|
#
|
195
217
|
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
196
218
|
# handles as values.
|
197
219
|
def open_output_files_undet(comp)
|
198
|
-
|
220
|
+
output_file_ios = {}
|
199
221
|
file_forward = File.join(@output_dir, "Undetermined#{@suffix1}")
|
200
222
|
file_reverse = File.join(@output_dir, "Undetermined#{@suffix2}")
|
201
223
|
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
202
224
|
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
203
|
-
|
225
|
+
output_file_ios[@undetermined] = [io_forward, io_reverse]
|
204
226
|
|
205
|
-
|
227
|
+
output_file_ios
|
228
|
+
end
|
229
|
+
|
230
|
+
# Internal: Method that closes open input files.
|
231
|
+
#
|
232
|
+
# Returns nothing.
|
233
|
+
def close_input_files
|
234
|
+
@input_file_ios.map(&:close)
|
235
|
+
end
|
236
|
+
|
237
|
+
# Internal: Method that closes the file handles stored in @output_file_ios.
|
238
|
+
#
|
239
|
+
# Returns nothing.
|
240
|
+
def close_output_files
|
241
|
+
@output_file_ios.each_value { |value| value.map(&:close) }
|
206
242
|
end
|
207
243
|
end
|