demultiplexer 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +6 -3
- data/bin/demultiplexer +67 -59
- data/demultiplexer.gemspec +12 -12
- data/lib/data_io.rb +132 -96
- data/lib/demultiplexer/version.rb +2 -1
- data/lib/demultiplexer.rb +34 -37
- data/lib/index_builder.rb +29 -33
- data/lib/sample_reader.rb +45 -23
- data/lib/status.rb +48 -21
- data/test/helper.rb +5 -0
- data/test/test_data_io.rb +158 -2
- data/test/test_demultiplexer.rb +206 -2
- data/test/test_index_builder.rb +57 -2
- data/test/test_sample_reader.rb +75 -2
- data/test/test_status.rb +56 -2
- metadata +3 -6
- data/test/test_screen.rb +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fd3a1561408ab33687de0b73f424873850cb55e
|
4
|
+
data.tar.gz: f57280b5897b453d43b0de72ecd72414ed4ac428
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4db2e0c15013c7a03ae483d0595b87feb147ff7284f2ae3541f0fe21c7b3e57e86d25cb4c1edd71e12042f5cce58f6c8773968d756f4d9812a20886205d71475
|
7
|
+
data.tar.gz: 1cfea327fdda5aa1d7f263d9b6f2e43890f4fcdcdac5c1380964c62cc39918e84b9121bfa0724625601a71f65202933066229b2abd267203b01398a231b36118
|
data/Rakefile
CHANGED
@@ -4,14 +4,17 @@ require 'rake/testtask'
|
|
4
4
|
|
5
5
|
Bundler::GemHelper.install_tasks
|
6
6
|
|
7
|
-
task :
|
7
|
+
task default: 'test'
|
8
8
|
|
9
9
|
Rake::TestTask.new do |t|
|
10
|
-
t.test_files = Dir['test/*'].select
|
10
|
+
t.test_files = Dir['test/*'].select do |file|
|
11
|
+
File.basename(file).match(/^test_.+\.rb$/)
|
12
|
+
end
|
13
|
+
|
11
14
|
t.warning = true
|
12
15
|
end
|
13
16
|
|
14
|
-
desc
|
17
|
+
desc 'Add or update rdoc'
|
15
18
|
task :doc do
|
16
19
|
`rdoc lib/`
|
17
20
|
end
|
data/bin/demultiplexer
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
4
|
+
require 'demultiplexer'
|
5
|
+
|
3
6
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
4
7
|
# #
|
5
8
|
# Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
|
@@ -23,56 +26,56 @@
|
|
23
26
|
# #
|
24
27
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
25
28
|
|
26
|
-
USAGE = <<USAGE
|
27
|
-
This program demultiplexes Illumina Paired data given a samples file and four
|
28
|
-
FASTQ files containing forward and reverse index data and forward and reverse
|
29
|
-
read data.
|
30
|
-
|
31
|
-
The samples file consists of three tab-separated columns: sample_id, forward
|
32
|
-
index, reverse index).
|
33
|
-
|
34
|
-
The FASTQ files are generated by the Illumina MiSeq instrument by adding the
|
35
|
-
following key:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
To the `MiSeq Reporter.exe.config` file located in the `MiSeq Reporter`
|
40
|
-
installation folder, `C:\\Illumina\\MiSeqReporter` and restarting the
|
41
|
-
|
42
|
-
|
43
|
-
http://support.illumina.com/downloads/miseq_reporter_user_guide_15042295.html
|
44
|
-
|
45
|
-
Thus Basecalling using a SampleSheet.csv containing a single entry `Data`
|
46
|
-
no index information will generate the following files:
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
Demultiplexing will generate file pairs according to the sample information
|
58
|
-
in the samples file and input file suffix, one pair per sample, and these
|
59
|
-
will be output to the output directory. Also a file pair with undetermined
|
60
|
-
reads are created where the index sequence is appended to the sequence name.
|
61
|
-
|
62
|
-
It is possible to allow up to three mismatches per index. Also, read pairs
|
63
|
-
filtered if either of the indexes have a mean quality score below a given
|
64
|
-
threshold or any single position in the index have a quality score below a
|
65
|
-
given theshold.
|
66
|
-
|
67
|
-
Finally, a log file `Demultiplex.log` is output containing the status of the
|
68
|
-
demultiplexing process along with a list of the samples ids and unique index1
|
69
|
-
and index2 sequences.
|
70
|
-
|
71
|
-
Usage: #{File.basename(__FILE__)} [options] <FASTQ files>
|
72
|
-
|
73
|
-
Example: #{File.basename(__FILE__)} -m samples.tsv Data*.fastq.gz
|
74
|
-
|
75
|
-
Options:
|
29
|
+
USAGE = <<USAGE.gsub(/^\s+\|/, '')
|
30
|
+
|This program demultiplexes Illumina Paired data given a samples file and four
|
31
|
+
|FASTQ files containing forward and reverse index data and forward and reverse
|
32
|
+
|read data.
|
33
|
+
|
34
|
+
|The samples file consists of three tab-separated columns: sample_id, forward
|
35
|
+
|index, reverse index).
|
36
|
+
|
37
|
+
|The FASTQ files are generated by the Illumina MiSeq instrument by adding the
|
38
|
+
|following key:
|
39
|
+
|
40
|
+
| <add key="CreateFastqForIndexReads" value="1">
|
41
|
+
|
42
|
+
|To the `MiSeq Reporter.exe.config` file located in the `MiSeq Reporter`
|
43
|
+
|installation folder, `C:\\Illumina\\MiSeqReporter` and restarting the `MiSeq
|
44
|
+
|Reporter` service. See the MiSeq Reporter User Guide page 29:
|
45
|
+
|
46
|
+
|http://support.illumina.com/downloads/miseq_reporter_user_guide_15042295.html
|
47
|
+
|
48
|
+
|Thus Basecalling using a SampleSheet.csv containing a single entry `Data`
|
49
|
+
|with no index information will generate the following files:
|
50
|
+
|
51
|
+
| Data_S1_L001_I1_001.fastq.gz
|
52
|
+
| Data_S1_L001_I2_001.fastq.gz
|
53
|
+
| Data_S1_L001_R1_001.fastq.gz
|
54
|
+
| Data_S1_L001_R2_001.fastq.gz
|
55
|
+
| Undetermined_S0_L001_I1_001.fastq.gz
|
56
|
+
| Undetermined_S0_L001_I2_001.fastq.gz
|
57
|
+
| Undetermined_S0_L001_R1_001.fastq.gz
|
58
|
+
| Undetermined_S0_L001_R2_001.fastq.gz
|
59
|
+
|
|
60
|
+
|Demultiplexing will generate file pairs according to the sample information
|
61
|
+
|in the samples file and input file suffix, one pair per sample, and these
|
62
|
+
|will be output to the output directory. Also a file pair with undetermined
|
63
|
+
|reads are created where the index sequence is appended to the sequence name.
|
64
|
+
|
|
65
|
+
|It is possible to allow up to three mismatches per index. Also, read pairs
|
66
|
+
|are filtered if either of the indexes have a mean quality score below a given
|
67
|
+
|threshold or any single position in the index have a quality score below a
|
68
|
+
|given theshold.
|
69
|
+
|
|
70
|
+
|Finally, a log file `Demultiplex.log` is output containing the status of the
|
71
|
+
|demultiplexing process along with a list of the samples ids and unique index1
|
72
|
+
|and index2 sequences.
|
73
|
+
|
|
74
|
+
|Usage: #{File.basename(__FILE__)} [options] <FASTQ files>
|
75
|
+
|
|
76
|
+
|Example: #{File.basename(__FILE__)} -m samples.tsv Data*.fastq.gz
|
77
|
+
|
|
78
|
+
|Options:
|
76
79
|
USAGE
|
77
80
|
|
78
81
|
DEFAULT_SCORE_MIN = 16
|
@@ -95,8 +98,8 @@ OptionParser.new do |opts|
|
|
95
98
|
options[:samples_file] = o
|
96
99
|
end
|
97
100
|
|
98
|
-
opts.on('-m', '--mismatches_max <uint>', Integer,
|
99
|
-
|
101
|
+
opts.on('-m', '--mismatches_max <uint>', Integer, 'Maximum mismatches_max ',
|
102
|
+
"allowed (default=#{DEFAULT_MISMATCHES})") do |o|
|
100
103
|
options[:mismatches_max] = o
|
101
104
|
end
|
102
105
|
|
@@ -108,14 +111,17 @@ OptionParser.new do |opts|
|
|
108
111
|
options[:revcomp_index2] = o
|
109
112
|
end
|
110
113
|
|
111
|
-
opts.on('--scores_min <uint>', Integer,
|
112
|
-
|
113
|
-
|
114
|
+
opts.on('--scores_min <uint>', Integer, 'Drop reads if a single position in ',
|
115
|
+
'the index have a quality score ',
|
116
|
+
'below scores_min (default= ' \
|
117
|
+
"#{DEFAULT_SCORE_MIN})") do |o|
|
114
118
|
options[:scores_min] = o
|
115
119
|
end
|
116
120
|
|
117
|
-
opts.on('--scores_mean <uint>', Integer,
|
118
|
-
|
121
|
+
opts.on('--scores_mean <uint>', Integer, 'Drop reads if the mean index',
|
122
|
+
'quality score is below ',
|
123
|
+
'scores_mean (default= ' \
|
124
|
+
"#{DEFAULT_SCORE_MEAN})") do |o|
|
119
125
|
options[:scores_mean] = o
|
120
126
|
end
|
121
127
|
|
@@ -123,8 +129,10 @@ OptionParser.new do |opts|
|
|
123
129
|
options[:output_dir] = o
|
124
130
|
end
|
125
131
|
|
126
|
-
opts.on('-c', '--compress <gzip|bzip2>', String, 'Compress output using \
|
127
|
-
|
132
|
+
opts.on('-c', '--compress <gzip|bzip2>', String, 'Compress output using ' \
|
133
|
+
'gzip or bzip2 ',
|
134
|
+
'(default=' \
|
135
|
+
'<no compression>)') do |o|
|
128
136
|
options[:compress] = o.to_sym
|
129
137
|
end
|
130
138
|
|
data/demultiplexer.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
2
|
|
3
3
|
require 'demultiplexer/version'
|
4
4
|
|
@@ -6,21 +6,21 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = 'demultiplexer'
|
7
7
|
s.version = Demultiplexer::VERSION
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
|
-
s.date = Time.now.strftime(
|
10
|
-
s.summary =
|
11
|
-
s.description =
|
12
|
-
s.authors = [
|
9
|
+
s.date = Time.now.strftime('%F')
|
10
|
+
s.summary = 'Demultiplexer'
|
11
|
+
s.description = 'Demultiplex sequences from the Illumina platform.'
|
12
|
+
s.authors = ['Martin A. Hansen']
|
13
13
|
s.email = 'mail@maasha.dk'
|
14
|
-
s.rubyforge_project =
|
14
|
+
s.rubyforge_project = 'demultiplexer'
|
15
15
|
s.homepage = 'http://github.com/maasha/demultiplexer'
|
16
16
|
s.license = 'GPL2'
|
17
|
-
s.rubygems_version =
|
17
|
+
s.rubygems_version = '2.0.0'
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
-
s.require_paths = [
|
20
|
+
s.require_paths = ['lib']
|
21
21
|
|
22
|
-
s.add_dependency(
|
23
|
-
s.add_dependency(
|
24
|
-
s.add_development_dependency(
|
25
|
-
s.add_development_dependency(
|
22
|
+
s.add_dependency('biopieces', '>= 0.4.1')
|
23
|
+
s.add_dependency('google_hash', '>= 0.8.4')
|
24
|
+
s.add_development_dependency('bundler', '>= 1.7.4')
|
25
|
+
s.add_development_dependency('simplecov', '>= 0.9.2')
|
26
26
|
end
|
data/lib/data_io.rb
CHANGED
@@ -21,40 +21,126 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
|
24
|
+
# Error class for all errors to do with DataIO.
|
25
|
+
DataIOError = Class.new(StandardError)
|
26
|
+
|
24
27
|
# Class containing methods for reading and write FASTQ data files.
|
25
28
|
class DataIO
|
29
|
+
# Internal: Constructor method for DataIO objects.
|
30
|
+
#
|
31
|
+
# samples - Array with Sample objects consisting id, index1 and index2
|
32
|
+
# fastq_files - Array of Strings with FASTQ file names of multiplexed data.
|
33
|
+
# compress - Symbol indicating if output data should be compressed with
|
34
|
+
# either gzip or bzip2.
|
35
|
+
# output_dir - String with path of output directory.
|
36
|
+
#
|
37
|
+
# Returns DataIO object.
|
26
38
|
def initialize(samples, fastq_files, compress, output_dir)
|
27
|
-
@samples
|
28
|
-
@compress
|
29
|
-
@output_dir
|
30
|
-
@suffix1
|
31
|
-
@suffix2
|
32
|
-
@input_files
|
33
|
-
@undetermined
|
34
|
-
@
|
39
|
+
@samples = samples
|
40
|
+
@compress = compress
|
41
|
+
@output_dir = output_dir
|
42
|
+
@suffix1 = extract_suffix(fastq_files, '_R1_')
|
43
|
+
@suffix2 = extract_suffix(fastq_files, '_R2_')
|
44
|
+
@input_files = identify_input_files(fastq_files)
|
45
|
+
@undetermined = @samples.size
|
46
|
+
@output_file_ios = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
# Internal: Method that opens the @input_files for reading.
|
50
|
+
#
|
51
|
+
# input_files - Array with input file paths.
|
52
|
+
#
|
53
|
+
# Returns an Array with IO objects (file handles).
|
54
|
+
def open_input_files
|
55
|
+
@input_file_ios = []
|
56
|
+
|
57
|
+
@input_files.each do |input_file|
|
58
|
+
@input_file_ios << BioPieces::Fastq.open(input_file)
|
59
|
+
end
|
60
|
+
|
61
|
+
yield self
|
62
|
+
ensure
|
63
|
+
close_input_files
|
64
|
+
end
|
65
|
+
|
66
|
+
# Internal: Method that opens the output files for writing.
|
67
|
+
#
|
68
|
+
# Yields a Hash with an incrementing index as keys, and a tuple of file
|
69
|
+
# handles as values.
|
70
|
+
def open_output_files
|
71
|
+
@output_file_ios = {}
|
72
|
+
comp = @compress
|
73
|
+
|
74
|
+
@output_file_ios.merge!(open_output_files_samples(comp))
|
75
|
+
@output_file_ios.merge!(open_output_files_undet(comp))
|
76
|
+
|
77
|
+
yield self
|
78
|
+
ensure
|
79
|
+
close_output_files
|
80
|
+
end
|
81
|
+
|
82
|
+
# Internal: Method that reads a Seq entry from each of the file handles in
|
83
|
+
# the @input_file_ios Array. Iteration stops when no more Seq entries are
|
84
|
+
# found.
|
85
|
+
#
|
86
|
+
# Yields an Array with 4 Seq objects.
|
87
|
+
#
|
88
|
+
# Returns nothing
|
89
|
+
def each
|
90
|
+
loop do
|
91
|
+
entries = @input_file_ios.each_with_object([]) do |e, a|
|
92
|
+
a << e.next_entry
|
93
|
+
end
|
94
|
+
|
95
|
+
break if entries.compact.size != 4
|
96
|
+
|
97
|
+
yield entries
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Internal: Getter method that returns a tuple of file handles from
|
102
|
+
# @output_file_ios when given a sample index key.
|
103
|
+
#
|
104
|
+
# key - Sample index Integer key used for lookup.
|
105
|
+
#
|
106
|
+
# Returns Array with a tuple of IO objects.
|
107
|
+
def [](key)
|
108
|
+
@output_file_ios[key]
|
35
109
|
end
|
36
110
|
|
37
|
-
|
111
|
+
private
|
112
|
+
|
113
|
+
# Internal: Method that extracts the Sample, Lane, Region information from
|
114
|
+
# given files.
|
38
115
|
#
|
39
|
-
#
|
116
|
+
# files - Array with FASTQ file names as Strings.
|
117
|
+
# pattern - String with pattern to use for matching file names.
|
40
118
|
#
|
41
119
|
# Examples
|
42
120
|
#
|
43
|
-
# extract_suffix("Sample1_S1_L001_R1_001.fastq.gz")
|
121
|
+
# extract_suffix("Sample1_S1_L001_R1_001.fastq.gz", "_R1_")
|
44
122
|
# # => "_S1_L001_R1_001"
|
45
123
|
#
|
46
124
|
# Returns String with SLR info.
|
47
|
-
|
48
|
-
|
125
|
+
# Raises unless pattern match exactly 1 file.
|
126
|
+
# Raises unless SLR info can be parsed.
|
127
|
+
def extract_suffix(files, pattern)
|
128
|
+
hits = files.grep(Regexp.new(pattern))
|
129
|
+
|
130
|
+
unless hits.size == 1
|
131
|
+
fail DataIOError, "Expecting exactly 1 hit but got: #{hits.size}"
|
132
|
+
end
|
133
|
+
|
134
|
+
if hits.first =~ /.+(_S\d_L\d{3}_R[12]_\d{3}).+$/
|
49
135
|
slr = Regexp.last_match(1)
|
50
136
|
else
|
51
|
-
fail "Unable to parse file SLR from: #{
|
137
|
+
fail DataIOError, "Unable to parse file SLR from: #{hits.first}"
|
52
138
|
end
|
53
139
|
|
54
140
|
append_suffix(slr)
|
55
141
|
end
|
56
142
|
|
57
|
-
# Method that appends a file suffix to a given Sample, Lane, Region
|
143
|
+
# Internal: Method that appends a file suffix to a given Sample, Lane, Region
|
58
144
|
# information String based on the @options[:compress] option. The
|
59
145
|
# file suffix can be either ".fastq.gz", ".fastq.bz2", or ".fastq".
|
60
146
|
#
|
@@ -79,14 +165,15 @@ class DataIO
|
|
79
165
|
slr
|
80
166
|
end
|
81
167
|
|
82
|
-
# Method identify the different input files from a given Array of
|
83
|
-
# The forward index file contains a _I1_, the reverse index file
|
84
|
-
# _I2_, the forward read file contains a _R1_ and finally, the
|
85
|
-
# file contain a _R2_.
|
168
|
+
# Internal: Method identify the different input files from a given Array of
|
169
|
+
# FASTQ files. The forward index file contains a _I1_, the reverse index file
|
170
|
+
# contains a _I2_, the forward read file contains a _R1_ and finally, the
|
171
|
+
# reverse read file contain a _R2_.
|
86
172
|
#
|
87
173
|
# fastq_files - Array with FASTQ files (Strings).
|
88
174
|
#
|
89
175
|
# Returns an Array with input files (Strings).
|
176
|
+
# Raises unless 4 input_files are found.
|
90
177
|
def identify_input_files(fastq_files)
|
91
178
|
input_files = []
|
92
179
|
|
@@ -95,113 +182,62 @@ class DataIO
|
|
95
182
|
input_files << fastq_files.grep(/_R1_/).first
|
96
183
|
input_files << fastq_files.grep(/_R2_/).first
|
97
184
|
|
98
|
-
input_files
|
99
|
-
|
100
|
-
|
101
|
-
# Method that opens the @input_files for reading.
|
102
|
-
#
|
103
|
-
# input_files - Array with input file paths.
|
104
|
-
#
|
105
|
-
# Returns an Array with IO objects (file handles).
|
106
|
-
def open_input_files
|
107
|
-
@file_ios = []
|
108
|
-
|
109
|
-
@input_files.each do |input_file|
|
110
|
-
@file_ios << BioPieces::Fastq.open(input_file)
|
111
|
-
end
|
112
|
-
|
113
|
-
yield self
|
114
|
-
ensure
|
115
|
-
close_input_files
|
116
|
-
end
|
117
|
-
|
118
|
-
# Method that closes open input files.
|
119
|
-
#
|
120
|
-
# Returns nothing.
|
121
|
-
def close_input_files
|
122
|
-
@file_ios.map(&:close)
|
123
|
-
end
|
124
|
-
|
125
|
-
# Method that reads a Seq entry from each of the file handles in the
|
126
|
-
# @file_ios Array. Iteration stops when no more Seq entries are found.
|
127
|
-
#
|
128
|
-
# Yields an Array with 4 Seq objects.
|
129
|
-
#
|
130
|
-
# Returns nothing
|
131
|
-
def each
|
132
|
-
loop do
|
133
|
-
entries = @file_ios.each_with_object([]) { |e, a| a << e.next_entry }
|
134
|
-
|
135
|
-
break if entries.compact.size != 4
|
136
|
-
|
137
|
-
yield entries
|
185
|
+
unless input_files.compact.size == 4
|
186
|
+
fail DataIOError, 'Expecting exactly 4 input_files but got: ' \
|
187
|
+
"#{input_files.compact.size}"
|
138
188
|
end
|
139
|
-
end
|
140
189
|
|
141
|
-
|
142
|
-
#
|
143
|
-
# Yeilds a Hash with an incrementing index as keys, and a tuple of file
|
144
|
-
# handles as values.
|
145
|
-
def open_output_files
|
146
|
-
@file_hash = {}
|
147
|
-
comp = @compress
|
148
|
-
|
149
|
-
@file_hash.merge!(open_output_files_samples(comp))
|
150
|
-
@file_hash.merge!(open_output_files_undet(comp))
|
151
|
-
|
152
|
-
yield self
|
153
|
-
ensure
|
154
|
-
close_output_files
|
155
|
-
end
|
156
|
-
|
157
|
-
def close_output_files
|
158
|
-
@file_hash.each_value { |value| value.map(&:close) }
|
159
|
-
end
|
160
|
-
|
161
|
-
# Getter method that returns a tuple of file handles from @file_hash when
|
162
|
-
# given a key.
|
163
|
-
#
|
164
|
-
# key - Key used to lookup
|
165
|
-
#
|
166
|
-
# Returns Array with a tuple of IO objects.
|
167
|
-
def [](key)
|
168
|
-
@file_hash[key]
|
190
|
+
input_files
|
169
191
|
end
|
170
192
|
|
171
|
-
# Method that opens the sample output files for writing.
|
193
|
+
# Internal: Method that opens the sample output files for writing.
|
172
194
|
#
|
173
195
|
# comp - Symbol with type of output compression.
|
174
196
|
#
|
175
197
|
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
176
198
|
# handles as values.
|
177
199
|
def open_output_files_samples(comp)
|
178
|
-
|
200
|
+
output_file_ios = {}
|
179
201
|
|
180
202
|
@samples.each_with_index do |sample, i|
|
181
203
|
file_forward = File.join(@output_dir, "#{sample.id}#{@suffix1}")
|
182
204
|
file_reverse = File.join(@output_dir, "#{sample.id}#{@suffix2}")
|
183
205
|
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
184
206
|
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
185
|
-
|
207
|
+
output_file_ios[i] = [io_forward, io_reverse]
|
186
208
|
end
|
187
209
|
|
188
|
-
|
210
|
+
output_file_ios
|
189
211
|
end
|
190
212
|
|
191
|
-
# Method that opens the undertermined output files for writing.
|
213
|
+
# Internal: Method that opens the undertermined output files for writing.
|
192
214
|
#
|
193
215
|
# comp - Symbol with type of output compression.
|
194
216
|
#
|
195
217
|
# Returns a Hash with an incrementing index as keys, and a tuple of file
|
196
218
|
# handles as values.
|
197
219
|
def open_output_files_undet(comp)
|
198
|
-
|
220
|
+
output_file_ios = {}
|
199
221
|
file_forward = File.join(@output_dir, "Undetermined#{@suffix1}")
|
200
222
|
file_reverse = File.join(@output_dir, "Undetermined#{@suffix2}")
|
201
223
|
io_forward = BioPieces::Fastq.open(file_forward, 'w', compress: comp)
|
202
224
|
io_reverse = BioPieces::Fastq.open(file_reverse, 'w', compress: comp)
|
203
|
-
|
225
|
+
output_file_ios[@undetermined] = [io_forward, io_reverse]
|
204
226
|
|
205
|
-
|
227
|
+
output_file_ios
|
228
|
+
end
|
229
|
+
|
230
|
+
# Internal: Method that closes open input files.
|
231
|
+
#
|
232
|
+
# Returns nothing.
|
233
|
+
def close_input_files
|
234
|
+
@input_file_ios.map(&:close)
|
235
|
+
end
|
236
|
+
|
237
|
+
# Internal: Method that closes the file handles stored in @output_file_ios.
|
238
|
+
#
|
239
|
+
# Returns nothing.
|
240
|
+
def close_output_files
|
241
|
+
@output_file_ios.each_value { |value| value.map(&:close) }
|
206
242
|
end
|
207
243
|
end
|