demultiplexer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+
24
+ class Demultiplexer
25
+ VERSION = "0.0.1"
26
+ end
@@ -0,0 +1,181 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+
24
+ # Class containing methods for building an search index.
25
+ class IndexBuilder
26
+ # Class method that build a search index from a given Array of samples.
27
+ #
28
+ # samples - Array of samples (Sample objects with id, index1 and index2).
29
+ #
30
+ # Examples
31
+ #
32
+ # IndexBuilder.build(samples)
33
+ # # => <Google Hash>
34
+ #
35
+ # Returns a Google Hash where the key is the index and the value is the TODO
36
+ def self.build(samples, mismatches_max)
37
+ index_builder = new(samples, mismatches_max)
38
+ index_hash = index_builder.index_init
39
+ index_builder.index_populate(index_hash)
40
+ end
41
+
42
+ # Constructor method for IndexBuilder object. The given Array of samples and
43
+ # mismatches_max are saved as an instance variable.
44
+ #
45
+ # samples - Array of Sample objects.
46
+ # mismatches_max - Integer denoting the maximum number of misses allowed in
47
+ # an index sequence.
48
+ #
49
+ # Examples
50
+ #
51
+ # IndexBuilder.new(samples, 2)
52
+ # # => <IndexBuilder>
53
+ #
54
+ # Returns an IndexBuilder object.
55
+ def initialize(samples, mismatches_max)
56
+ @samples = samples
57
+ @mismatches_max = mismatches_max
58
+ end
59
+
60
+ # Method to initialize the index. If @mismatches_max is <= then
61
+ # GoogleHashSparseLongToInt is used else GoogleHashDenseLongToInt due to
62
+ # memory and performance.
63
+ #
64
+ # Returns a Google Hash.
65
+ def index_init
66
+ if @mismatches_max <= 1
67
+ index_hash = GoogleHashSparseLongToInt.new
68
+ else
69
+ index_hash = GoogleHashDenseLongToInt.new
70
+ end
71
+
72
+ index_hash
73
+ end
74
+
75
+ # Method to populate the index.
76
+ #
77
+ # index_hash - Google Hash with initialized index.
78
+ #
79
+ # Returns a Google Hash.
80
+ def index_populate(index_hash)
81
+ @samples.each_with_index do |sample, i|
82
+ index_list1 = permutate([sample.index1], @mismatches_max)
83
+ index_list2 = permutate([sample.index2], @mismatches_max)
84
+
85
+ # index_check_list_sizes(index_list1, index_list2)
86
+
87
+ index_list1.product(index_list2).each do |index1, index2|
88
+ key = "#{index1}#{index2}".hash
89
+
90
+ index_check_existing(index_hash, key)
91
+
92
+ index_hash[key] = i
93
+ end
94
+ end
95
+
96
+ index_hash
97
+ end
98
+
99
+ private
100
+
101
+ # Method to check if two index lists differ in size, if so an exception is
102
+ # raised.
103
+ #
104
+ # index_list1 - Array with index1
105
+ # index_list2 - Array with index2
106
+ #
107
+ # Returns nothing.
108
+ def index_check_list_sizes(index_list1, index_list2)
109
+ return if index_list1.size == index_list2.size
110
+
111
+ fail "Permutated list sizes differ: \
112
+ #{index_list1.size} != #{index_list2.size}"
113
+ end
114
+
115
+ # Method to check if a index key already exists in the index, and if so an
116
+ # exception is raised.
117
+ #
118
+ # index_hash - Google Hash with index
119
+ # key - Integer from Google Hash's #hash method
120
+ #
121
+ # Returns nothing.
122
+ def index_check_existing(index_hash, key)
123
+ return unless index_hash[key]
124
+
125
+ fail "Index combo of #{index1} and #{index2} already exists for \
126
+ sample id: #{@samples[index_hash[key]].id} and #{sample.id}"
127
+ end
128
+
129
+ # Method that for each word in a given Array of word permutates each word a
130
+ # given number (permuate) of times using a given alphabet, such that an Array
131
+ # of words with all possible combinations is returned.
132
+ #
133
+ # list - Array of words (Strings) to permutate.
134
+ # permuate - Number of permutations (Integer).
135
+ # alphabet - String with alphabet used for permutation.
136
+ #
137
+ # Examples
138
+ #
139
+ # permutate(["AA"], 1, "ATCG")
140
+ # # => ["AA", "TA", "CA", "GA", "AA", "AT", "AC, "AG"]
141
+ #
142
+ # Returns an Array with permutated words (Strings).
143
+ def permutate(list, permutations = 2, alphabet = 'ATCG')
144
+ permutations.times do
145
+ set = list.each_with_object(Set.new) { |e, a| a.add(e.to_sym) }
146
+
147
+ list.each do |word|
148
+ new_words = permutate_word(word, alphabet)
149
+ new_words.map { |new_word| set.add(new_word.to_sym) }
150
+ end
151
+
152
+ list = set.map(&:to_s)
153
+ end
154
+
155
+ list
156
+ end
157
+
158
+ # Method that permutates a given word using a given alphabet, such that an
159
+ # Array of words with all possible combinations is returned.
160
+ #
161
+ # word - String with word to permutate.
162
+ # alphabet - String with alphabet used for permutation.
163
+ #
164
+ # Examples
165
+ #
166
+ # permutate("AA", "ATCG")
167
+ # # => ["AA", "TA", "CA", "GA", "AA", "AT", "AC, "AG"]
168
+ #
169
+ # Returns an Array with permutated words (Strings).
170
+ def permutate_word(word, alphabet)
171
+ new_words = []
172
+
173
+ (0...word.size).each do |pos|
174
+ alphabet.each_char do |char|
175
+ new_words << "#{word[0...pos]}#{char}#{word[pos + 1..-1]}"
176
+ end
177
+ end
178
+
179
+ new_words
180
+ end
181
+ end
@@ -0,0 +1,198 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+
24
+ # Class containing methods for reading and checking sample information.
25
+ class SampleReader
26
+ # Class method that reads sample information from a samples file, which
27
+ # consists of ASCII text in three tab separated columns: The first column is
28
+ # the sample_id, the second column is index1 and the third column is index2.
29
+ #
30
+ # If revcomp1 or revcomp2 is set then index1 and index2 are
31
+ # reverse-complemented accordingly.
32
+ #
33
+ # file - String with path to sample file.
34
+ # revcomp1 - Flag indicating that index1 should be reverse-complemented.
35
+ # revcomp2 - Flag indicating that index2 should be reverse-complemented.
36
+ #
37
+ # Examples
38
+ #
39
+ # SampleReader.read("samples.txt", false, false)
40
+ # # => [<Sample>, <Sample>, <Sample> ...]
41
+ #
42
+ # Returns an Array of Sample objects.
43
+ def self.read(file, revcomp1, revcomp2)
44
+ sample_reader = new(revcomp1, revcomp2)
45
+ sample_reader.samples_parse(file)
46
+ end
47
+
48
+ # Constructor method for SampleReader object. The given revcomp1 and revcomp2
49
+ # flags are stored as instance variables.
50
+ #
51
+ # revcomp1 - Flag indicating that index1 should be reverse-complemented.
52
+ # revcomp2 - Flag indicating that index2 should be reverse-complemented.
53
+ #
54
+ # Examples
55
+ #
56
+ # SampleReader.new(false, false)
57
+ # # => <SampleReader>
58
+ #
59
+ # Returns SampleReader object.
60
+ def initialize(revcomp1, revcomp2)
61
+ @revcomp1 = revcomp1
62
+ @revcomp2 = revcomp2
63
+ end
64
+
65
+ # Method that reads sample information from a samples file, which consists
66
+ # of ASCII text in three tab separated columns: The first column is the
67
+ # sample_id, the second column is index1 and the third column is index2.
68
+ #
69
+ # file - String with path to sample file.
70
+ #
71
+ # Examples
72
+ #
73
+ # samples_parse("samples.txt")
74
+ # # => [<Sample>, <Sample>, <Sample> ...]
75
+ #
76
+ # Returns an Array of Sample objects.
77
+ def samples_parse(file)
78
+ samples = samples_read(file)
79
+ samples_reverse_complement(samples)
80
+ errors = []
81
+ errors.push(*samples_check_index_combo(samples))
82
+ errors.push(*samples_check_uniq_id(samples))
83
+
84
+ unless errors.empty?
85
+ pp errors
86
+ fail 'errors found in sample file.'
87
+ end
88
+
89
+ samples
90
+ end
91
+
92
+ private
93
+
94
+ # Method that reads sample information form a samples file, which consists
95
+ # of ASCII text in three tab separated columns: The first column is the
96
+ # sample_id, the second column is index1 and the third column is index2.
97
+ #
98
+ # If @options[:revcomp_index1] or @options[:revcomp_index2] is set then
99
+ # index1 and index2 are reverse-complemented accordingly.
100
+ #
101
+ # file - String with path to sample file.
102
+ #
103
+ # Examples
104
+ #
105
+ # samples_read("samples.txt")
106
+ # # => [<Sample>, <Sample>, <Sample> ...]
107
+ #
108
+ # Returns an Array of Sample objects.
109
+ def samples_read(file)
110
+ samples = []
111
+
112
+ CSV.read(file, col_sep: "\t").each do |id, index1, index2|
113
+ samples << Sample.new(id, index1, index2)
114
+ end
115
+
116
+ samples
117
+ end
118
+
119
+ # Method that iterates over the a given Array of sample Objects, and if
120
+ # @options[:revcomp_index1] or @options[:revcomp_index2] is set then
121
+ # index1 and index2 are reverse-complemented accordingly.
122
+ #
123
+ # samples - Array of Sample objects.
124
+ #
125
+ # Returns nothing.
126
+ def samples_reverse_complement(samples)
127
+ samples.each do |sample|
128
+ sample.index1 = index_reverse_complement(sample.index1) if @revcomp1
129
+ sample.index2 = index_reverse_complement(sample.index2) if @revcomp2
130
+ end
131
+ end
132
+
133
+ # Method that reverse-complements a given index sequence.
134
+ #
135
+ # index - Index String.
136
+ #
137
+ # Returns reverse-complemented index String.
138
+ def index_reverse_complement(index)
139
+ BioPieces::Seq.new(seq: index, type: :dna).reverse.complement.seq
140
+ end
141
+
142
+ # Method that iterates over the a given Array of sample Objects, and if
143
+ # the combination of index1 and index2 is non-unique an error is pushed
144
+ # on an error Array.
145
+ #
146
+ # samples - Array of Sample objects.
147
+ #
148
+ # Returns an Array of found errors.
149
+ def samples_check_index_combo(samples)
150
+ errors = []
151
+ lookup = {}
152
+
153
+ samples.each do |sample|
154
+ if (id2 = lookup["#{sample.index1}#{sample.index2}"])
155
+ errors << ['Samples with same index combo', sample.id, id2].join("\t")
156
+ else
157
+ lookup["#{sample.index1}#{sample.index2}"] = sample.id
158
+ end
159
+ end
160
+
161
+ errors
162
+ end
163
+
164
+ # Method that iterates over the a given Array of sample Objects, and if
165
+ # a sample id is non-unique an error is pushed on an error Array.
166
+ #
167
+ # samples - Array of Sample objects.
168
+ #
169
+ # Returns an Array of found errors.
170
+ def samples_check_uniq_id(samples)
171
+ errors = []
172
+ lookup = Set.new
173
+
174
+ samples.each do |sample|
175
+ if lookup.include? sample.id
176
+ errors << ['Non-unique sample id', sample.id].join("\t")
177
+ end
178
+
179
+ lookup << sample.id
180
+ end
181
+
182
+ errors
183
+ end
184
+
185
+ # Struct for holding sample information.
186
+ #
187
+ # id - Sample id.
188
+ # index1 - Index1 sequence.
189
+ # index2 - Index2 sequence.
190
+ #
191
+ # Examples
192
+ #
193
+ # Sample.new("test1", "atcg", "gcta")
194
+ # # => <Sample>
195
+ #
196
+ # Returns Sample object.
197
+ Sample = Struct.new(:id, :index1, :index2)
198
+ end
data/lib/screen.rb ADDED
@@ -0,0 +1,39 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+
24
+ # Module containing class methods for clearing and resetting a terminal screen.
25
+ module Screen
26
+ # Method that uses console code to clear the screen.
27
+ #
28
+ # Returns nothing.
29
+ def self.clear
30
+ print "\e[H\e[2J"
31
+ end
32
+
33
+ # Method that uses console code to move cursor to 1,1 coordinate.
34
+ #
35
+ # Returns nothing.
36
+ def self.reset
37
+ print "\e[1;1H"
38
+ end
39
+ end
data/lib/status.rb ADDED
@@ -0,0 +1,101 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2014-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+
24
+ # Class containing methods to records demultiplexing status.
25
+ class Status
26
+ attr_accessor :count, :match, :undetermined, :index1_bad_mean,
27
+ :index2_bad_mean, :index1_bad_min, :index2_bad_min
28
+ # Method to initialize a Status object, which contains the following instance
29
+ # variables initialized to 0:
30
+ #
31
+ # @count - Number or reads.
32
+ # @match - Number of reads found in index.
33
+ # @undetermined - Number of reads not found in index.
34
+ # @index1_bad_mean - Number of reads dropped due to bad mean in index1.
35
+ # @index2_bad_mean - Number of reads dropped due to bad mean in index2.
36
+ # @index1_bad_min - Number of reads dropped due to bad min in index1.
37
+ # @index2_bad_min - Number of reads dropped due to bad min in index2.
38
+ #
39
+ # Examples
40
+ #
41
+ # Status.new
42
+ # # => <Status>
43
+ #
44
+ # Returns a Status object.
45
+ def initialize
46
+ @count = 0
47
+ @match = 0
48
+ @undetermined = 0
49
+ @index1_bad_mean = 0
50
+ @index2_bad_mean = 0
51
+ @index1_bad_min = 0
52
+ @index2_bad_min = 0
53
+ @time_start = Time.now
54
+ end
55
+
56
+ # Method to format a String from a Status object. This is done by adding the
57
+ # relevant instance variables to a Hash and return this as an YAML String.
58
+ #
59
+ # Returns a YAML String.
60
+ def to_s
61
+ { count: @count,
62
+ match: @match,
63
+ undetermined: @undetermined,
64
+ undetermined_percent: undetermined_percent,
65
+ index1_bad_mean: @index1_bad_mean,
66
+ index2_bad_mean: @index2_bad_mean,
67
+ index1_bad_min: @index1_bad_min,
68
+ index2_bad_min: @index2_bad_min,
69
+ time: time }.to_yaml
70
+ end
71
+
72
+ # Method that calculate the percentage of undetermined reads.
73
+ #
74
+ # Returns a Float with the percentage of undetermined reads.
75
+ def undetermined_percent
76
+ (100 * @undetermined / @count.to_f).round(1)
77
+ end
78
+
79
+ # Method that calculates the elapsed time and formats a nice Time String.
80
+ #
81
+ # Returns String with elapsed time.
82
+ def time
83
+ time_elapsed = Time.now - @time_start
84
+ (Time.mktime(0) + time_elapsed).strftime('%H:%M:%S')
85
+ end
86
+
87
+ # Method to save stats to the log file 'Demultiplex.log' in the output
88
+ # directory.
89
+ #
90
+ # Returns nothing.
91
+ def save(file)
92
+ @stats[:sample_id] = @samples.map(&:id)
93
+
94
+ @stats[:index1] = uniq_index1
95
+ @stats[:index2] = uniq_index2
96
+
97
+ File.open(file, 'w') do |ios|
98
+ ios.puts @status
99
+ end
100
+ end
101
+ end