viral_seq 1.0.6 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -4
- data/README.md +116 -45
- data/bin/locator +31 -9
- data/bin/tcs +454 -0
- data/lib/viral_seq.rb +4 -1
- data/lib/viral_seq/constant.rb +5 -1
- data/lib/viral_seq/hash.rb +1 -1
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +2 -2
- data/lib/viral_seq/sdrm.rb +43 -0
- data/lib/viral_seq/seq_hash.rb +173 -42
- data/lib/viral_seq/seq_hash_pair.rb +16 -6
- data/lib/viral_seq/tcs_core.rb +305 -0
- data/lib/viral_seq/tcs_json.rb +178 -0
- data/lib/viral_seq/version.rb +2 -1
- data/viral_seq.gemspec +5 -1
- metadata +24 -5
@@ -7,7 +7,7 @@ module ViralSeq
|
|
7
7
|
# @example join the paired-end sequences with an overlap of 100 bp
|
8
8
|
# my_seqhashpair.join1(100)
|
9
9
|
# @example join the paired-end sequences with unknown overlap, each pair of sequences has its own overlap size
|
10
|
-
# my_seqhashpair.
|
10
|
+
# my_seqhashpair.join2(model: :indiv)
|
11
11
|
|
12
12
|
class SeqHashPair
|
13
13
|
|
@@ -80,6 +80,12 @@ module ViralSeq
|
|
80
80
|
alias_method :fa, :new_from_fasta
|
81
81
|
end
|
82
82
|
|
83
|
+
# the size of nt sequence hash of the SeqHashPair object
|
84
|
+
# @return [Integer] size of nt sequence hash of the SeqHash object
|
85
|
+
def size
|
86
|
+
self.dna_hash.size
|
87
|
+
end
|
88
|
+
|
83
89
|
# Pair-end join function for KNOWN overlap size.
|
84
90
|
# @param overlap [Integer] how many bases are overlapped. `0` means no overlap, R1 and R2 will be simply put together.
|
85
91
|
# @param diff [Integer, Float] the maximum mismatch rate allowed for the overlapping region. default at 0.0, i.e. no mis-match allowed.
|
@@ -104,17 +110,21 @@ module ViralSeq
|
|
104
110
|
raise ArgumentError.new(":overlap has to be Integer, input #{overlap} invalid.") unless overlap.is_a? Integer
|
105
111
|
raise ArgumentError.new(":diff has to be float or integer, input #{diff} invalid.") unless (diff.is_a? Integer or diff.is_a? Float)
|
106
112
|
joined_seq = {}
|
107
|
-
seq_pair_hash.each do |
|
113
|
+
seq_pair_hash.uniq_hash.each do |seq_pair, seq_names|
|
108
114
|
r1_seq = seq_pair[0]
|
109
115
|
r2_seq = seq_pair[1]
|
110
116
|
if overlap.zero?
|
111
|
-
|
117
|
+
joined_sequence = r1_seq + r2_seq
|
112
118
|
elsif r1_seq[-overlap..-1].compare_with(r2_seq[0,overlap]) <= (overlap * diff)
|
113
|
-
|
119
|
+
joined_sequence= r1_seq + r2_seq[overlap..-1]
|
114
120
|
else
|
115
121
|
next
|
116
122
|
end
|
123
|
+
seq_names.each do |seq_name|
|
124
|
+
joined_seq[seq_name] = joined_sequence
|
125
|
+
end
|
117
126
|
end
|
127
|
+
|
118
128
|
joined_seq_hash = ViralSeq::SeqHash.new
|
119
129
|
joined_seq_hash.dna_hash = joined_seq
|
120
130
|
joined_seq_hash.title = self.title + "_joined"
|
@@ -139,7 +149,7 @@ module ViralSeq
|
|
139
149
|
# my_seqhashpair = ViralSeq::SeqHashPair.new(paired_seq2)
|
140
150
|
# my_seqhashpair.join2.dna_hash
|
141
151
|
# => {">pair4"=>"AAAGGGGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
|
142
|
-
# my_seqhashpair.join2(model :indiv).dna_hash
|
152
|
+
# my_seqhashpair.join2(model: :indiv).dna_hash
|
143
153
|
# => {">pair4"=>"AAAGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
|
144
154
|
|
145
155
|
def join2(model: :con, diff: 0.0)
|
@@ -207,7 +217,7 @@ module ViralSeq
|
|
207
217
|
# {minimal overlap set to 4. }
|
208
218
|
def overlap_matrix(sequence1, sequence2)
|
209
219
|
min_overlap = 4
|
210
|
-
max_overlap = [sequence1.size, sequence2.size].
|
220
|
+
max_overlap = [sequence1.size, sequence2.size].min
|
211
221
|
matrix_hash = {}
|
212
222
|
(min_overlap..max_overlap).each do |overlap|
|
213
223
|
matrix_hash[overlap] = sequence1[-overlap..-1].compare_with(sequence2[0, overlap])
|
@@ -0,0 +1,305 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
|
3
|
+
# Core functions for `tcs` pipeline
|
4
|
+
|
5
|
+
class TcsCore
|
6
|
+
class << self
|
7
|
+
|
8
|
+
# methods to calculate TCS consensus cut-off based on the maximum numbers of PIDs and platform error rate.
|
9
|
+
|
10
|
+
def calculate_cut_off(m, error_rate = 0.02)
|
11
|
+
n = 0
|
12
|
+
case error_rate
|
13
|
+
when 0.005...0.015
|
14
|
+
if m <= 10
|
15
|
+
n = 2
|
16
|
+
else
|
17
|
+
n = 1.09*10**-26*m**6 + 7.82*10**-22*m**5 - 1.93*10**-16*m**4 + 1.01*10**-11*m**3 - 2.31*10**-7*m**2 + 0.00645*m + 2.872
|
18
|
+
end
|
19
|
+
|
20
|
+
when 0...0.005
|
21
|
+
if m <= 10
|
22
|
+
n = 2
|
23
|
+
else
|
24
|
+
n = -9.59*10**-27*m**6 + 3.27*10**-21*m**5 - 3.05*10**-16*m**4 + 1.2*10**-11*m**3 - 2.19*10**-7*m**2 + 0.004044*m + 2.273
|
25
|
+
end
|
26
|
+
|
27
|
+
else
|
28
|
+
if m <= 10
|
29
|
+
n = 2
|
30
|
+
elsif m <= 8500
|
31
|
+
n = -1.24*10**-21*m**6 + 3.53*10**-17*m**5 - 3.90*10**-13*m**4 + 2.12*10**-9*m**3 - 6.06*10**-6*m**2 + 1.80*10**-2*m + 3.15
|
32
|
+
else
|
33
|
+
n = 0.0079 * m + 9.4869
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
n = n.round
|
38
|
+
n = 2 if n < 3
|
39
|
+
return n
|
40
|
+
end
|
41
|
+
|
42
|
+
# identify which file in the directory is R1 file, and which is R2 file based on file names
|
43
|
+
# input as directory (Dir object or a string of path)
|
44
|
+
# by default, .gz files will be unzipped.
|
45
|
+
# return as an hash of {r1_file: file1, r1_file: file2}
|
46
|
+
def r1r2(directory, unzip = true)
|
47
|
+
files = []
|
48
|
+
Dir.chdir(directory) { files = Dir.glob "*" }
|
49
|
+
r1_file = ""
|
50
|
+
r2_file = ""
|
51
|
+
files.each do |f|
|
52
|
+
tag = parser_file_name(f)[:tag]
|
53
|
+
|
54
|
+
if tag.include? "R1"
|
55
|
+
unzip ? r1_file = unzip_r(directory, f) : r1_file = File.join(directory, f)
|
56
|
+
elsif tag.include? "R2"
|
57
|
+
unzip ? r2_file = unzip_r(directory, f) : r2_file = File.join(directory, f)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
return { r1_file: r1_file, r2_file: r2_file }
|
61
|
+
end # end of ViralSeq:TcsCore.r1r2
|
62
|
+
|
63
|
+
# sort directories containing mulitple r1 and r2 files.
|
64
|
+
# use the library name (first string before "_") to seperate libraries
|
65
|
+
# out_dir is the Dir object or string of the output directory, by default named as directory + "_sorted"
|
66
|
+
# return a hash as { with_both_r1_r2: [lib1, lib2, ...], missing_r1: [lib1, lib2, ...], missing_r2: [lib1, lib2, ...], error: [lib1, lib2, ...]}
|
67
|
+
|
68
|
+
def sort_by_lib(directory, out_dir = directory + "_sorted")
|
69
|
+
Dir.mkdir(out_dir) unless File.directory?(out_dir)
|
70
|
+
files = []
|
71
|
+
Dir.chdir(directory) {files = Dir.glob("*")}
|
72
|
+
|
73
|
+
files.each do |file|
|
74
|
+
path = File.join(directory,file)
|
75
|
+
index = file.split("_")[0]
|
76
|
+
index_dir = File.join(out_dir, index)
|
77
|
+
Dir.mkdir(index_dir) unless File.directory?(index_dir)
|
78
|
+
File.rename(path, File.join(index_dir, file))
|
79
|
+
end
|
80
|
+
|
81
|
+
return_obj = { with_both_r1_r2: [],
|
82
|
+
missing_r1: [],
|
83
|
+
missing_r2: [],
|
84
|
+
error: []
|
85
|
+
}
|
86
|
+
|
87
|
+
libs = []
|
88
|
+
Dir.chdir(out_dir) { libs = Dir.glob('*') }
|
89
|
+
libs.each do |lib|
|
90
|
+
file_check = ViralSeq::TcsCore.r1r2(File.join(out_dir, lib))
|
91
|
+
if !file_check[:r1_file].empty? and !file_check[:r2_file].empty?
|
92
|
+
return_obj[:with_both_r1_r2] << lib
|
93
|
+
elsif file_check[:r1_file].empty? and !file_check[:r2_file].empty?
|
94
|
+
return_obj[:missing_r1] << lib
|
95
|
+
elsif file_check[:r2_file].empty? and !file_check[:r1_file].empty?
|
96
|
+
return_obj[:missing_r2] << lib
|
97
|
+
else
|
98
|
+
return_obj[:error] << lib
|
99
|
+
end
|
100
|
+
end
|
101
|
+
return return_obj
|
102
|
+
end
|
103
|
+
|
104
|
+
# sort array of file names to determine if there is potential errors
|
105
|
+
# input name_array array of file names
|
106
|
+
# output hash { }
|
107
|
+
# need to change for each file name have an error code. and a bool to show if all pass
|
108
|
+
def validate_file_name(name_array)
|
109
|
+
errors = {
|
110
|
+
file_type_error: [] ,
|
111
|
+
missing_r1_file: [] ,
|
112
|
+
missing_r2_file: [] ,
|
113
|
+
extra_r1_r2_file: [],
|
114
|
+
no_region_tag: [] ,
|
115
|
+
multiple_region_tag: []
|
116
|
+
}
|
117
|
+
|
118
|
+
passed_libs = {}
|
119
|
+
|
120
|
+
name_with_r1_r2 = []
|
121
|
+
|
122
|
+
name_array.each do |name|
|
123
|
+
tag = parser_file_name(name)[:tag]
|
124
|
+
if name !~ /\.fastq\Z|\.fastq\.gz\Z/
|
125
|
+
errors[:file_type_error] << name
|
126
|
+
elsif tag.count("R1") == 0 and tag.count("R2") == 0
|
127
|
+
errors[:no_region_tag] << name
|
128
|
+
elsif tag.count("R1") > 0 and tag.count("R2") > 0
|
129
|
+
errors[:multiple_region_tag] << name
|
130
|
+
elsif tag.count("R1") > 1 or tag.count("R2") > 1
|
131
|
+
errors[:multiple_region_tag] << name
|
132
|
+
else
|
133
|
+
name_with_r1_r2 << name
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
libs = {}
|
138
|
+
|
139
|
+
name_with_r1_r2.map do |name|
|
140
|
+
libname = parser_file_name(name)[:libname]
|
141
|
+
libs[libname] ||= []
|
142
|
+
libs[libname] << name
|
143
|
+
end
|
144
|
+
|
145
|
+
libs.each do |libname, files|
|
146
|
+
count_r1_file = 0
|
147
|
+
count_r2_file = 0
|
148
|
+
files.each do |name|
|
149
|
+
tag = parser_file_name(name)[:tag]
|
150
|
+
if tag.include? "R1"
|
151
|
+
count_r1_file += 1
|
152
|
+
elsif tag.include? "R2"
|
153
|
+
count_r2_file += 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
if count_r1_file > 1 or count_r2_file > 1
|
158
|
+
errors[:extra_r1_r2_file] += files
|
159
|
+
elsif count_r1_file.zero?
|
160
|
+
errors[:missing_r1_file] += files
|
161
|
+
elsif count_r2_file.zero?
|
162
|
+
errors[:missing_r2_file] += files
|
163
|
+
else
|
164
|
+
passed_libs[libname] = files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
passed_names = []
|
169
|
+
|
170
|
+
passed_libs.values.each { |names| passed_names += names}
|
171
|
+
|
172
|
+
if passed_names.size < name_array.size
|
173
|
+
pass = false
|
174
|
+
else
|
175
|
+
pass = true
|
176
|
+
end
|
177
|
+
|
178
|
+
return { errors: errors, all_pass: pass, passed_names: passed_names, passed_libs: passed_libs }
|
179
|
+
end
|
180
|
+
|
181
|
+
# filter r1 raw sequences for non-specific primers.
|
182
|
+
# input r1_sh, SeqHash obj.
|
183
|
+
# return filtered Hash of sequence name and seq pair, in the object { r1_filtered_seq: r1_filtered_seq_pair }
|
184
|
+
|
185
|
+
def filter_r1(r1_sh, forward_primer)
|
186
|
+
if forward_primer.match(/(N+)(\w+)$/)
|
187
|
+
forward_n = $1.size
|
188
|
+
forward_bio_primer = $2
|
189
|
+
else
|
190
|
+
forward_n = 0
|
191
|
+
forward_bio_primer = forward_primer
|
192
|
+
end
|
193
|
+
forward_bio_primer_size = forward_bio_primer.size
|
194
|
+
forward_starting_number = forward_n + forward_bio_primer_size
|
195
|
+
forward_primer_ref = forward_bio_primer.nt_parser
|
196
|
+
|
197
|
+
r1_passed_seq = {}
|
198
|
+
r1_raw = r1_sh.dna_hash
|
199
|
+
|
200
|
+
proc_filter = proc do |name|
|
201
|
+
seq = r1_raw[name]
|
202
|
+
next unless general_filter seq
|
203
|
+
primer_region_seq = seq[forward_n, forward_bio_primer_size]
|
204
|
+
if primer_region_seq =~ forward_primer_ref
|
205
|
+
new_name = remove_tag name
|
206
|
+
r1_passed_seq[new_name] = seq
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
r1_raw.keys.map do |name|
|
211
|
+
proc_filter.call name
|
212
|
+
end
|
213
|
+
|
214
|
+
return { r1_passed_seq: r1_passed_seq, forward_starting_number: forward_starting_number }
|
215
|
+
end # end of filter_r1
|
216
|
+
|
217
|
+
# filter r2 raw sequences for non-specific primers.
|
218
|
+
# input r2_sh, SeqHash obj.
|
219
|
+
# return filtered Hash of sequence name and seq pair, as well as the length of PID.
|
220
|
+
def filter_r2(r2_sh, cdna_primer)
|
221
|
+
r2_raw = r2_sh.dna_hash
|
222
|
+
cdna_primer.match(/(N+)(\w+)$/)
|
223
|
+
pid_length = $1.size
|
224
|
+
cdna_bio_primer = $2
|
225
|
+
cdna_bio_primer_size = cdna_bio_primer.size
|
226
|
+
reverse_starting_number = pid_length + cdna_bio_primer_size
|
227
|
+
cdna_primer_ref = cdna_bio_primer.nt_parser
|
228
|
+
r2_passed_seq = {}
|
229
|
+
proc_filter = proc do |name|
|
230
|
+
seq = r2_raw[name]
|
231
|
+
next unless general_filter seq
|
232
|
+
primer_region_seq = seq[pid_length, cdna_bio_primer_size]
|
233
|
+
if primer_region_seq =~ cdna_primer_ref
|
234
|
+
new_name = remove_tag name
|
235
|
+
r2_passed_seq[new_name] = seq
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
r2_raw.keys.map do |name|
|
240
|
+
proc_filter.call name
|
241
|
+
end
|
242
|
+
|
243
|
+
return { r2_passed_seq: r2_passed_seq, pid_length: pid_length, reverse_starting_number: reverse_starting_number }
|
244
|
+
end # end of filter_r2
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
# puts error message in the log file handler, and abort with the same infor
|
249
|
+
|
250
|
+
def log_and_abort(log, infor)
|
251
|
+
log.puts Time.now.to_s + "\t" + infor
|
252
|
+
log.close
|
253
|
+
abort infor.red.bold
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
|
258
|
+
def unzip_r(indir, f)
|
259
|
+
r_file = File.join(indir, f)
|
260
|
+
if f =~ /.gz/
|
261
|
+
`gzip -d #{r_file}`
|
262
|
+
new_f = f.sub ".gz", ""
|
263
|
+
r_file = File.join(indir, new_f)
|
264
|
+
end
|
265
|
+
return r_file
|
266
|
+
end
|
267
|
+
|
268
|
+
def parser_file_name(file_name)
|
269
|
+
t = file_name.split(".")[0].split("_")
|
270
|
+
if t.size == 1
|
271
|
+
libname = "lib"
|
272
|
+
tag = [ t[0].upcase ]
|
273
|
+
else
|
274
|
+
libname = t[0]
|
275
|
+
tag = t[1..-1].map(&:upcase)
|
276
|
+
end
|
277
|
+
return {libname: libname, tag: tag}
|
278
|
+
end
|
279
|
+
|
280
|
+
def general_filter(seq)
|
281
|
+
if seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
|
282
|
+
return false
|
283
|
+
elsif seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
|
284
|
+
return false
|
285
|
+
elsif seq =~ /T{11}/ # a string of poly-T indicates adaptor sequence
|
286
|
+
return false
|
287
|
+
else
|
288
|
+
return true
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# remove region info tags from the raw MiSeq sequences.
|
293
|
+
def remove_tag(seq_name)
|
294
|
+
if seq_name =~ /\s/
|
295
|
+
new_tag = $`
|
296
|
+
else
|
297
|
+
new_tag = seq_name[0..-3]
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
end # end of class << self
|
302
|
+
|
303
|
+
end # end of TcsCore module
|
304
|
+
|
305
|
+
end # end of main module
|
@@ -0,0 +1,178 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
class TcsJson
|
3
|
+
class << self
|
4
|
+
|
5
|
+
def generate
|
6
|
+
puts '-'*58
|
7
|
+
puts '| JSON Parameter Generator for ' + "TCS #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
|
8
|
+
puts '-'*58 + "\n"
|
9
|
+
|
10
|
+
param = {}
|
11
|
+
|
12
|
+
puts 'Enter the path to the directory that contains the MiSeq pair-end R1 and R2 .fastq or .fastq.gz file'
|
13
|
+
print '> '
|
14
|
+
param[:raw_sequence_dir] = gets.chomp.rstrip
|
15
|
+
|
16
|
+
puts 'Enter the estimated platform error rate (for TCS cut-off calculation), default as ' + '0.02'.red.bold
|
17
|
+
print '> '
|
18
|
+
input_error = gets.chomp.rstrip.to_f
|
19
|
+
if input_error == 0.0
|
20
|
+
param[:platform_error_rate] = 0.02
|
21
|
+
else
|
22
|
+
param[:platform_error_rate] = input_error
|
23
|
+
end
|
24
|
+
|
25
|
+
param[:primer_pairs] = []
|
26
|
+
|
27
|
+
loop do
|
28
|
+
data = {}
|
29
|
+
puts "Enter the name for the sequenced region: "
|
30
|
+
print '> '
|
31
|
+
data[:region] = gets.chomp.rstrip
|
32
|
+
|
33
|
+
puts "Enter the #{"cDNA".red.bold} primer sequence: "
|
34
|
+
print '> '
|
35
|
+
data[:cdna] = gets.chomp.rstrip
|
36
|
+
|
37
|
+
puts "Enter the #{"forward".blue.bold} primer sequence: "
|
38
|
+
print '> '
|
39
|
+
data[:forward] = gets.chomp.rstrip
|
40
|
+
|
41
|
+
puts "Enter supermajority cut-off (0.5 - 1.0). Default Simple Majority"
|
42
|
+
print '> '
|
43
|
+
mj = gets.chomp.rstrip.to_f
|
44
|
+
if (0.5..1.0).include?(mj)
|
45
|
+
data[:majority] = mj
|
46
|
+
else
|
47
|
+
data[:majority] = 0
|
48
|
+
end
|
49
|
+
|
50
|
+
print "Need end-join? Y/N \n> "
|
51
|
+
ej = gets.chomp.rstrip
|
52
|
+
if ej =~ /y|yes/i
|
53
|
+
data[:end_join] = true
|
54
|
+
|
55
|
+
print "End-join option? Choose from (1-4):\n
|
56
|
+
1: simple join, no overlap
|
57
|
+
2: known overlap \n
|
58
|
+
3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap\n
|
59
|
+
4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap\n
|
60
|
+
> "
|
61
|
+
ej_option = gets.chomp.rstrip
|
62
|
+
while ![1,2,3,4].include?(ej_option.to_i)
|
63
|
+
puts "Entered end-join option #{ej_option.red.bold} not valid (choose 1-4), try again"
|
64
|
+
ej_option = gets.chomp.rstrip.to_i
|
65
|
+
end
|
66
|
+
case ej_option.to_i
|
67
|
+
when 1
|
68
|
+
data[:end_join_option] = 1
|
69
|
+
data[:overlap] = 0
|
70
|
+
when 2
|
71
|
+
data[:end_join_option] = 1
|
72
|
+
print "overlap bases: \n> "
|
73
|
+
ol = gets.chomp.rstrip.to_i
|
74
|
+
data[:overlap] = ol
|
75
|
+
when 3
|
76
|
+
data[:end_join_option] = 3
|
77
|
+
when 4
|
78
|
+
data[:end_join_option] = 4
|
79
|
+
end
|
80
|
+
|
81
|
+
print "Need QC for TCS? (support for HIV-1 and SIV)? Y/N \n> "
|
82
|
+
qc = gets.chomp.rstrip
|
83
|
+
if qc =~ /y|yes/i
|
84
|
+
data[:TCS_QC] = true
|
85
|
+
|
86
|
+
data[:ref_genome] = get_ref
|
87
|
+
|
88
|
+
print "reference 5'end ref position or posiiton range, 0 if no need to match this end \n> "
|
89
|
+
data[:ref_start] = gets.chomp.rstrip.to_i
|
90
|
+
|
91
|
+
print "reference 3'end ref position or posiiton range: 0 if no need to match this end \n> "
|
92
|
+
data[:ref_end] = gets.chomp.rstrip.to_i
|
93
|
+
|
94
|
+
print "allow indels? (default as yes) Y/N \n> "
|
95
|
+
indel = gets.chomp.rstrip
|
96
|
+
if indel =~ /n|no/i
|
97
|
+
data[:indel] = false
|
98
|
+
else
|
99
|
+
data[:indel] = true
|
100
|
+
end
|
101
|
+
else
|
102
|
+
data[:TCS_QC] = false
|
103
|
+
end
|
104
|
+
|
105
|
+
print "Need trimming to a reference genome? Y/N \n> "
|
106
|
+
trim_option = gets.chomp.rstrip
|
107
|
+
if trim_option =~ /y|yes/i
|
108
|
+
data[:trim] = true
|
109
|
+
data[:trim_ref] = get_ref
|
110
|
+
|
111
|
+
print "reference 5'end ref position \n> "
|
112
|
+
data[:trim_ref_start] = gets.chomp.rstrip.to_i
|
113
|
+
|
114
|
+
print "reference 3'end ref position \n> "
|
115
|
+
data[:trim_ref_end] = gets.chomp.rstrip.to_i
|
116
|
+
|
117
|
+
else
|
118
|
+
data[:trim] = false
|
119
|
+
end
|
120
|
+
|
121
|
+
else
|
122
|
+
data[:end_join] = false
|
123
|
+
end
|
124
|
+
|
125
|
+
param[:primer_pairs] << data
|
126
|
+
print "Do you wish to conintue? Y/N \n> "
|
127
|
+
continue_sig = gets.chomp.rstrip
|
128
|
+
break unless continue_sig =~ /y|yes/i
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
puts "\nYour JSON string is:"
|
133
|
+
puts JSON.pretty_generate(param)
|
134
|
+
|
135
|
+
print "\nDo you wish to save it as a file? Y/N \n> "
|
136
|
+
save_option = gets.chomp.rstrip
|
137
|
+
|
138
|
+
if save_option =~ /y|yes/i
|
139
|
+
print "Path to save JSON file:\n> "
|
140
|
+
path = gets.chomp.rstrip
|
141
|
+
File.open(path, 'w') {|f| f.puts JSON.pretty_generate(param)}
|
142
|
+
end
|
143
|
+
|
144
|
+
print "\nDo you wish to execute tcs pipeline with the input params now? Y/N \n> "
|
145
|
+
|
146
|
+
rsp = gets.chomp.rstrip
|
147
|
+
if rsp =~ /y/i
|
148
|
+
return param
|
149
|
+
else
|
150
|
+
abort "Params json file generated. You can execute tcs pipeline using `tcs -p [params.json]`"
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
def get_ref
|
157
|
+
puts "Choose reference genome (1-3):"
|
158
|
+
puts "1. HIV-1 HXB2".red.bold
|
159
|
+
puts "2. HIV-1 NL4-3".blue.bold
|
160
|
+
puts "3. SIV MAC239".magenta.bold
|
161
|
+
print "> "
|
162
|
+
ref_option = gets.chomp.rstrip
|
163
|
+
while ![1,2,3].include?(ref_option.to_i)
|
164
|
+
print "Entered end-join option #{ref_option.to_s.red.bold} not valid (choose 1-3), try again\n> "
|
165
|
+
ref_option = gets.chomp.rstrip.to_i
|
166
|
+
end
|
167
|
+
ref = case ref_option.to_i
|
168
|
+
when 1
|
169
|
+
:HXB2
|
170
|
+
when 2
|
171
|
+
:NL43
|
172
|
+
when 3
|
173
|
+
:MAC239
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end # end TcsJson
|
178
|
+
end # end main module
|