viral_seq 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +29 -24
- data/README.md +28 -18
- data/bin/tcs +39 -8
- data/bin/tcs_log +27 -16
- data/bin/tcs_sdrm +23 -14
- data/lib/viral_seq/R.rb +31 -0
- data/lib/viral_seq/constant.rb +0 -41
- data/lib/viral_seq/muscle.rb +1 -1
- data/lib/viral_seq/recency.rb +47 -1
- data/lib/viral_seq/recency_report.rb +193 -0
- data/lib/viral_seq/root.rb +7 -0
- data/lib/viral_seq/seq_hash.rb +4 -4
- data/lib/viral_seq/seq_hash_pair.rb +154 -27
- data/lib/viral_seq/tcs_dr.rb +168 -81
- data/lib/viral_seq/util/check_env.r +9 -0
- data/lib/viral_seq/util/recency_model/rt_only_fit.Rdata +0 -0
- data/lib/viral_seq/util/recency_model/rt_v1v3_fit.Rdata +0 -0
- data/lib/viral_seq/util/recency_model/v1v3_only_fit.Rdata +0 -0
- data/lib/viral_seq/util/sdrm_r.r +34 -0
- data/lib/viral_seq/version.rb +2 -2
- data/lib/viral_seq.rb +6 -0
- data/viral_seq.gemspec +2 -2
- metadata +17 -9
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
module ViralSeq
|
|
2
|
+
|
|
3
|
+
# class to generate recency report
|
|
4
|
+
|
|
5
|
+
class RecencyReport
|
|
6
|
+
|
|
7
|
+
# to generate the recency report in .pdf format.
|
|
8
|
+
# @param log [Hash] Hash from the json summary string of the SDRM report
|
|
9
|
+
# @param outfile [String] path to the output file
|
|
10
|
+
# @return [NilClass] .pdf file generated by the method. Return nil.
|
|
11
|
+
|
|
12
|
+
def self.generate(log, outfile)
|
|
13
|
+
|
|
14
|
+
recency_color = {
|
|
15
|
+
"recent" => "d42828",
|
|
16
|
+
"chronic" => "0666bf",
|
|
17
|
+
"indeterminant"=> "f78914",
|
|
18
|
+
"insufficient data" => "7d7b79"
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
dual_infection_color = {
|
|
22
|
+
"Yes" => "ffcc00",
|
|
23
|
+
"No" => "339900",
|
|
24
|
+
"insufficient data" => "7d7b79"
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
Prawn::Document.generate(outfile, margin: 75) do
|
|
28
|
+
|
|
29
|
+
def text_format(text1, text2)
|
|
30
|
+
[
|
|
31
|
+
{ text: text1 + "\s" * (30 - text1.size), styles: [:bold], size: 14, font: "Courier"},
|
|
32
|
+
{ text: text2, size: 14, styles: [:underline]}
|
|
33
|
+
]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def text_format2(text1, text2, text3, text4)
|
|
37
|
+
text1 = text1.to_s
|
|
38
|
+
text2 = text2.to_s
|
|
39
|
+
text3 = text3.to_s
|
|
40
|
+
text4 = text4.to_s
|
|
41
|
+
|
|
42
|
+
[
|
|
43
|
+
{ text: "\s\s\s" + text1 + "\s"*(11-text1.size) +
|
|
44
|
+
text2 + "\s"*(19-text2.size) +
|
|
45
|
+
text3 + "\s"*(11-text3.size) + text4,
|
|
46
|
+
size: 14,
|
|
47
|
+
font: "Courier"
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
text("Quantitative Recency Report by MPID-NGS",
|
|
53
|
+
size: 18,
|
|
54
|
+
align: :center,
|
|
55
|
+
style: :bold
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
move_down 20
|
|
59
|
+
|
|
60
|
+
formatted_text(
|
|
61
|
+
text_format("Library ID:", log[:sample_id])
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
move_down 10
|
|
65
|
+
|
|
66
|
+
formatted_text(
|
|
67
|
+
text_format("ViralSeq Version:", ViralSeq::VERSION.to_s)
|
|
68
|
+
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
formatted_text(
|
|
72
|
+
text_format("TCS Version:", ViralSeq::TCS_VERSION.to_s)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
formatted_text(
|
|
76
|
+
text_format("Processed Date", Time.now.strftime("%Y-%b-%d %H:%M"))
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
move_down 30
|
|
80
|
+
|
|
81
|
+
text("Summary of parameters",
|
|
82
|
+
size: 16,
|
|
83
|
+
style: :bold
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
move_down 20
|
|
87
|
+
|
|
88
|
+
formatted_text(
|
|
89
|
+
[
|
|
90
|
+
{ text: "REGION" + "\s"*5 + "AVG. DIVERSITY" + "\s"*5 + "DIST20" + "\s"*5 + "DEPTH",
|
|
91
|
+
styles: [:bold],
|
|
92
|
+
size: 14,
|
|
93
|
+
font: "Courier"
|
|
94
|
+
},
|
|
95
|
+
]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
move_down 5
|
|
99
|
+
|
|
100
|
+
formatted_text(
|
|
101
|
+
text_format2("RT", log[:pi_RT], log[:dist20_RT], log[:tcs_RT])
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
formatted_text(
|
|
105
|
+
text_format2("V1V3", log[:pi_V1V3], log[:dist20_V1V3], log[:tcs_V1V3])
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
formatted_text(
|
|
109
|
+
text_format2("P17", log[:pi_P17], log[:dist20_P17], log[:tcs_P17])
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
move_down 30
|
|
113
|
+
|
|
114
|
+
formatted_text(
|
|
115
|
+
[
|
|
116
|
+
{ text: "Prediction: ",
|
|
117
|
+
styles: [:bold],
|
|
118
|
+
size: 16,
|
|
119
|
+
},
|
|
120
|
+
|
|
121
|
+
{ text: log[:recency].capitalize + " Infection",
|
|
122
|
+
styles: [:bold],
|
|
123
|
+
size: 16,
|
|
124
|
+
color: recency_color[log[:recency]]
|
|
125
|
+
},
|
|
126
|
+
|
|
127
|
+
{ text: " (9-month cutoff)",
|
|
128
|
+
size: 14,
|
|
129
|
+
},
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
move_down 20
|
|
134
|
+
|
|
135
|
+
formatted_text(
|
|
136
|
+
[
|
|
137
|
+
{
|
|
138
|
+
text: "Estimated Day Post Infection: ",
|
|
139
|
+
styles: [:bold],
|
|
140
|
+
size: 16
|
|
141
|
+
},
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
text: log[:dpi].round(1).to_s +
|
|
145
|
+
" (" + log[:dpi_lwr].round(1).to_s + "-" + log[:dpi_upr].round(1).to_s + ") Days",
|
|
146
|
+
styles: [:bold],
|
|
147
|
+
size: 16,
|
|
148
|
+
color: recency_color[log[:recency]]
|
|
149
|
+
}
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
move_down 20
|
|
154
|
+
|
|
155
|
+
formatted_text(
|
|
156
|
+
[
|
|
157
|
+
{
|
|
158
|
+
text: "Possible multivariant Infection: ",
|
|
159
|
+
styles: [:bold],
|
|
160
|
+
size: 16,
|
|
161
|
+
},
|
|
162
|
+
|
|
163
|
+
{
|
|
164
|
+
text: log[:possible_dual_infection],
|
|
165
|
+
styles: [:bold],
|
|
166
|
+
size: 16,
|
|
167
|
+
color: dual_infection_color[log[:possible_dual_infection]]
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
move_down 10
|
|
173
|
+
|
|
174
|
+
if log[:possible_dual_infection] == "Yes"
|
|
175
|
+
|
|
176
|
+
formatted_text(
|
|
177
|
+
[
|
|
178
|
+
{
|
|
179
|
+
text: "Warning: Days Post Infection prediction not reliable!",
|
|
180
|
+
styles: [:bold],
|
|
181
|
+
size: 14,
|
|
182
|
+
color: "ffcc00"
|
|
183
|
+
}
|
|
184
|
+
]
|
|
185
|
+
)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
end
|
data/lib/viral_seq/seq_hash.rb
CHANGED
|
@@ -495,7 +495,7 @@ module ViralSeq
|
|
|
495
495
|
# total G->A mutations at apobec3g/f positions.
|
|
496
496
|
total = 0
|
|
497
497
|
|
|
498
|
-
unless ref
|
|
498
|
+
unless ref
|
|
499
499
|
# make consensus sequence for the input sequence hash
|
|
500
500
|
ref = self.consensus
|
|
501
501
|
end
|
|
@@ -571,7 +571,7 @@ module ViralSeq
|
|
|
571
571
|
hm_hash.each do |k,_v|
|
|
572
572
|
hm_seq_hash.dna_hash[k] = self.dna_hash[k]
|
|
573
573
|
end
|
|
574
|
-
|
|
574
|
+
|
|
575
575
|
hm_seq_hash.title = self.title + "_hypermut"
|
|
576
576
|
hm_seq_hash.file = self.file
|
|
577
577
|
filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
|
|
@@ -713,7 +713,7 @@ module ViralSeq
|
|
|
713
713
|
|
|
714
714
|
|
|
715
715
|
# align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
|
|
716
|
-
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
|
716
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
|
717
717
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
|
718
718
|
# @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
|
|
719
719
|
|
|
@@ -729,7 +729,7 @@ module ViralSeq
|
|
|
729
729
|
temp_aln = File.join(temp_dir, "_temp_muscle_aln")
|
|
730
730
|
File.open(temp_file, 'w'){|f| seq_hash.each {|k,v| f.puts k; f.puts v}}
|
|
731
731
|
if path_to_muscle
|
|
732
|
-
unless ViralSeq.check_muscle?(path_to_muscle)
|
|
732
|
+
unless ViralSeq::Muscle.check_muscle?(path_to_muscle)
|
|
733
733
|
File.unlink(temp_file)
|
|
734
734
|
return nil
|
|
735
735
|
end
|
|
@@ -87,7 +87,8 @@ module ViralSeq
|
|
|
87
87
|
end
|
|
88
88
|
|
|
89
89
|
# Pair-end join function for KNOWN overlap size.
|
|
90
|
-
# @param overlap [Integer] how many bases are overlapped. `0` means no overlap, R1 and R2 will be simply put together.
|
|
90
|
+
# @param overlap [Integer] simple overlap value indicating how many bases are overlapped. `0` means no overlap, R1 and R2 will be simply put together.
|
|
91
|
+
# overlap can also be an explicit [Hash] object for :overlap_size, :r1_overlap, :r2_overlap, :before_overlap, :after_overlap
|
|
91
92
|
# @param diff [Integer, Float] the maximum mismatch rate allowed for the overlapping region. default at 0.0, i.e. no mis-match allowed.
|
|
92
93
|
# @return [ViralSeq::SeqHash] a SeqHash object of joined sequences.
|
|
93
94
|
# @example join paired-end sequences with different :diff cut-offs, overlap provided.
|
|
@@ -106,24 +107,64 @@ module ViralSeq
|
|
|
106
107
|
# => [">pair1", ">pair2", ">pair3"]
|
|
107
108
|
|
|
108
109
|
def join1(overlap = 0, diff = 0.0)
|
|
109
|
-
seq_pair_hash = self.dna_hash
|
|
110
|
-
raise ArgumentError.new(":overlap has to be Integer, input #{overlap} invalid.") unless overlap.is_a? Integer
|
|
111
110
|
raise ArgumentError.new(":diff has to be float or integer, input #{diff} invalid.") unless (diff.is_a? Integer or diff.is_a? Float)
|
|
111
|
+
|
|
112
|
+
if overlap.is_a? Integer and overlap.zero?
|
|
113
|
+
overlap = {
|
|
114
|
+
overlap_size: 0,
|
|
115
|
+
r1_overlapped: 0...0,
|
|
116
|
+
r2_overlapped: 0...0,
|
|
117
|
+
before_overlap: {
|
|
118
|
+
region: :r1,
|
|
119
|
+
range: 0..-1,
|
|
120
|
+
} ,
|
|
121
|
+
after_overlap: {
|
|
122
|
+
region: :r2,
|
|
123
|
+
range: 0..-1
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
elsif overlap.is_a? Integer
|
|
127
|
+
overlap = {
|
|
128
|
+
overlap_size: overlap,
|
|
129
|
+
r1_overlapped: -overlap..-1,
|
|
130
|
+
r2_overlapped: 0..(overlap - 1),
|
|
131
|
+
before_overlap: {
|
|
132
|
+
region: :r1,
|
|
133
|
+
range: 0..(-overlap - 1),
|
|
134
|
+
} ,
|
|
135
|
+
after_overlap: {
|
|
136
|
+
region: :r2,
|
|
137
|
+
range: overlap..-1
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
seq_pair_hash = self.dna_hash
|
|
112
143
|
joined_seq = {}
|
|
113
144
|
seq_pair_hash.each do |seq_name,seq_pair|
|
|
114
145
|
r1_seq = seq_pair[0]
|
|
115
146
|
r2_seq = seq_pair[1]
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
147
|
+
|
|
148
|
+
r1_overlap = r1_seq[overlap[:r1_overlapped]]
|
|
149
|
+
r2_overlap = r2_seq[overlap[:r2_overlapped]]
|
|
150
|
+
|
|
151
|
+
overlap_size = overlap[:overlap_size]
|
|
152
|
+
|
|
153
|
+
if (diff.zero? and r1_overlap == r2_overlap) or (!diff.zero? and r1_overlap.compare_with(r2_overlap) <= (overlap_size.abs * diff))
|
|
154
|
+
if overlap[:before_overlap][:region] == :r1
|
|
155
|
+
before_overlap_seq = r1_seq[overlap[:before_overlap][:range]]
|
|
156
|
+
elsif overlap[:before_overlap][:region] == :r2
|
|
157
|
+
before_overlap_seq = r2_seq[overlap[:before_overlap][:range]]
|
|
121
158
|
end
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
159
|
+
|
|
160
|
+
if overlap[:after_overlap][:region] == :r1
|
|
161
|
+
after_overlap_seq = r1_seq[overlap[:after_overlap][:range]]
|
|
162
|
+
elsif overlap[:after_overlap][:region] == :r2
|
|
163
|
+
after_overlap_seq = r2_seq[overlap[:after_overlap][:range]]
|
|
164
|
+
end
|
|
165
|
+
joined_sequence = before_overlap_seq + r1_overlap + after_overlap_seq
|
|
126
166
|
end
|
|
167
|
+
|
|
127
168
|
joined_seq[seq_name] = joined_sequence if joined_sequence
|
|
128
169
|
end
|
|
129
170
|
|
|
@@ -164,18 +205,35 @@ module ViralSeq
|
|
|
164
205
|
elsif model == :indiv
|
|
165
206
|
joined_seq = {}
|
|
166
207
|
seq_pair_hash.each do |seq_name, seq_pair|
|
|
208
|
+
r1_seq = seq_pair[0]
|
|
209
|
+
r2_seq = seq_pair[1]
|
|
167
210
|
overlap_list = []
|
|
168
|
-
|
|
169
|
-
|
|
211
|
+
|
|
212
|
+
overlap_matrix(r1_seq, r2_seq).each do |overlap1, diff_nt|
|
|
213
|
+
cut_off_base = overlap1[:overlap_size] * diff
|
|
170
214
|
overlap_list << overlap1 if diff_nt <= cut_off_base
|
|
171
215
|
end
|
|
216
|
+
|
|
172
217
|
if overlap_list.empty?
|
|
173
|
-
joined_seq[seq_name]
|
|
218
|
+
joined_seq[seq_name] = seq_pair[0] + seq_pair[1]
|
|
174
219
|
else
|
|
175
|
-
|
|
176
|
-
|
|
220
|
+
overlap_to_use = overlap_list.sort_by{|k| k[:overlap_size].abs}.reverse[0]
|
|
221
|
+
|
|
222
|
+
if overlap_to_use[:before_overlap][:region] == :r1
|
|
223
|
+
before_overlap_seq = r1_seq[overlap_to_use[:before_overlap][:range]]
|
|
224
|
+
elsif overlap_to_use[:before_overlap][:region] == :r2
|
|
225
|
+
before_overlap_seq = r2_seq[overlap_to_use[:before_overlap][:range]]
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
if overlap_to_use[:after_overlap][:region] == :r1
|
|
229
|
+
after_overlap_seq = r1_seq[overlap_to_use[:after_overlap][:range]]
|
|
230
|
+
elsif overlap_to_use[:after_overlap][:region] == :r2
|
|
231
|
+
after_overlap_seq = r2_seq[overlap_to_use[:after_overlap][:range]]
|
|
232
|
+
end
|
|
233
|
+
joined_seq[seq_name] = before_overlap_seq + r1_seq[overlap_to_use[:r1_overlapped]] + after_overlap_seq
|
|
177
234
|
end
|
|
178
235
|
end
|
|
236
|
+
|
|
179
237
|
joined_seq_hash = ViralSeq::SeqHash.new
|
|
180
238
|
joined_seq_hash.dna_hash = joined_seq
|
|
181
239
|
joined_seq_hash.title = self.title + "_joined"
|
|
@@ -197,35 +255,104 @@ module ViralSeq
|
|
|
197
255
|
seq_pair_hash.each do |_seq_name, seq_pair|
|
|
198
256
|
overlap_list = []
|
|
199
257
|
matrix = overlap_matrix(seq_pair[0], seq_pair[1])
|
|
200
|
-
matrix.each do |
|
|
258
|
+
matrix.each do |overlap_positions, diff_nt|
|
|
259
|
+
overlap = overlap_positions[:overlap_size].abs
|
|
201
260
|
cut_off_base = overlap * diff
|
|
202
|
-
overlap_list <<
|
|
261
|
+
overlap_list << overlap_positions if diff_nt <= cut_off_base
|
|
203
262
|
end
|
|
263
|
+
|
|
204
264
|
if overlap_list.empty?
|
|
205
|
-
overlaps <<
|
|
265
|
+
overlaps << {
|
|
266
|
+
overlap_size: 0,
|
|
267
|
+
r1_overlapped: 0...0,
|
|
268
|
+
r2_overlapped: 0...0,
|
|
269
|
+
before_overlap: {
|
|
270
|
+
region: :r1,
|
|
271
|
+
range: 0..-1,
|
|
272
|
+
} ,
|
|
273
|
+
after_overlap: {
|
|
274
|
+
region: :r2,
|
|
275
|
+
range: 0..-1
|
|
276
|
+
}
|
|
277
|
+
}
|
|
206
278
|
else
|
|
207
|
-
overlaps << overlap_list.
|
|
279
|
+
overlaps << overlap_list.sort_by{|k| k[:overlap_size].abs}.reverse[0]
|
|
208
280
|
end
|
|
281
|
+
|
|
209
282
|
end
|
|
210
283
|
count_overlaps = overlaps.count_freq
|
|
211
284
|
max_value = count_overlaps.values.max
|
|
212
285
|
max_overlap_list = []
|
|
213
286
|
count_overlaps.each {|overlap, counts| max_overlap_list << overlap if counts == max_value}
|
|
214
|
-
max_overlap_list.
|
|
287
|
+
max_overlap_list.sort_by{|k| k[:overlap_size].abs}.reverse[0]
|
|
215
288
|
end # end pf determine_overlap_pid_pair
|
|
216
289
|
|
|
217
290
|
# input a pair of sequences as String, return a Hash object of overlapping Hash object
|
|
218
291
|
# {:overlap_size => number_of_differnt_positions, ...}
|
|
219
292
|
# {minimal overlap set to 4. }
|
|
220
293
|
def overlap_matrix(sequence1, sequence2)
|
|
221
|
-
|
|
222
|
-
max_overlap = [sequence1.size, sequence2.size].min
|
|
294
|
+
list = overlap_list(sequence1.size, sequence2.size)
|
|
223
295
|
matrix_hash = {}
|
|
296
|
+
list.each do |l|
|
|
297
|
+
range1 = l[:r1_overlapped]
|
|
298
|
+
range2 = l[:r2_overlapped]
|
|
299
|
+
matrix_hash[l] = sequence1[range1].compare_with(sequence2[range2])
|
|
300
|
+
end
|
|
301
|
+
matrix_hash
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# given two [Integer], return all possible overlaping ranges in an [Array]
|
|
305
|
+
def overlap_list(l1, l2)
|
|
306
|
+
return_list = []
|
|
307
|
+
min_overlap = 4
|
|
308
|
+
max_overlap = [l1, l2].min
|
|
309
|
+
diff = (l1 - l2).abs
|
|
310
|
+
max_reverse = l1/2
|
|
311
|
+
|
|
224
312
|
(min_overlap..max_overlap).each do |overlap|
|
|
225
|
-
|
|
313
|
+
return_list<< {
|
|
314
|
+
overlap_size: overlap,
|
|
315
|
+
r1_overlapped: (l1-overlap)..(l1-1),
|
|
316
|
+
r2_overlapped: 0..(overlap -1),
|
|
317
|
+
before_overlap: {region: :r1, range: 0..(l1 - overlap - 1)},
|
|
318
|
+
after_overlap: {region: :r2, range: overlap..(l2-1)}
|
|
319
|
+
}
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
if l1 >= l2
|
|
323
|
+
(1..diff).each do |overlap|
|
|
324
|
+
return_list << {
|
|
325
|
+
overlap_size: max_overlap,
|
|
326
|
+
r1_overlapped: (diff - overlap)..(l1-1-overlap),
|
|
327
|
+
r2_overlapped: 0..(l2-1),
|
|
328
|
+
before_overlap: {region: :r1, range: 0...(diff - overlap)},
|
|
329
|
+
after_overlap: {region: :r1, range: (l1-overlap)...l1},
|
|
330
|
+
}
|
|
331
|
+
end
|
|
332
|
+
else
|
|
333
|
+
(1..diff).each do |overlap|
|
|
334
|
+
return_list << {
|
|
335
|
+
overlap_size: max_overlap,
|
|
336
|
+
r1_overlapped: 0..(l1-1),
|
|
337
|
+
r2_overlapped: overlap..(max_overlap + overlap - 1),
|
|
338
|
+
before_overlap: {region: :r2, range: 0...overlap},
|
|
339
|
+
after_overlap: {region: :r2, range: (max_overlap + overlap)...l2},
|
|
340
|
+
}
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
(max_reverse..(max_overlap-1)).reverse_each do |overlap|
|
|
345
|
+
return_list << {
|
|
346
|
+
overlap_size: overlap,
|
|
347
|
+
r1_overlapped: 0..(overlap -1),
|
|
348
|
+
r2_overlapped: (l2-overlap)..(l2-1),
|
|
349
|
+
before_overlap: {region: :r2, range: 0..(l2-overlap-1)},
|
|
350
|
+
after_overlap: {region: :r1, range: overlap..(l1-1)},
|
|
351
|
+
}
|
|
226
352
|
end
|
|
227
|
-
|
|
228
|
-
|
|
353
|
+
|
|
354
|
+
return_list
|
|
355
|
+
end # end of overlap_list
|
|
229
356
|
|
|
230
357
|
end # end of SeqHashPair
|
|
231
358
|
end # end of ViralSeq
|