viral_seq 1.6.1 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +26 -1
- data/bin/tcs +23 -5
- data/bin/tcs_sdrm +32 -2
- data/docs/dr.json +15 -0
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +8 -2
- data/lib/viral_seq/seq_hash.rb +19 -11
- data/lib/viral_seq/sequence.rb +12 -13
- data/lib/viral_seq/tcs_dr.rb +20 -5
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
|
4
|
+
data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
|
7
|
+
data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.6.
|
4
|
+
viral_seq (1.6.5)
|
5
5
|
colorize (~> 0.1)
|
6
6
|
combine_pdf (~> 1.0, >= 1.0.0)
|
7
|
-
muscle_bio (~> 0.
|
7
|
+
muscle_bio (~> 0.5)
|
8
8
|
prawn (~> 2.3, >= 2.3.0)
|
9
9
|
prawn-table (~> 0.2, >= 0.2.0)
|
10
10
|
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
combine_pdf (1.0.21)
|
16
16
|
ruby-rc4 (>= 0.1.5)
|
17
17
|
diff-lcs (1.3)
|
18
|
-
muscle_bio (0.
|
18
|
+
muscle_bio (0.5.0)
|
19
19
|
pdf-core (0.9.0)
|
20
20
|
prawn (2.4.0)
|
21
21
|
pdf-core (~> 0.9.0)
|
data/README.md
CHANGED
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
|
10
10
|
|
11
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
12
12
|
|
13
|
+
CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
|
14
|
+
|
13
15
|
#### tcs web app - https://primer-id.org/
|
14
16
|
|
15
17
|
## Illustration for the Primer ID Sequencing
|
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
22
24
|
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
23
25
|
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
24
26
|
|
27
|
+
## Requirements
|
28
|
+
|
29
|
+
Required Ruby Version: >= 2.5
|
30
|
+
|
31
|
+
Required RubyGems version: >= 1.3.6
|
32
|
+
|
25
33
|
## Install
|
26
34
|
|
27
35
|
```bash
|
@@ -179,10 +187,27 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
179
187
|
|
180
188
|
## Updates
|
181
189
|
|
190
|
+
### Version-1.7.0-08242022
|
191
|
+
|
192
|
+
1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
|
193
|
+
2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
|
194
|
+
|
195
|
+
### Version-1.6.4-07182022
|
196
|
+
|
197
|
+
1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
|
198
|
+
2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
|
199
|
+
3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
|
200
|
+
|
201
|
+
### Version-1.6.3-02052022
|
202
|
+
|
203
|
+
1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
|
204
|
+
2. Optimized the `locator` algorithm based on `muscle` v5.1.
|
205
|
+
3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
|
206
|
+
|
182
207
|
### Version-1.6.1-02022022
|
183
208
|
|
184
209
|
1. Fixed the `nav bar` in tcs_log html file.
|
185
|
-
2. Fixed a typo in `tcs`.
|
210
|
+
2. Fixed a typo in `tcs`.
|
186
211
|
|
187
212
|
### Version 1.6.0-01042022
|
188
213
|
|
data/bin/tcs
CHANGED
@@ -22,20 +22,38 @@
|
|
22
22
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
23
|
# THE SOFTWARE.
|
24
24
|
|
25
|
+
# Install using `gem install viral_seq`
|
25
26
|
# Use JSON file as the run param
|
26
27
|
# run `tcs -j` to generate param json file.
|
27
28
|
|
28
|
-
|
29
|
+
def gem_installed?(gem_name)
|
30
|
+
found_gem = false
|
31
|
+
begin
|
32
|
+
found_gem = Gem::Specification.find_by_name(gem_name)
|
33
|
+
rescue Gem::LoadError
|
34
|
+
return false
|
35
|
+
else
|
36
|
+
return true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
if gem_installed?('viral_seq')
|
41
|
+
require 'viral_seq'
|
42
|
+
else
|
43
|
+
printf "\n****************************************************\n"
|
44
|
+
printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
|
45
|
+
printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
|
46
|
+
printf "****************************************************\n\n"
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
|
50
|
+
|
29
51
|
require 'json'
|
30
52
|
require 'colorize'
|
31
53
|
require 'optparse'
|
32
54
|
|
33
55
|
options = {}
|
34
56
|
|
35
|
-
# banner = '-'*50 + "\n" +
|
36
|
-
# '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
|
37
|
-
# '-'*50 + "\n"
|
38
|
-
|
39
57
|
banner = "\n" +
|
40
58
|
"████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
|
41
59
|
" ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
|
data/bin/tcs_sdrm
CHANGED
@@ -172,6 +172,25 @@ libs.each do |lib|
|
|
172
172
|
linkage_list += sdrm[1]
|
173
173
|
aa_report_list += sdrm[2]
|
174
174
|
|
175
|
+
elsif seq_basename =~/P17/i
|
176
|
+
a3g_check = seqs.a3g
|
177
|
+
a3g_seqs = a3g_check[:a3g_seq]
|
178
|
+
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
179
|
+
stop_codon_check = a3g_filtered_seqs.stop_codon(2)
|
180
|
+
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
181
|
+
filtered_seqs = stop_codon_check[:without_stop_codon]
|
182
|
+
poisson_minority_cutoff = filtered_seqs.pm
|
183
|
+
fdr_hash = filtered_seqs.fdr
|
184
|
+
summary_hash[:P17] = [
|
185
|
+
seqs.size.to_s,
|
186
|
+
a3g_seqs.size.to_s,
|
187
|
+
stop_codon_seqs.size.to_s,
|
188
|
+
filtered_seqs.size.to_s,
|
189
|
+
poisson_minority_cutoff.to_s
|
190
|
+
].join(',')
|
191
|
+
next if filtered_seqs.size < 3
|
192
|
+
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
193
|
+
|
175
194
|
elsif seq_basename =~/RT/i
|
176
195
|
rt_seq1 = {}
|
177
196
|
rt_seq2 = {}
|
@@ -229,7 +248,7 @@ libs.each do |lib|
|
|
229
248
|
filtered_seq_files.each do |seq_file|
|
230
249
|
filtered_sh = ViralSeq::SeqHash.fa(seq_file)
|
231
250
|
next if filtered_sh.size < 3
|
232
|
-
aligned_sh = filtered_sh.random_select(1000).align
|
251
|
+
aligned_sh = filtered_sh.random_select(1000).align(:Super5)
|
233
252
|
aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
|
234
253
|
end
|
235
254
|
|
@@ -249,7 +268,7 @@ libs.each do |lib|
|
|
249
268
|
tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
|
250
269
|
summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
|
251
270
|
end
|
252
|
-
[:PR, :RT, :IN, :V1V3].each do |regions|
|
271
|
+
[:PR, :RT, :IN, :V1V3, :P17].each do |regions|
|
253
272
|
next unless summary_hash[regions]
|
254
273
|
seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
|
255
274
|
end
|
@@ -270,10 +289,13 @@ libs.each do |lib|
|
|
270
289
|
tcs_RT = 0
|
271
290
|
tcs_IN = 0
|
272
291
|
tcs_V1V3 = 0
|
292
|
+
tcs_P17 = 0
|
273
293
|
pi_RT = 0.0
|
274
294
|
pi_V1V3 = 0.0
|
295
|
+
pi_P17 = 0.0
|
275
296
|
dist20_RT = 0.0
|
276
297
|
dist20_V1V3 = 0.0
|
298
|
+
dist20_P17 = 0.0
|
277
299
|
summary_lines.each do |line|
|
278
300
|
data = line.chomp.split(",")
|
279
301
|
if data[0] == "PR"
|
@@ -288,6 +310,10 @@ libs.each do |lib|
|
|
288
310
|
tcs_V1V3 = data[1].to_i
|
289
311
|
pi_V1V3 = data[6].to_f
|
290
312
|
dist20_V1V3 = data[7].to_f
|
313
|
+
elsif data[0] == "P17"
|
314
|
+
tcs_P17 = data[4].to_i
|
315
|
+
pi_P17 = data[6].to_f
|
316
|
+
dist20_P17 = data[7].to_f
|
291
317
|
end
|
292
318
|
end
|
293
319
|
|
@@ -323,9 +349,13 @@ libs.each do |lib|
|
|
323
349
|
tcs_RT: tcs_RT,
|
324
350
|
tcs_IN: tcs_IN,
|
325
351
|
tcs_V1V3: tcs_V1V3,
|
352
|
+
tcs_P17: tcs_P17,
|
326
353
|
pi_RT: pi_RT,
|
354
|
+
pi_V1V3: pi_V1V3,
|
355
|
+
pi_P17: pi_P17,
|
327
356
|
dist20_RT: dist20_RT,
|
328
357
|
dist20_V1V3: dist20_V1V3,
|
358
|
+
dist20_P17: dist20_P17,
|
329
359
|
recency: recency,
|
330
360
|
sdrm_PR: sdrm_PR,
|
331
361
|
sdrm_RT: sdrm_RT,
|
data/docs/dr.json
CHANGED
@@ -62,6 +62,21 @@
|
|
62
62
|
"ref_end": 7208,
|
63
63
|
"indel": true,
|
64
64
|
"trim": false
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"region": "P17",
|
68
|
+
"cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
69
|
+
"forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
|
70
|
+
"majority": 0.5,
|
71
|
+
"end_join": true,
|
72
|
+
"end_join_option": 1,
|
73
|
+
"overlap": 0,
|
74
|
+
"TCS_QC": true,
|
75
|
+
"ref_genome": "HXB2",
|
76
|
+
"ref_start": 1196,
|
77
|
+
"ref_end": 1725,
|
78
|
+
"indel": true,
|
79
|
+
"trim": false
|
65
80
|
}
|
66
81
|
]
|
67
82
|
}
|
data/lib/viral_seq/hivdr.rb
CHANGED
@@ -5,7 +5,7 @@ module ViralSeq
|
|
5
5
|
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
|
6
6
|
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
7
7
|
# PR codon 1-99
|
8
|
-
# RT codon 34-122 (HXB2
|
8
|
+
# RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
|
9
9
|
# IN codon 53-174 (HXB2 4384-4751)
|
10
10
|
# @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
|
11
11
|
# can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
|
data/lib/viral_seq/muscle.rb
CHANGED
@@ -28,6 +28,8 @@ module ViralSeq
|
|
28
28
|
# align a sequence with reference sequence Strings
|
29
29
|
# @param ref_seq [String] reference sequence
|
30
30
|
# @param test_seq [String] test sequence
|
31
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
32
|
+
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
31
33
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
32
34
|
# @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
|
33
35
|
# if the cannot find MUSCLE excutable
|
@@ -37,7 +39,7 @@ module ViralSeq
|
|
37
39
|
# aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
|
38
40
|
# => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
|
39
41
|
|
40
|
-
def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
|
42
|
+
def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
|
41
43
|
temp_dir = Dir.home
|
42
44
|
temp_name = "_" + SecureRandom.alphanumeric
|
43
45
|
temp_file = File.join(temp_dir, temp_name)
|
@@ -56,7 +58,11 @@ module ViralSeq
|
|
56
58
|
end
|
57
59
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
58
60
|
else
|
59
|
-
MuscleBio.
|
61
|
+
if MuscleBio::VERSION.to_f < 0.5
|
62
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
63
|
+
else
|
64
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
65
|
+
end
|
60
66
|
end
|
61
67
|
aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
|
62
68
|
File.unlink(temp_file)
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -223,7 +223,7 @@ module ViralSeq
|
|
223
223
|
|
224
224
|
# check the size range of the DNA sequences of the SeqHash object
|
225
225
|
# @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
|
226
|
-
|
226
|
+
|
227
227
|
def check_nt_size
|
228
228
|
dna_hash = self.dna_hash
|
229
229
|
size_array = []
|
@@ -450,7 +450,7 @@ module ViralSeq
|
|
450
450
|
# function to determine if the sequences have APOBEC3g/f hypermutation.
|
451
451
|
# # APOBEC3G/F pattern: GRD -> ARD
|
452
452
|
# # control pattern: G[YN|RC] -> A[YN|RC]
|
453
|
-
# # use the sample consensus to determine potential a3g sites
|
453
|
+
# # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
|
454
454
|
# # Two criteria to identify hypermutation
|
455
455
|
# # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
|
456
456
|
# # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
|
@@ -486,7 +486,7 @@ module ViralSeq
|
|
486
486
|
# # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
|
487
487
|
# @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
|
488
488
|
|
489
|
-
def a3g_hypermut
|
489
|
+
def a3g_hypermut(ref = nil)
|
490
490
|
# mut_hash number of apobec3g/f mutations per sequence
|
491
491
|
mut_hash = {}
|
492
492
|
hm_hash = {}
|
@@ -495,8 +495,10 @@ module ViralSeq
|
|
495
495
|
# total G->A mutations at apobec3g/f positions.
|
496
496
|
total = 0
|
497
497
|
|
498
|
-
|
499
|
-
|
498
|
+
unless ref
|
499
|
+
# make consensus sequence for the input sequence hash
|
500
|
+
ref = self.consensus
|
501
|
+
end
|
500
502
|
|
501
503
|
# obtain apobec3g positions and control positions
|
502
504
|
apobec = apobec3gf(ref)
|
@@ -509,7 +511,6 @@ module ViralSeq
|
|
509
511
|
c = 0 # control muts
|
510
512
|
d = 0 # potenrial controls
|
511
513
|
mut.each do |n|
|
512
|
-
next if v[n] == "-"
|
513
514
|
if v[n] == "A"
|
514
515
|
a += 1
|
515
516
|
b += 1
|
@@ -521,7 +522,6 @@ module ViralSeq
|
|
521
522
|
total += a
|
522
523
|
|
523
524
|
control.each do |n|
|
524
|
-
next if v[n] == "-"
|
525
525
|
if v[n] == "A"
|
526
526
|
c += 1
|
527
527
|
d += 1
|
@@ -544,7 +544,7 @@ module ViralSeq
|
|
544
544
|
end
|
545
545
|
end
|
546
546
|
|
547
|
-
if self.dna_hash.size >
|
547
|
+
if self.dna_hash.size > 200
|
548
548
|
rate = total.to_f/(self.dna_hash.size)
|
549
549
|
count_mut = mut_hash.values.count_freq
|
550
550
|
maxi_count = count_mut.values.max
|
@@ -566,10 +566,12 @@ module ViralSeq
|
|
566
566
|
end
|
567
567
|
end
|
568
568
|
end
|
569
|
+
|
569
570
|
hm_seq_hash = ViralSeq::SeqHash.new
|
570
571
|
hm_hash.each do |k,_v|
|
571
572
|
hm_seq_hash.dna_hash[k] = self.dna_hash[k]
|
572
573
|
end
|
574
|
+
|
573
575
|
hm_seq_hash.title = self.title + "_hypermut"
|
574
576
|
hm_seq_hash.file = self.file
|
575
577
|
filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
|
@@ -711,10 +713,11 @@ module ViralSeq
|
|
711
713
|
|
712
714
|
|
713
715
|
# align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
|
716
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
714
717
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
715
718
|
# @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
|
716
719
|
|
717
|
-
def align(path_to_muscle = false)
|
720
|
+
def align(algorithm = :PPP, path_to_muscle = false)
|
718
721
|
seq_hash = self.dna_hash
|
719
722
|
if self.file.size > 0
|
720
723
|
temp_dir = File.dirname(self.file)
|
@@ -732,7 +735,11 @@ module ViralSeq
|
|
732
735
|
end
|
733
736
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
734
737
|
else
|
735
|
-
MuscleBio.
|
738
|
+
if MuscleBio::VERSION.to_f < 0.5
|
739
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
740
|
+
else
|
741
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
742
|
+
end
|
736
743
|
end
|
737
744
|
out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
|
738
745
|
out_seq_hash.title = self.title + "_aligned"
|
@@ -1351,7 +1358,7 @@ module ViralSeq
|
|
1351
1358
|
# APOBEC3G/F pattern: GRD -> ARD,
|
1352
1359
|
# control pattern: G[YN|RC] -> A[YN|RC],
|
1353
1360
|
def apobec3gf(seq = '')
|
1354
|
-
seq.tr!("-", "")
|
1361
|
+
#seq.tr!("-", "")
|
1355
1362
|
seq_length = seq.size
|
1356
1363
|
apobec_position = []
|
1357
1364
|
control_position = []
|
@@ -1363,6 +1370,7 @@ module ViralSeq
|
|
1363
1370
|
control_position << n
|
1364
1371
|
end
|
1365
1372
|
end
|
1373
|
+
|
1366
1374
|
return [apobec_position,control_position]
|
1367
1375
|
end # end of #apobec3gf
|
1368
1376
|
|
data/lib/viral_seq/sequence.rb
CHANGED
@@ -180,7 +180,7 @@ module ViralSeq
|
|
180
180
|
l1 = 0
|
181
181
|
l2 = 0
|
182
182
|
|
183
|
-
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
|
183
|
+
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
|
184
184
|
aln_test = aln_seq[1]
|
185
185
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
186
186
|
gap_begin = $1.size
|
@@ -214,7 +214,7 @@ module ViralSeq
|
|
214
214
|
l2 = l2 + (post_aln - b2)
|
215
215
|
end
|
216
216
|
|
217
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
217
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
218
218
|
aln_test = aln_seq[1]
|
219
219
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
220
220
|
gap_begin = $1.size
|
@@ -240,22 +240,22 @@ module ViralSeq
|
|
240
240
|
repeat = 0
|
241
241
|
|
242
242
|
if g1 == g2 and (s1 + g1 + s2) == ref.size
|
243
|
-
if s1 > s2 and g2
|
243
|
+
if s1 > s2 and g2 >= s2
|
244
244
|
ref = ref[0..(-g2-1)]
|
245
245
|
repeat = 1
|
246
246
|
l2 = l2 + g2
|
247
|
-
elsif s1 < s2 and g1
|
247
|
+
elsif s1 < s2 and g1 >= s1
|
248
248
|
ref = ref[g1..-1]
|
249
249
|
repeat = 1
|
250
250
|
l1 = l1 + g1
|
251
251
|
end
|
252
252
|
else
|
253
|
-
if g1
|
253
|
+
if g1 >= s1
|
254
254
|
ref = ref[g1..-1]
|
255
255
|
repeat = 1
|
256
256
|
l1 = l1 + g1
|
257
257
|
end
|
258
|
-
if g2
|
258
|
+
if g2 >= s2
|
259
259
|
ref = ref[0..(-g2 - 1)]
|
260
260
|
repeat = 1
|
261
261
|
l2 = l2 + g2
|
@@ -263,7 +263,7 @@ module ViralSeq
|
|
263
263
|
end
|
264
264
|
|
265
265
|
while repeat == 1
|
266
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
266
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
267
267
|
aln_test = aln_seq[1]
|
268
268
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
269
269
|
gap_begin = $1.size
|
@@ -280,12 +280,12 @@ module ViralSeq
|
|
280
280
|
l1 = l1 + gap_begin
|
281
281
|
l2 = l2 + gap_end
|
282
282
|
repeat = 0
|
283
|
-
if g1
|
283
|
+
if g1 >= s1
|
284
284
|
ref = ref[g1..-1]
|
285
285
|
repeat = 1
|
286
286
|
l1 = l1 + g1
|
287
287
|
end
|
288
|
-
if g2
|
288
|
+
if g2 >= s2
|
289
289
|
ref = ref[0..(-g2 - 1)]
|
290
290
|
repeat = 1
|
291
291
|
l2 = l2 + g2
|
@@ -293,8 +293,7 @@ module ViralSeq
|
|
293
293
|
end
|
294
294
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
295
295
|
|
296
|
-
|
297
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
296
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
298
297
|
aln_test = aln_seq[1]
|
299
298
|
ref = aln_seq[0]
|
300
299
|
|
@@ -303,12 +302,12 @@ module ViralSeq
|
|
303
302
|
if ref =~ /^(\-+)/
|
304
303
|
l1 = l1 - $1.size
|
305
304
|
elsif ref =~ /(\-+)$/
|
306
|
-
l2 = l2
|
305
|
+
l2 = l2 - $1.size
|
307
306
|
end
|
308
307
|
|
309
308
|
if (ori_ref_l - l2 - 1) >= l1
|
310
309
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
311
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
310
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
312
311
|
aln_test = aln_seq[1]
|
313
312
|
ref = aln_seq[0]
|
314
313
|
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
@@ -16,7 +16,7 @@ module ViralSeq
|
|
16
16
|
:ref_genome=>"HXB2",
|
17
17
|
:ref_start=>2648,
|
18
18
|
:ref_end=>3257,
|
19
|
-
:indel=>
|
19
|
+
:indel=>true,
|
20
20
|
:trim=>false},
|
21
21
|
{:region=>"PR",
|
22
22
|
:cdna=>
|
@@ -41,7 +41,7 @@ module ViralSeq
|
|
41
41
|
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
42
|
:majority=>0,
|
43
43
|
:end_join=>true,
|
44
|
-
:end_join_option=>
|
44
|
+
:end_join_option=>2,
|
45
45
|
:overlap=>171,
|
46
46
|
:TCS_QC=>true,
|
47
47
|
:ref_genome=>"HXB2",
|
@@ -61,11 +61,26 @@ module ViralSeq
|
|
61
61
|
:TCS_QC=>true,
|
62
62
|
:ref_genome=>"HXB2",
|
63
63
|
:ref_start=>6585,
|
64
|
-
:ref_end=>
|
64
|
+
:ref_end=>7205..7210,
|
65
65
|
:indel=>true,
|
66
|
-
:trim=>false}
|
66
|
+
:trim=>false},
|
67
|
+
{:region=>"P17",
|
68
|
+
:cdna=>
|
69
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
70
|
+
:forward=>
|
71
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
|
72
|
+
:majority=>0,
|
73
|
+
:end_join=>true,
|
74
|
+
:end_join_option=>1,
|
75
|
+
:overlap=>0,
|
76
|
+
:TCS_QC=>true,
|
77
|
+
:ref_genome=>"HXB2",
|
78
|
+
:ref_start=>1196,
|
79
|
+
:ref_end=>1725,
|
80
|
+
:indel=>true,
|
81
|
+
:trim=>false}
|
67
82
|
]
|
68
|
-
|
83
|
+
}
|
69
84
|
end
|
70
85
|
|
71
86
|
end
|
data/lib/viral_seq/version.rb
CHANGED
data/viral_seq.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.required_rubygems_version = '>= 1.3.6'
|
36
36
|
|
37
37
|
# muscle_bio gem required
|
38
|
-
spec.add_runtime_dependency "muscle_bio", "~> 0.
|
38
|
+
spec.add_runtime_dependency "muscle_bio", "~> 0.5"
|
39
39
|
|
40
40
|
# colorize gem required
|
41
41
|
spec.add_runtime_dependency "colorize", "~> 0.1"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0.
|
62
|
+
version: '0.5'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0.
|
69
|
+
version: '0.5'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: colorize
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|