viral_seq 1.6.1 → 1.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +7 -1
- data/bin/tcs_sdrm +2 -1
- data/lib/viral_seq/muscle.rb +8 -2
- data/lib/viral_seq/seq_hash.rb +8 -3
- data/lib/viral_seq/sequence.rb +12 -13
- data/lib/viral_seq/tcs_dr.rb +2 -2
- data/lib/viral_seq/version.rb +1 -1
- data/viral_seq.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed86467ec77cdce9b1175604528937233fb2d3e91081a294b8a9aed2e5ca2b68
|
4
|
+
data.tar.gz: 3c9eef94e3252e5f486ee82b69b45e584810281d50c7d7ceaad0d51ec87ba890
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf23bedc3bc4be082eeb1e1a512fa5ae4777ca2d7b7d425348e2f95cc934fe6a1aed5875998fd5236f1485ed86797d3b2fed1d416a4cca6442e4dfa35967dd3b
|
7
|
+
data.tar.gz: 4958b976b7d97d1c7f052b0341bdbf9eee66db86cabf6aada972afb081a4766d20af3cc0c13e0c14a01b4a1e831c62c49e03ffd6131a21f8215b66fc19cee61b
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.6.
|
4
|
+
viral_seq (1.6.2)
|
5
5
|
colorize (~> 0.1)
|
6
6
|
combine_pdf (~> 1.0, >= 1.0.0)
|
7
|
-
muscle_bio (~> 0.
|
7
|
+
muscle_bio (~> 0.5)
|
8
8
|
prawn (~> 2.3, >= 2.3.0)
|
9
9
|
prawn-table (~> 0.2, >= 0.2.0)
|
10
10
|
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
combine_pdf (1.0.21)
|
16
16
|
ruby-rc4 (>= 0.1.5)
|
17
17
|
diff-lcs (1.3)
|
18
|
-
muscle_bio (0.
|
18
|
+
muscle_bio (0.5.0)
|
19
19
|
pdf-core (0.9.0)
|
20
20
|
prawn (2.4.0)
|
21
21
|
pdf-core (~> 0.9.0)
|
data/README.md
CHANGED
@@ -179,10 +179,16 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
179
179
|
|
180
180
|
## Updates
|
181
181
|
|
182
|
+
### Version-1.6.3-02052022
|
183
|
+
|
184
|
+
1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
|
185
|
+
2. Optimized the `locator` algorithm based on `muscle` v5.1.
|
186
|
+
3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
|
187
|
+
|
182
188
|
### Version-1.6.1-02022022
|
183
189
|
|
184
190
|
1. Fixed the `nav bar` in tcs_log html file.
|
185
|
-
2. Fixed a typo in `tcs`.
|
191
|
+
2. Fixed a typo in `tcs`.
|
186
192
|
|
187
193
|
### Version 1.6.0-01042022
|
188
194
|
|
data/bin/tcs_sdrm
CHANGED
@@ -229,8 +229,9 @@ libs.each do |lib|
|
|
229
229
|
filtered_seq_files.each do |seq_file|
|
230
230
|
filtered_sh = ViralSeq::SeqHash.fa(seq_file)
|
231
231
|
next if filtered_sh.size < 3
|
232
|
-
aligned_sh = filtered_sh.random_select(1000).align
|
232
|
+
aligned_sh = filtered_sh.random_select(1000).align(:Super5)
|
233
233
|
aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
|
234
|
+
puts 'aligned sequence written.'
|
234
235
|
end
|
235
236
|
|
236
237
|
r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
|
data/lib/viral_seq/muscle.rb
CHANGED
@@ -28,6 +28,8 @@ module ViralSeq
|
|
28
28
|
# align a sequence with reference sequence Strings
|
29
29
|
# @param ref_seq [String] reference sequence
|
30
30
|
# @param test_seq [String] test sequence
|
31
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
32
|
+
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
31
33
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
32
34
|
# @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
|
33
35
|
# if the cannot find MUSCLE excutable
|
@@ -37,7 +39,7 @@ module ViralSeq
|
|
37
39
|
# aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
|
38
40
|
# => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
|
39
41
|
|
40
|
-
def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
|
42
|
+
def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
|
41
43
|
temp_dir = Dir.home
|
42
44
|
temp_name = "_" + SecureRandom.alphanumeric
|
43
45
|
temp_file = File.join(temp_dir, temp_name)
|
@@ -56,7 +58,11 @@ module ViralSeq
|
|
56
58
|
end
|
57
59
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
58
60
|
else
|
59
|
-
MuscleBio.
|
61
|
+
if MuscleBio::VERSION.to_f < 0.5
|
62
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
63
|
+
else
|
64
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
65
|
+
end
|
60
66
|
end
|
61
67
|
aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
|
62
68
|
File.unlink(temp_file)
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -223,7 +223,7 @@ module ViralSeq
|
|
223
223
|
|
224
224
|
# check the size range of the DNA sequences of the SeqHash object
|
225
225
|
# @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
|
226
|
-
|
226
|
+
|
227
227
|
def check_nt_size
|
228
228
|
dna_hash = self.dna_hash
|
229
229
|
size_array = []
|
@@ -711,10 +711,11 @@ module ViralSeq
|
|
711
711
|
|
712
712
|
|
713
713
|
# align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
|
714
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
714
715
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
715
716
|
# @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
|
716
717
|
|
717
|
-
def align(path_to_muscle = false)
|
718
|
+
def align(algorithm = :PPP, path_to_muscle = false)
|
718
719
|
seq_hash = self.dna_hash
|
719
720
|
if self.file.size > 0
|
720
721
|
temp_dir = File.dirname(self.file)
|
@@ -732,7 +733,11 @@ module ViralSeq
|
|
732
733
|
end
|
733
734
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
734
735
|
else
|
735
|
-
MuscleBio.
|
736
|
+
if MuscleBio::VERSION.to_f < 0.5
|
737
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
738
|
+
else
|
739
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
740
|
+
end
|
736
741
|
end
|
737
742
|
out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
|
738
743
|
out_seq_hash.title = self.title + "_aligned"
|
data/lib/viral_seq/sequence.rb
CHANGED
@@ -180,7 +180,7 @@ module ViralSeq
|
|
180
180
|
l1 = 0
|
181
181
|
l2 = 0
|
182
182
|
|
183
|
-
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
|
183
|
+
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
|
184
184
|
aln_test = aln_seq[1]
|
185
185
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
186
186
|
gap_begin = $1.size
|
@@ -214,7 +214,7 @@ module ViralSeq
|
|
214
214
|
l2 = l2 + (post_aln - b2)
|
215
215
|
end
|
216
216
|
|
217
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
217
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
218
218
|
aln_test = aln_seq[1]
|
219
219
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
220
220
|
gap_begin = $1.size
|
@@ -240,22 +240,22 @@ module ViralSeq
|
|
240
240
|
repeat = 0
|
241
241
|
|
242
242
|
if g1 == g2 and (s1 + g1 + s2) == ref.size
|
243
|
-
if s1 > s2 and g2
|
243
|
+
if s1 > s2 and g2 >= s2
|
244
244
|
ref = ref[0..(-g2-1)]
|
245
245
|
repeat = 1
|
246
246
|
l2 = l2 + g2
|
247
|
-
elsif s1 < s2 and g1
|
247
|
+
elsif s1 < s2 and g1 >= s1
|
248
248
|
ref = ref[g1..-1]
|
249
249
|
repeat = 1
|
250
250
|
l1 = l1 + g1
|
251
251
|
end
|
252
252
|
else
|
253
|
-
if g1
|
253
|
+
if g1 >= s1
|
254
254
|
ref = ref[g1..-1]
|
255
255
|
repeat = 1
|
256
256
|
l1 = l1 + g1
|
257
257
|
end
|
258
|
-
if g2
|
258
|
+
if g2 >= s2
|
259
259
|
ref = ref[0..(-g2 - 1)]
|
260
260
|
repeat = 1
|
261
261
|
l2 = l2 + g2
|
@@ -263,7 +263,7 @@ module ViralSeq
|
|
263
263
|
end
|
264
264
|
|
265
265
|
while repeat == 1
|
266
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
266
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
267
267
|
aln_test = aln_seq[1]
|
268
268
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
269
269
|
gap_begin = $1.size
|
@@ -280,12 +280,12 @@ module ViralSeq
|
|
280
280
|
l1 = l1 + gap_begin
|
281
281
|
l2 = l2 + gap_end
|
282
282
|
repeat = 0
|
283
|
-
if g1
|
283
|
+
if g1 >= s1
|
284
284
|
ref = ref[g1..-1]
|
285
285
|
repeat = 1
|
286
286
|
l1 = l1 + g1
|
287
287
|
end
|
288
|
-
if g2
|
288
|
+
if g2 >= s2
|
289
289
|
ref = ref[0..(-g2 - 1)]
|
290
290
|
repeat = 1
|
291
291
|
l2 = l2 + g2
|
@@ -293,8 +293,7 @@ module ViralSeq
|
|
293
293
|
end
|
294
294
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
295
295
|
|
296
|
-
|
297
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
296
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
298
297
|
aln_test = aln_seq[1]
|
299
298
|
ref = aln_seq[0]
|
300
299
|
|
@@ -303,12 +302,12 @@ module ViralSeq
|
|
303
302
|
if ref =~ /^(\-+)/
|
304
303
|
l1 = l1 - $1.size
|
305
304
|
elsif ref =~ /(\-+)$/
|
306
|
-
l2 = l2
|
305
|
+
l2 = l2 - $1.size
|
307
306
|
end
|
308
307
|
|
309
308
|
if (ori_ref_l - l2 - 1) >= l1
|
310
309
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
311
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
310
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
312
311
|
aln_test = aln_seq[1]
|
313
312
|
ref = aln_seq[0]
|
314
313
|
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
@@ -16,7 +16,7 @@ module ViralSeq
|
|
16
16
|
:ref_genome=>"HXB2",
|
17
17
|
:ref_start=>2648,
|
18
18
|
:ref_end=>3257,
|
19
|
-
:indel=>
|
19
|
+
:indel=>true,
|
20
20
|
:trim=>false},
|
21
21
|
{:region=>"PR",
|
22
22
|
:cdna=>
|
@@ -41,7 +41,7 @@ module ViralSeq
|
|
41
41
|
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
42
|
:majority=>0,
|
43
43
|
:end_join=>true,
|
44
|
-
:end_join_option=>
|
44
|
+
:end_join_option=>2,
|
45
45
|
:overlap=>171,
|
46
46
|
:TCS_QC=>true,
|
47
47
|
:ref_genome=>"HXB2",
|
data/lib/viral_seq/version.rb
CHANGED
data/viral_seq.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.required_rubygems_version = '>= 1.3.6'
|
36
36
|
|
37
37
|
# muscle_bio gem required
|
38
|
-
spec.add_runtime_dependency "muscle_bio", "~> 0.
|
38
|
+
spec.add_runtime_dependency "muscle_bio", "~> 0.5"
|
39
39
|
|
40
40
|
# colorize gem required
|
41
41
|
spec.add_runtime_dependency "colorize", "~> 0.1"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-02-
|
12
|
+
date: 2022-02-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0.
|
62
|
+
version: '0.5'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0.
|
69
|
+
version: '0.5'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: colorize
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|