viral_seq 1.6.1 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +7 -1
- data/bin/tcs_sdrm +2 -1
- data/lib/viral_seq/muscle.rb +8 -2
- data/lib/viral_seq/seq_hash.rb +8 -3
- data/lib/viral_seq/sequence.rb +12 -13
- data/lib/viral_seq/tcs_dr.rb +2 -2
- data/lib/viral_seq/version.rb +1 -1
- data/viral_seq.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed86467ec77cdce9b1175604528937233fb2d3e91081a294b8a9aed2e5ca2b68
|
4
|
+
data.tar.gz: 3c9eef94e3252e5f486ee82b69b45e584810281d50c7d7ceaad0d51ec87ba890
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf23bedc3bc4be082eeb1e1a512fa5ae4777ca2d7b7d425348e2f95cc934fe6a1aed5875998fd5236f1485ed86797d3b2fed1d416a4cca6442e4dfa35967dd3b
|
7
|
+
data.tar.gz: 4958b976b7d97d1c7f052b0341bdbf9eee66db86cabf6aada972afb081a4766d20af3cc0c13e0c14a01b4a1e831c62c49e03ffd6131a21f8215b66fc19cee61b
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.6.
|
4
|
+
viral_seq (1.6.2)
|
5
5
|
colorize (~> 0.1)
|
6
6
|
combine_pdf (~> 1.0, >= 1.0.0)
|
7
|
-
muscle_bio (~> 0.
|
7
|
+
muscle_bio (~> 0.5)
|
8
8
|
prawn (~> 2.3, >= 2.3.0)
|
9
9
|
prawn-table (~> 0.2, >= 0.2.0)
|
10
10
|
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
combine_pdf (1.0.21)
|
16
16
|
ruby-rc4 (>= 0.1.5)
|
17
17
|
diff-lcs (1.3)
|
18
|
-
muscle_bio (0.
|
18
|
+
muscle_bio (0.5.0)
|
19
19
|
pdf-core (0.9.0)
|
20
20
|
prawn (2.4.0)
|
21
21
|
pdf-core (~> 0.9.0)
|
data/README.md
CHANGED
@@ -179,10 +179,16 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
179
179
|
|
180
180
|
## Updates
|
181
181
|
|
182
|
+
### Version-1.6.3-02052022
|
183
|
+
|
184
|
+
1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
|
185
|
+
2. Optimized the `locator` algorithm based on `muscle` v5.1.
|
186
|
+
3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
|
187
|
+
|
182
188
|
### Version-1.6.1-02022022
|
183
189
|
|
184
190
|
1. Fixed the `nav bar` in tcs_log html file.
|
185
|
-
2. Fixed a typo in `tcs`.
|
191
|
+
2. Fixed a typo in `tcs`.
|
186
192
|
|
187
193
|
### Version 1.6.0-01042022
|
188
194
|
|
data/bin/tcs_sdrm
CHANGED
@@ -229,8 +229,9 @@ libs.each do |lib|
|
|
229
229
|
filtered_seq_files.each do |seq_file|
|
230
230
|
filtered_sh = ViralSeq::SeqHash.fa(seq_file)
|
231
231
|
next if filtered_sh.size < 3
|
232
|
-
aligned_sh = filtered_sh.random_select(1000).align
|
232
|
+
aligned_sh = filtered_sh.random_select(1000).align(:Super5)
|
233
233
|
aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
|
234
|
+
puts 'aligned sequence written.'
|
234
235
|
end
|
235
236
|
|
236
237
|
r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
|
data/lib/viral_seq/muscle.rb
CHANGED
@@ -28,6 +28,8 @@ module ViralSeq
|
|
28
28
|
# align a sequence with reference sequence Strings
|
29
29
|
# @param ref_seq [String] reference sequence
|
30
30
|
# @param test_seq [String] test sequence
|
31
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
32
|
+
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
31
33
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
32
34
|
# @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
|
33
35
|
# if the cannot find MUSCLE excutable
|
@@ -37,7 +39,7 @@ module ViralSeq
|
|
37
39
|
# aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
|
38
40
|
# => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
|
39
41
|
|
40
|
-
def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
|
42
|
+
def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
|
41
43
|
temp_dir = Dir.home
|
42
44
|
temp_name = "_" + SecureRandom.alphanumeric
|
43
45
|
temp_file = File.join(temp_dir, temp_name)
|
@@ -56,7 +58,11 @@ module ViralSeq
|
|
56
58
|
end
|
57
59
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
58
60
|
else
|
59
|
-
MuscleBio.
|
61
|
+
if MuscleBio::VERSION.to_f < 0.5
|
62
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
63
|
+
else
|
64
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
65
|
+
end
|
60
66
|
end
|
61
67
|
aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
|
62
68
|
File.unlink(temp_file)
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -223,7 +223,7 @@ module ViralSeq
|
|
223
223
|
|
224
224
|
# check the size range of the DNA sequences of the SeqHash object
|
225
225
|
# @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
|
226
|
-
|
226
|
+
|
227
227
|
def check_nt_size
|
228
228
|
dna_hash = self.dna_hash
|
229
229
|
size_array = []
|
@@ -711,10 +711,11 @@ module ViralSeq
|
|
711
711
|
|
712
712
|
|
713
713
|
# align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
|
714
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
714
715
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
715
716
|
# @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
|
716
717
|
|
717
|
-
def align(path_to_muscle = false)
|
718
|
+
def align(algorithm = :PPP, path_to_muscle = false)
|
718
719
|
seq_hash = self.dna_hash
|
719
720
|
if self.file.size > 0
|
720
721
|
temp_dir = File.dirname(self.file)
|
@@ -732,7 +733,11 @@ module ViralSeq
|
|
732
733
|
end
|
733
734
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
734
735
|
else
|
735
|
-
MuscleBio.
|
736
|
+
if MuscleBio::VERSION.to_f < 0.5
|
737
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
738
|
+
else
|
739
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
740
|
+
end
|
736
741
|
end
|
737
742
|
out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
|
738
743
|
out_seq_hash.title = self.title + "_aligned"
|
data/lib/viral_seq/sequence.rb
CHANGED
@@ -180,7 +180,7 @@ module ViralSeq
|
|
180
180
|
l1 = 0
|
181
181
|
l2 = 0
|
182
182
|
|
183
|
-
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
|
183
|
+
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
|
184
184
|
aln_test = aln_seq[1]
|
185
185
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
186
186
|
gap_begin = $1.size
|
@@ -214,7 +214,7 @@ module ViralSeq
|
|
214
214
|
l2 = l2 + (post_aln - b2)
|
215
215
|
end
|
216
216
|
|
217
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
217
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
218
218
|
aln_test = aln_seq[1]
|
219
219
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
220
220
|
gap_begin = $1.size
|
@@ -240,22 +240,22 @@ module ViralSeq
|
|
240
240
|
repeat = 0
|
241
241
|
|
242
242
|
if g1 == g2 and (s1 + g1 + s2) == ref.size
|
243
|
-
if s1 > s2 and g2
|
243
|
+
if s1 > s2 and g2 >= s2
|
244
244
|
ref = ref[0..(-g2-1)]
|
245
245
|
repeat = 1
|
246
246
|
l2 = l2 + g2
|
247
|
-
elsif s1 < s2 and g1
|
247
|
+
elsif s1 < s2 and g1 >= s1
|
248
248
|
ref = ref[g1..-1]
|
249
249
|
repeat = 1
|
250
250
|
l1 = l1 + g1
|
251
251
|
end
|
252
252
|
else
|
253
|
-
if g1
|
253
|
+
if g1 >= s1
|
254
254
|
ref = ref[g1..-1]
|
255
255
|
repeat = 1
|
256
256
|
l1 = l1 + g1
|
257
257
|
end
|
258
|
-
if g2
|
258
|
+
if g2 >= s2
|
259
259
|
ref = ref[0..(-g2 - 1)]
|
260
260
|
repeat = 1
|
261
261
|
l2 = l2 + g2
|
@@ -263,7 +263,7 @@ module ViralSeq
|
|
263
263
|
end
|
264
264
|
|
265
265
|
while repeat == 1
|
266
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
266
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
267
267
|
aln_test = aln_seq[1]
|
268
268
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
269
269
|
gap_begin = $1.size
|
@@ -280,12 +280,12 @@ module ViralSeq
|
|
280
280
|
l1 = l1 + gap_begin
|
281
281
|
l2 = l2 + gap_end
|
282
282
|
repeat = 0
|
283
|
-
if g1
|
283
|
+
if g1 >= s1
|
284
284
|
ref = ref[g1..-1]
|
285
285
|
repeat = 1
|
286
286
|
l1 = l1 + g1
|
287
287
|
end
|
288
|
-
if g2
|
288
|
+
if g2 >= s2
|
289
289
|
ref = ref[0..(-g2 - 1)]
|
290
290
|
repeat = 1
|
291
291
|
l2 = l2 + g2
|
@@ -293,8 +293,7 @@ module ViralSeq
|
|
293
293
|
end
|
294
294
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
295
295
|
|
296
|
-
|
297
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
296
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
298
297
|
aln_test = aln_seq[1]
|
299
298
|
ref = aln_seq[0]
|
300
299
|
|
@@ -303,12 +302,12 @@ module ViralSeq
|
|
303
302
|
if ref =~ /^(\-+)/
|
304
303
|
l1 = l1 - $1.size
|
305
304
|
elsif ref =~ /(\-+)$/
|
306
|
-
l2 = l2
|
305
|
+
l2 = l2 - $1.size
|
307
306
|
end
|
308
307
|
|
309
308
|
if (ori_ref_l - l2 - 1) >= l1
|
310
309
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
311
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
310
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
312
311
|
aln_test = aln_seq[1]
|
313
312
|
ref = aln_seq[0]
|
314
313
|
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
@@ -16,7 +16,7 @@ module ViralSeq
|
|
16
16
|
:ref_genome=>"HXB2",
|
17
17
|
:ref_start=>2648,
|
18
18
|
:ref_end=>3257,
|
19
|
-
:indel=>
|
19
|
+
:indel=>true,
|
20
20
|
:trim=>false},
|
21
21
|
{:region=>"PR",
|
22
22
|
:cdna=>
|
@@ -41,7 +41,7 @@ module ViralSeq
|
|
41
41
|
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
42
|
:majority=>0,
|
43
43
|
:end_join=>true,
|
44
|
-
:end_join_option=>
|
44
|
+
:end_join_option=>2,
|
45
45
|
:overlap=>171,
|
46
46
|
:TCS_QC=>true,
|
47
47
|
:ref_genome=>"HXB2",
|
data/lib/viral_seq/version.rb
CHANGED
data/viral_seq.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.required_rubygems_version = '>= 1.3.6'
|
36
36
|
|
37
37
|
# muscle_bio gem required
|
38
|
-
spec.add_runtime_dependency "muscle_bio", "~> 0.
|
38
|
+
spec.add_runtime_dependency "muscle_bio", "~> 0.5"
|
39
39
|
|
40
40
|
# colorize gem required
|
41
41
|
spec.add_runtime_dependency "colorize", "~> 0.1"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-02-
|
12
|
+
date: 2022-02-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0.
|
62
|
+
version: '0.5'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0.
|
69
|
+
version: '0.5'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: colorize
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|