viral_seq 1.6.1 → 1.6.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53c276c8975ad7b714e69906210e78950a3a6efa593cc56565fcf1b80d41d5db
4
- data.tar.gz: 6171d3fb2594d2c214b6b94420dee99915e59941e4aeb74cd723bbc9a1d42ab8
3
+ metadata.gz: ed86467ec77cdce9b1175604528937233fb2d3e91081a294b8a9aed2e5ca2b68
4
+ data.tar.gz: 3c9eef94e3252e5f486ee82b69b45e584810281d50c7d7ceaad0d51ec87ba890
5
5
  SHA512:
6
- metadata.gz: 802ba6aff173d1fdae22f110e75228e2ed8d66ae4147c0e7c12ac2cad0334ca588e3ab23de57bda04bf7dfb33c778b5132c97f2eb58991c82584419ae29b5051
7
- data.tar.gz: eb40f1364222f459392edc0dd00eba03b417d2236c4c0d2ea75b88b3153f6bf209aaf9ff0e5b28309de4abc33464cc9e9607d8d4b8bef16a9a1965a850a835ef
6
+ metadata.gz: cf23bedc3bc4be082eeb1e1a512fa5ae4777ca2d7b7d425348e2f95cc934fe6a1aed5875998fd5236f1485ed86797d3b2fed1d416a4cca6442e4dfa35967dd3b
7
+ data.tar.gz: 4958b976b7d97d1c7f052b0341bdbf9eee66db86cabf6aada972afb081a4766d20af3cc0c13e0c14a01b4a1e831c62c49e03ffd6131a21f8215b66fc19cee61b
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.6.0)
4
+ viral_seq (1.6.2)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
- muscle_bio (~> 0.4)
7
+ muscle_bio (~> 0.5)
8
8
  prawn (~> 2.3, >= 2.3.0)
9
9
  prawn-table (~> 0.2, >= 0.2.0)
10
10
 
@@ -15,7 +15,7 @@ GEM
15
15
  combine_pdf (1.0.21)
16
16
  ruby-rc4 (>= 0.1.5)
17
17
  diff-lcs (1.3)
18
- muscle_bio (0.4.0)
18
+ muscle_bio (0.5.0)
19
19
  pdf-core (0.9.0)
20
20
  prawn (2.4.0)
21
21
  pdf-core (~> 0.9.0)
data/README.md CHANGED
@@ -179,10 +179,16 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
179
 
180
180
  ## Updates
181
181
 
182
+ ### Version-1.6.3-02052022
183
+
184
+ 1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
185
+ 2. Optimized the `locator` algorithm based on `muscle` v5.1.
186
+ 3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
187
+
182
188
  ### Version-1.6.1-02022022
183
189
 
184
190
  1. Fixed the `nav bar` in tcs_log html file.
185
- 2. Fixed a typo in `tcs`.
191
+ 2. Fixed a typo in `tcs`.
186
192
 
187
193
  ### Version 1.6.0-01042022
188
194
 
data/bin/tcs_sdrm CHANGED
@@ -229,8 +229,9 @@ libs.each do |lib|
229
229
  filtered_seq_files.each do |seq_file|
230
230
  filtered_sh = ViralSeq::SeqHash.fa(seq_file)
231
231
  next if filtered_sh.size < 3
232
- aligned_sh = filtered_sh.random_select(1000).align
232
+ aligned_sh = filtered_sh.random_select(1000).align(:Super5)
233
233
  aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
234
+ puts 'aligned sequence written.'
234
235
  end
235
236
 
236
237
  r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
@@ -28,6 +28,8 @@ module ViralSeq
28
28
  # align a sequence with reference sequence Strings
29
29
  # @param ref_seq [String] reference sequence
30
30
  # @param test_seq [String] test sequence
31
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
32
+ # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
31
33
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
32
34
  # @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
33
35
  # if the cannot find MUSCLE excutable
@@ -37,7 +39,7 @@ module ViralSeq
37
39
  # aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
38
40
  # => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
39
41
 
40
- def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
42
+ def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
41
43
  temp_dir = Dir.home
42
44
  temp_name = "_" + SecureRandom.alphanumeric
43
45
  temp_file = File.join(temp_dir, temp_name)
@@ -56,7 +58,11 @@ module ViralSeq
56
58
  end
57
59
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
58
60
  else
59
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
61
+ if MuscleBio::VERSION.to_f < 0.5
62
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
63
+ else
64
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
65
+ end
60
66
  end
61
67
  aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
62
68
  File.unlink(temp_file)
@@ -223,7 +223,7 @@ module ViralSeq
223
223
 
224
224
  # check the size range of the DNA sequences of the SeqHash object
225
225
  # @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
226
-
226
+
227
227
  def check_nt_size
228
228
  dna_hash = self.dna_hash
229
229
  size_array = []
@@ -711,10 +711,11 @@ module ViralSeq
711
711
 
712
712
 
713
713
  # align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
714
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
714
715
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
715
716
  # @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
716
717
 
717
- def align(path_to_muscle = false)
718
+ def align(algorithm = :PPP, path_to_muscle = false)
718
719
  seq_hash = self.dna_hash
719
720
  if self.file.size > 0
720
721
  temp_dir = File.dirname(self.file)
@@ -732,7 +733,11 @@ module ViralSeq
732
733
  end
733
734
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
734
735
  else
735
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
736
+ if MuscleBio::VERSION.to_f < 0.5
737
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
738
+ else
739
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
740
+ end
736
741
  end
737
742
  out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
738
743
  out_seq_hash.title = self.title + "_aligned"
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -240,22 +240,22 @@ module ViralSeq
240
240
  repeat = 0
241
241
 
242
242
  if g1 == g2 and (s1 + g1 + s2) == ref.size
243
- if s1 > s2 and g2 > 2*s2
243
+ if s1 > s2 and g2 >= s2
244
244
  ref = ref[0..(-g2-1)]
245
245
  repeat = 1
246
246
  l2 = l2 + g2
247
- elsif s1 < s2 and g1 > 2*s1
247
+ elsif s1 < s2 and g1 >= s1
248
248
  ref = ref[g1..-1]
249
249
  repeat = 1
250
250
  l1 = l1 + g1
251
251
  end
252
252
  else
253
- if g1 > 2*s1
253
+ if g1 >= s1
254
254
  ref = ref[g1..-1]
255
255
  repeat = 1
256
256
  l1 = l1 + g1
257
257
  end
258
- if g2 > 2*s2
258
+ if g2 >= s2
259
259
  ref = ref[0..(-g2 - 1)]
260
260
  repeat = 1
261
261
  l2 = l2 + g2
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -280,12 +280,12 @@ module ViralSeq
280
280
  l1 = l1 + gap_begin
281
281
  l2 = l2 + gap_end
282
282
  repeat = 0
283
- if g1 > 2*s1
283
+ if g1 >= s1
284
284
  ref = ref[g1..-1]
285
285
  repeat = 1
286
286
  l1 = l1 + g1
287
287
  end
288
- if g2 > 2*s2
288
+ if g2 >= s2
289
289
  ref = ref[0..(-g2 - 1)]
290
290
  repeat = 1
291
291
  l2 = l2 + g2
@@ -293,8 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
-
297
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
298
297
  aln_test = aln_seq[1]
299
298
  ref = aln_seq[0]
300
299
 
@@ -303,12 +302,12 @@ module ViralSeq
303
302
  if ref =~ /^(\-+)/
304
303
  l1 = l1 - $1.size
305
304
  elsif ref =~ /(\-+)$/
306
- l2 = l2 + $1.size
305
+ l2 = l2 - $1.size
307
306
  end
308
307
 
309
308
  if (ori_ref_l - l2 - 1) >= l1
310
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
311
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
312
311
  aln_test = aln_seq[1]
313
312
  ref = aln_seq[0]
314
313
 
@@ -16,7 +16,7 @@ module ViralSeq
16
16
  :ref_genome=>"HXB2",
17
17
  :ref_start=>2648,
18
18
  :ref_end=>3257,
19
- :indel=>false,
19
+ :indel=>true,
20
20
  :trim=>false},
21
21
  {:region=>"PR",
22
22
  :cdna=>
@@ -41,7 +41,7 @@ module ViralSeq
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
42
  :majority=>0,
43
43
  :end_join=>true,
44
- :end_join_option=>3,
44
+ :end_join_option=>2,
45
45
  :overlap=>171,
46
46
  :TCS_QC=>true,
47
47
  :ref_genome=>"HXB2",
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.6.1"
5
+ VERSION = "1.6.3"
6
6
  TCS_VERSION = "2.5.0"
7
7
  end
data/viral_seq.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.required_rubygems_version = '>= 1.3.6'
36
36
 
37
37
  # muscle_bio gem required
38
- spec.add_runtime_dependency "muscle_bio", "~> 0.4"
38
+ spec.add_runtime_dependency "muscle_bio", "~> 0.5"
39
39
 
40
40
  # colorize gem required
41
41
  spec.add_runtime_dependency "colorize", "~> 0.1"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-02 00:00:00.000000000 Z
12
+ date: 2022-02-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0.4'
62
+ version: '0.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0.4'
69
+ version: '0.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: colorize
72
72
  requirement: !ruby/object:Gem::Requirement