viral_seq 1.6.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53c276c8975ad7b714e69906210e78950a3a6efa593cc56565fcf1b80d41d5db
4
- data.tar.gz: 6171d3fb2594d2c214b6b94420dee99915e59941e4aeb74cd723bbc9a1d42ab8
3
+ metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
4
+ data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
5
5
  SHA512:
6
- metadata.gz: 802ba6aff173d1fdae22f110e75228e2ed8d66ae4147c0e7c12ac2cad0334ca588e3ab23de57bda04bf7dfb33c778b5132c97f2eb58991c82584419ae29b5051
7
- data.tar.gz: eb40f1364222f459392edc0dd00eba03b417d2236c4c0d2ea75b88b3153f6bf209aaf9ff0e5b28309de4abc33464cc9e9607d8d4b8bef16a9a1965a850a835ef
6
+ metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
7
+ data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.6.0)
4
+ viral_seq (1.6.5)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
- muscle_bio (~> 0.4)
7
+ muscle_bio (~> 0.5)
8
8
  prawn (~> 2.3, >= 2.3.0)
9
9
  prawn-table (~> 0.2, >= 0.2.0)
10
10
 
@@ -15,7 +15,7 @@ GEM
15
15
  combine_pdf (1.0.21)
16
16
  ruby-rc4 (>= 0.1.5)
17
17
  diff-lcs (1.3)
18
- muscle_bio (0.4.0)
18
+ muscle_bio (0.5.0)
19
19
  pdf-core (0.9.0)
20
20
  prawn (2.4.0)
21
21
  pdf-core (~> 0.9.0)
data/README.md CHANGED
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
10
10
 
11
11
  Specifically for Primer ID sequencing and HIV drug resistance analysis.
12
12
 
13
+ CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
14
+
13
15
  #### tcs web app - https://primer-id.org/
14
16
 
15
17
  ## Illustration for the Primer ID Sequencing
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
22
24
  [Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
23
25
  [Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
24
26
 
27
+ ## Requirements
28
+
29
+ Required Ruby Version: >= 2.5
30
+
31
+ Required RubyGems version: >= 1.3.6
32
+
25
33
  ## Install
26
34
 
27
35
  ```bash
@@ -179,10 +187,27 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
187
 
180
188
  ## Updates
181
189
 
190
+ ### Version-1.7.0-08242022
191
+
192
+ 1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
193
+ 2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
194
+
195
+ ### Version-1.6.4-07182022
196
+
197
+ 1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
198
+ 2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
199
+ 3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
200
+
201
+ ### Version-1.6.3-02052022
202
+
203
+ 1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
204
+ 2. Optimized the `locator` algorithm based on `muscle` v5.1.
205
+ 3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
206
+
182
207
  ### Version-1.6.1-02022022
183
208
 
184
209
  1. Fixed the `nav bar` in tcs_log html file.
185
- 2. Fixed a typo in `tcs`.
210
+ 2. Fixed a typo in `tcs`.
186
211
 
187
212
  ### Version 1.6.0-01042022
188
213
 
data/bin/tcs CHANGED
@@ -22,20 +22,38 @@
22
22
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
23
  # THE SOFTWARE.
24
24
 
25
+ # Install using `gem install viral_seq`
25
26
  # Use JSON file as the run param
26
27
  # run `tcs -j` to generate param json file.
27
28
 
28
- require 'viral_seq'
29
+ def gem_installed?(gem_name)
30
+ found_gem = false
31
+ begin
32
+ found_gem = Gem::Specification.find_by_name(gem_name)
33
+ rescue Gem::LoadError
34
+ return false
35
+ else
36
+ return true
37
+ end
38
+ end
39
+
40
+ if gem_installed?('viral_seq')
41
+ require 'viral_seq'
42
+ else
43
+ printf "\n****************************************************\n"
44
+ printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
45
+ printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
46
+ printf "****************************************************\n\n"
47
+ exit 1
48
+ end
49
+
50
+
29
51
  require 'json'
30
52
  require 'colorize'
31
53
  require 'optparse'
32
54
 
33
55
  options = {}
34
56
 
35
- # banner = '-'*50 + "\n" +
36
- # '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
37
- # '-'*50 + "\n"
38
-
39
57
  banner = "\n" +
40
58
  "████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
41
59
  " ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
data/bin/tcs_sdrm CHANGED
@@ -172,6 +172,25 @@ libs.each do |lib|
172
172
  linkage_list += sdrm[1]
173
173
  aa_report_list += sdrm[2]
174
174
 
175
+ elsif seq_basename =~/P17/i
176
+ a3g_check = seqs.a3g
177
+ a3g_seqs = a3g_check[:a3g_seq]
178
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
179
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
180
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
181
+ filtered_seqs = stop_codon_check[:without_stop_codon]
182
+ poisson_minority_cutoff = filtered_seqs.pm
183
+ fdr_hash = filtered_seqs.fdr
184
+ summary_hash[:P17] = [
185
+ seqs.size.to_s,
186
+ a3g_seqs.size.to_s,
187
+ stop_codon_seqs.size.to_s,
188
+ filtered_seqs.size.to_s,
189
+ poisson_minority_cutoff.to_s
190
+ ].join(',')
191
+ next if filtered_seqs.size < 3
192
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
193
+
175
194
  elsif seq_basename =~/RT/i
176
195
  rt_seq1 = {}
177
196
  rt_seq2 = {}
@@ -229,7 +248,7 @@ libs.each do |lib|
229
248
  filtered_seq_files.each do |seq_file|
230
249
  filtered_sh = ViralSeq::SeqHash.fa(seq_file)
231
250
  next if filtered_sh.size < 3
232
- aligned_sh = filtered_sh.random_select(1000).align
251
+ aligned_sh = filtered_sh.random_select(1000).align(:Super5)
233
252
  aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
234
253
  end
235
254
 
@@ -249,7 +268,7 @@ libs.each do |lib|
249
268
  tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
250
269
  summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
251
270
  end
252
- [:PR, :RT, :IN, :V1V3].each do |regions|
271
+ [:PR, :RT, :IN, :V1V3, :P17].each do |regions|
253
272
  next unless summary_hash[regions]
254
273
  seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
255
274
  end
@@ -270,10 +289,13 @@ libs.each do |lib|
270
289
  tcs_RT = 0
271
290
  tcs_IN = 0
272
291
  tcs_V1V3 = 0
292
+ tcs_P17 = 0
273
293
  pi_RT = 0.0
274
294
  pi_V1V3 = 0.0
295
+ pi_P17 = 0.0
275
296
  dist20_RT = 0.0
276
297
  dist20_V1V3 = 0.0
298
+ dist20_P17 = 0.0
277
299
  summary_lines.each do |line|
278
300
  data = line.chomp.split(",")
279
301
  if data[0] == "PR"
@@ -288,6 +310,10 @@ libs.each do |lib|
288
310
  tcs_V1V3 = data[1].to_i
289
311
  pi_V1V3 = data[6].to_f
290
312
  dist20_V1V3 = data[7].to_f
313
+ elsif data[0] == "P17"
314
+ tcs_P17 = data[4].to_i
315
+ pi_P17 = data[6].to_f
316
+ dist20_P17 = data[7].to_f
291
317
  end
292
318
  end
293
319
 
@@ -323,9 +349,13 @@ libs.each do |lib|
323
349
  tcs_RT: tcs_RT,
324
350
  tcs_IN: tcs_IN,
325
351
  tcs_V1V3: tcs_V1V3,
352
+ tcs_P17: tcs_P17,
326
353
  pi_RT: pi_RT,
354
+ pi_V1V3: pi_V1V3,
355
+ pi_P17: pi_P17,
327
356
  dist20_RT: dist20_RT,
328
357
  dist20_V1V3: dist20_V1V3,
358
+ dist20_P17: dist20_P17,
329
359
  recency: recency,
330
360
  sdrm_PR: sdrm_PR,
331
361
  sdrm_RT: sdrm_RT,
data/docs/dr.json CHANGED
@@ -62,6 +62,21 @@
62
62
  "ref_end": 7208,
63
63
  "indel": true,
64
64
  "trim": false
65
+ },
66
+ {
67
+ "region": "P17",
68
+ "cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
69
+ "forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
70
+ "majority": 0.5,
71
+ "end_join": true,
72
+ "end_join_option": 1,
73
+ "overlap": 0,
74
+ "TCS_QC": true,
75
+ "ref_genome": "HXB2",
76
+ "ref_start": 1196,
77
+ "ref_end": 1725,
78
+ "indel": true,
79
+ "trim": false
65
80
  }
66
81
  ]
67
82
  }
@@ -5,7 +5,7 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
- # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
8
+ # RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
9
9
  # IN codon 53-174 (HXB2 4384-4751)
10
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
11
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
@@ -28,6 +28,8 @@ module ViralSeq
28
28
  # align a sequence with reference sequence Strings
29
29
  # @param ref_seq [String] reference sequence
30
30
  # @param test_seq [String] test sequence
31
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
32
+ # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
31
33
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
32
34
  # @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
33
35
  # if the cannot find MUSCLE excutable
@@ -37,7 +39,7 @@ module ViralSeq
37
39
  # aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
38
40
  # => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
39
41
 
40
- def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
42
+ def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
41
43
  temp_dir = Dir.home
42
44
  temp_name = "_" + SecureRandom.alphanumeric
43
45
  temp_file = File.join(temp_dir, temp_name)
@@ -56,7 +58,11 @@ module ViralSeq
56
58
  end
57
59
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
58
60
  else
59
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
61
+ if MuscleBio::VERSION.to_f < 0.5
62
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
63
+ else
64
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
65
+ end
60
66
  end
61
67
  aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
62
68
  File.unlink(temp_file)
@@ -223,7 +223,7 @@ module ViralSeq
223
223
 
224
224
  # check the size range of the DNA sequences of the SeqHash object
225
225
  # @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
226
-
226
+
227
227
  def check_nt_size
228
228
  dna_hash = self.dna_hash
229
229
  size_array = []
@@ -450,7 +450,7 @@ module ViralSeq
450
450
  # function to determine if the sequences have APOBEC3g/f hypermutation.
451
451
  # # APOBEC3G/F pattern: GRD -> ARD
452
452
  # # control pattern: G[YN|RC] -> A[YN|RC]
453
- # # use the sample consensus to determine potential a3g sites
453
+ # # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
454
454
  # # Two criteria to identify hypermutation
455
455
  # # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
456
456
  # # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
@@ -486,7 +486,7 @@ module ViralSeq
486
486
  # # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
487
487
  # @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
488
488
 
489
- def a3g_hypermut
489
+ def a3g_hypermut(ref = nil)
490
490
  # mut_hash number of apobec3g/f mutations per sequence
491
491
  mut_hash = {}
492
492
  hm_hash = {}
@@ -495,8 +495,10 @@ module ViralSeq
495
495
  # total G->A mutations at apobec3g/f positions.
496
496
  total = 0
497
497
 
498
- # make consensus sequence for the input sequence hash
499
- ref = self.consensus
498
+ unless ref
499
+ # make consensus sequence for the input sequence hash
500
+ ref = self.consensus
501
+ end
500
502
 
501
503
  # obtain apobec3g positions and control positions
502
504
  apobec = apobec3gf(ref)
@@ -509,7 +511,6 @@ module ViralSeq
509
511
  c = 0 # control muts
510
512
  d = 0 # potenrial controls
511
513
  mut.each do |n|
512
- next if v[n] == "-"
513
514
  if v[n] == "A"
514
515
  a += 1
515
516
  b += 1
@@ -521,7 +522,6 @@ module ViralSeq
521
522
  total += a
522
523
 
523
524
  control.each do |n|
524
- next if v[n] == "-"
525
525
  if v[n] == "A"
526
526
  c += 1
527
527
  d += 1
@@ -544,7 +544,7 @@ module ViralSeq
544
544
  end
545
545
  end
546
546
 
547
- if self.dna_hash.size > 20
547
+ if self.dna_hash.size > 200
548
548
  rate = total.to_f/(self.dna_hash.size)
549
549
  count_mut = mut_hash.values.count_freq
550
550
  maxi_count = count_mut.values.max
@@ -566,10 +566,12 @@ module ViralSeq
566
566
  end
567
567
  end
568
568
  end
569
+
569
570
  hm_seq_hash = ViralSeq::SeqHash.new
570
571
  hm_hash.each do |k,_v|
571
572
  hm_seq_hash.dna_hash[k] = self.dna_hash[k]
572
573
  end
574
+
573
575
  hm_seq_hash.title = self.title + "_hypermut"
574
576
  hm_seq_hash.file = self.file
575
577
  filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
@@ -711,10 +713,11 @@ module ViralSeq
711
713
 
712
714
 
713
715
  # align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
716
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
714
717
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
715
718
  # @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
716
719
 
717
- def align(path_to_muscle = false)
720
+ def align(algorithm = :PPP, path_to_muscle = false)
718
721
  seq_hash = self.dna_hash
719
722
  if self.file.size > 0
720
723
  temp_dir = File.dirname(self.file)
@@ -732,7 +735,11 @@ module ViralSeq
732
735
  end
733
736
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
734
737
  else
735
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
738
+ if MuscleBio::VERSION.to_f < 0.5
739
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
740
+ else
741
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
742
+ end
736
743
  end
737
744
  out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
738
745
  out_seq_hash.title = self.title + "_aligned"
@@ -1351,7 +1358,7 @@ module ViralSeq
1351
1358
  # APOBEC3G/F pattern: GRD -> ARD,
1352
1359
  # control pattern: G[YN|RC] -> A[YN|RC],
1353
1360
  def apobec3gf(seq = '')
1354
- seq.tr!("-", "")
1361
+ #seq.tr!("-", "")
1355
1362
  seq_length = seq.size
1356
1363
  apobec_position = []
1357
1364
  control_position = []
@@ -1363,6 +1370,7 @@ module ViralSeq
1363
1370
  control_position << n
1364
1371
  end
1365
1372
  end
1373
+
1366
1374
  return [apobec_position,control_position]
1367
1375
  end # end of #apobec3gf
1368
1376
 
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -240,22 +240,22 @@ module ViralSeq
240
240
  repeat = 0
241
241
 
242
242
  if g1 == g2 and (s1 + g1 + s2) == ref.size
243
- if s1 > s2 and g2 > 2*s2
243
+ if s1 > s2 and g2 >= s2
244
244
  ref = ref[0..(-g2-1)]
245
245
  repeat = 1
246
246
  l2 = l2 + g2
247
- elsif s1 < s2 and g1 > 2*s1
247
+ elsif s1 < s2 and g1 >= s1
248
248
  ref = ref[g1..-1]
249
249
  repeat = 1
250
250
  l1 = l1 + g1
251
251
  end
252
252
  else
253
- if g1 > 2*s1
253
+ if g1 >= s1
254
254
  ref = ref[g1..-1]
255
255
  repeat = 1
256
256
  l1 = l1 + g1
257
257
  end
258
- if g2 > 2*s2
258
+ if g2 >= s2
259
259
  ref = ref[0..(-g2 - 1)]
260
260
  repeat = 1
261
261
  l2 = l2 + g2
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -280,12 +280,12 @@ module ViralSeq
280
280
  l1 = l1 + gap_begin
281
281
  l2 = l2 + gap_end
282
282
  repeat = 0
283
- if g1 > 2*s1
283
+ if g1 >= s1
284
284
  ref = ref[g1..-1]
285
285
  repeat = 1
286
286
  l1 = l1 + g1
287
287
  end
288
- if g2 > 2*s2
288
+ if g2 >= s2
289
289
  ref = ref[0..(-g2 - 1)]
290
290
  repeat = 1
291
291
  l2 = l2 + g2
@@ -293,8 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
-
297
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
298
297
  aln_test = aln_seq[1]
299
298
  ref = aln_seq[0]
300
299
 
@@ -303,12 +302,12 @@ module ViralSeq
303
302
  if ref =~ /^(\-+)/
304
303
  l1 = l1 - $1.size
305
304
  elsif ref =~ /(\-+)$/
306
- l2 = l2 + $1.size
305
+ l2 = l2 - $1.size
307
306
  end
308
307
 
309
308
  if (ori_ref_l - l2 - 1) >= l1
310
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
311
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
312
311
  aln_test = aln_seq[1]
313
312
  ref = aln_seq[0]
314
313
 
@@ -16,7 +16,7 @@ module ViralSeq
16
16
  :ref_genome=>"HXB2",
17
17
  :ref_start=>2648,
18
18
  :ref_end=>3257,
19
- :indel=>false,
19
+ :indel=>true,
20
20
  :trim=>false},
21
21
  {:region=>"PR",
22
22
  :cdna=>
@@ -41,7 +41,7 @@ module ViralSeq
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
42
  :majority=>0,
43
43
  :end_join=>true,
44
- :end_join_option=>3,
44
+ :end_join_option=>2,
45
45
  :overlap=>171,
46
46
  :TCS_QC=>true,
47
47
  :ref_genome=>"HXB2",
@@ -61,11 +61,26 @@ module ViralSeq
61
61
  :TCS_QC=>true,
62
62
  :ref_genome=>"HXB2",
63
63
  :ref_start=>6585,
64
- :ref_end=>7208,
64
+ :ref_end=>7205..7210,
65
65
  :indel=>true,
66
- :trim=>false}
66
+ :trim=>false},
67
+ {:region=>"P17",
68
+ :cdna=>
69
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
70
+ :forward=>
71
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
72
+ :majority=>0,
73
+ :end_join=>true,
74
+ :end_join_option=>1,
75
+ :overlap=>0,
76
+ :TCS_QC=>true,
77
+ :ref_genome=>"HXB2",
78
+ :ref_start=>1196,
79
+ :ref_end=>1725,
80
+ :indel=>true,
81
+ :trim=>false}
67
82
  ]
68
- }
83
+ }
69
84
  end
70
85
 
71
86
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.6.1"
6
- TCS_VERSION = "2.5.0"
5
+ VERSION = "1.7.0"
6
+ TCS_VERSION = "2.5.1"
7
7
  end
data/viral_seq.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.required_rubygems_version = '>= 1.3.6'
36
36
 
37
37
  # muscle_bio gem required
38
- spec.add_runtime_dependency "muscle_bio", "~> 0.4"
38
+ spec.add_runtime_dependency "muscle_bio", "~> 0.5"
39
39
 
40
40
  # colorize gem required
41
41
  spec.add_runtime_dependency "colorize", "~> 0.1"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-02 00:00:00.000000000 Z
12
+ date: 2022-08-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0.4'
62
+ version: '0.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0.4'
69
+ version: '0.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: colorize
72
72
  requirement: !ruby/object:Gem::Requirement