viral_seq 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53c276c8975ad7b714e69906210e78950a3a6efa593cc56565fcf1b80d41d5db
4
- data.tar.gz: 6171d3fb2594d2c214b6b94420dee99915e59941e4aeb74cd723bbc9a1d42ab8
3
+ metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
4
+ data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
5
5
  SHA512:
6
- metadata.gz: 802ba6aff173d1fdae22f110e75228e2ed8d66ae4147c0e7c12ac2cad0334ca588e3ab23de57bda04bf7dfb33c778b5132c97f2eb58991c82584419ae29b5051
7
- data.tar.gz: eb40f1364222f459392edc0dd00eba03b417d2236c4c0d2ea75b88b3153f6bf209aaf9ff0e5b28309de4abc33464cc9e9607d8d4b8bef16a9a1965a850a835ef
6
+ metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
7
+ data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.6.0)
4
+ viral_seq (1.6.5)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
- muscle_bio (~> 0.4)
7
+ muscle_bio (~> 0.5)
8
8
  prawn (~> 2.3, >= 2.3.0)
9
9
  prawn-table (~> 0.2, >= 0.2.0)
10
10
 
@@ -15,7 +15,7 @@ GEM
15
15
  combine_pdf (1.0.21)
16
16
  ruby-rc4 (>= 0.1.5)
17
17
  diff-lcs (1.3)
18
- muscle_bio (0.4.0)
18
+ muscle_bio (0.5.0)
19
19
  pdf-core (0.9.0)
20
20
  prawn (2.4.0)
21
21
  pdf-core (~> 0.9.0)
data/README.md CHANGED
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
10
10
 
11
11
  Specifically for Primer ID sequencing and HIV drug resistance analysis.
12
12
 
13
+ CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
14
+
13
15
  #### tcs web app - https://primer-id.org/
14
16
 
15
17
  ## Illustration for the Primer ID Sequencing
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
22
24
  [Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
23
25
  [Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
24
26
 
27
+ ## Requirements
28
+
29
+ Required Ruby Version: >= 2.5
30
+
31
+ Required RubyGems version: >= 1.3.6
32
+
25
33
  ## Install
26
34
 
27
35
  ```bash
@@ -179,10 +187,27 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
187
 
180
188
  ## Updates
181
189
 
190
+ ### Version-1.7.0-08242022
191
+
192
+ 1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
193
+ 2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
194
+
195
+ ### Version-1.6.4-07182022
196
+
197
+ 1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
198
+ 2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
199
+ 3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
200
+
201
+ ### Version-1.6.3-02052022
202
+
203
+ 1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
204
+ 2. Optimized the `locator` algorithm based on `muscle` v5.1.
205
+ 3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
206
+
182
207
  ### Version-1.6.1-02022022
183
208
 
184
209
  1. Fixed the `nav bar` in tcs_log html file.
185
- 2. Fixed a typo in `tcs`.
210
+ 2. Fixed a typo in `tcs`.
186
211
 
187
212
  ### Version 1.6.0-01042022
188
213
 
data/bin/tcs CHANGED
@@ -22,20 +22,38 @@
22
22
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
23
  # THE SOFTWARE.
24
24
 
25
+ # Install using `gem install viral_seq`
25
26
  # Use JSON file as the run param
26
27
  # run `tcs -j` to generate param json file.
27
28
 
28
- require 'viral_seq'
29
+ def gem_installed?(gem_name)
30
+ found_gem = false
31
+ begin
32
+ found_gem = Gem::Specification.find_by_name(gem_name)
33
+ rescue Gem::LoadError
34
+ return false
35
+ else
36
+ return true
37
+ end
38
+ end
39
+
40
+ if gem_installed?('viral_seq')
41
+ require 'viral_seq'
42
+ else
43
+ printf "\n****************************************************\n"
44
+ printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
45
+ printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
46
+ printf "****************************************************\n\n"
47
+ exit 1
48
+ end
49
+
50
+
29
51
  require 'json'
30
52
  require 'colorize'
31
53
  require 'optparse'
32
54
 
33
55
  options = {}
34
56
 
35
- # banner = '-'*50 + "\n" +
36
- # '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
37
- # '-'*50 + "\n"
38
-
39
57
  banner = "\n" +
40
58
  "████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
41
59
  " ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
data/bin/tcs_sdrm CHANGED
@@ -172,6 +172,25 @@ libs.each do |lib|
172
172
  linkage_list += sdrm[1]
173
173
  aa_report_list += sdrm[2]
174
174
 
175
+ elsif seq_basename =~/P17/i
176
+ a3g_check = seqs.a3g
177
+ a3g_seqs = a3g_check[:a3g_seq]
178
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
179
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
180
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
181
+ filtered_seqs = stop_codon_check[:without_stop_codon]
182
+ poisson_minority_cutoff = filtered_seqs.pm
183
+ fdr_hash = filtered_seqs.fdr
184
+ summary_hash[:P17] = [
185
+ seqs.size.to_s,
186
+ a3g_seqs.size.to_s,
187
+ stop_codon_seqs.size.to_s,
188
+ filtered_seqs.size.to_s,
189
+ poisson_minority_cutoff.to_s
190
+ ].join(',')
191
+ next if filtered_seqs.size < 3
192
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
193
+
175
194
  elsif seq_basename =~/RT/i
176
195
  rt_seq1 = {}
177
196
  rt_seq2 = {}
@@ -229,7 +248,7 @@ libs.each do |lib|
229
248
  filtered_seq_files.each do |seq_file|
230
249
  filtered_sh = ViralSeq::SeqHash.fa(seq_file)
231
250
  next if filtered_sh.size < 3
232
- aligned_sh = filtered_sh.random_select(1000).align
251
+ aligned_sh = filtered_sh.random_select(1000).align(:Super5)
233
252
  aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
234
253
  end
235
254
 
@@ -249,7 +268,7 @@ libs.each do |lib|
249
268
  tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
250
269
  summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
251
270
  end
252
- [:PR, :RT, :IN, :V1V3].each do |regions|
271
+ [:PR, :RT, :IN, :V1V3, :P17].each do |regions|
253
272
  next unless summary_hash[regions]
254
273
  seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
255
274
  end
@@ -270,10 +289,13 @@ libs.each do |lib|
270
289
  tcs_RT = 0
271
290
  tcs_IN = 0
272
291
  tcs_V1V3 = 0
292
+ tcs_P17 = 0
273
293
  pi_RT = 0.0
274
294
  pi_V1V3 = 0.0
295
+ pi_P17 = 0.0
275
296
  dist20_RT = 0.0
276
297
  dist20_V1V3 = 0.0
298
+ dist20_P17 = 0.0
277
299
  summary_lines.each do |line|
278
300
  data = line.chomp.split(",")
279
301
  if data[0] == "PR"
@@ -288,6 +310,10 @@ libs.each do |lib|
288
310
  tcs_V1V3 = data[1].to_i
289
311
  pi_V1V3 = data[6].to_f
290
312
  dist20_V1V3 = data[7].to_f
313
+ elsif data[0] == "P17"
314
+ tcs_P17 = data[4].to_i
315
+ pi_P17 = data[6].to_f
316
+ dist20_P17 = data[7].to_f
291
317
  end
292
318
  end
293
319
 
@@ -323,9 +349,13 @@ libs.each do |lib|
323
349
  tcs_RT: tcs_RT,
324
350
  tcs_IN: tcs_IN,
325
351
  tcs_V1V3: tcs_V1V3,
352
+ tcs_P17: tcs_P17,
326
353
  pi_RT: pi_RT,
354
+ pi_V1V3: pi_V1V3,
355
+ pi_P17: pi_P17,
327
356
  dist20_RT: dist20_RT,
328
357
  dist20_V1V3: dist20_V1V3,
358
+ dist20_P17: dist20_P17,
329
359
  recency: recency,
330
360
  sdrm_PR: sdrm_PR,
331
361
  sdrm_RT: sdrm_RT,
data/docs/dr.json CHANGED
@@ -62,6 +62,21 @@
62
62
  "ref_end": 7208,
63
63
  "indel": true,
64
64
  "trim": false
65
+ },
66
+ {
67
+ "region": "P17",
68
+ "cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
69
+ "forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
70
+ "majority": 0.5,
71
+ "end_join": true,
72
+ "end_join_option": 1,
73
+ "overlap": 0,
74
+ "TCS_QC": true,
75
+ "ref_genome": "HXB2",
76
+ "ref_start": 1196,
77
+ "ref_end": 1725,
78
+ "indel": true,
79
+ "trim": false
65
80
  }
66
81
  ]
67
82
  }
@@ -5,7 +5,7 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
- # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
8
+ # RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
9
9
  # IN codon 53-174 (HXB2 4384-4751)
10
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
11
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
@@ -28,6 +28,8 @@ module ViralSeq
28
28
  # align a sequence with reference sequence Strings
29
29
  # @param ref_seq [String] reference sequence
30
30
  # @param test_seq [String] test sequence
31
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
32
+ # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
31
33
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
32
34
  # @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
33
35
  # if the cannot find MUSCLE excutable
@@ -37,7 +39,7 @@ module ViralSeq
37
39
  # aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
38
40
  # => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
39
41
 
40
- def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
42
+ def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
41
43
  temp_dir = Dir.home
42
44
  temp_name = "_" + SecureRandom.alphanumeric
43
45
  temp_file = File.join(temp_dir, temp_name)
@@ -56,7 +58,11 @@ module ViralSeq
56
58
  end
57
59
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
58
60
  else
59
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
61
+ if MuscleBio::VERSION.to_f < 0.5
62
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
63
+ else
64
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
65
+ end
60
66
  end
61
67
  aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
62
68
  File.unlink(temp_file)
@@ -223,7 +223,7 @@ module ViralSeq
223
223
 
224
224
  # check the size range of the DNA sequences of the SeqHash object
225
225
  # @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
226
-
226
+
227
227
  def check_nt_size
228
228
  dna_hash = self.dna_hash
229
229
  size_array = []
@@ -450,7 +450,7 @@ module ViralSeq
450
450
  # function to determine if the sequences have APOBEC3g/f hypermutation.
451
451
  # # APOBEC3G/F pattern: GRD -> ARD
452
452
  # # control pattern: G[YN|RC] -> A[YN|RC]
453
- # # use the sample consensus to determine potential a3g sites
453
+ # # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
454
454
  # # Two criteria to identify hypermutation
455
455
  # # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
456
456
  # # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
@@ -486,7 +486,7 @@ module ViralSeq
486
486
  # # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
487
487
  # @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
488
488
 
489
- def a3g_hypermut
489
+ def a3g_hypermut(ref = nil)
490
490
  # mut_hash number of apobec3g/f mutations per sequence
491
491
  mut_hash = {}
492
492
  hm_hash = {}
@@ -495,8 +495,10 @@ module ViralSeq
495
495
  # total G->A mutations at apobec3g/f positions.
496
496
  total = 0
497
497
 
498
- # make consensus sequence for the input sequence hash
499
- ref = self.consensus
498
+ unless ref
499
+ # make consensus sequence for the input sequence hash
500
+ ref = self.consensus
501
+ end
500
502
 
501
503
  # obtain apobec3g positions and control positions
502
504
  apobec = apobec3gf(ref)
@@ -509,7 +511,6 @@ module ViralSeq
509
511
  c = 0 # control muts
510
512
  d = 0 # potenrial controls
511
513
  mut.each do |n|
512
- next if v[n] == "-"
513
514
  if v[n] == "A"
514
515
  a += 1
515
516
  b += 1
@@ -521,7 +522,6 @@ module ViralSeq
521
522
  total += a
522
523
 
523
524
  control.each do |n|
524
- next if v[n] == "-"
525
525
  if v[n] == "A"
526
526
  c += 1
527
527
  d += 1
@@ -544,7 +544,7 @@ module ViralSeq
544
544
  end
545
545
  end
546
546
 
547
- if self.dna_hash.size > 20
547
+ if self.dna_hash.size > 200
548
548
  rate = total.to_f/(self.dna_hash.size)
549
549
  count_mut = mut_hash.values.count_freq
550
550
  maxi_count = count_mut.values.max
@@ -566,10 +566,12 @@ module ViralSeq
566
566
  end
567
567
  end
568
568
  end
569
+
569
570
  hm_seq_hash = ViralSeq::SeqHash.new
570
571
  hm_hash.each do |k,_v|
571
572
  hm_seq_hash.dna_hash[k] = self.dna_hash[k]
572
573
  end
574
+
573
575
  hm_seq_hash.title = self.title + "_hypermut"
574
576
  hm_seq_hash.file = self.file
575
577
  filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
@@ -711,10 +713,11 @@ module ViralSeq
711
713
 
712
714
 
713
715
  # align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
716
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
714
717
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
715
718
  # @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
716
719
 
717
- def align(path_to_muscle = false)
720
+ def align(algorithm = :PPP, path_to_muscle = false)
718
721
  seq_hash = self.dna_hash
719
722
  if self.file.size > 0
720
723
  temp_dir = File.dirname(self.file)
@@ -732,7 +735,11 @@ module ViralSeq
732
735
  end
733
736
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
734
737
  else
735
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
738
+ if MuscleBio::VERSION.to_f < 0.5
739
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
740
+ else
741
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
742
+ end
736
743
  end
737
744
  out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
738
745
  out_seq_hash.title = self.title + "_aligned"
@@ -1351,7 +1358,7 @@ module ViralSeq
1351
1358
  # APOBEC3G/F pattern: GRD -> ARD,
1352
1359
  # control pattern: G[YN|RC] -> A[YN|RC],
1353
1360
  def apobec3gf(seq = '')
1354
- seq.tr!("-", "")
1361
+ #seq.tr!("-", "")
1355
1362
  seq_length = seq.size
1356
1363
  apobec_position = []
1357
1364
  control_position = []
@@ -1363,6 +1370,7 @@ module ViralSeq
1363
1370
  control_position << n
1364
1371
  end
1365
1372
  end
1373
+
1366
1374
  return [apobec_position,control_position]
1367
1375
  end # end of #apobec3gf
1368
1376
 
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -240,22 +240,22 @@ module ViralSeq
240
240
  repeat = 0
241
241
 
242
242
  if g1 == g2 and (s1 + g1 + s2) == ref.size
243
- if s1 > s2 and g2 > 2*s2
243
+ if s1 > s2 and g2 >= s2
244
244
  ref = ref[0..(-g2-1)]
245
245
  repeat = 1
246
246
  l2 = l2 + g2
247
- elsif s1 < s2 and g1 > 2*s1
247
+ elsif s1 < s2 and g1 >= s1
248
248
  ref = ref[g1..-1]
249
249
  repeat = 1
250
250
  l1 = l1 + g1
251
251
  end
252
252
  else
253
- if g1 > 2*s1
253
+ if g1 >= s1
254
254
  ref = ref[g1..-1]
255
255
  repeat = 1
256
256
  l1 = l1 + g1
257
257
  end
258
- if g2 > 2*s2
258
+ if g2 >= s2
259
259
  ref = ref[0..(-g2 - 1)]
260
260
  repeat = 1
261
261
  l2 = l2 + g2
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -280,12 +280,12 @@ module ViralSeq
280
280
  l1 = l1 + gap_begin
281
281
  l2 = l2 + gap_end
282
282
  repeat = 0
283
- if g1 > 2*s1
283
+ if g1 >= s1
284
284
  ref = ref[g1..-1]
285
285
  repeat = 1
286
286
  l1 = l1 + g1
287
287
  end
288
- if g2 > 2*s2
288
+ if g2 >= s2
289
289
  ref = ref[0..(-g2 - 1)]
290
290
  repeat = 1
291
291
  l2 = l2 + g2
@@ -293,8 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
-
297
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
298
297
  aln_test = aln_seq[1]
299
298
  ref = aln_seq[0]
300
299
 
@@ -303,12 +302,12 @@ module ViralSeq
303
302
  if ref =~ /^(\-+)/
304
303
  l1 = l1 - $1.size
305
304
  elsif ref =~ /(\-+)$/
306
- l2 = l2 + $1.size
305
+ l2 = l2 - $1.size
307
306
  end
308
307
 
309
308
  if (ori_ref_l - l2 - 1) >= l1
310
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
311
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
312
311
  aln_test = aln_seq[1]
313
312
  ref = aln_seq[0]
314
313
 
@@ -16,7 +16,7 @@ module ViralSeq
16
16
  :ref_genome=>"HXB2",
17
17
  :ref_start=>2648,
18
18
  :ref_end=>3257,
19
- :indel=>false,
19
+ :indel=>true,
20
20
  :trim=>false},
21
21
  {:region=>"PR",
22
22
  :cdna=>
@@ -41,7 +41,7 @@ module ViralSeq
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
42
  :majority=>0,
43
43
  :end_join=>true,
44
- :end_join_option=>3,
44
+ :end_join_option=>2,
45
45
  :overlap=>171,
46
46
  :TCS_QC=>true,
47
47
  :ref_genome=>"HXB2",
@@ -61,11 +61,26 @@ module ViralSeq
61
61
  :TCS_QC=>true,
62
62
  :ref_genome=>"HXB2",
63
63
  :ref_start=>6585,
64
- :ref_end=>7208,
64
+ :ref_end=>7205..7210,
65
65
  :indel=>true,
66
- :trim=>false}
66
+ :trim=>false},
67
+ {:region=>"P17",
68
+ :cdna=>
69
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
70
+ :forward=>
71
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
72
+ :majority=>0,
73
+ :end_join=>true,
74
+ :end_join_option=>1,
75
+ :overlap=>0,
76
+ :TCS_QC=>true,
77
+ :ref_genome=>"HXB2",
78
+ :ref_start=>1196,
79
+ :ref_end=>1725,
80
+ :indel=>true,
81
+ :trim=>false}
67
82
  ]
68
- }
83
+ }
69
84
  end
70
85
 
71
86
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.6.1"
6
- TCS_VERSION = "2.5.0"
5
+ VERSION = "1.7.0"
6
+ TCS_VERSION = "2.5.1"
7
7
  end
data/viral_seq.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.required_rubygems_version = '>= 1.3.6'
36
36
 
37
37
  # muscle_bio gem required
38
- spec.add_runtime_dependency "muscle_bio", "~> 0.4"
38
+ spec.add_runtime_dependency "muscle_bio", "~> 0.5"
39
39
 
40
40
  # colorize gem required
41
41
  spec.add_runtime_dependency "colorize", "~> 0.1"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-02 00:00:00.000000000 Z
12
+ date: 2022-08-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0.4'
62
+ version: '0.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0.4'
69
+ version: '0.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: colorize
72
72
  requirement: !ruby/object:Gem::Requirement