viral_seq 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/viral_seq/pid.rb CHANGED
@@ -14,10 +14,7 @@ module ViralSeq
14
14
  nt = ['A','T','C','G']
15
15
  pid_pool = ['A','T','C','G']
16
16
  (l-1).times do
17
- pid_pool = pid_pool.product(nt)
18
- pid_pool.collect! do |v|
19
- v.join("")
20
- end
17
+ pid_pool = pid_pool.product(nt).map(&:join)
21
18
  end
22
19
  return pid_pool
23
20
  end # end of .generate_primer_id_pool
@@ -656,7 +656,7 @@ module ViralSeq
656
656
 
657
657
  def nt_variants
658
658
  return_obj = {}
659
- nt_hash = self.dna_hash
659
+
660
660
  tcs_number = self.size
661
661
  dl = ViralSeq::TcsCore.detection_limit(tcs_number)
662
662
  fdr_hash = self.fdr
@@ -869,7 +869,7 @@ module ViralSeq
869
869
  # @param start_nt [Integer,Range,Array] start nt position(s) on the refernce genome, can be single number (Integer) or a range of Integers (Range), or an Array
870
870
  # @param end_nt [Integer,Range,Array] end nt position(s) on the refernce genome,can be single number (Integer) or a range of Integers (Range), or an Array
871
871
  # @param indel [Boolean] allow indels or not, `ture` or `false`
872
- # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
872
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:SIVmm239`
873
873
  # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
874
874
  # @return [ViralSeq::SeqHash] a new ViralSeq::SeqHash object with only the sequences that meet the QC criterias
875
875
  # @example QC for sequences in a FASTA files
@@ -880,17 +880,19 @@ module ViralSeq
880
880
  # filtered_seqhash.dna_hash.size
881
881
  # => 4
882
882
 
883
- def hiv_seq_qc(start_nt, end_nt, indel=true, ref_option = :HXB2, path_to_muscle = false)
884
- start_nt = start_nt..start_nt if start_nt.is_a?(Integer)
885
- end_nt = end_nt..end_nt if end_nt.is_a?(Integer)
883
+ def hiv_seq_qc(start_nt, end_nt, indel=true, ref_option = :HXB2)
884
+ start_nt = position_helper(start_nt)
885
+ end_nt = position_helper(end_nt)
886
+
886
887
  seq_hash = self.dna_hash.dup
887
888
  seq_hash_unique = seq_hash.values.uniq
888
889
  seq_hash_unique_pass = []
889
890
 
890
- seq_hash_unique.each do |seq|
891
- next if seq.nil?
892
- loc = ViralSeq::Sequence.new('', seq).locator(ref_option, path_to_muscle)
893
- next unless loc # if locator tool fails, skip this seq.
891
+ batch_locator = VirustLocator::Locator.exec(seq_hash_unique.join("\s"), "nt", 1, ref_option).split("\n")
892
+ seq_hash_unique.each_with_index do |seq, i|
893
+ loc = batch_locator[i]
894
+ loc = locator_helper(loc)
895
+ next unless loc
894
896
  if start_nt.include?(loc[0]) && end_nt.include?(loc[1])
895
897
  if indel
896
898
  seq_hash_unique_pass << seq
@@ -898,8 +900,11 @@ module ViralSeq
898
900
  seq_hash_unique_pass << seq
899
901
  end
900
902
  end
903
+
901
904
  end
905
+
902
906
  seq_pass = []
907
+
903
908
  seq_hash_unique_pass.each do |seq|
904
909
  seq_hash.each do |seq_name, orginal_seq|
905
910
  if orginal_seq == seq
@@ -909,10 +914,10 @@ module ViralSeq
909
914
  end
910
915
  end
911
916
  self.sub(seq_pass)
912
- end # end of #hiv_seq_qc
917
+ end # end of #hiv_seq_qc # end of #hiv_seq_qc
913
918
 
914
919
  # sequence locator for SeqHash object, resembling HIV Sequence Locator from LANL
915
- # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
920
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:SIVmm239`
916
921
  # @return [Array] two dimensional array `[[],[],[],...]` for each sequence, including the following information:
917
922
  #
918
923
  # title of the SeqHash object (String)
@@ -1341,7 +1346,7 @@ module ViralSeq
1341
1346
  seq_hash_unique = seq_hash.uniq_hash
1342
1347
  trimmed_seq_hash = {}
1343
1348
  seq_hash_unique.each do |seq, names|
1344
- trimmed_seq = ViralSeq::Sequence.new('', seq).sequence_clip(start_nt, end_nt, ref_option, path_to_muscle).dna
1349
+ trimmed_seq = ViralSeq::Sequence.new('', seq).sequence_clip(start_nt, end_nt, ref_option).dna
1345
1350
  names.each do |name|
1346
1351
  trimmed_seq_hash[name] = trimmed_seq
1347
1352
  end
@@ -1431,6 +1436,37 @@ module ViralSeq
1431
1436
  var_count.sort_by{|key,_value|key}.to_h
1432
1437
  end # end of #varaint_for_poisson
1433
1438
 
1439
+ # helper for start/end position for #hiv_seq_qc
1440
+ def position_helper(position)
1441
+ if position.is_a?(Range)
1442
+ return position
1443
+ elsif position.is_a?(Integer)
1444
+ return position..position
1445
+ elsif position.is_a?(String)
1446
+ return position.to_i..position.to_i
1447
+ elsif position.is_a?(Array)
1448
+ return position[0].to_i..position[1].to_i
1449
+ else
1450
+ raise "Position #{position} not recognized"
1451
+ end
1452
+ end # position_helper
1453
+
1454
+ # helper for batch locator
1455
+ # @param loc [String] the output of batch locator
1456
+ # @return [Array] the locator information in an array
1457
+ def locator_helper(loc)
1458
+ loc = loc.split("\t")
1459
+ loc[0] = loc[0].to_i
1460
+ loc[1] = loc[1].to_i
1461
+ loc[2] = loc[2].to_f.round(1)
1462
+ if loc[3].to_s.downcase == "true"
1463
+ loc[3] = true
1464
+ else
1465
+ loc[3] = false
1466
+ end
1467
+ return loc
1468
+ end
1469
+
1434
1470
  end # end of SeqHash
1435
1471
 
1436
1472
  end # end of ViralSeq
@@ -165,7 +165,7 @@ module ViralSeq
165
165
 
166
166
  # HIV sequence locator function, resembling HIV Sequence Locator from LANL
167
167
  # # current version only supports nucleotide sequence, not for amino acid sequence.
168
- # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
168
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:SIVmm239`
169
169
  # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
170
170
  # @return [Array] an array of the following info:
171
171
  #
@@ -181,182 +181,32 @@ module ViralSeq
181
181
  #
182
182
  # aligned_reference_sequence (String)
183
183
  #
184
- # @example identify the location of the input sequence on the NL43 genome
184
+ # @example identify the location of the input sequence on the HXB2 genome
185
185
  # sequence = 'AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC'
186
186
  # s = ViralSeq::Sequence.new('my_sequence', sequence)
187
- # loc = s.locator(:NL43)
188
- # h = ViralSeq::SeqHash.new; h.dna_hash['NL43'] = loc[5]; h.dna_hash[s.name] = loc[4]
187
+ # loc = s.locator(:HXB2)
188
+ # h = ViralSeq::SeqHash.new; h.dna_hash['HXB2'] = loc[5]; h.dna_hash[s.name] = loc[4]
189
189
  # rs_string = h.to_rsphylip.split("\n")[1..-1].join("\n") # get a relaxed phylip format string for display of alignment.
190
- # puts "The input sequence \"#{s.name}\" is located on the NL43 nt sequence from #{loc[0].to_s} to #{loc[1].to_s}.\nIt is #{loc[2].to_s}% similar to the reference.\nIt #{loc[3]? "does" : "does not"} have indels.\nThe alignment is\n#{rs_string}"
191
- # => The input sequence "my_sequence" is located on the NL43 nt sequence from 2333 to 2433.
192
- # => It is 98.0% similar to the reference.
190
+ # puts "The input sequence \"#{s.name}\" is located on the HXB2 nt sequence from #{loc[0].to_s} to #{loc[1].to_s}.\nIt is #{loc[2].round(1).to_s}% similar to the reference.\nIt #{loc[3]? "does" : "does not"} have indels.\nThe alignment is\n#{rs_string}"
191
+ # => The input sequence "my_sequence" is located on the HXB2 nt sequence from 2333 to 2433.
192
+ # => It is 97.0% similar to the reference.
193
193
  # => It does not have indels.
194
194
  # => The alignment is
195
- # => NL43 AGCAGATGAT ACAGTATTAG AAGAAATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT GGAGGTTTTA TCAAAGTAAG ACAGTATGAT C
195
+ # => HXB2 AGCAGATGAT ACAGTATTAG AAGAAATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT GGAGGTTTTA TCAAAGTAAG ACAGTATGAT C
196
196
  # => my_sequence AGCAGATGAT ACAGTATTAG AAGAAATAAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT GGAGGTTTTA TCAAAGTAAG ACAATATGAT C
197
197
  # @see https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html LANL Sequence Locator
198
-
199
- def locator(ref_option = :HXB2, path_to_muscle = false)
198
+ def locator(ref_option = :HXB2, algorithm = 1)
200
199
  seq = self.dna
201
- ori_ref = ViralSeq::RefSeq.get(ref_option)
202
-
200
+ ref = ref_option.to_s
203
201
  begin
204
- ori_ref_l = ori_ref.size
205
- l1 = 0
206
- l2 = 0
207
-
208
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :Super5, path_to_muscle)
209
- aln_test = aln_seq[1]
210
- aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
211
- gap_begin = $1.size
212
- gap_end = $3.size
213
- aln_test2 = $2
214
- ref = aln_seq[0]
215
- ref = ref[gap_begin..(-gap_end-1)]
216
- ref_size = ref.size
217
- if ref_size > 1.3*(seq.size)
218
- l1 = l1 + gap_begin
219
- l2 = l2 + gap_end
220
- max_seq = aln_test2.scan(/[ACGT]+/).max_by(&:length)
221
- aln_test2 =~ /#{max_seq}/
222
- before_aln_seq = $`
223
- before_aln = $`.size
224
- post_aln_seq = $'
225
- post_aln = $'.size
226
- before_aln_seq_size = before_aln_seq.scan(/[ACGT]+/).join("").size
227
- b1 = (1.3 * before_aln_seq_size).to_i
228
- post_aln_seq_size = post_aln_seq.scan(/[ACGT]+/).join("").size
229
- b2 = (1.3 * post_aln_seq_size).to_i
230
- if (before_aln > seq.size) and (post_aln <= seq.size)
231
- ref = ref[(before_aln - b1)..(ref_size - post_aln - 1)]
232
- l1 = l1 + (before_aln - b1)
233
- elsif (post_aln > seq.size) and (before_aln <= seq.size)
234
- ref = ref[before_aln..(ref_size - post_aln - 1 + b2)]
235
- l2 = l2 + post_aln - b2
236
- elsif (post_aln > seq.size) and (before_aln > seq.size)
237
- ref = ref[(before_aln - b1)..(ref_size - post_aln - 1 + b2)]
238
- l1 = l1 + (before_aln - b1)
239
- l2 = l2 + (post_aln - b2)
240
- end
241
-
242
- aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
243
- aln_test = aln_seq[1]
244
- aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
245
- gap_begin = $1.size
246
- gap_end = $3.size
247
- ref = aln_seq[0]
248
- ref = ref[gap_begin..(-gap_end-1)]
249
- end
250
-
251
- aln_test = aln_seq[1]
252
- aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
253
- gap_begin = $1.size
254
- gap_end = $3.size
255
- aln_test = $2
256
- aln_test =~ /^(\w+)(\-*)\w/
257
- s1 = $1.size
258
- g1 = $2.size
259
- aln_test =~ /\w(\-*)(\w+)$/
260
- s2 = $2.size
261
- g2 = $1.size
262
-
263
- l1 = l1 + gap_begin
264
- l2 = l2 + gap_end
265
- repeat = 0
266
-
267
- if g1 == g2 and (s1 + g1 + s2) == ref.size
268
- if s1 > s2 and g2 >= s2
269
- ref = ref[0..(-g2-1)]
270
- repeat = 1
271
- l2 = l2 + g2
272
- elsif s1 < s2 and g1 >= s1
273
- ref = ref[g1..-1]
274
- repeat = 1
275
- l1 = l1 + g1
276
- end
277
- else
278
- if g1 >= s1
279
- ref = ref[g1..-1]
280
- repeat = 1
281
- l1 = l1 + g1
282
- end
283
- if g2 >= s2
284
- ref = ref[0..(-g2 - 1)]
285
- repeat = 1
286
- l2 = l2 + g2
287
- end
288
- end
289
-
290
- while repeat == 1
291
- aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
292
- aln_test = aln_seq[1]
293
- aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
294
- gap_begin = $1.size
295
- gap_end = $3.size
296
- aln_test = $2
297
- aln_test =~ /^(\w+)(\-*)\w/
298
- s1 = $1.size
299
- g1 = $2.size
300
- aln_test =~ /\w(\-*)(\w+)$/
301
- s2 = $2.size
302
- g2 = $1.size
303
- ref = aln_seq[0]
304
- ref = ref[gap_begin..(-gap_end-1)]
305
- l1 = l1 + gap_begin
306
- l2 = l2 + gap_end
307
- repeat = 0
308
- if g1 >= s1
309
- ref = ref[g1..-1]
310
- repeat = 1
311
- l1 = l1 + g1
312
- end
313
- if g2 >= s2
314
- ref = ref[0..(-g2 - 1)]
315
- repeat = 1
316
- l2 = l2 + g2
317
- end
318
- end
319
- ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
320
-
321
- aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
322
- aln_test = aln_seq[1]
323
- ref = aln_seq[0]
324
-
325
- #refine alignment
326
-
327
- if ref =~ /^(\-+)/
328
- l1 = l1 - $1.size
329
- elsif ref =~ /(\-+)$/
330
- l2 = l2 - $1.size
331
- end
332
-
333
- if (ori_ref_l - l2 - 1) >= l1
334
- ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
335
- aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
336
- aln_test = aln_seq[1]
337
- ref = aln_seq[0]
338
-
339
- ref_size = ref.size
340
- sim_count = 0
341
- (0..(ref_size-1)).each do |n|
342
- ref_base = ref[n]
343
- test_base = aln_test[n]
344
- sim_count += 1 if ref_base == test_base
345
- end
346
- similarity = (sim_count/ref_size.to_f*100).round(1)
347
-
348
- loc_p1 = l1 + 1
349
- loc_p2 = ori_ref_l - l2
350
- if seq.size != (loc_p2 - loc_p1 + 1)
351
- indel = true
352
- elsif aln_test.include?("-")
353
- indel = true
354
- else
355
- indel = false
356
- end
357
- return [loc_p1,loc_p2,similarity,indel,aln_test,ref]
202
+ loc = VirustLocator::Locator.exec(seq, "nt", algorithm, ref).split("\t")
203
+ loc[0] = loc[0].to_i
204
+ loc[1] = loc[1].to_i
205
+ loc[2] = loc[2].to_f.round(1)
206
+ if loc[3].to_s.downcase == "true"
207
+ loc[3] = true
358
208
  else
359
- return [0,0,0,0,0,0,0]
209
+ loc[3] = false
360
210
  end
361
211
  rescue => e
362
212
  puts "Unexpected error occured."
@@ -366,12 +216,13 @@ module ViralSeq
366
216
  puts "ViralSeq.sequence_locator returns nil"
367
217
  return nil
368
218
  end
369
- end # end of locator
219
+ return loc
220
+ end #end of locator
370
221
 
371
222
  # Given start and end positions on the reference genome, return a sub-sequence of the target sequence in that range
372
223
  # @param p1 [Integer] start position number on the reference genome
373
224
  # @param p2 [Integer] end position number on the reference genome
374
- # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
225
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:SIVmm239`
375
226
  # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
376
227
  # @return [ViralSeq::Sequence, nil] a new ViralSeq::Sequence object that of input range on the reference genome or nil
377
228
  # if either the start or end position is beyond the range of the target sequence.
@@ -381,8 +232,8 @@ module ViralSeq
381
232
  # s.sequence_clip(2333, 2433, :HXB2).dna
382
233
  # => "AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC"
383
234
 
384
- def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false)
385
- loc = self.locator(ref_option, path_to_muscle)
235
+ def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2)
236
+ loc = self.locator(ref_option)
386
237
  l1 = loc[0]
387
238
  l2 = loc[1]
388
239
  if (p1 >= l1) & (p2 <= l2)
@@ -56,13 +56,13 @@ class String
56
56
  Regexp.new match
57
57
  end
58
58
 
59
- # parse the nucleotide sequences as an Array of Array
59
+ # parse the nucleotide sequences as an Array of Array
60
60
  # @return [Array] Array of Array at each position
61
61
  # @example parse a sequence with ambiguities to Array of Array
62
62
  # "ATRWCG".nt_to_array
63
63
  # => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
64
-
65
- def nt_to_array
64
+
65
+ def nt_to_array
66
66
  return_array = []
67
67
  self.each_char.each do |base|
68
68
  base_array = base.to_list
@@ -75,9 +75,6 @@ class String
75
75
  # compare the given nt sequence string with the ref sequence string
76
76
  # @param ref [String] the ref sequence string to compare with
77
77
  # @return [Interger] Number of differences
78
- # @example parse a sequence with ambiguities to Array of Array
79
- # "ATRWCG".nt_to_array
80
- # => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
81
78
 
82
79
  def nt_diff(ref)
83
80
  count_diff = 0
@@ -331,6 +331,10 @@ module ViralSeq
331
331
  return false
332
332
  elsif seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
333
333
  return false
334
+ elsif seq =~ /G{11}/ # a string of poly-G indicates poor quanlity in 2-color chemistry
335
+ return false
336
+ elsif seq =~ /C{11}/ # a string of poly-C indicates poor quanlity in 2-color chemistry
337
+ return false
334
338
  elsif seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
335
339
  return false
336
340
  elsif seq =~ /T{11}/ # a string of poly-T indicates adaptor sequence
@@ -186,7 +186,7 @@ module ViralSeq
186
186
  :trim=>false},
187
187
  {:region=>"PR",
188
188
  :cdna=>
189
- "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNTTAACCTTTGGGCCATCCATTCC",
189
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
190
190
  :forward=>
191
191
  "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
192
192
  :majority=>0,
@@ -247,6 +247,87 @@ module ViralSeq
247
247
  ]
248
248
  },
249
249
 
250
+ "v4" => {:platform_error_rate=>0.01,
251
+ :primer_pairs=>
252
+ [{:region=>"RT",
253
+ :cdna=>
254
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTAAGGAATGGAGGTTCTTTCTGATG",
255
+ :forward=>
256
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
257
+ :majority=>0,
258
+ :end_join=>true,
259
+ :end_join_option=>1,
260
+ :overlap=>0,
261
+ :TCS_QC=>true,
262
+ :ref_genome=>"HXB2",
263
+ :ref_start=>2648,
264
+ :ref_end=>3209,
265
+ :indel=>true,
266
+ :trim=>false},
267
+ {:region=>"PR",
268
+ :cdna=>
269
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
270
+ :forward=>
271
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
272
+ :majority=>0,
273
+ :end_join=>true,
274
+ :end_join_option=>3,
275
+ :TCS_QC=>true,
276
+ :ref_genome=>"HXB2",
277
+ :ref_start=>0,
278
+ :ref_end=>2591,
279
+ :indel=>true,
280
+ :trim=>true,
281
+ :trim_ref=>"HXB2",
282
+ :trim_ref_start=>2253,
283
+ :trim_ref_end=>2549},
284
+ {:region=>"IN",
285
+ :cdna=>
286
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCATCACCTGCCATCTGTTTTCCAT",
287
+ :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGCAGAAGTTATYCCAGCAGAAACA",
288
+ :majority=>0,
289
+ :end_join=>true,
290
+ :end_join_option=>2,
291
+ :overlap=>3,
292
+ :TCS_QC=>true,
293
+ :ref_genome=>"HXB2",
294
+ :ref_start=>4509,
295
+ :ref_end=>5040,
296
+ :indel=>true,
297
+ :trim=>false},
298
+ {:region=>"V1V3",
299
+ :cdna=>
300
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
301
+ :forward=>
302
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
303
+ :majority=>0,
304
+ :end_join=>true,
305
+ :end_join_option=>1,
306
+ :overlap=>0,
307
+ :TCS_QC=>true,
308
+ :ref_genome=>"HXB2",
309
+ :ref_start=>6585,
310
+ :ref_end=>7205..7210,
311
+ :indel=>true,
312
+ :trim=>false},
313
+ {:region=>"CA",
314
+ :cdna=>
315
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
316
+ :forward=>
317
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
318
+ :majority=>0,
319
+ :end_join=>true,
320
+ :end_join_option=>1,
321
+ :overlap=>0,
322
+ :TCS_QC=>true,
323
+ :ref_genome=>"HXB2",
324
+ :ref_start=>1196,
325
+ :ref_end=>1725,
326
+ :indel=>true,
327
+ :trim=>false}
328
+ ]
329
+ },
330
+
250
331
 
251
332
  }
252
333
 
@@ -54,6 +54,58 @@
54
54
  }
55
55
  }
56
56
 
57
+ },
58
+ {
59
+ "version": "v4",
60
+ "DRM_range": {
61
+ "CAI": [56,57, 66, 67, 70, 74, 105, 107],
62
+ "PI": [23, 24, 30, 32, 46, 47, 48, 50, 53, 54, 73, 76, 82, 83, 84, 88, 90],
63
+ "NRTI": [41, 65, 67, 69, 70, 74, 75, 77, 115, 116, 151, 184, 210, 215, 219],
64
+ "NNRTI": [98, 100, 101, 103, 106, 138, 179, 181, 188, 190],
65
+ "INSTI": [95, 97, 121, 140, 143, 147, 148, 155, 263]
66
+ },
67
+ "seq_coord": {
68
+ "CA": {
69
+ "minimum": 1196,
70
+ "maximum": 1725,
71
+ "gap": {
72
+ "minimum": 1466,
73
+ "maximum": 1471
74
+ }
75
+ },
76
+ "PR": {
77
+ "minimum": 2253,
78
+ "maximum": 2549
79
+ },
80
+ "RT": {
81
+ "minimum": 2648,
82
+ "maximum": 3209,
83
+ "gap": {
84
+ "minimum": 2915,
85
+ "maximum": 2949
86
+ }
87
+ },
88
+ "IN": {
89
+ "minimum": 4509,
90
+ "maximum": 5040
91
+ }
92
+ },
93
+ "seq_drm_correlation": {
94
+ "CA": ["CAI"],
95
+ "RT": ["NRTI", "NNRTI"],
96
+ "PR": ["PI"],
97
+ "IN": ["INSTI"]
98
+ },
99
+ "ref_info": {
100
+ "ref_type": "HXB2",
101
+ "ref_coord": {
102
+ "CA": [1186,1878],
103
+ "PR": [2253,2549],
104
+ "RT": [2550,3869],
105
+ "IN": [4230,5096]
106
+ }
107
+ }
108
+
57
109
  },
58
110
  {
59
111
  "version": "v1",
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.9.0"
6
- TCS_VERSION = "2.7.0"
5
+ VERSION = "1.10.0"
6
+ TCS_VERSION = "2.7.2"
7
7
  end
data/lib/viral_seq.rb CHANGED
@@ -53,3 +53,5 @@ require "json"
53
53
  require "securerandom"
54
54
  require "prawn"
55
55
  require "colorize"
56
+ require "virust_locator"
57
+ require "shellwords"
data/viral_seq.gemspec CHANGED
@@ -37,6 +37,9 @@ Gem::Specification.new do |spec|
37
37
  # muscle_bio gem required
38
38
  spec.add_runtime_dependency "muscle_bio", "= 0.4"
39
39
 
40
+ # virust-locator-ruby required
41
+ spec.add_runtime_dependency "virust-locator-ruby", "~> 0.3"
42
+
40
43
  # colorize gem required
41
44
  spec.add_runtime_dependency "colorize", "~> 0.1"
42
45
 
@@ -47,4 +50,6 @@ Gem::Specification.new do |spec|
47
50
  spec.add_runtime_dependency "combine_pdf", "~> 1.0", '>= 1.0.0'
48
51
 
49
52
  spec.requirements << 'R required for some functions'
53
+
54
+ spec.add_dependency "shellwords", "~> 0.2"
50
55
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
8
8
  - Michael Clark
9
- autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2024-11-13 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
@@ -67,6 +66,20 @@ dependencies:
67
66
  - - '='
68
67
  - !ruby/object:Gem::Version
69
68
  version: '0.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: virust-locator-ruby
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
70
83
  - !ruby/object:Gem::Dependency
71
84
  name: colorize
72
85
  requirement: !ruby/object:Gem::Requirement
@@ -141,6 +154,20 @@ dependencies:
141
154
  - - ">="
142
155
  - !ruby/object:Gem::Version
143
156
  version: 1.0.0
157
+ - !ruby/object:Gem::Dependency
158
+ name: shellwords
159
+ requirement: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - "~>"
162
+ - !ruby/object:Gem::Version
163
+ version: '0.2'
164
+ type: :runtime
165
+ prerelease: false
166
+ version_requirements: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - "~>"
169
+ - !ruby/object:Gem::Version
170
+ version: '0.2'
144
171
  description: |-
145
172
  A Ruby Gem with bioinformatics tools for processing viral NGS data.
146
173
  Specifically for Primer-ID sequencing and HIV drug resistance analysis.
@@ -226,8 +253,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
226
253
  version: 1.3.6
227
254
  requirements:
228
255
  - R required for some functions
229
- rubygems_version: 3.5.11
230
- signing_key:
256
+ rubygems_version: 3.6.7
231
257
  specification_version: 4
232
258
  summary: A Ruby Gem containing bioinformatics tools for processing viral NGS data.
233
259
  test_files: []