RubyGems - viral_seq - Versions diffs - 1.0.8 → 1.0.9 - Mend

viral_seq 1.0.8 → 1.0.9

Files changed (9) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8d79f0676fb23cdc25fb3b0161b5665ecfe082e2401f40a1de3a782d9fb3d52a
-  data.tar.gz: 01a09f4cfca1274bfb1b870cdad62614def01fdaded727ce9100eec377962401
+  metadata.gz: 4921d3609d6ffc7fd6fbafd7a4a86e5818d47ed855393addd68b20f28b9d214f
+  data.tar.gz: a9e18c01b287885f8f6238343d9633a52d4ae5ea061347e73bd4f3e86788b2a4
 SHA512:
-  metadata.gz: 042f11da57209003bc84b0f7c764a9953f0ca6c1fcd00a5e943be531162bc06c9d54e3c4ceb1305c91fe5795894e3da394a196899a4f1df83d97b826c5582411
-  data.tar.gz: b2b2bfb9a8e6d023f610b19311a1a1ea331fbaa804cf20aebc3a34f6b049240ec43fe10e92b9f00feef3fd78e922fe0ed39281146693358998020036b9553504
+  metadata.gz: dd21b57e17751f6c3e475f05b7a565d295ac7592b7c02f8d89ed49192834bee444f08ee9ebf48e41922c8caaf37a03651d5d0c9aa89d97ccc2edb9aad8224d5f
+  data.tar.gz: d1162424ea877d9839c179cacc330c81cd3508fcff07b64a1e753c7c706485d1dcb9a6b60aec9ce02ed33b91bbd4386ed58329c17e247ba086e7d81ed107bfd4

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    viral_seq (1.0.8)
+    viral_seq (1.0.9)
       colorize (~> 0.1)
       muscle_bio (~> 0.4)
@@ -11,7 +11,7 @@ GEM
     colorize (0.8.1)
     diff-lcs (1.3)
     muscle_bio (0.4.0)
-    rake (10.5.0)
+    rake (13.0.1)
     rspec (3.8.0)
       rspec-core (~> 3.8.0)
       rspec-expectations (~> 3.8.0)
@@ -31,7 +31,7 @@ PLATFORMS
 DEPENDENCIES
   bundler (~> 2.0)
-  rake (~> 10.0)
+  rake (~> 13.0)
   rspec (~> 3.0)
   viral_seq!

data/README.md CHANGED

@@ -12,101 +12,133 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
 #### Load all ViralSeq classes by requiring 'viral_seq.rb'
-    #!/usr/bin/env ruby
-    require 'viral_seq'
+```ruby
+#!/usr/bin/env ruby
+require 'viral_seq'
+```
 #### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
     $ locator -i sequence.fasta -o sequence.fasta.csv
+#### Use executable `tcs` pipeline to process Primer ID MiSeq sequencing data. Parameter json file can be generated using `tcs_json_generator` or at https://tcs-dr-dept-tcs.cloudapps.unc.edu/generator.php
+    $ tcs params.json
+#### Use executable `tcs_json_generator` to generate params .json file for the `tcs` pipeline.
+    $ tcs_json_generator
 ## Some Examples
 #### Load nucleotide sequences from a FASTA format sequence file
-    my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
+```ruby
+my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
+```
 #### Make an alignment (using MUSCLE)
-    aligned_seqhash = my_seqhash.align
+```ruby
+aligned_seqhash = my_seqhash.align
+```
 #### Filter nucleotide sequences with the reference coordinates (HIV Protease)
-    qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
+```ruby
+qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
+```
 #### Further filter out sequences with Apobec3g/f hypermutations
-    qc_seqhash = qc_seqhash.a3g
+```ruby
+qc_seqhash = qc_seqhash.a3g
+```
 #### Calculate nucleotide diveristy π
-    qc_seqhash.pi
+```ruby
+qc_seqhash.pi
+```
 #### Calculate cut-off for minority variants based on Poisson model
-    cut_off = qc_seqhash.pm
+```ruby
+cut_off = qc_seqhash.pm
+```
 #### Examine for drug resistance mutations for HIV PR region
-    qc_seqhash.sdrm_hiv_pr(cut_off)
+```ruby
+qc_seqhash.sdrm_hiv_pr(cut_off)
+```
 ## Updates
+Version 1.0.9-07182020:
+  1. Change ViralSeq::SeqHash#stop_codon and ViralSeq::SeqHash#a3g_hypermut return value to hash object.
+  2. TCS pipeline updated to version 2.0.1. Add optional `export_raw: TRUE/FALSE` in json params. If `export_raw` is `TRUE`, raw sequence reads (have to pass quality filters) will be exported, along with TCS reads.
 Version 1.0.8-02282020:
-    1. TCS pipeline added as executable.
-        tcs  -  main TCS pipeline script.
-        tcs_json_generator  -  step-by-step script to generate json file for tcs pipeline.
+  1. TCS pipeline (version 2.0.0) added as executable.
+      tcs  -  main TCS pipeline script.
+      tcs_json_generator  -  step-by-step script to generate json file for tcs pipeline.
-    2. Methods added:
-        ViralSeq::SeqHash#trim
+  2. Methods added:
+      ViralSeq::SeqHash#trim
-    3. Bug fix for several methods.
+  3. Bug fix for several methods.
 Version 1.0.7-01282020:
-    1. Several methods added, including
-        ViralSeq::SeqHash#error_table
-        ViralSeq::SeqHash#random_select
-    2. Improved performance for several functions.
+  1. Several methods added, including
+      ViralSeq::SeqHash#error_table
+      ViralSeq::SeqHash#random_select
+  2. Improved performance for several functions.
 Version 1.0.6-07232019:
-    1. Several methods added to ViralSeq::SeqHash, including
-        ViralSeq::SeqHash#size
-        ViralSeq::SeqHash#+
-        ViralSeq::SeqHash#write_nt_fa
-        ViralSeq::SeqHash#mutation
-    2. Update documentations and rspec samples.
+  1. Several methods added to ViralSeq::SeqHash, including
+      ViralSeq::SeqHash#size
+      ViralSeq::SeqHash#+
+      ViralSeq::SeqHash#write_nt_fa
+      ViralSeq::SeqHash#mutation
+  2. Update documentations and rspec samples.
 Version 1.0.5-07112019:
-    1. Update ViralSeq::SeqHash#sequence_locator.
-       Program will try to determine the direction (`+` or `-` of the query sequence)
-    2. update executable `locator` to have a column of `direction` in output .csv file
+  1. Update ViralSeq::SeqHash#sequence_locator.
+     Program will try to determine the direction (`+` or `-` of the query sequence)
+  2. update executable `locator` to have a column of `direction` in output .csv file
 Version 1.0.4-07102019:
-    1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
-    2. Fix bugs in bin `locator`
+  1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
+  2. Fix bugs in bin `locator`
 Version 1.0.3-07102019:
-    1. Bug fix.
+  1. Bug fix.
 Version 1.0.2-07102019:
-    1. Fixed a gem loading issue.
+  1. Fixed a gem loading issue.
 Version 1.0.1-07102019:
-    1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
-    2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
-    3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
-    4. update documentations
+  1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
+  2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
+  3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
+  4. update documentations
 Version 1.0.0-07092019:
-    1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
+  1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
 ## Development

data/bin/tcs CHANGED

@@ -29,69 +29,6 @@ require 'viral_seq'
 require 'json'
 require 'colorize'
-# updated the ViralSeq module. Push with the new version.
-module ViralSeq
-  class SeqHash
-    def self.new_from_fastq(fastq_file)
-      count = 0
-      sequence_a = []
-      quality_a = []
-      count_seq = 0
-      File.open(fastq_file,'r') do |file|
-        file.readlines.collect do |line|
-          count +=1
-          count_m = count % 4
-          if count_m == 1
-            line.tr!('@','>')
-            sequence_a << line.chomp
-            quality_a << line.chomp
-            count_seq += 1
-          elsif count_m == 2
-            sequence_a << line.chomp
-          elsif count_m == 0
-            quality_a << line.chomp
-          end
-        end
-      end
-      sequence_hash = Hash[sequence_a.each_slice(2).to_a]
-      quality_hash = Hash[quality_a.each_slice(2).to_a]
-      seq_hash = ViralSeq::SeqHash.new
-      seq_hash.dna_hash = sequence_hash
-      seq_hash.qc_hash = quality_hash
-      seq_hash.title = File.basename(fastq_file,".*")
-      seq_hash.file = fastq_file
-      return seq_hash
-    end # end of ::new_from_fastq
-    class << self
-      alias_method :fq, :new_from_fastq
-    end
-  end
-end
-module ViralSeq
-  class SeqHash
-    def trim(start_nt, end_nt, ref_option = :HXB2, path_to_muscle = false)
-      seq_hash = self.dna_hash.dup
-      seq_hash_unique = seq_hash.uniq_hash
-      trimmed_seq_hash = {}
-      seq_hash_unique.each do |seq, names|
-        trimmed_seq = ViralSeq::Sequence.new('', seq).sequence_clip(start_nt, end_nt, ref_option, path_to_muscle).dna
-        names.each do |name|
-          trimmed_seq_hash[name] = trimmed_seq
-        end
-      end
-      return_seq_hash = self.dup
-      return_seq_hash.dna_hash = trimmed_seq_hash
-      return return_seq_hash
-    end
-  end
-end
-# end of additonal methods. Delete before publish
 # calculate consensus cutoff
@@ -127,12 +64,9 @@ def calculate_cut_off(m, error_rate = 0.02)
   return n
 end
-TCS_VERSION = "2.0.0"
-puts "\n" + '-'*58
-puts '| JSON Parameter Generator for ' + "TCS #{TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
-puts '-'*58 + "\n"
+puts "\n" + '-'*50
+puts '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
+puts '-'*50 + "\n"
 unless ARGV[0]
   raise "No JSON param file found. Script terminated."
@@ -173,7 +107,7 @@ def unzip_r(indir, f)
 end
 runtime_log_file = File.join(indir,"runtime.log")
 log = File.open(runtime_log_file, "w")
-log.puts "TSC pipeline Version " + TCS_VERSION.to_s
+log.puts "TSC pipeline Version " + ViralSeq::TCS_VERSION.to_s
 log.puts "viral_seq Version " + ViralSeq::VERSION.to_s
 log.puts Time.now.to_s + "\t" + "Start TCS pipeline..."
@@ -224,7 +158,7 @@ end
 primers.each do |primer|
   summary_json = {}
-  summary_json[:tcs_version] = TCS_VERSION
+  summary_json[:tcs_version] = ViralSeq::TCS_VERSION
   summary_json[:viralseq_version] = ViralSeq::VERSION
   summary_json[:runtime] = Time.now.to_s
@@ -233,6 +167,9 @@ primers.each do |primer|
   cdna_primer = primer[:cdna]
   forward_primer = primer[:forward]
+  export_raw = primer[:export_raw]
   unless cdna_primer
     log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
   end
@@ -363,10 +300,30 @@ primers.each do |primer|
   out_dir_consensus = File.join(out_dir_set, "consensus")
   Dir.mkdir(out_dir_consensus) unless File.directory?(out_dir_consensus)
-  outfile_r1 = File.join(out_dir_consensus, 'r1.txt')
-  outfile_r2 = File.join(out_dir_consensus, 'r2.txt')
+  outfile_r1 = File.join(out_dir_consensus, 'r1.fasta')
+  outfile_r2 = File.join(out_dir_consensus, 'r2.fasta')
   outfile_log = File.join(out_dir_set, 'log.json')
+  # if export_raw is true, create dir for raw sequence
+  if export_raw
+    out_dir_raw = File.join(out_dir_set, "raw")
+    Dir.mkdir(out_dir_raw) unless File.directory?(out_dir_raw)
+    outfile_raw_r1 = File.join(out_dir_raw, 'r1.raw.fasta')
+    outfile_raw_r2 = File.join(out_dir_raw, 'r2.raw.fasta')
+    raw_r1_f = File.open(outfile_raw_r1, 'w')
+    raw_r2_f = File.open(outfile_raw_r2, 'w')
+    bio_r1.keys.each do |k|
+      raw_r1_f.puts k + "_r1"
+      raw_r2_f.puts k + "_r2"
+      raw_r1_f.puts bio_r1[k]
+      raw_r2_f.puts bio_r2[k].rc
+    end
+    raw_r1_f.close
+    raw_r2_f.close
+  end
   # create TCS
   pid_seqtag_hash = {}
@@ -456,19 +413,30 @@ primers.each do |primer|
     f.puts JSON.pretty_generate(pid_json)
   end
-  if primer[:end_join]
-    log.puts Time.now.to_s + "\t" +  "Start end-pairing for TCS..."
-    shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
-    case primer[:end_join_option]
+  def end_join(dir, option, overlap)
+    shp = ViralSeq::SeqHashPair.fa(dir)
+    case option
     when 1
-      joined_sh = shp.join1(primer[:overlap])
+      joined_sh = shp.join1()
     when 3
       joined_sh = shp.join2
     when 4
       joined_sh = shp.join2(model: :indiv)
     end
+    return joined_sh
+  end
+  if primer[:end_join]
+    log.puts Time.now.to_s + "\t" +  "Start end-pairing for TCS..."
+    shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
+    joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
     log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
     summary_json[:combined_tcs] = joined_sh.size
+    if export_raw
+      joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
+    end
   else
     File.open(outfile_log, "w") do |f|
       f.puts JSON.pretty_generate(summary_json)
@@ -501,8 +469,28 @@ primers.each do |primer|
         joined_seq[seq_name] = seq + new_r2_seq[seq_name]
       end
       joined_sh = ViralSeq::SeqHash.new(joined_seq)
+      if export_raw
+        r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
+        r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
+        r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
+        r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
+        new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
+        new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
+        joined_seq_raw = {}
+        new_r1_seq_raw.each do |seq_name, seq|
+          next unless seq
+          next unless new_r2_seq_raw[seq_name]
+          joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
+        end
+        joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
+      end
     else
       joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
+      if export_raw
+        joined_sh_raw = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
+      end
     end
     log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
     summary_json[:combined_tcs_after_qc] = joined_sh.size
@@ -512,7 +500,10 @@ primers.each do |primer|
       trim_ref = primer[:trim_ref].to_sym
       joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
     end
-    joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.txt"))
+    joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
+    if export_raw
+      joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.fasta"))
+    end
   end
   File.open(outfile_log, "w") do |f|

data/bin/tcs_json_generator CHANGED

@@ -2,6 +2,7 @@
 # TCS pipeline JSON params generator.
+require 'viral_seq'
 require 'colorize'
 require 'json'
@@ -26,10 +27,8 @@ def get_ref
         end
 end
-TCS_VERSION = "2.0.0"
 puts "\n" + '-'*58
-puts '| JSON Parameter Generator for ' + "TCS #{TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
+puts '| JSON Parameter Generator for ' + "TCS #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
 puts '-'*58 + "\n"
 param = {}
@@ -48,8 +47,8 @@ else
 end
 param[:primer_pairs] = []
-continue = true
-while continue
+loop do
   data = {}
   puts "Enter the name for the sequenced region: "
   print '> '
@@ -147,14 +146,11 @@ while continue
     data[:end_join] = false
   end
+  param[:primer_pairs] << data
   print "Do you wish to conintue? Y/N \n> "
   continue_sig = gets.chomp.rstrip
-  if continue_sig =~ /y|yes/i
-    continue = true
-  else
-    continue = false
-  end
-  param[:primer_pairs] << data
+  break unless continue_sig =~ /y|yes/i
 end
 puts "\nYour JSON string is:"

data/lib/viral_seq/seq_hash.rb CHANGED

@@ -313,22 +313,22 @@ module ViralSeq
     # screen for sequences with stop codons.
     # @param (see #translate)
-    # @return [Array] of two elements [seqhash_stop_codon, seqhash_no_stop_codon],
+    # @return [Hash] of two SeqHash objects {with_stop_codon: seqHash, without_stop_codon: seqHash},
     #
-    #   # seqhash_stop_codon: ViralSeq::SeqHash object with stop codons
-    #   # seqhash_no_stop_codon: ViralSeq::SeqHash object without stop codons
+    #   # :with_stop_codon : ViralSeq::SeqHash object with stop codons
+    #   # :without_stop_codon: ViralSeq::SeqHash object without stop codons
     # @example given a hash of sequences, return a sub-hash with sequences only contains stop codons
     #   my_seqhash = ViralSeq::SeqHash.fa('my_fasta_file.fasta')
     #   my_seqhash.dna_hash
     #   => {">seq1"=>"ATAAGAACG", ">seq2"=>"ATATGAACG", ">seq3"=>"ATGAGAACG", ">seq4"=>"TATTAGACG", ">seq5"=>"CGCTGAACG"}
-    #   stop_codon_seqhash = my_seqhash.stop_codon[0]
+    #   stop_codon_seqhash = my_seqhash.stop_codon[:with_stop_codon]
     #   stop_codon_seqhash.dna_hash
     #   => {">seq2"=>"ATATGAACG", ">seq4"=>"TATTAGACG", ">seq5"=>"CGCTGAACG"}
     #   stop_codon_seqhash.aa_hash
     #   => {">seq2"=>"I*T", ">seq4"=>"Y*T", ">seq5"=>"R*T"}
     #   stop_codon_seqhash.title
     #   => "my_fasta_file_stop"
-    #   filtered_seqhash = my_seqhash.stop_codon[1]
+    #   filtered_seqhash = my_seqhash.stop_codon[:without_stop_codon]
     #   filtered_seqhash.aa_hash
     #   {">seq1"=>"IRT", ">seq3"=>"MRT"}
@@ -343,7 +343,10 @@ module ViralSeq
       seqhash1.title = self.title + "_stop"
       keys2 = aa_seqs.keys - keys
       seqhash2 = self.sub(keys2)
-      return [seqhash1, seqhash2]
+      return {
+        with_stop_codon: seqhash1,
+        without_stop_codon: seqhash2
+      }
     end #end of #stop_codon
@@ -399,10 +402,10 @@ module ViralSeq
     #   # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
     #   # note:  criteria 2 only applies on a sequence file containing more than 20 sequences,
     #   #        b/c Poisson model does not do well on small sample size.
-    # @return [Array] three values.
-    #   first value, `array[0]`: a ViralSeq:SeqHash object for sequences with hypermutations
-    #   second value, `array[1]`: a ViralSeq:SeqHash object for sequences without hypermutations
-    #   third value, `array[2]`: a two-demensional array `[[a,b], [c,d]]` for statistic_info, including the following information,
+    # @return [Hash] three paris.
+    #   :a3g_seq: a ViralSeq:SeqHash object for sequences with hypermutations
+    #   :filtered_seq : a ViralSeq:SeqHash object for sequences without hypermutations
+    #   :stats : a two-demensional array `[[a,b], [c,d]]` for statistic_info, including the following information,
     #     # sequence tag
     #     # G to A mutation numbers at potential a3g positions
     #     # total potential a3g G positions
@@ -413,17 +416,17 @@ module ViralSeq
     # @example identify apobec3gf mutations from a sequence fasta file
     #   my_seqhash = ViralSeq::SeqHash.fa('spec/sample_files/sample_a3g_sequence1.fasta')
     #   hypermut = my_seqhash.a3g
-    #   hypermut[0].dna_hash.keys
+    #   hypermut[:a3g_seq].dna_hash.keys
     #   => [">Seq7", ">Seq14"]
-    #   hypermut[1].dna_hash.keys
+    #   hypermut[:filtered_seq].dna_hash.keys
     #   => [">Seq1", ">Seq2", ">Seq5"]
-    #   hypermut[2]
+    #   hypermut[:stats]
     #   => [[">Seq7", 23, 68, 1, 54, 18.26, 4.308329383112348e-06], [">Seq14", 45, 68, 9, 54, 3.97, 5.2143571971582974e-08]]
     #
     # @example identify apobec3gf mutations from another sequence fasta file
     #   my_seqhash = ViralSeq::SeqHash.fa('spec/sample_files/sample_a3g_sequence2.fasta')
     #   hypermut = my_seqhash.a3g
-    #   hypermut[2]
+    #   hypermut[:stats]
     #   => [[">CTAACACTCA_134_a3g-sample2", 4, 35, 0, 51, Infinity, 0.02465676660128911], [">ATAGTGCCCA_60_a3g-sample2", 4, 35, 1, 51, 5.83, 0.1534487353839561]]
     #   # notice sequence ">ATAGTGCCCA_60_a3g-sample2" has a p value at 0.15, greater than 0.05,
     #   # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
@@ -516,7 +519,10 @@ module ViralSeq
       hm_seq_hash.title = self.title + "_hypermut"
       hm_seq_hash.file = self.file
       filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
-      return [hm_seq_hash, filtered_seq_hash, hm_hash.values]
+      return { a3g_seq: hm_seq_hash,
+               filtered_seq: filtered_seq_hash,
+               stats: hm_hash.values
+              }
     end #end of #a3g_hypermut
     alias_method :a3g, :a3g_hypermut
@@ -730,6 +736,7 @@ module ViralSeq
       seq_hash_unique.each do |seq|
         loc = ViralSeq::Sequence.new('', seq).locator(ref_option, path_to_muscle)
+        next unless loc # if locator tool fails, skip this seq.
         if start_nt.include?(loc[0]) && end_nt.include?(loc[1])
           if indel
             seq_hash_unique_pass << seq
@@ -1151,7 +1158,7 @@ module ViralSeq
     # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
     # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
     # @return [ViralSeq::SeqHash] a new ViralSeq::SeqHash object with trimmed sequences
     def trim(start_nt, end_nt, ref_option = :HXB2, path_to_muscle = false)
       seq_hash = self.dna_hash.dup
       seq_hash_unique = seq_hash.uniq_hash

data/lib/viral_seq/version.rb CHANGED

@@ -2,6 +2,6 @@
 # version info and histroy
 module ViralSeq
-  VERSION = "1.0.8"
-  TCS_VERSION = "2.0.0"
+  VERSION = "1.0.9"
+  TCS_VERSION = "2.0.1"
 end

data/viral_seq.gemspec CHANGED

@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
   spec.post_install_message = "Thanks for installing!"
   spec.add_development_dependency "bundler", "~> 2.0"
-  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency "rake", "~> 13.0"
   spec.add_development_dependency "rspec", "~> 3.0"
   # muscle_bio gem required

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: viral_seq
 version: !ruby/object:Gem::Version
-  version: 1.0.8
+  version: 1.0.9
 platform: ruby
 authors:
 - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-02-29 00:00:00.000000000 Z
+date: 2020-07-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -31,14 +31,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '13.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '13.0'
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement