RubyGems - viral_seq - Versions diffs - 1.0.0 → 1.0.1 - Mend

viral_seq 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/Gemfile.lock +2 -2
data/README.md +37 -0
data/bin/locator +102 -0
data/lib/viral_seq/seq_hash.rb +37 -0
data/lib/viral_seq/seq_hash_pair.rb +2 -2
data/lib/viral_seq/sequence.rb +15 -8
data/lib/viral_seq/version.rb +1 -1
data/viral_seq.gemspec +2 -5
metadata +6 -7
data/bin/console +0 -14
data/bin/setup +0 -8

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9055ee4b893bdff77117a2a9c005166637c177b0ed243a5362488ccf7d893e76
-  data.tar.gz: 87faa7b60c47eecc6f1e3267d4f2a0df549dc70d935d8adabaf54994e60b8ab4
+  metadata.gz: 0c704f9231123785d82a46e3bb8eb797564ffce24e23f91817b634b502438ac3
+  data.tar.gz: 2cfa762e094166be0510b5b2e344dbac2913b7da431c922def1802f594d96559
 SHA512:
-  metadata.gz: c5a3d9aab73cd1e8b696527392c6caaa0a4eec485fe0dbf38a7db456ddce115288f2ae735717ec9595cc4f732cb6afee8dca750b0ebfc703112a5df7196230ca
-  data.tar.gz: f0f040bb1c70f3569ae132023f367f945c408ba73d8d495976ceb0cc2538d7104a56f2009f89789eacbfe45921c017e40578fcb4ccd1df489f75d83d7b733a85
+  metadata.gz: 3f5d9b0bd5acada5be9bc20d9264917b7317ef0abbfa70395527483fb2e7e0d256c5423f7a498b3c7add3416aadd2806a797b297360175f6257fae19abfd3122
+  data.tar.gz: f64e177e00a642a090cc07b6ef7150ae6047e58a3f84b6d4186300ca60925bd212e0561edf4046d2467ebc297701c1706cec23f877a4f9724799361c3fa9765e

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    viral_seq (1.0.0)
+    viral_seq (1.0.1)
       muscle_bio (~> 0.4)
 GEM
@@ -34,4 +34,4 @@ DEPENDENCIES
   viral_seq!
 BUNDLED WITH
-   2.0.1
+   2.0.2

data/README.md CHANGED Viewed

@@ -15,8 +15,45 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
     #!/usr/bin/env ruby
     require 'viral_seq'
+## Some Examples
+Load nucleotide sequences from a FASTA format sequence file
+    my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
+Make an alignment (using MUSCLE)
+    aligned_seqhash = my_seqhash.align
+Filter nucleotide sequences with the reference coordinates (HIV Protease)
+    qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
+Further filter out sequences with Apobec3g/f hypermutations
+    qc_seqhash = qc_seqhash.a3g
+Calculate nucleotide diveristy π
+    qc_seqhash.pi
+Calculate cut-off for minority variants based on Poisson model
+    cut_off = qc_seqhash.pm
+Examine for drug resistance mutations for HIV PR region
+    qc_seqhash.sdrm_hiv_pr(cut_off)
 ## Updates
+Version 1.0.1-07102019:
+    1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
+    2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
+    3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
+    4. update documentations
 Version 1.0.0-07092019:
     1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq

data/bin/locator ADDED Viewed

@@ -0,0 +1,102 @@
+#!/usr/bin/env ruby
+require 'viral_seq'
+require 'csv'
+require 'optparse'
+module ViralSeq
+  class SeqHash
+    def sequence_locator(ref_option = :HXB2)
+      out_array = []
+      dna_seq = self.dna_hash
+      title = self.title
+      uniq_dna = dna_seq.uniq_hash
+      uniq_dna.each do |seq,names|
+        s = ViralSeq::Sequence.new('',seq)
+        loc = s.locator(ref_option)
+        names.each do |name|
+          out_array << ([title, name, ref_option.to_s] + loc)
+        end
+      end
+      return out_array
+    end # end of locator
+    alias_method :loc, :sequence_locator
+  end
+end
+def myparser
+  options = {}
+  OptionParser.new do |opts|
+    opts.banner = "Usage: locator -i [nt_sequence_fasta_file] -o [locator_info_csv_file] -r [reference_genome_option]"
+    opts.on('-i', '--infile FASTA_FILE', 'nt sequence file in FASTA format') do |i|
+      options[:infile] = i
+    end
+    opts.on('-o', '--outfile CSV_FILE', 'output .csv file for locator info') do |o|
+      options[:outfile] = o
+    end
+    opts.on('-r', '--ref_option OPTION', 'reference genome option, choose from `HXB2` (default), `NL43`, `MAC239`') do |o|
+      options[:ref_option] = o.to_sym
+    end
+    opts.on("-h", "--help", "Prints this help") do
+      puts opts
+      exit
+    end
+  end.parse!
+  return options
+end
+puts "\nSequence Locator (RubyGem::ViralSeq) v1.0.1 by Shuntai Zhou"
+puts "See details at https://github.com/ViralSeq/viral_seq\n"
+puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
+ARGV << '-h' if ARGV.size == 0
+options = myparser
+begin
+  if options[:infile]
+    seq_file = options[:infile]
+  else
+    raise StandardError.new("Input file sequence file not found")
+  end
+  if options[:outfile]
+    csv_file = options[:outfile]
+  else
+    raise StandardError.new("Please provide path to output csv file")
+  end
+  unless File.exist?(seq_file)
+    raise StandardError.new("Input file sequence file not found")
+  end
+  seqs = ViralSeq::SeqHash.fa(seq_file)
+  opt =  options[:ref_option] ? options[:ref_option] : :HXB2
+  unless [:HXB2, :NL43, :MAC239].include? opt
+    puts "Reference option #{opt} not recognized, using `:HXB2` as the reference genome."
+    opt = :HXB2
+  end
+  locs = seqs.loc(opt)
+  head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
+  locs.unshift(head)
+  data = CSV.generate do |csv|
+    locs.each {|loc| csv << loc}
+  end
+  File.write(csv_file, data)
+rescue StandardError => e
+  puts e.message
+  puts "\n"
+  ARGV.clear
+  ARGV << '-h'
+  myparser
+end

data/lib/viral_seq/seq_hash.rb CHANGED Viewed

@@ -713,6 +713,43 @@ module ViralSeq
       self.sub(seq_pass)
     end # end of #hiv_seq_qc
+    # sequence locator for SeqHash object, resembling HIV Sequence Locator from LANL
+    # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
+    # @return [Array] two dimensional array `[[],[],[],...]` for each sequence, including the following information:
+    #
+    #     title of the SeqHash object (String)
+    #
+    #     sequence taxa (String)
+    #
+    #     start_location (Integer)
+    #
+    #     end_location (Integer)
+    #
+    #     percentage_of_similarity_to_reference_sequence (Float)
+    #
+    #     containing_indel? (Boolean)
+    #
+    #     aligned_input_sequence (String)
+    #
+    #     aligned_reference_sequence (String)
+    # @see https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html LANL Sequence Locator
+    def sequence_locator(ref_option = :HXB2)
+      out_array = []
+      dna_seq = self.dna_hash
+      title = self.title
+      uniq_dna = dna_seq.uniq_hash
+      uniq_dna.each do |seq,names|
+        s = ViralSeq::Sequence.new('',seq)
+        loc = s.locator(ref_option)
+        names.each do |name|
+          out_array << ([title, name, ref_option.to_s] + loc)
+        end
+      end
+      return out_array
+    end # end of locator
+    alias_method :loc, :sequence_locator
     # Remove squences with residual offspring Primer IDs.
     #   Compare PID with sequences which have identical sequences.

data/lib/viral_seq/seq_hash_pair.rb CHANGED Viewed

@@ -139,10 +139,10 @@ module ViralSeq
     #   my_seqhashpair = ViralSeq::SeqHashPair.new(paired_seq2)
     #   my_seqhashpair.join2.dna_hash
     #   => {">pair4"=>"AAAGGGGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
-    #   my_seqhashpair.join2(:indiv).dna_hash
+    #   my_seqhashpair.join2(model :indiv).dna_hash
     #   => {">pair4"=>"AAAGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
-    def join2(model = :con, diff = 0.0)
+    def join2(model: :con, diff: 0.0)
       seq_pair_hash = self.dna_hash
       begin
         raise ArgumentError.new(":diff has to be float or integer, input #{diff} invalid.") unless (diff.is_a? Integer or diff.is_a? Float)

data/lib/viral_seq/sequence.rb CHANGED Viewed

@@ -142,13 +142,20 @@ module ViralSeq
     #   # current version only supports nucleotide sequence, not for amino acid sequence.
     # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
     # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
-    # @return [Array] an array of the following info
-    #   #   start_location (Integer)
-    #   #   end_location (Integer)
-    #   #   percentage_of_similarity_to_reference_sequence (Float)
-    #   #   containing_indel? (Boolean)
-    #   #   aligned_input_sequence (String)
-    #   #   aligned_reference_sequence (String)
+    # @return [Array] an array of the following info:
+    #
+    #   start_location (Integer)
+    #
+    #   end_location (Integer)
+    #
+    #   percentage_of_similarity_to_reference_sequence (Float)
+    #
+    #   containing_indel? (Boolean)
+    #
+    #   aligned_input_sequence (String)
+    #
+    #   aligned_reference_sequence (String)
+    #
     # @example identify the location of the input sequence on the NL43 genome
     #   sequence = 'AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC'
     #   s = ViralSeq::Sequence.new('my_sequence', sequence)
@@ -349,7 +356,7 @@ module ViralSeq
     #   s = ViralSeq::Sequence.new('my_seq', seq)
     #   s.sequence_clip(2333, 2433, :HXB2).dna
     #   => "AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC"
     def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false)
       loc = self.locator(ref_option, path_to_muscle)
       l1 = loc[0]

data/lib/viral_seq/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 # version info and histroy
 module ViralSeq
-  VERSION = "1.0.0"
+  VERSION = "1.0.1"
 end

data/viral_seq.gemspec CHANGED Viewed

@@ -20,8 +20,8 @@ Gem::Specification.new do |spec|
   spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
     `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
   end
-  spec.bindir        = "exe"
-  # spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }\
+  spec.bindir        = "bin"
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
   spec.require_paths = ["lib"]
   spec.post_install_message = "Thanks for installing!"
@@ -31,8 +31,5 @@ Gem::Specification.new do |spec|
   # muscle_bio gem required
   spec.add_runtime_dependency "muscle_bio", "~> 0.4"
-  spec.requirements << 'MUSCLE (http://www.drive5.com/muscle) required for some functions'
   spec.requirements << 'R required for some functions'
 end

metadata CHANGED Viewed

@@ -1,15 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: viral_seq
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: ruby
 authors:
 - Shuntai Zhou
 - Michael Clark
 autorequire:
-bindir: exe
+bindir: bin
 cert_chain: []
-date: 2019-07-09 00:00:00.000000000 Z
+date: 2019-07-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -73,7 +73,8 @@ description: |-
 email:
 - shuntai.zhou@gmail.com
 - clarkmu@gmail.com
-executables: []
+executables:
+- locator
 extensions: []
 extra_rdoc_files: []
 files:
@@ -86,8 +87,7 @@ files:
 - LICENSE.txt
 - README.md
 - Rakefile
-- bin/console
-- bin/setup
+- bin/locator
 - lib/viral_seq.rb
 - lib/viral_seq/Integer.rb
 - lib/viral_seq/constant.rb
@@ -124,7 +124,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements:
-- MUSCLE (http://www.drive5.com/muscle) required for some functions
 - R required for some functions
 rubygems_version: 3.0.3
 signing_key:

data/bin/console DELETED Viewed

@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-require "bundler/setup"
-require "viral_seq"
-# You can add fixtures and/or initialization code here to make experimenting
-# with your gem easier. You can also use a different console, if you like.
-# (If you use this, don't forget to add pry to your Gemfile!)
-# require "pry"
-# Pry.start
-require "irb"
-IRB.start(__FILE__)

data/bin/setup DELETED Viewed

@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-IFS=$'\n\t'
-set -vx
-bundle install
-# Do any other automated setup that you need to do here