viral_seq 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9055ee4b893bdff77117a2a9c005166637c177b0ed243a5362488ccf7d893e76
4
- data.tar.gz: 87faa7b60c47eecc6f1e3267d4f2a0df549dc70d935d8adabaf54994e60b8ab4
3
+ metadata.gz: 0c704f9231123785d82a46e3bb8eb797564ffce24e23f91817b634b502438ac3
4
+ data.tar.gz: 2cfa762e094166be0510b5b2e344dbac2913b7da431c922def1802f594d96559
5
5
  SHA512:
6
- metadata.gz: c5a3d9aab73cd1e8b696527392c6caaa0a4eec485fe0dbf38a7db456ddce115288f2ae735717ec9595cc4f732cb6afee8dca750b0ebfc703112a5df7196230ca
7
- data.tar.gz: f0f040bb1c70f3569ae132023f367f945c408ba73d8d495976ceb0cc2538d7104a56f2009f89789eacbfe45921c017e40578fcb4ccd1df489f75d83d7b733a85
6
+ metadata.gz: 3f5d9b0bd5acada5be9bc20d9264917b7317ef0abbfa70395527483fb2e7e0d256c5423f7a498b3c7add3416aadd2806a797b297360175f6257fae19abfd3122
7
+ data.tar.gz: f64e177e00a642a090cc07b6ef7150ae6047e58a3f84b6d4186300ca60925bd212e0561edf4046d2467ebc297701c1706cec23f877a4f9724799361c3fa9765e
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.0.0)
4
+ viral_seq (1.0.1)
5
5
  muscle_bio (~> 0.4)
6
6
 
7
7
  GEM
@@ -34,4 +34,4 @@ DEPENDENCIES
34
34
  viral_seq!
35
35
 
36
36
  BUNDLED WITH
37
- 2.0.1
37
+ 2.0.2
data/README.md CHANGED
@@ -15,8 +15,45 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
15
15
  #!/usr/bin/env ruby
16
16
  require 'viral_seq'
17
17
 
18
+ ## Some Examples
19
+
20
+ Load nucleotide sequences from a FASTA format sequence file
21
+
22
+ my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
+
24
+ Make an alignment (using MUSCLE)
25
+
26
+ aligned_seqhash = my_seqhash.align
27
+
28
+ Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
+
30
+ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
+
32
+ Further filter out sequences with Apobec3g/f hypermutations
33
+
34
+ qc_seqhash = qc_seqhash.a3g
35
+
36
+ Calculate nucleotide diveristy π
37
+
38
+ qc_seqhash.pi
39
+
40
+ Calculate cut-off for minority variants based on Poisson model
41
+
42
+ cut_off = qc_seqhash.pm
43
+
44
+ Examine for drug resistance mutations for HIV PR region
45
+
46
+ qc_seqhash.sdrm_hiv_pr(cut_off)
47
+
18
48
  ## Updates
19
49
 
50
+ Version 1.0.1-07102019:
51
+
52
+ 1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
53
+ 2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
54
+ 3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
55
+ 4. update documentations
56
+
20
57
  Version 1.0.0-07092019:
21
58
 
22
59
  1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
data/bin/locator ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'viral_seq'
4
+ require 'csv'
5
+ require 'optparse'
6
+
7
+ module ViralSeq
8
+ class SeqHash
9
+
10
+ def sequence_locator(ref_option = :HXB2)
11
+ out_array = []
12
+ dna_seq = self.dna_hash
13
+ title = self.title
14
+
15
+ uniq_dna = dna_seq.uniq_hash
16
+
17
+ uniq_dna.each do |seq,names|
18
+ s = ViralSeq::Sequence.new('',seq)
19
+ loc = s.locator(ref_option)
20
+ names.each do |name|
21
+ out_array << ([title, name, ref_option.to_s] + loc)
22
+ end
23
+ end
24
+ return out_array
25
+ end # end of locator
26
+ alias_method :loc, :sequence_locator
27
+ end
28
+ end
29
+
30
+ def myparser
31
+ options = {}
32
+ OptionParser.new do |opts|
33
+ opts.banner = "Usage: locator -i [nt_sequence_fasta_file] -o [locator_info_csv_file] -r [reference_genome_option]"
34
+
35
+ opts.on('-i', '--infile FASTA_FILE', 'nt sequence file in FASTA format') do |i|
36
+ options[:infile] = i
37
+ end
38
+
39
+ opts.on('-o', '--outfile CSV_FILE', 'output .csv file for locator info') do |o|
40
+ options[:outfile] = o
41
+ end
42
+
43
+ opts.on('-r', '--ref_option OPTION', 'reference genome option, choose from `HXB2` (default), `NL43`, `MAC239`') do |o|
44
+ options[:ref_option] = o.to_sym
45
+ end
46
+
47
+ opts.on("-h", "--help", "Prints this help") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end.parse!
52
+ return options
53
+ end
54
+
55
+ puts "\nSequence Locator (RubyGem::ViralSeq) v1.0.1 by Shuntai Zhou"
56
+ puts "See details at https://github.com/ViralSeq/viral_seq\n"
57
+ puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
58
+
59
+ ARGV << '-h' if ARGV.size == 0
60
+
61
+ options = myparser
62
+
63
+ begin
64
+ if options[:infile]
65
+ seq_file = options[:infile]
66
+ else
67
+ raise StandardError.new("Input file sequence file not found")
68
+ end
69
+
70
+ if options[:outfile]
71
+ csv_file = options[:outfile]
72
+ else
73
+ raise StandardError.new("Please provide path to output csv file")
74
+ end
75
+
76
+ unless File.exist?(seq_file)
77
+ raise StandardError.new("Input file sequence file not found")
78
+ end
79
+
80
+ seqs = ViralSeq::SeqHash.fa(seq_file)
81
+ opt = options[:ref_option] ? options[:ref_option] : :HXB2
82
+
83
+ unless [:HXB2, :NL43, :MAC239].include? opt
84
+ puts "Reference option #{opt} not recognized, using `:HXB2` as the reference genome."
85
+ opt = :HXB2
86
+ end
87
+
88
+ locs = seqs.loc(opt)
89
+ head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
90
+ locs.unshift(head)
91
+ data = CSV.generate do |csv|
92
+ locs.each {|loc| csv << loc}
93
+ end
94
+
95
+ File.write(csv_file, data)
96
+ rescue StandardError => e
97
+ puts e.message
98
+ puts "\n"
99
+ ARGV.clear
100
+ ARGV << '-h'
101
+ myparser
102
+ end
@@ -713,6 +713,43 @@ module ViralSeq
713
713
  self.sub(seq_pass)
714
714
  end # end of #hiv_seq_qc
715
715
 
716
+ # sequence locator for SeqHash object, resembling HIV Sequence Locator from LANL
717
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
718
+ # @return [Array] two dimensional array `[[],[],[],...]` for each sequence, including the following information:
719
+ #
720
+ # title of the SeqHash object (String)
721
+ #
722
+ # sequence taxa (String)
723
+ #
724
+ # start_location (Integer)
725
+ #
726
+ # end_location (Integer)
727
+ #
728
+ # percentage_of_similarity_to_reference_sequence (Float)
729
+ #
730
+ # containing_indel? (Boolean)
731
+ #
732
+ # aligned_input_sequence (String)
733
+ #
734
+ # aligned_reference_sequence (String)
735
+ # @see https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html LANL Sequence Locator
736
+ def sequence_locator(ref_option = :HXB2)
737
+ out_array = []
738
+ dna_seq = self.dna_hash
739
+ title = self.title
740
+
741
+ uniq_dna = dna_seq.uniq_hash
742
+
743
+ uniq_dna.each do |seq,names|
744
+ s = ViralSeq::Sequence.new('',seq)
745
+ loc = s.locator(ref_option)
746
+ names.each do |name|
747
+ out_array << ([title, name, ref_option.to_s] + loc)
748
+ end
749
+ end
750
+ return out_array
751
+ end # end of locator
752
+ alias_method :loc, :sequence_locator
716
753
 
717
754
  # Remove squences with residual offspring Primer IDs.
718
755
  # Compare PID with sequences which have identical sequences.
@@ -139,10 +139,10 @@ module ViralSeq
139
139
  # my_seqhashpair = ViralSeq::SeqHashPair.new(paired_seq2)
140
140
  # my_seqhashpair.join2.dna_hash
141
141
  # => {">pair4"=>"AAAGGGGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
142
- # my_seqhashpair.join2(:indiv).dna_hash
142
+ # my_seqhashpair.join2(model :indiv).dna_hash
143
143
  # => {">pair4"=>"AAAGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
144
144
 
145
- def join2(model = :con, diff = 0.0)
145
+ def join2(model: :con, diff: 0.0)
146
146
  seq_pair_hash = self.dna_hash
147
147
  begin
148
148
  raise ArgumentError.new(":diff has to be float or integer, input #{diff} invalid.") unless (diff.is_a? Integer or diff.is_a? Float)
@@ -142,13 +142,20 @@ module ViralSeq
142
142
  # # current version only supports nucleotide sequence, not for amino acid sequence.
143
143
  # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
144
144
  # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
145
- # @return [Array] an array of the following info
146
- # # start_location (Integer)
147
- # # end_location (Integer)
148
- # # percentage_of_similarity_to_reference_sequence (Float)
149
- # # containing_indel? (Boolean)
150
- # # aligned_input_sequence (String)
151
- # # aligned_reference_sequence (String)
145
+ # @return [Array] an array of the following info:
146
+ #
147
+ # start_location (Integer)
148
+ #
149
+ # end_location (Integer)
150
+ #
151
+ # percentage_of_similarity_to_reference_sequence (Float)
152
+ #
153
+ # containing_indel? (Boolean)
154
+ #
155
+ # aligned_input_sequence (String)
156
+ #
157
+ # aligned_reference_sequence (String)
158
+ #
152
159
  # @example identify the location of the input sequence on the NL43 genome
153
160
  # sequence = 'AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC'
154
161
  # s = ViralSeq::Sequence.new('my_sequence', sequence)
@@ -349,7 +356,7 @@ module ViralSeq
349
356
  # s = ViralSeq::Sequence.new('my_seq', seq)
350
357
  # s.sequence_clip(2333, 2433, :HXB2).dna
351
358
  # => "AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC"
352
-
359
+
353
360
  def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false)
354
361
  loc = self.locator(ref_option, path_to_muscle)
355
362
  l1 = loc[0]
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.0"
5
+ VERSION = "1.0.1"
6
6
  end
data/viral_seq.gemspec CHANGED
@@ -20,8 +20,8 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
- spec.bindir = "exe"
24
- # spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }\
23
+ spec.bindir = "bin"
24
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ["lib"]
26
26
  spec.post_install_message = "Thanks for installing!"
27
27
 
@@ -31,8 +31,5 @@ Gem::Specification.new do |spec|
31
31
 
32
32
  # muscle_bio gem required
33
33
  spec.add_runtime_dependency "muscle_bio", "~> 0.4"
34
-
35
-
36
- spec.requirements << 'MUSCLE (http://www.drive5.com/muscle) required for some functions'
37
34
  spec.requirements << 'R required for some functions'
38
35
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
8
8
  - Michael Clark
9
9
  autorequire:
10
- bindir: exe
10
+ bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-09 00:00:00.000000000 Z
12
+ date: 2019-07-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -73,7 +73,8 @@ description: |-
73
73
  email:
74
74
  - shuntai.zhou@gmail.com
75
75
  - clarkmu@gmail.com
76
- executables: []
76
+ executables:
77
+ - locator
77
78
  extensions: []
78
79
  extra_rdoc_files: []
79
80
  files:
@@ -86,8 +87,7 @@ files:
86
87
  - LICENSE.txt
87
88
  - README.md
88
89
  - Rakefile
89
- - bin/console
90
- - bin/setup
90
+ - bin/locator
91
91
  - lib/viral_seq.rb
92
92
  - lib/viral_seq/Integer.rb
93
93
  - lib/viral_seq/constant.rb
@@ -124,7 +124,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
126
  requirements:
127
- - MUSCLE (http://www.drive5.com/muscle) required for some functions
128
127
  - R required for some functions
129
128
  rubygems_version: 3.0.3
130
129
  signing_key:
data/bin/console DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "bundler/setup"
4
- require "viral_seq"
5
-
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
14
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here