viral_seq 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9055ee4b893bdff77117a2a9c005166637c177b0ed243a5362488ccf7d893e76
4
- data.tar.gz: 87faa7b60c47eecc6f1e3267d4f2a0df549dc70d935d8adabaf54994e60b8ab4
3
+ metadata.gz: 0c704f9231123785d82a46e3bb8eb797564ffce24e23f91817b634b502438ac3
4
+ data.tar.gz: 2cfa762e094166be0510b5b2e344dbac2913b7da431c922def1802f594d96559
5
5
  SHA512:
6
- metadata.gz: c5a3d9aab73cd1e8b696527392c6caaa0a4eec485fe0dbf38a7db456ddce115288f2ae735717ec9595cc4f732cb6afee8dca750b0ebfc703112a5df7196230ca
7
- data.tar.gz: f0f040bb1c70f3569ae132023f367f945c408ba73d8d495976ceb0cc2538d7104a56f2009f89789eacbfe45921c017e40578fcb4ccd1df489f75d83d7b733a85
6
+ metadata.gz: 3f5d9b0bd5acada5be9bc20d9264917b7317ef0abbfa70395527483fb2e7e0d256c5423f7a498b3c7add3416aadd2806a797b297360175f6257fae19abfd3122
7
+ data.tar.gz: f64e177e00a642a090cc07b6ef7150ae6047e58a3f84b6d4186300ca60925bd212e0561edf4046d2467ebc297701c1706cec23f877a4f9724799361c3fa9765e
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.0.0)
4
+ viral_seq (1.0.1)
5
5
  muscle_bio (~> 0.4)
6
6
 
7
7
  GEM
@@ -34,4 +34,4 @@ DEPENDENCIES
34
34
  viral_seq!
35
35
 
36
36
  BUNDLED WITH
37
- 2.0.1
37
+ 2.0.2
data/README.md CHANGED
@@ -15,8 +15,45 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
15
15
  #!/usr/bin/env ruby
16
16
  require 'viral_seq'
17
17
 
18
+ ## Some Examples
19
+
20
+ Load nucleotide sequences from a FASTA format sequence file
21
+
22
+ my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
+
24
+ Make an alignment (using MUSCLE)
25
+
26
+ aligned_seqhash = my_seqhash.align
27
+
28
+ Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
+
30
+ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
+
32
+ Further filter out sequences with Apobec3g/f hypermutations
33
+
34
+ qc_seqhash = qc_seqhash.a3g
35
+
36
+ Calculate nucleotide diveristy π
37
+
38
+ qc_seqhash.pi
39
+
40
+ Calculate cut-off for minority variants based on Poisson model
41
+
42
+ cut_off = qc_seqhash.pm
43
+
44
+ Examine for drug resistance mutations for HIV PR region
45
+
46
+ qc_seqhash.sdrm_hiv_pr(cut_off)
47
+
18
48
  ## Updates
19
49
 
50
+ Version 1.0.1-07102019:
51
+
52
+ 1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
53
+ 2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
54
+ 3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
55
+ 4. update documentations
56
+
20
57
  Version 1.0.0-07092019:
21
58
 
22
59
  1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
data/bin/locator ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'viral_seq'
4
+ require 'csv'
5
+ require 'optparse'
6
+
7
+ module ViralSeq
8
+ class SeqHash
9
+
10
+ def sequence_locator(ref_option = :HXB2)
11
+ out_array = []
12
+ dna_seq = self.dna_hash
13
+ title = self.title
14
+
15
+ uniq_dna = dna_seq.uniq_hash
16
+
17
+ uniq_dna.each do |seq,names|
18
+ s = ViralSeq::Sequence.new('',seq)
19
+ loc = s.locator(ref_option)
20
+ names.each do |name|
21
+ out_array << ([title, name, ref_option.to_s] + loc)
22
+ end
23
+ end
24
+ return out_array
25
+ end # end of locator
26
+ alias_method :loc, :sequence_locator
27
+ end
28
+ end
29
+
30
+ def myparser
31
+ options = {}
32
+ OptionParser.new do |opts|
33
+ opts.banner = "Usage: locator -i [nt_sequence_fasta_file] -o [locator_info_csv_file] -r [reference_genome_option]"
34
+
35
+ opts.on('-i', '--infile FASTA_FILE', 'nt sequence file in FASTA format') do |i|
36
+ options[:infile] = i
37
+ end
38
+
39
+ opts.on('-o', '--outfile CSV_FILE', 'output .csv file for locator info') do |o|
40
+ options[:outfile] = o
41
+ end
42
+
43
+ opts.on('-r', '--ref_option OPTION', 'reference genome option, choose from `HXB2` (default), `NL43`, `MAC239`') do |o|
44
+ options[:ref_option] = o.to_sym
45
+ end
46
+
47
+ opts.on("-h", "--help", "Prints this help") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end.parse!
52
+ return options
53
+ end
54
+
55
+ puts "\nSequence Locator (RubyGem::ViralSeq) v1.0.1 by Shuntai Zhou"
56
+ puts "See details at https://github.com/ViralSeq/viral_seq\n"
57
+ puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
58
+
59
+ ARGV << '-h' if ARGV.size == 0
60
+
61
+ options = myparser
62
+
63
+ begin
64
+ if options[:infile]
65
+ seq_file = options[:infile]
66
+ else
67
+ raise StandardError.new("Input file sequence file not found")
68
+ end
69
+
70
+ if options[:outfile]
71
+ csv_file = options[:outfile]
72
+ else
73
+ raise StandardError.new("Please provide path to output csv file")
74
+ end
75
+
76
+ unless File.exist?(seq_file)
77
+ raise StandardError.new("Input file sequence file not found")
78
+ end
79
+
80
+ seqs = ViralSeq::SeqHash.fa(seq_file)
81
+ opt = options[:ref_option] ? options[:ref_option] : :HXB2
82
+
83
+ unless [:HXB2, :NL43, :MAC239].include? opt
84
+ puts "Reference option #{opt} not recognized, using `:HXB2` as the reference genome."
85
+ opt = :HXB2
86
+ end
87
+
88
+ locs = seqs.loc(opt)
89
+ head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
90
+ locs.unshift(head)
91
+ data = CSV.generate do |csv|
92
+ locs.each {|loc| csv << loc}
93
+ end
94
+
95
+ File.write(csv_file, data)
96
+ rescue StandardError => e
97
+ puts e.message
98
+ puts "\n"
99
+ ARGV.clear
100
+ ARGV << '-h'
101
+ myparser
102
+ end
@@ -713,6 +713,43 @@ module ViralSeq
713
713
  self.sub(seq_pass)
714
714
  end # end of #hiv_seq_qc
715
715
 
716
+ # sequence locator for SeqHash object, resembling HIV Sequence Locator from LANL
717
+ # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
718
+ # @return [Array] two dimensional array `[[],[],[],...]` for each sequence, including the following information:
719
+ #
720
+ # title of the SeqHash object (String)
721
+ #
722
+ # sequence taxa (String)
723
+ #
724
+ # start_location (Integer)
725
+ #
726
+ # end_location (Integer)
727
+ #
728
+ # percentage_of_similarity_to_reference_sequence (Float)
729
+ #
730
+ # containing_indel? (Boolean)
731
+ #
732
+ # aligned_input_sequence (String)
733
+ #
734
+ # aligned_reference_sequence (String)
735
+ # @see https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html LANL Sequence Locator
736
+ def sequence_locator(ref_option = :HXB2)
737
+ out_array = []
738
+ dna_seq = self.dna_hash
739
+ title = self.title
740
+
741
+ uniq_dna = dna_seq.uniq_hash
742
+
743
+ uniq_dna.each do |seq,names|
744
+ s = ViralSeq::Sequence.new('',seq)
745
+ loc = s.locator(ref_option)
746
+ names.each do |name|
747
+ out_array << ([title, name, ref_option.to_s] + loc)
748
+ end
749
+ end
750
+ return out_array
751
+ end # end of locator
752
+ alias_method :loc, :sequence_locator
716
753
 
717
754
  # Remove squences with residual offspring Primer IDs.
718
755
  # Compare PID with sequences which have identical sequences.
@@ -139,10 +139,10 @@ module ViralSeq
139
139
  # my_seqhashpair = ViralSeq::SeqHashPair.new(paired_seq2)
140
140
  # my_seqhashpair.join2.dna_hash
141
141
  # => {">pair4"=>"AAAGGGGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
142
- # my_seqhashpair.join2(:indiv).dna_hash
142
+ # my_seqhashpair.join2(model :indiv).dna_hash
143
143
  # => {">pair4"=>"AAAGGGGGGGTT", ">pair5"=>"AAAAAAGGGGTTTTT", ">pair6"=>"AAACAAGGGGTTTTT"}
144
144
 
145
- def join2(model = :con, diff = 0.0)
145
+ def join2(model: :con, diff: 0.0)
146
146
  seq_pair_hash = self.dna_hash
147
147
  begin
148
148
  raise ArgumentError.new(":diff has to be float or integer, input #{diff} invalid.") unless (diff.is_a? Integer or diff.is_a? Float)
@@ -142,13 +142,20 @@ module ViralSeq
142
142
  # # current version only supports nucleotide sequence, not for amino acid sequence.
143
143
  # @param ref_option [Symbol], name of reference genomes, options are `:HXB2`, `:NL43`, `:MAC239`
144
144
  # @param path_to_muscle [String], path to the muscle executable, if not provided, use MuscleBio to run Muscle
145
- # @return [Array] an array of the following info
146
- # # start_location (Integer)
147
- # # end_location (Integer)
148
- # # percentage_of_similarity_to_reference_sequence (Float)
149
- # # containing_indel? (Boolean)
150
- # # aligned_input_sequence (String)
151
- # # aligned_reference_sequence (String)
145
+ # @return [Array] an array of the following info:
146
+ #
147
+ # start_location (Integer)
148
+ #
149
+ # end_location (Integer)
150
+ #
151
+ # percentage_of_similarity_to_reference_sequence (Float)
152
+ #
153
+ # containing_indel? (Boolean)
154
+ #
155
+ # aligned_input_sequence (String)
156
+ #
157
+ # aligned_reference_sequence (String)
158
+ #
152
159
  # @example identify the location of the input sequence on the NL43 genome
153
160
  # sequence = 'AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC'
154
161
  # s = ViralSeq::Sequence.new('my_sequence', sequence)
@@ -349,7 +356,7 @@ module ViralSeq
349
356
  # s = ViralSeq::Sequence.new('my_seq', seq)
350
357
  # s.sequence_clip(2333, 2433, :HXB2).dna
351
358
  # => "AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC"
352
-
359
+
353
360
  def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false)
354
361
  loc = self.locator(ref_option, path_to_muscle)
355
362
  l1 = loc[0]
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.0"
5
+ VERSION = "1.0.1"
6
6
  end
data/viral_seq.gemspec CHANGED
@@ -20,8 +20,8 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
- spec.bindir = "exe"
24
- # spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }\
23
+ spec.bindir = "bin"
24
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ["lib"]
26
26
  spec.post_install_message = "Thanks for installing!"
27
27
 
@@ -31,8 +31,5 @@ Gem::Specification.new do |spec|
31
31
 
32
32
  # muscle_bio gem required
33
33
  spec.add_runtime_dependency "muscle_bio", "~> 0.4"
34
-
35
-
36
- spec.requirements << 'MUSCLE (http://www.drive5.com/muscle) required for some functions'
37
34
  spec.requirements << 'R required for some functions'
38
35
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
8
8
  - Michael Clark
9
9
  autorequire:
10
- bindir: exe
10
+ bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-09 00:00:00.000000000 Z
12
+ date: 2019-07-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -73,7 +73,8 @@ description: |-
73
73
  email:
74
74
  - shuntai.zhou@gmail.com
75
75
  - clarkmu@gmail.com
76
- executables: []
76
+ executables:
77
+ - locator
77
78
  extensions: []
78
79
  extra_rdoc_files: []
79
80
  files:
@@ -86,8 +87,7 @@ files:
86
87
  - LICENSE.txt
87
88
  - README.md
88
89
  - Rakefile
89
- - bin/console
90
- - bin/setup
90
+ - bin/locator
91
91
  - lib/viral_seq.rb
92
92
  - lib/viral_seq/Integer.rb
93
93
  - lib/viral_seq/constant.rb
@@ -124,7 +124,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
126
  requirements:
127
- - MUSCLE (http://www.drive5.com/muscle) required for some functions
128
127
  - R required for some functions
129
128
  rubygems_version: 3.0.3
130
129
  signing_key:
data/bin/console DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "bundler/setup"
4
- require "viral_seq"
5
-
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
14
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here