viral_seq 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f54bac7de0e2ffe1b4eac99b6f325b039a3be19d832926ebf1b280154cd0ad2
4
- data.tar.gz: f5c5fe62299ec5253c4b8b395816a9158e8d60f428b757161daf209da70b0c0e
3
+ metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
4
+ data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
5
5
  SHA512:
6
- metadata.gz: 5c9b7767cbd000c253c7a6f98432e6230adfb6736cf39ced62815443f82f1cee7f446ebabf7791c67675bf37e664f7f4ac9570350894a871ac0310c36844213e
7
- data.tar.gz: '049d8b2fc82f09ab618d777a2dc6d20922dd6e4c9af8f6c47ca734b8caa46c4fcee10617247404b6ac85c3e31e3d47353c7162b7c6f1df17ec6ce9efb94829a5'
6
+ metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
7
+ data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.0.2)
4
+ viral_seq (1.0.5)
5
5
  muscle_bio (~> 0.4)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -17,36 +17,46 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
17
17
 
18
18
  ## Some Examples
19
19
 
20
- Load nucleotide sequences from a FASTA format sequence file
20
+ ### Load nucleotide sequences from a FASTA format sequence file
21
21
 
22
22
  my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
23
 
24
- Make an alignment (using MUSCLE)
24
+ ### Make an alignment (using MUSCLE)
25
25
 
26
26
  aligned_seqhash = my_seqhash.align
27
27
 
28
- Filter nucleotide sequences with the reference coordinates (HIV Protease)
28
+ ### Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
29
 
30
30
  qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
31
 
32
- Further filter out sequences with Apobec3g/f hypermutations
32
+ ### Further filter out sequences with Apobec3g/f hypermutations
33
33
 
34
34
  qc_seqhash = qc_seqhash.a3g
35
35
 
36
- Calculate nucleotide diveristy π
36
+ ### Calculate nucleotide diveristy π
37
37
 
38
38
  qc_seqhash.pi
39
39
 
40
- Calculate cut-off for minority variants based on Poisson model
40
+ ### Calculate cut-off for minority variants based on Poisson model
41
41
 
42
42
  cut_off = qc_seqhash.pm
43
43
 
44
- Examine for drug resistance mutations for HIV PR region
44
+ ### Examine for drug resistance mutations for HIV PR region
45
45
 
46
46
  qc_seqhash.sdrm_hiv_pr(cut_off)
47
47
 
48
+ ### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
49
+
50
+ $ locator -i sequence.fasta -o sequence.fasta.csv
51
+
48
52
  ## Updates
49
53
 
54
+ Version 1.0.5-07112019:
55
+
56
+ 1. Update ViralSeq::SeqHash#sequence_locator.
57
+ Program will try to determine the direction (`+` or `-` of the query sequence)
58
+ 2. update executable `locator` to have a column of `direction` in output .csv file
59
+
50
60
  Version 1.0.4-07102019:
51
61
 
52
62
  1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
@@ -4,29 +4,6 @@ require 'viral_seq'
4
4
  require 'csv'
5
5
  require 'optparse'
6
6
 
7
- module ViralSeq
8
- class SeqHash
9
-
10
- def sequence_locator(ref_option = :HXB2)
11
- out_array = []
12
- dna_seq = self.dna_hash
13
- title = self.title
14
-
15
- uniq_dna = dna_seq.uniq_hash
16
-
17
- uniq_dna.each do |seq,names|
18
- s = ViralSeq::Sequence.new('',seq)
19
- loc = s.locator(ref_option)
20
- names.each do |name|
21
- out_array << ([title, name, ref_option.to_s] + loc)
22
- end
23
- end
24
- return out_array
25
- end # end of locator
26
- alias_method :loc, :sequence_locator
27
- end
28
- end
29
-
30
7
  def myparser
31
8
  options = {}
32
9
  OptionParser.new do |opts|
@@ -36,7 +13,7 @@ def myparser
36
13
  options[:infile] = i
37
14
  end
38
15
 
39
- opts.on('-o', '--outfile CSV_FILE', 'output .csv file for locator info') do |o|
16
+ opts.on('-o', '--outfile CSV_FILE', "output .csv file for locator info, default as \#\{infile\}.csv") do |o|
40
17
  options[:outfile] = o
41
18
  end
42
19
 
@@ -48,11 +25,17 @@ def myparser
48
25
  puts opts
49
26
  exit
50
27
  end
28
+
29
+ opts.on("-v", "--version", "Version number of RubyGem::ViralSeq") do
30
+ puts opts
31
+ exit
32
+ end
33
+
51
34
  end.parse!
52
35
  return options
53
36
  end
54
37
 
55
- puts "\nSequence Locator (RubyGem::ViralSeq) #{ViralSeq::VERSION} by Shuntai Zhou"
38
+ puts "\nSequence Locator (RubyGem::ViralSeq Version #{ViralSeq::VERSION}) by Shuntai Zhou"
56
39
  puts "See details at https://github.com/ViralSeq/viral_seq\n"
57
40
  puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
58
41
 
@@ -70,7 +53,7 @@ begin
70
53
  if options[:outfile]
71
54
  csv_file = options[:outfile]
72
55
  else
73
- raise StandardError.new("Please provide path to output csv file")
56
+ csv_file = seq_file + ".csv"
74
57
  end
75
58
 
76
59
  unless File.exist?(seq_file)
@@ -86,7 +69,7 @@ begin
86
69
  end
87
70
 
88
71
  locs = seqs.loc(opt)
89
- head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
72
+ head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
90
73
  locs.unshift(head)
91
74
  data = CSV.generate do |csv|
92
75
  locs.each {|loc| csv << loc}
@@ -729,6 +729,8 @@ module ViralSeq
729
729
  #
730
730
  # containing_indel? (Boolean)
731
731
  #
732
+ # direction ('forward' or 'reverse')
733
+ #
732
734
  # aligned_input_sequence (String)
733
735
  #
734
736
  # aligned_reference_sequence (String)
@@ -742,9 +744,13 @@ module ViralSeq
742
744
 
743
745
  uniq_dna.each do |seq,names|
744
746
  s = ViralSeq::Sequence.new('',seq)
745
- loc = s.locator(ref_option)
747
+ loc1 = s.locator(ref_option)
748
+ s.rc!
749
+ loc2 = s.locator(ref_option)
750
+ loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
751
+
746
752
  names.each do |name|
747
- out_array << ([title, name, ref_option.to_s] + loc)
753
+ out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
748
754
  end
749
755
  end
750
756
  return out_array
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.4"
5
+ VERSION = "1.0.5"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-10 00:00:00.000000000 Z
12
+ date: 2019-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler