viral_seq 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f54bac7de0e2ffe1b4eac99b6f325b039a3be19d832926ebf1b280154cd0ad2
4
- data.tar.gz: f5c5fe62299ec5253c4b8b395816a9158e8d60f428b757161daf209da70b0c0e
3
+ metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
4
+ data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
5
5
  SHA512:
6
- metadata.gz: 5c9b7767cbd000c253c7a6f98432e6230adfb6736cf39ced62815443f82f1cee7f446ebabf7791c67675bf37e664f7f4ac9570350894a871ac0310c36844213e
7
- data.tar.gz: '049d8b2fc82f09ab618d777a2dc6d20922dd6e4c9af8f6c47ca734b8caa46c4fcee10617247404b6ac85c3e31e3d47353c7162b7c6f1df17ec6ce9efb94829a5'
6
+ metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
7
+ data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.0.2)
4
+ viral_seq (1.0.5)
5
5
  muscle_bio (~> 0.4)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -17,36 +17,46 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
17
17
 
18
18
  ## Some Examples
19
19
 
20
- Load nucleotide sequences from a FASTA format sequence file
20
+ ### Load nucleotide sequences from a FASTA format sequence file
21
21
 
22
22
  my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
23
 
24
- Make an alignment (using MUSCLE)
24
+ ### Make an alignment (using MUSCLE)
25
25
 
26
26
  aligned_seqhash = my_seqhash.align
27
27
 
28
- Filter nucleotide sequences with the reference coordinates (HIV Protease)
28
+ ### Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
29
 
30
30
  qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
31
 
32
- Further filter out sequences with Apobec3g/f hypermutations
32
+ ### Further filter out sequences with Apobec3g/f hypermutations
33
33
 
34
34
  qc_seqhash = qc_seqhash.a3g
35
35
 
36
- Calculate nucleotide diveristy π
36
+ ### Calculate nucleotide diveristy π
37
37
 
38
38
  qc_seqhash.pi
39
39
 
40
- Calculate cut-off for minority variants based on Poisson model
40
+ ### Calculate cut-off for minority variants based on Poisson model
41
41
 
42
42
  cut_off = qc_seqhash.pm
43
43
 
44
- Examine for drug resistance mutations for HIV PR region
44
+ ### Examine for drug resistance mutations for HIV PR region
45
45
 
46
46
  qc_seqhash.sdrm_hiv_pr(cut_off)
47
47
 
48
+ ### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
49
+
50
+ $ locator -i sequence.fasta -o sequence.fasta.csv
51
+
48
52
  ## Updates
49
53
 
54
+ Version 1.0.5-07112019:
55
+
56
+ 1. Update ViralSeq::SeqHash#sequence_locator.
57
+ Program will try to determine the direction (`+` or `-` of the query sequence)
58
+ 2. update executable `locator` to have a column of `direction` in output .csv file
59
+
50
60
  Version 1.0.4-07102019:
51
61
 
52
62
  1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
@@ -4,29 +4,6 @@ require 'viral_seq'
4
4
  require 'csv'
5
5
  require 'optparse'
6
6
 
7
- module ViralSeq
8
- class SeqHash
9
-
10
- def sequence_locator(ref_option = :HXB2)
11
- out_array = []
12
- dna_seq = self.dna_hash
13
- title = self.title
14
-
15
- uniq_dna = dna_seq.uniq_hash
16
-
17
- uniq_dna.each do |seq,names|
18
- s = ViralSeq::Sequence.new('',seq)
19
- loc = s.locator(ref_option)
20
- names.each do |name|
21
- out_array << ([title, name, ref_option.to_s] + loc)
22
- end
23
- end
24
- return out_array
25
- end # end of locator
26
- alias_method :loc, :sequence_locator
27
- end
28
- end
29
-
30
7
  def myparser
31
8
  options = {}
32
9
  OptionParser.new do |opts|
@@ -36,7 +13,7 @@ def myparser
36
13
  options[:infile] = i
37
14
  end
38
15
 
39
- opts.on('-o', '--outfile CSV_FILE', 'output .csv file for locator info') do |o|
16
+ opts.on('-o', '--outfile CSV_FILE', "output .csv file for locator info, default as \#\{infile\}.csv") do |o|
40
17
  options[:outfile] = o
41
18
  end
42
19
 
@@ -48,11 +25,17 @@ def myparser
48
25
  puts opts
49
26
  exit
50
27
  end
28
+
29
+ opts.on("-v", "--version", "Version number of RubyGem::ViralSeq") do
30
+ puts opts
31
+ exit
32
+ end
33
+
51
34
  end.parse!
52
35
  return options
53
36
  end
54
37
 
55
- puts "\nSequence Locator (RubyGem::ViralSeq) #{ViralSeq::VERSION} by Shuntai Zhou"
38
+ puts "\nSequence Locator (RubyGem::ViralSeq Version #{ViralSeq::VERSION}) by Shuntai Zhou"
56
39
  puts "See details at https://github.com/ViralSeq/viral_seq\n"
57
40
  puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
58
41
 
@@ -70,7 +53,7 @@ begin
70
53
  if options[:outfile]
71
54
  csv_file = options[:outfile]
72
55
  else
73
- raise StandardError.new("Please provide path to output csv file")
56
+ csv_file = seq_file + ".csv"
74
57
  end
75
58
 
76
59
  unless File.exist?(seq_file)
@@ -86,7 +69,7 @@ begin
86
69
  end
87
70
 
88
71
  locs = seqs.loc(opt)
89
- head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
72
+ head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
90
73
  locs.unshift(head)
91
74
  data = CSV.generate do |csv|
92
75
  locs.each {|loc| csv << loc}
@@ -729,6 +729,8 @@ module ViralSeq
729
729
  #
730
730
  # containing_indel? (Boolean)
731
731
  #
732
+ # direction ('forward' or 'reverse')
733
+ #
732
734
  # aligned_input_sequence (String)
733
735
  #
734
736
  # aligned_reference_sequence (String)
@@ -742,9 +744,13 @@ module ViralSeq
742
744
 
743
745
  uniq_dna.each do |seq,names|
744
746
  s = ViralSeq::Sequence.new('',seq)
745
- loc = s.locator(ref_option)
747
+ loc1 = s.locator(ref_option)
748
+ s.rc!
749
+ loc2 = s.locator(ref_option)
750
+ loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
751
+
746
752
  names.each do |name|
747
- out_array << ([title, name, ref_option.to_s] + loc)
753
+ out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
748
754
  end
749
755
  end
750
756
  return out_array
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.4"
5
+ VERSION = "1.0.5"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-10 00:00:00.000000000 Z
12
+ date: 2019-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler