viral_seq 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +17 -7
- data/bin/locator +10 -27
- data/lib/viral_seq/seq_hash.rb +8 -2
- data/lib/viral_seq/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
|
4
|
+
data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
|
7
|
+
data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -17,36 +17,46 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
|
|
17
17
|
|
18
18
|
## Some Examples
|
19
19
|
|
20
|
-
Load nucleotide sequences from a FASTA format sequence file
|
20
|
+
### Load nucleotide sequences from a FASTA format sequence file
|
21
21
|
|
22
22
|
my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
|
23
23
|
|
24
|
-
Make an alignment (using MUSCLE)
|
24
|
+
### Make an alignment (using MUSCLE)
|
25
25
|
|
26
26
|
aligned_seqhash = my_seqhash.align
|
27
27
|
|
28
|
-
Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
28
|
+
### Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
29
29
|
|
30
30
|
qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
31
31
|
|
32
|
-
Further filter out sequences with Apobec3g/f hypermutations
|
32
|
+
### Further filter out sequences with Apobec3g/f hypermutations
|
33
33
|
|
34
34
|
qc_seqhash = qc_seqhash.a3g
|
35
35
|
|
36
|
-
Calculate nucleotide diveristy π
|
36
|
+
### Calculate nucleotide diveristy π
|
37
37
|
|
38
38
|
qc_seqhash.pi
|
39
39
|
|
40
|
-
Calculate cut-off for minority variants based on Poisson model
|
40
|
+
### Calculate cut-off for minority variants based on Poisson model
|
41
41
|
|
42
42
|
cut_off = qc_seqhash.pm
|
43
43
|
|
44
|
-
Examine for drug resistance mutations for HIV PR region
|
44
|
+
### Examine for drug resistance mutations for HIV PR region
|
45
45
|
|
46
46
|
qc_seqhash.sdrm_hiv_pr(cut_off)
|
47
47
|
|
48
|
+
### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
49
|
+
|
50
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
51
|
+
|
48
52
|
## Updates
|
49
53
|
|
54
|
+
Version 1.0.5-07112019:
|
55
|
+
|
56
|
+
1. Update ViralSeq::SeqHash#sequence_locator.
|
57
|
+
Program will try to determine the direction (`+` or `-` of the query sequence)
|
58
|
+
2. update executable `locator` to have a column of `direction` in output .csv file
|
59
|
+
|
50
60
|
Version 1.0.4-07102019:
|
51
61
|
|
52
62
|
1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
|
data/bin/locator
CHANGED
@@ -4,29 +4,6 @@ require 'viral_seq'
|
|
4
4
|
require 'csv'
|
5
5
|
require 'optparse'
|
6
6
|
|
7
|
-
module ViralSeq
|
8
|
-
class SeqHash
|
9
|
-
|
10
|
-
def sequence_locator(ref_option = :HXB2)
|
11
|
-
out_array = []
|
12
|
-
dna_seq = self.dna_hash
|
13
|
-
title = self.title
|
14
|
-
|
15
|
-
uniq_dna = dna_seq.uniq_hash
|
16
|
-
|
17
|
-
uniq_dna.each do |seq,names|
|
18
|
-
s = ViralSeq::Sequence.new('',seq)
|
19
|
-
loc = s.locator(ref_option)
|
20
|
-
names.each do |name|
|
21
|
-
out_array << ([title, name, ref_option.to_s] + loc)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
return out_array
|
25
|
-
end # end of locator
|
26
|
-
alias_method :loc, :sequence_locator
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
7
|
def myparser
|
31
8
|
options = {}
|
32
9
|
OptionParser.new do |opts|
|
@@ -36,7 +13,7 @@ def myparser
|
|
36
13
|
options[:infile] = i
|
37
14
|
end
|
38
15
|
|
39
|
-
opts.on('-o', '--outfile CSV_FILE',
|
16
|
+
opts.on('-o', '--outfile CSV_FILE', "output .csv file for locator info, default as \#\{infile\}.csv") do |o|
|
40
17
|
options[:outfile] = o
|
41
18
|
end
|
42
19
|
|
@@ -48,11 +25,17 @@ def myparser
|
|
48
25
|
puts opts
|
49
26
|
exit
|
50
27
|
end
|
28
|
+
|
29
|
+
opts.on("-v", "--version", "Version number of RubyGem::ViralSeq") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
|
51
34
|
end.parse!
|
52
35
|
return options
|
53
36
|
end
|
54
37
|
|
55
|
-
puts "\nSequence Locator (RubyGem::ViralSeq
|
38
|
+
puts "\nSequence Locator (RubyGem::ViralSeq Version #{ViralSeq::VERSION}) by Shuntai Zhou"
|
56
39
|
puts "See details at https://github.com/ViralSeq/viral_seq\n"
|
57
40
|
puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
|
58
41
|
|
@@ -70,7 +53,7 @@ begin
|
|
70
53
|
if options[:outfile]
|
71
54
|
csv_file = options[:outfile]
|
72
55
|
else
|
73
|
-
|
56
|
+
csv_file = seq_file + ".csv"
|
74
57
|
end
|
75
58
|
|
76
59
|
unless File.exist?(seq_file)
|
@@ -86,7 +69,7 @@ begin
|
|
86
69
|
end
|
87
70
|
|
88
71
|
locs = seqs.loc(opt)
|
89
|
-
head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
72
|
+
head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
90
73
|
locs.unshift(head)
|
91
74
|
data = CSV.generate do |csv|
|
92
75
|
locs.each {|loc| csv << loc}
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -729,6 +729,8 @@ module ViralSeq
|
|
729
729
|
#
|
730
730
|
# containing_indel? (Boolean)
|
731
731
|
#
|
732
|
+
# direction ('forward' or 'reverse')
|
733
|
+
#
|
732
734
|
# aligned_input_sequence (String)
|
733
735
|
#
|
734
736
|
# aligned_reference_sequence (String)
|
@@ -742,9 +744,13 @@ module ViralSeq
|
|
742
744
|
|
743
745
|
uniq_dna.each do |seq,names|
|
744
746
|
s = ViralSeq::Sequence.new('',seq)
|
745
|
-
|
747
|
+
loc1 = s.locator(ref_option)
|
748
|
+
s.rc!
|
749
|
+
loc2 = s.locator(ref_option)
|
750
|
+
loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
|
751
|
+
|
746
752
|
names.each do |name|
|
747
|
-
out_array << ([title, name, ref_option.to_s] + loc)
|
753
|
+
out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
|
748
754
|
end
|
749
755
|
end
|
750
756
|
return out_array
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-07-
|
12
|
+
date: 2019-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|