viral_seq 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +17 -7
- data/bin/locator +10 -27
- data/lib/viral_seq/seq_hash.rb +8 -2
- data/lib/viral_seq/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
|
4
|
+
data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
|
7
|
+
data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -17,36 +17,46 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
|
|
17
17
|
|
18
18
|
## Some Examples
|
19
19
|
|
20
|
-
Load nucleotide sequences from a FASTA format sequence file
|
20
|
+
### Load nucleotide sequences from a FASTA format sequence file
|
21
21
|
|
22
22
|
my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
|
23
23
|
|
24
|
-
Make an alignment (using MUSCLE)
|
24
|
+
### Make an alignment (using MUSCLE)
|
25
25
|
|
26
26
|
aligned_seqhash = my_seqhash.align
|
27
27
|
|
28
|
-
Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
28
|
+
### Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
29
29
|
|
30
30
|
qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
31
31
|
|
32
|
-
Further filter out sequences with Apobec3g/f hypermutations
|
32
|
+
### Further filter out sequences with Apobec3g/f hypermutations
|
33
33
|
|
34
34
|
qc_seqhash = qc_seqhash.a3g
|
35
35
|
|
36
|
-
Calculate nucleotide diveristy π
|
36
|
+
### Calculate nucleotide diveristy π
|
37
37
|
|
38
38
|
qc_seqhash.pi
|
39
39
|
|
40
|
-
Calculate cut-off for minority variants based on Poisson model
|
40
|
+
### Calculate cut-off for minority variants based on Poisson model
|
41
41
|
|
42
42
|
cut_off = qc_seqhash.pm
|
43
43
|
|
44
|
-
Examine for drug resistance mutations for HIV PR region
|
44
|
+
### Examine for drug resistance mutations for HIV PR region
|
45
45
|
|
46
46
|
qc_seqhash.sdrm_hiv_pr(cut_off)
|
47
47
|
|
48
|
+
### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
49
|
+
|
50
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
51
|
+
|
48
52
|
## Updates
|
49
53
|
|
54
|
+
Version 1.0.5-07112019:
|
55
|
+
|
56
|
+
1. Update ViralSeq::SeqHash#sequence_locator.
|
57
|
+
Program will try to determine the direction (`+` or `-` of the query sequence)
|
58
|
+
2. update executable `locator` to have a column of `direction` in output .csv file
|
59
|
+
|
50
60
|
Version 1.0.4-07102019:
|
51
61
|
|
52
62
|
1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
|
data/bin/locator
CHANGED
@@ -4,29 +4,6 @@ require 'viral_seq'
|
|
4
4
|
require 'csv'
|
5
5
|
require 'optparse'
|
6
6
|
|
7
|
-
module ViralSeq
|
8
|
-
class SeqHash
|
9
|
-
|
10
|
-
def sequence_locator(ref_option = :HXB2)
|
11
|
-
out_array = []
|
12
|
-
dna_seq = self.dna_hash
|
13
|
-
title = self.title
|
14
|
-
|
15
|
-
uniq_dna = dna_seq.uniq_hash
|
16
|
-
|
17
|
-
uniq_dna.each do |seq,names|
|
18
|
-
s = ViralSeq::Sequence.new('',seq)
|
19
|
-
loc = s.locator(ref_option)
|
20
|
-
names.each do |name|
|
21
|
-
out_array << ([title, name, ref_option.to_s] + loc)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
return out_array
|
25
|
-
end # end of locator
|
26
|
-
alias_method :loc, :sequence_locator
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
7
|
def myparser
|
31
8
|
options = {}
|
32
9
|
OptionParser.new do |opts|
|
@@ -36,7 +13,7 @@ def myparser
|
|
36
13
|
options[:infile] = i
|
37
14
|
end
|
38
15
|
|
39
|
-
opts.on('-o', '--outfile CSV_FILE',
|
16
|
+
opts.on('-o', '--outfile CSV_FILE', "output .csv file for locator info, default as \#\{infile\}.csv") do |o|
|
40
17
|
options[:outfile] = o
|
41
18
|
end
|
42
19
|
|
@@ -48,11 +25,17 @@ def myparser
|
|
48
25
|
puts opts
|
49
26
|
exit
|
50
27
|
end
|
28
|
+
|
29
|
+
opts.on("-v", "--version", "Version number of RubyGem::ViralSeq") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
|
51
34
|
end.parse!
|
52
35
|
return options
|
53
36
|
end
|
54
37
|
|
55
|
-
puts "\nSequence Locator (RubyGem::ViralSeq
|
38
|
+
puts "\nSequence Locator (RubyGem::ViralSeq Version #{ViralSeq::VERSION}) by Shuntai Zhou"
|
56
39
|
puts "See details at https://github.com/ViralSeq/viral_seq\n"
|
57
40
|
puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n\n"
|
58
41
|
|
@@ -70,7 +53,7 @@ begin
|
|
70
53
|
if options[:outfile]
|
71
54
|
csv_file = options[:outfile]
|
72
55
|
else
|
73
|
-
|
56
|
+
csv_file = seq_file + ".csv"
|
74
57
|
end
|
75
58
|
|
76
59
|
unless File.exist?(seq_file)
|
@@ -86,7 +69,7 @@ begin
|
|
86
69
|
end
|
87
70
|
|
88
71
|
locs = seqs.loc(opt)
|
89
|
-
head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
72
|
+
head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
90
73
|
locs.unshift(head)
|
91
74
|
data = CSV.generate do |csv|
|
92
75
|
locs.each {|loc| csv << loc}
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -729,6 +729,8 @@ module ViralSeq
|
|
729
729
|
#
|
730
730
|
# containing_indel? (Boolean)
|
731
731
|
#
|
732
|
+
# direction ('forward' or 'reverse')
|
733
|
+
#
|
732
734
|
# aligned_input_sequence (String)
|
733
735
|
#
|
734
736
|
# aligned_reference_sequence (String)
|
@@ -742,9 +744,13 @@ module ViralSeq
|
|
742
744
|
|
743
745
|
uniq_dna.each do |seq,names|
|
744
746
|
s = ViralSeq::Sequence.new('',seq)
|
745
|
-
|
747
|
+
loc1 = s.locator(ref_option)
|
748
|
+
s.rc!
|
749
|
+
loc2 = s.locator(ref_option)
|
750
|
+
loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
|
751
|
+
|
746
752
|
names.each do |name|
|
747
|
-
out_array << ([title, name, ref_option.to_s] + loc)
|
753
|
+
out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
|
748
754
|
end
|
749
755
|
end
|
750
756
|
return out_array
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-07-
|
12
|
+
date: 2019-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|