viral_seq 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -11
- data/lib/viral_seq/hivdr.rb +2 -0
- data/lib/viral_seq/seq_hash.rb +49 -1
- data/lib/viral_seq/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df8d50f2dfbf0f2e7e1efcf66c8a91c286c7b5029533b725a4a33219882748bb
|
4
|
+
data.tar.gz: 4061c3875d4629025d1ccc216a54fdb7a011d397408a3ecb15125475e9f262e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a52087ced9fe258ef5bab4449b90e964ff9a557292dc1ce679aae03a56bd2570fdf1221e7026fec2b1ccb49ad2a9ff076338a397982e47c46877e2cdfb4e6d2e
|
7
|
+
data.tar.gz: 792cb9424fd46d536d0b95cfc90914a8548ee5ea6d1c3efe45cccd1d01c6dbd6b7a7ee0ba1be010bd6cf7a3ea201f4850803c28f16889b5286e1e458a774c8f1
|
data/README.md
CHANGED
@@ -10,46 +10,55 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
-
Load all ViralSeq classes by requiring 'viral_seq.rb'
|
13
|
+
#### Load all ViralSeq classes by requiring 'viral_seq.rb'
|
14
14
|
|
15
15
|
#!/usr/bin/env ruby
|
16
16
|
require 'viral_seq'
|
17
|
+
|
18
|
+
#### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
19
|
+
|
20
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
17
21
|
|
18
22
|
## Some Examples
|
19
23
|
|
20
|
-
|
24
|
+
#### Load nucleotide sequences from a FASTA format sequence file
|
21
25
|
|
22
26
|
my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
|
23
27
|
|
24
|
-
|
28
|
+
#### Make an alignment (using MUSCLE)
|
25
29
|
|
26
30
|
aligned_seqhash = my_seqhash.align
|
27
31
|
|
28
|
-
|
32
|
+
#### Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
29
33
|
|
30
34
|
qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
31
35
|
|
32
|
-
|
36
|
+
#### Further filter out sequences with Apobec3g/f hypermutations
|
33
37
|
|
34
38
|
qc_seqhash = qc_seqhash.a3g
|
35
39
|
|
36
|
-
|
40
|
+
#### Calculate nucleotide diveristy π
|
37
41
|
|
38
42
|
qc_seqhash.pi
|
39
43
|
|
40
|
-
|
44
|
+
#### Calculate cut-off for minority variants based on Poisson model
|
41
45
|
|
42
46
|
cut_off = qc_seqhash.pm
|
43
47
|
|
44
|
-
|
48
|
+
#### Examine for drug resistance mutations for HIV PR region
|
45
49
|
|
46
50
|
qc_seqhash.sdrm_hiv_pr(cut_off)
|
47
51
|
|
48
|
-
|
52
|
+
## Updates
|
49
53
|
|
50
|
-
|
54
|
+
Version 1.0.6-07232019:
|
51
55
|
|
52
|
-
|
56
|
+
1. Several methods added to ViralSeq::SeqHash, including
|
57
|
+
ViralSeq::SeqHash#size
|
58
|
+
ViralSeq::SeqHash#+
|
59
|
+
ViralSeq::SeqHash#write_nt_fa
|
60
|
+
ViralSeq::SeqHash#mutation
|
61
|
+
2. Update documentations and rspec samples.
|
53
62
|
|
54
63
|
Version 1.0.5-07112019:
|
55
64
|
|
data/lib/viral_seq/hivdr.rb
CHANGED
@@ -5,6 +5,8 @@ module ViralSeq
|
|
5
5
|
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
|
6
6
|
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
7
7
|
# PR codon 1-99
|
8
|
+
# RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
|
9
|
+
# IN codon 53-174 (HXB2 4384-4751)
|
8
10
|
# @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
|
9
11
|
# can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
|
10
12
|
# @return [Array] three elements `[point_mutation_list, linkage_list, report_list]`
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -166,6 +166,39 @@ module ViralSeq
|
|
166
166
|
alias_method :array, :new_from_array
|
167
167
|
end
|
168
168
|
|
169
|
+
# the size of nt sequence hash of the SeqHash object
|
170
|
+
# @return [Integer] size of nt sequence hash of the SeqHash object
|
171
|
+
|
172
|
+
def size
|
173
|
+
self.dna_hash.size
|
174
|
+
end
|
175
|
+
|
176
|
+
# combine SeqHash objects
|
177
|
+
# @param sh2 [ViralSeq::SeqHash] another SeqHash
|
178
|
+
# @return [ViralSeq::SeqHash] combined SeqHash
|
179
|
+
|
180
|
+
def +(sh2)
|
181
|
+
new_seqhash = ViralSeq::SeqHash.new
|
182
|
+
new_seqhash.dna_hash = self.dna_hash.merge(sh2.dna_hash)
|
183
|
+
new_seqhash.aa_hash = self.aa_hash.merge(sh2.aa_hash)
|
184
|
+
new_seqhash.title = self.title + "_with_" + sh2.title
|
185
|
+
new_seqhash.file = self.file + "," + sh2.file
|
186
|
+
return new_seqhash
|
187
|
+
end
|
188
|
+
|
189
|
+
# write the nt sequences to a FASTA format file
|
190
|
+
# @param file [String] path to the FASTA output file
|
191
|
+
# @return [NilClass]
|
192
|
+
|
193
|
+
def write_nt_fa(file)
|
194
|
+
File.open(file, 'w') do |f|
|
195
|
+
self.dna_hash.each do |k,v|
|
196
|
+
f.puts k
|
197
|
+
f.puts v
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
169
202
|
# generate sequences in relaxed sequencial phylip format from a ViralSeq::SeqHash object
|
170
203
|
# @return [String] relaxed sequencial phylip format in a String object
|
171
204
|
# @example convert fasta format to relaxed sequencial phylip format
|
@@ -748,7 +781,7 @@ module ViralSeq
|
|
748
781
|
s.rc!
|
749
782
|
loc2 = s.locator(ref_option)
|
750
783
|
loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
|
751
|
-
|
784
|
+
|
752
785
|
names.each do |name|
|
753
786
|
out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
|
754
787
|
end
|
@@ -999,6 +1032,21 @@ module ViralSeq
|
|
999
1032
|
end
|
1000
1033
|
|
1001
1034
|
|
1035
|
+
# mutate @dna_hash based on the error_rate
|
1036
|
+
# @param error_rate [Float] error rate used to mutate sequences.
|
1037
|
+
# @return [ViralSeq::SeqHash] new SeqHash object of mutated sequences.
|
1038
|
+
|
1039
|
+
def mutation(error_rate = 0.01)
|
1040
|
+
new_seqhash = ViralSeq::SeqHash.new
|
1041
|
+
dna = {}
|
1042
|
+
self.dna_hash.each do |name, seq|
|
1043
|
+
dna[name + '_mut-' + error_rate.to_s] = seq.mutation(error_rate)
|
1044
|
+
end
|
1045
|
+
new_seqhash.dna_hash = dna
|
1046
|
+
new_seqhash.title = self.title + "_mut-" + error_rate.to_s
|
1047
|
+
new_seqhash.file = self.file
|
1048
|
+
return new_seqhash
|
1049
|
+
end
|
1002
1050
|
|
1003
1051
|
|
1004
1052
|
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-07-
|
12
|
+
date: 2019-07-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|