viral_seq 0.3.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/lib/viral_seq/Integer.rb +16 -0
- data/lib/viral_seq/constant.rb +7 -0
- data/lib/viral_seq/enumerable.rb +132 -0
- data/lib/viral_seq/hash.rb +45 -0
- data/lib/viral_seq/hivdr.rb +454 -0
- data/lib/viral_seq/math.rb +128 -380
- data/lib/viral_seq/muscle.rb +60 -82
- data/lib/viral_seq/pid.rb +26 -0
- data/lib/viral_seq/ref_seq.rb +35 -0
- data/lib/viral_seq/rubystats.rb +172 -0
- data/lib/viral_seq/seq_hash.rb +1043 -0
- data/lib/viral_seq/seq_hash_pair.rb +219 -0
- data/lib/viral_seq/sequence.rb +571 -348
- data/lib/viral_seq/string.rb +119 -0
- data/lib/viral_seq/version.rb +1 -1
- data/lib/viral_seq.rb +14 -15
- metadata +13 -12
- data/lib/viral_seq/a3g.rb +0 -172
- data/lib/viral_seq/fasta.rb +0 -154
- data/lib/viral_seq/hcv_dr.rb +0 -54
- data/lib/viral_seq/locator.rb +0 -299
- data/lib/viral_seq/misc.rb +0 -103
- data/lib/viral_seq/nt_variation.rb +0 -148
- data/lib/viral_seq/poisson_cutoff.rb +0 -68
- data/lib/viral_seq/refseq.rb +0 -45
- data/lib/viral_seq/sdrm_core.rb +0 -652
- data/lib/viral_seq/tcs_core.rb +0 -556
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9055ee4b893bdff77117a2a9c005166637c177b0ed243a5362488ccf7d893e76
|
4
|
+
data.tar.gz: 87faa7b60c47eecc6f1e3267d4f2a0df549dc70d935d8adabaf54994e60b8ab4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5a3d9aab73cd1e8b696527392c6caaa0a4eec485fe0dbf38a7db456ddce115288f2ae735717ec9595cc4f732cb6afee8dca750b0ebfc703112a5df7196230ca
|
7
|
+
data.tar.gz: f0f040bb1c70f3569ae132023f367f945c408ba73d8d495976ceb0cc2538d7104a56f2009f89789eacbfe45921c017e40578fcb4ccd1df489f75d83d7b733a85
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ViralSeq
|
2
2
|
|
3
3
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
4
|
|
@@ -15,6 +15,12 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
|
|
15
15
|
#!/usr/bin/env ruby
|
16
16
|
require 'viral_seq'
|
17
17
|
|
18
|
+
## Updates
|
19
|
+
|
20
|
+
Version 1.0.0-07092019:
|
21
|
+
|
22
|
+
1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
|
23
|
+
|
18
24
|
## Development
|
19
25
|
|
20
26
|
Bug reports and pull requests are welcome on GitHub at https://github.com/ViralSeq/viral_seq. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# additional functions for Class::Integer
|
2
|
+
|
3
|
+
class Integer
|
4
|
+
# factorial method for an Integer
|
5
|
+
# @return [Integer] factorial for given Integer
|
6
|
+
# @example factorial for 5
|
7
|
+
# !5
|
8
|
+
# => 120
|
9
|
+
def !
|
10
|
+
if self == 0
|
11
|
+
return 1
|
12
|
+
else
|
13
|
+
(1..self).inject(:*)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# additional statistic/math functions to Module::Enumerable
|
2
|
+
# @example median number
|
3
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
4
|
+
# array.median
|
5
|
+
# => 5.5
|
6
|
+
# @example sum
|
7
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
8
|
+
# array.sum
|
9
|
+
# => 55
|
10
|
+
# @example average number (mean)
|
11
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
12
|
+
# array.mean
|
13
|
+
# => 5.5
|
14
|
+
# @example sample variance
|
15
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
16
|
+
# array.sample_variance
|
17
|
+
# => 9.166666666666666
|
18
|
+
# @example standard deviation
|
19
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
20
|
+
# array.stdev
|
21
|
+
# => 3.0276503540974917
|
22
|
+
# @example upper quartile
|
23
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
24
|
+
# array.upper_quartile
|
25
|
+
# => 7.5
|
26
|
+
# @example lower_quartile
|
27
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
28
|
+
# array.lower_quartile
|
29
|
+
# => 3.5
|
30
|
+
# @example count frequency of elements in an array
|
31
|
+
# array = %w{cat dog monkey cat cat cat monkey}
|
32
|
+
# array.count_freq
|
33
|
+
# => {"cat"=>4, "dog"=>1, "monkey"=>2}
|
34
|
+
# @example count frequency as percentage of elements in an array
|
35
|
+
# array = %w{cat dog monkey cat cat cat monkey}
|
36
|
+
# array.count_freq2
|
37
|
+
# => {"cat"=>0.57, "dog"=>0.14, "monkey"=>0.29}
|
38
|
+
module Enumerable
|
39
|
+
|
40
|
+
# generate median number
|
41
|
+
# @return [Numeric] median number
|
42
|
+
def median
|
43
|
+
len = self.length
|
44
|
+
sorted = self.sort
|
45
|
+
len % 2 == 1 ? sorted[len/2] : (sorted[len/2 - 1] + sorted[len/2]).to_f / 2
|
46
|
+
end
|
47
|
+
|
48
|
+
# generate summed value
|
49
|
+
# @return [Numeric] summed value
|
50
|
+
def sum
|
51
|
+
self.inject(0){|accum, i| accum + i }
|
52
|
+
end
|
53
|
+
|
54
|
+
# generate mean number
|
55
|
+
# @return [Float] mean value
|
56
|
+
def mean
|
57
|
+
self.sum/self.length.to_f
|
58
|
+
end
|
59
|
+
|
60
|
+
# generate sample variance
|
61
|
+
# @return [Float] sample variance
|
62
|
+
def sample_variance
|
63
|
+
m = self.mean
|
64
|
+
sum = self.inject(0){|accum, i| accum + (i-m)**2 }
|
65
|
+
sum/(self.length - 1).to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
# generate standard deviation
|
69
|
+
# @return [Float] standard deviation
|
70
|
+
def stdev
|
71
|
+
return Math.sqrt(self.sample_variance)
|
72
|
+
end
|
73
|
+
|
74
|
+
# generate upper quartile value
|
75
|
+
# @return [Numeric] upper quartile value
|
76
|
+
def upper_quartile
|
77
|
+
return nil if self.empty?
|
78
|
+
sorted_array = self.sort
|
79
|
+
u = (0.25*(3*sorted_array.length))
|
80
|
+
if (u-u.truncate).is_a?(Integer)
|
81
|
+
return sorted_array[(u-u.truncate)-1]
|
82
|
+
else
|
83
|
+
sample = sorted_array[u.truncate.abs-1]
|
84
|
+
sample1 = sorted_array[(u.truncate.abs)]
|
85
|
+
return sample+((sample1-sample)*(u-u.truncate))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# generate lower quartile value
|
90
|
+
# @return [Numeric] lower quartile value
|
91
|
+
def lower_quartile
|
92
|
+
return nil if self.empty?
|
93
|
+
sorted_array = self.sort
|
94
|
+
u = 0.25*sorted_array.length + 1
|
95
|
+
if (u-u.truncate).is_a?(Integer)
|
96
|
+
return sorted_array[(u-u.truncate)-1]
|
97
|
+
else
|
98
|
+
sample = sorted_array[u.truncate.abs-1]
|
99
|
+
sample1 = sorted_array[(u.truncate.abs)]
|
100
|
+
return sample+((sample1-sample)*(u-u.truncate))
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# tabulate elements and frequencies of an Enumerable
|
105
|
+
# return [Hash] return a hash of :element => :freq_count
|
106
|
+
|
107
|
+
def count_freq
|
108
|
+
hash = Hash.new(0)
|
109
|
+
self.each do |element|
|
110
|
+
hash[element] +=1
|
111
|
+
end
|
112
|
+
return hash
|
113
|
+
end
|
114
|
+
|
115
|
+
# tabulate elements and frequencies (as percentage) of an Enumerable {
|
116
|
+
# @param decimal [Integer] decimals of frequency
|
117
|
+
# return [Hash] return a hash of :element => :percentage
|
118
|
+
|
119
|
+
def count_freq2(decimal = 2)
|
120
|
+
hash1 = Hash.new(0)
|
121
|
+
self.each do |element|
|
122
|
+
hash1[element] += 1
|
123
|
+
end
|
124
|
+
total_elements = self.size
|
125
|
+
hash2 = Hash.new(0)
|
126
|
+
hash1.each do |key,value|
|
127
|
+
hash2[key] = (value/total_elements.to_f).round(decimal)
|
128
|
+
end
|
129
|
+
return hash2
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# addition methods for Class::Hash required for ViralSeq
|
2
|
+
|
3
|
+
class Hash
|
4
|
+
|
5
|
+
# subtract one hash (h2) from the other (h1) if the keys are identical
|
6
|
+
# @param other_hash [Hash] the hash that needs to substracted from the hash before the method
|
7
|
+
# @return [Hash] hash after substraction
|
8
|
+
# @example substract h2 from h1 if the keys match
|
9
|
+
# h1 = {"Cat" => 100, "Dog" => 5, "Bird" => 2, "Snake" => 10}
|
10
|
+
# h2 = {"Cat" => 100, "Dog" => 5, "Bison" => 30}
|
11
|
+
# h1.difference(h2)
|
12
|
+
# => {"Bird" => 2, "Snake" => 10}
|
13
|
+
|
14
|
+
def difference(other_hash)
|
15
|
+
reject do |k,_v|
|
16
|
+
other_hash.has_key? k
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# return a new hash with the unique values of input hash as keys,
|
21
|
+
# and the keys of the unique values of input hash in an array as values of the new hash
|
22
|
+
# @return [Hash] a new hash of :uniq_value_of_orginial_hash => :array_of_keys
|
23
|
+
# @example
|
24
|
+
# hash = {1=>"A", 2=>"A", 3=>"C", 4=>"C", 5=>"T"}
|
25
|
+
# hash.uniq_hash
|
26
|
+
# => {"A"=>[1, 2], "C"=>[3, 4], "T"=>[5]}
|
27
|
+
|
28
|
+
def uniq_hash
|
29
|
+
uniq_values = self.values.uniq
|
30
|
+
out_hash = {}
|
31
|
+
uniq_values.each do |uniq_va|
|
32
|
+
self.each do |k,v|
|
33
|
+
if v == uniq_va
|
34
|
+
if out_hash[uniq_va]
|
35
|
+
out_hash[uniq_va] << k
|
36
|
+
else
|
37
|
+
out_hash[uniq_va] = []
|
38
|
+
out_hash[uniq_va] << k
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return out_hash
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,454 @@
|
|
1
|
+
|
2
|
+
module ViralSeq
|
3
|
+
class SeqHash
|
4
|
+
|
5
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
|
6
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
7
|
+
# PR codon 1-99
|
8
|
+
# @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
|
9
|
+
# can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
|
10
|
+
# @return [Array] three elements `[point_mutation_list, linkage_list, report_list]`
|
11
|
+
#
|
12
|
+
# # point_mutation_list: two demensional array for the following information,
|
13
|
+
# # [region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label]
|
14
|
+
# # linkage_list: two demensional array for the following information,
|
15
|
+
# # [region,tcs_number,linkage,count,%,CI_low,CI_high,label]
|
16
|
+
# # report_list: two demensional array for the following information,
|
17
|
+
# # [position,codon,tcs_number,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*]
|
18
|
+
# @example identify SDRMs from a FASTA sequence file of HIV PR sequences obtained after MPID-DR sequencing
|
19
|
+
# my_seqhash = ViralSeq::SeqHash.fa('spec/sample_files/sample_dr_sequences/pr.fasta')
|
20
|
+
# p_cut_off = my_seqhash.pm
|
21
|
+
# pr_sdrm = my_seqhash.sdrm_hiv_pr(p_cut_off)
|
22
|
+
# puts "region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label"; pr_sdrm[0].each {|n| puts n.join(',')}
|
23
|
+
# => region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label
|
24
|
+
# => PR,396,30,D,N,247,0.62374,0.57398,0.67163,
|
25
|
+
# => PR,396,50,I,V,1,0.00253,6.0e-05,0.01399,*
|
26
|
+
# => PR,396,88,N,D,246,0.62121,0.57141,0.66919,
|
27
|
+
#
|
28
|
+
# puts "region,tcs_number,linkage,count,%,CI_low,CI_high,label"; pr_sdrm[1].each {|n| puts n.join(',')}
|
29
|
+
# => region,tcs_number,linkage,count,%,CI_low,CI_high,label
|
30
|
+
# => PR,396,D30N+N88D,245,0.61869,0.56884,0.66674,
|
31
|
+
# => PR,396,WT,149,0.37626,0.32837,0.42602,
|
32
|
+
# => PR,396,D30N,1,0.00253,6.0e-05,0.01399,*
|
33
|
+
# => PR,396,D30N+I50V+N88D,1,0.00253,6.0e-05,0.01399,*
|
34
|
+
#
|
35
|
+
# puts "position,codon,tcs_number," + ViralSeq::AMINO_ACID_LIST.join(","); pr_sdrm[2].each {|n|puts n.join(",")}
|
36
|
+
# => position,codon,tcs_number,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*
|
37
|
+
# => PR,1,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
38
|
+
# => PR,2,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
39
|
+
# => PR,3,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
40
|
+
# => PR,4,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
41
|
+
# => PR,5,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
42
|
+
# => PR,6,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0
|
43
|
+
# => PR,7,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
44
|
+
# => PR,8,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
|
45
|
+
# => PR,9,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
46
|
+
# => PR,10,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
47
|
+
# => PR,11,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
48
|
+
# => PR,12,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,62.1212,0.0,0.0,0.0,0.0
|
49
|
+
# => PR,13,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.1313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.8687,0.0,0.0,0.0
|
50
|
+
# => PR,14,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
51
|
+
# => PR,15,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.3737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.6263,0.0,0.0,0.0
|
52
|
+
# => PR,16,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
53
|
+
# => PR,17,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
54
|
+
# => PR,18,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.5051,0.0,0.0,0.0,0.0,0.0,0.0
|
55
|
+
# => PR,19,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
56
|
+
# => PR,20,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
57
|
+
# => PR,21,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
58
|
+
# => PR,22,396,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
59
|
+
# => PR,23,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
60
|
+
# => PR,24,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
61
|
+
# => PR,25,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
62
|
+
# => PR,26,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
63
|
+
# => PR,27,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
64
|
+
# => PR,28,396,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
65
|
+
# => PR,29,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
66
|
+
# => PR,30,396,0.0,0.0,37.6263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.3737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
67
|
+
# => PR,31,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
68
|
+
# => PR,32,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
69
|
+
# => PR,33,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
70
|
+
# => PR,34,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
71
|
+
# => PR,35,396,0.0,0.0,62.1212,37.6263,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
72
|
+
# => PR,36,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
73
|
+
# => PR,37,396,0.0,0.0,37.8788,61.8687,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
74
|
+
# => PR,38,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
75
|
+
# => PR,39,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.5051,0.0,0.0,0.0,0.0,0.0
|
76
|
+
# => PR,40,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
77
|
+
# => PR,41,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,0.0,0.0,0.0,0.0,0.0
|
78
|
+
# => PR,42,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0
|
79
|
+
# => PR,43,396,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
80
|
+
# => PR,44,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
81
|
+
# => PR,45,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
82
|
+
# => PR,46,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
83
|
+
# => PR,47,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
84
|
+
# => PR,48,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
85
|
+
# => PR,49,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
86
|
+
# => PR,50,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
87
|
+
# => PR,51,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
88
|
+
# => PR,52,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
89
|
+
# => PR,53,396,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
90
|
+
# => PR,54,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
91
|
+
# => PR,55,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
92
|
+
# => PR,56,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
93
|
+
# => PR,57,396,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.0,0.0,0.0,0.0
|
94
|
+
# => PR,58,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
95
|
+
# => PR,59,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0
|
96
|
+
# => PR,60,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
97
|
+
# => PR,61,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
98
|
+
# => PR,62,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
99
|
+
# => PR,63,396,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,37.8788,0.0,0.0,61.8687,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
100
|
+
# => PR,64,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
101
|
+
# => PR,65,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
102
|
+
# => PR,66,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
103
|
+
# => PR,67,396,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
104
|
+
# => PR,68,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
105
|
+
# => PR,69,396,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
106
|
+
# => PR,70,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
107
|
+
# => PR,71,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,37.8788,0.0,0.0,0.0
|
108
|
+
# => PR,72,396,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,0.0,0.0,0.0
|
109
|
+
# => PR,73,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
110
|
+
# => PR,74,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
111
|
+
# => PR,75,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
112
|
+
# => PR,76,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
113
|
+
# => PR,77,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
114
|
+
# => PR,78,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
115
|
+
# => PR,79,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
116
|
+
# => PR,80,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
117
|
+
# => PR,81,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
118
|
+
# => PR,82,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
119
|
+
# => PR,83,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.0,0.5051,0.0,0.0,0.0,0.0,0.0
|
120
|
+
# => PR,84,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
121
|
+
# => PR,85,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
122
|
+
# => PR,86,396,0.0,0.0,0.0,0.5051,0.0,99.4949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
123
|
+
# => PR,87,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
|
124
|
+
# => PR,88,396,0.0,0.0,62.1212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
125
|
+
# => PR,89,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
126
|
+
# => PR,90,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
127
|
+
# => PR,91,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
128
|
+
# => PR,92,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
129
|
+
# => PR,93,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
130
|
+
# => PR,94,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
131
|
+
# => PR,95,396,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
132
|
+
# => PR,96,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
133
|
+
# => PR,97,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
134
|
+
# => PR,98,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0
|
135
|
+
# => PR,99,396,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
136
|
+
|
137
|
+
def sdrm_hiv_pr(cutoff = 0)
|
138
|
+
sequences = self.dna_hash
|
139
|
+
region = "PR"
|
140
|
+
rf_label = 0
|
141
|
+
start_codon_number = 1
|
142
|
+
n_seq = sequences.size
|
143
|
+
mut = {}
|
144
|
+
mut_com = []
|
145
|
+
aa = {}
|
146
|
+
point_mutation_list = []
|
147
|
+
sequences.each do |name,seq|
|
148
|
+
s = ViralSeq::Sequence.new(name,seq)
|
149
|
+
s.translate(rf_label)
|
150
|
+
aa[name] = s.aa_string
|
151
|
+
record = s.sdrm(:hiv_pr)
|
152
|
+
mut_com << record
|
153
|
+
record.each do |position,mutation|
|
154
|
+
if mut[position]
|
155
|
+
mut[position][1] << mutation[1]
|
156
|
+
else
|
157
|
+
mut[position] = [mutation[0],[]]
|
158
|
+
mut[position][1] << mutation[1]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
mut.each do |position,mutation|
|
163
|
+
wt = mutation[0]
|
164
|
+
mut_list = mutation[1]
|
165
|
+
count_mut_list = mut_list.count_freq
|
166
|
+
count_mut_list.each do |m,number|
|
167
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
168
|
+
label = number < cutoff ? "*" : ""
|
169
|
+
point_mutation_list << [region, n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
173
|
+
|
174
|
+
link = mut_com.count_freq
|
175
|
+
link2 = {}
|
176
|
+
link.each do |k,v|
|
177
|
+
pattern = []
|
178
|
+
if k.size == 0
|
179
|
+
pattern = ['WT']
|
180
|
+
else
|
181
|
+
k.each do |p,m|
|
182
|
+
pattern << (m[0] + p.to_s + m[1])
|
183
|
+
end
|
184
|
+
end
|
185
|
+
link2[pattern.join("+")] = v
|
186
|
+
end
|
187
|
+
linkage_list = []
|
188
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
189
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
190
|
+
label = v < cutoff ? "*" : ""
|
191
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
192
|
+
end
|
193
|
+
|
194
|
+
report_list = []
|
195
|
+
|
196
|
+
div_aa = {}
|
197
|
+
aa_start = start_codon_number
|
198
|
+
|
199
|
+
aa_size = aa.values[0].size - 1
|
200
|
+
|
201
|
+
(0..aa_size).to_a.each do |p|
|
202
|
+
aas = []
|
203
|
+
aa.values.each do |r1|
|
204
|
+
aas << r1[p]
|
205
|
+
end
|
206
|
+
count_aas = aas.count_freq
|
207
|
+
div_aa[aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
208
|
+
aa_start += 1
|
209
|
+
end
|
210
|
+
|
211
|
+
div_aa.each do |k,v|
|
212
|
+
record = [region, k, n_seq]
|
213
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
214
|
+
aa_count = v[amino_acid]
|
215
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
216
|
+
end
|
217
|
+
report_list << record
|
218
|
+
end
|
219
|
+
|
220
|
+
return [point_mutation_list, linkage_list, report_list]
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV RT region.
|
225
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
226
|
+
# RT codon 34-122, 152-236, two regions are linked
|
227
|
+
# @param (see #sdrm_hiv_pr)
|
228
|
+
# @return (see #sdrm_hiv_pr)
|
229
|
+
|
230
|
+
def sdrm_hiv_rt(cutoff = 0)
|
231
|
+
sequences = self.dna_hash
|
232
|
+
region = "RT"
|
233
|
+
rf_label = 1
|
234
|
+
start_codon_number = 34
|
235
|
+
gap = "AGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCAC"
|
236
|
+
|
237
|
+
n_seq = sequences.size
|
238
|
+
mut_nrti = {}
|
239
|
+
mut_nnrti = {}
|
240
|
+
mut_com = []
|
241
|
+
r1_aa = {}
|
242
|
+
r2_aa = {}
|
243
|
+
point_mutation_list = []
|
244
|
+
sequences.each do |name,seq|
|
245
|
+
r1 = seq[0,267]
|
246
|
+
r2 = seq[267..-1]
|
247
|
+
seq = r1 + gap + r2
|
248
|
+
s = ViralSeq::Sequence.new(name,seq)
|
249
|
+
s.translate(rf_label)
|
250
|
+
|
251
|
+
r1_aa[name] = s.aa_string[0,89]
|
252
|
+
r2_aa[name] = s.aa_string[-85..-1]
|
253
|
+
nrti = s.sdrm(:nrti, start_codon_number)
|
254
|
+
nnrti = s.sdrm(:nnrti, start_codon_number)
|
255
|
+
mut_com << (nrti.merge(nnrti))
|
256
|
+
|
257
|
+
nrti.each do |position,mutation|
|
258
|
+
if mut_nrti[position]
|
259
|
+
mut_nrti[position][1] << mutation[1]
|
260
|
+
else
|
261
|
+
mut_nrti[position] = [mutation[0],[]]
|
262
|
+
mut_nrti[position][1] << mutation[1]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
nnrti.each do |position,mutation|
|
266
|
+
if mut_nnrti[position]
|
267
|
+
mut_nnrti[position][1] << mutation[1]
|
268
|
+
else
|
269
|
+
mut_nnrti[position] = [mutation[0],[]]
|
270
|
+
mut_nnrti[position][1] << mutation[1]
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
mut_nrti.each do |position,mutation|
|
276
|
+
wt = mutation[0]
|
277
|
+
mut_list = mutation[1]
|
278
|
+
count_mut_list = mut_list.count_freq
|
279
|
+
count_mut_list.each do |m,number|
|
280
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
281
|
+
label = number < cutoff ? "*" : ""
|
282
|
+
point_mutation_list << ["NRTI", n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
mut_nnrti.each do |position,mutation|
|
287
|
+
wt = mutation[0]
|
288
|
+
mut_list = mutation[1]
|
289
|
+
count_mut_list = mut_list.count_freq
|
290
|
+
count_mut_list.each do |m,number|
|
291
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
292
|
+
label = number < cutoff ? "*" : ""
|
293
|
+
point_mutation_list << ["NNRTI", n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
298
|
+
|
299
|
+
link = mut_com.count_freq
|
300
|
+
link2 = {}
|
301
|
+
link.each do |k,v|
|
302
|
+
pattern = []
|
303
|
+
if k.size == 0
|
304
|
+
pattern = ['WT']
|
305
|
+
else
|
306
|
+
k.each do |p,m|
|
307
|
+
pattern << (m[0] + p.to_s + m[1])
|
308
|
+
end
|
309
|
+
end
|
310
|
+
link2[pattern.join("+")] = v
|
311
|
+
end
|
312
|
+
linkage_list = []
|
313
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
314
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
315
|
+
label = v < cutoff ? "*" : ""
|
316
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
317
|
+
end
|
318
|
+
|
319
|
+
report_list = []
|
320
|
+
|
321
|
+
div_aa = {}
|
322
|
+
r1_aa_start = 34
|
323
|
+
r2_aa_start = 152
|
324
|
+
|
325
|
+
r1_aa_size = r1_aa.values[0].size - 1
|
326
|
+
r2_aa_size = r2_aa.values[0].size - 1
|
327
|
+
|
328
|
+
(0..r1_aa_size).to_a.each do |p|
|
329
|
+
aas = []
|
330
|
+
r1_aa.values.each do |r1|
|
331
|
+
aas << r1[p]
|
332
|
+
end
|
333
|
+
count_aas = aas.count_freq
|
334
|
+
div_aa[r1_aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
335
|
+
r1_aa_start += 1
|
336
|
+
end
|
337
|
+
|
338
|
+
(0..r2_aa_size).to_a.each do |p|
|
339
|
+
aas = []
|
340
|
+
r2_aa.values.each do |r1|
|
341
|
+
aas << r1[p]
|
342
|
+
end
|
343
|
+
count_aas = aas.count_freq
|
344
|
+
div_aa[r2_aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
345
|
+
r2_aa_start += 1
|
346
|
+
end
|
347
|
+
|
348
|
+
div_aa.each do |k,v|
|
349
|
+
record = [region, k, n_seq]
|
350
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
351
|
+
aa_count = v[amino_acid]
|
352
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
353
|
+
end
|
354
|
+
report_list << record
|
355
|
+
end
|
356
|
+
|
357
|
+
return [point_mutation_list, linkage_list, report_list]
|
358
|
+
end
|
359
|
+
|
360
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV IN region.
|
361
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
362
|
+
# IN codon 53-174
|
363
|
+
# @param (see #sdrm_hiv_pr)
|
364
|
+
# @return (see #sdrm_hiv_pr)
|
365
|
+
|
366
|
+
def sdrm_hiv_in(cutoff = 0)
|
367
|
+
sequences = self.dna_hash
|
368
|
+
region = "IN"
|
369
|
+
rf_label = 2
|
370
|
+
start_codon_number = 53
|
371
|
+
n_seq = sequences.size
|
372
|
+
mut = {}
|
373
|
+
mut_com = []
|
374
|
+
aa = {}
|
375
|
+
point_mutation_list = []
|
376
|
+
sequences.each do |name,seq|
|
377
|
+
s = ViralSeq::Sequence.new(name,seq)
|
378
|
+
s.translate(rf_label)
|
379
|
+
aa[name] = s.aa_string
|
380
|
+
record = s.sdrm(:hiv_in, start_codon_number)
|
381
|
+
mut_com << record
|
382
|
+
record.each do |position,mutation|
|
383
|
+
if mut[position]
|
384
|
+
mut[position][1] << mutation[1]
|
385
|
+
else
|
386
|
+
mut[position] = [mutation[0],[]]
|
387
|
+
mut[position][1] << mutation[1]
|
388
|
+
end
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
mut.each do |position,mutation|
|
393
|
+
wt = mutation[0]
|
394
|
+
mut_list = mutation[1]
|
395
|
+
count_mut_list = mut_list.count_freq
|
396
|
+
count_mut_list.each do |m,number|
|
397
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
398
|
+
label = number < cutoff ? "*" : ""
|
399
|
+
point_mutation_list << [region, n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
400
|
+
end
|
401
|
+
end
|
402
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
403
|
+
|
404
|
+
link = mut_com.count_freq
|
405
|
+
link2 = {}
|
406
|
+
link.each do |k,v|
|
407
|
+
pattern = []
|
408
|
+
if k.size == 0
|
409
|
+
pattern = ['WT']
|
410
|
+
else
|
411
|
+
k.each do |p,m|
|
412
|
+
pattern << (m[0] + p.to_s + m[1])
|
413
|
+
end
|
414
|
+
end
|
415
|
+
link2[pattern.join("+")] = v
|
416
|
+
end
|
417
|
+
linkage_list = []
|
418
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
419
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
420
|
+
label = v < cutoff ? "*" : ""
|
421
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
422
|
+
end
|
423
|
+
|
424
|
+
report_list = []
|
425
|
+
|
426
|
+
div_aa = {}
|
427
|
+
aa_start = start_codon_number
|
428
|
+
|
429
|
+
aa_size = aa.values[0].size - 1
|
430
|
+
|
431
|
+
(0..aa_size).to_a.each do |p|
|
432
|
+
aas = []
|
433
|
+
aa.values.each do |r1|
|
434
|
+
aas << r1[p]
|
435
|
+
end
|
436
|
+
count_aas = aas.count_freq
|
437
|
+
div_aa[aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
438
|
+
aa_start += 1
|
439
|
+
end
|
440
|
+
|
441
|
+
div_aa.each do |k,v|
|
442
|
+
record = [region, k, n_seq]
|
443
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
444
|
+
aa_count = v[amino_acid]
|
445
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
446
|
+
end
|
447
|
+
report_list << record
|
448
|
+
end
|
449
|
+
|
450
|
+
return [point_mutation_list, linkage_list, report_list]
|
451
|
+
end
|
452
|
+
|
453
|
+
end # end of ViralSeq::SeqHash
|
454
|
+
end # end of ViralSeq
|