viral_seq 0.3.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/lib/viral_seq/Integer.rb +16 -0
- data/lib/viral_seq/constant.rb +7 -0
- data/lib/viral_seq/enumerable.rb +132 -0
- data/lib/viral_seq/hash.rb +45 -0
- data/lib/viral_seq/hivdr.rb +454 -0
- data/lib/viral_seq/math.rb +128 -380
- data/lib/viral_seq/muscle.rb +60 -82
- data/lib/viral_seq/pid.rb +26 -0
- data/lib/viral_seq/ref_seq.rb +35 -0
- data/lib/viral_seq/rubystats.rb +172 -0
- data/lib/viral_seq/seq_hash.rb +1043 -0
- data/lib/viral_seq/seq_hash_pair.rb +219 -0
- data/lib/viral_seq/sequence.rb +571 -348
- data/lib/viral_seq/string.rb +119 -0
- data/lib/viral_seq/version.rb +1 -1
- data/lib/viral_seq.rb +14 -15
- metadata +13 -12
- data/lib/viral_seq/a3g.rb +0 -172
- data/lib/viral_seq/fasta.rb +0 -154
- data/lib/viral_seq/hcv_dr.rb +0 -54
- data/lib/viral_seq/locator.rb +0 -299
- data/lib/viral_seq/misc.rb +0 -103
- data/lib/viral_seq/nt_variation.rb +0 -148
- data/lib/viral_seq/poisson_cutoff.rb +0 -68
- data/lib/viral_seq/refseq.rb +0 -45
- data/lib/viral_seq/sdrm_core.rb +0 -652
- data/lib/viral_seq/tcs_core.rb +0 -556
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9055ee4b893bdff77117a2a9c005166637c177b0ed243a5362488ccf7d893e76
|
4
|
+
data.tar.gz: 87faa7b60c47eecc6f1e3267d4f2a0df549dc70d935d8adabaf54994e60b8ab4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5a3d9aab73cd1e8b696527392c6caaa0a4eec485fe0dbf38a7db456ddce115288f2ae735717ec9595cc4f732cb6afee8dca750b0ebfc703112a5df7196230ca
|
7
|
+
data.tar.gz: f0f040bb1c70f3569ae132023f367f945c408ba73d8d495976ceb0cc2538d7104a56f2009f89789eacbfe45921c017e40578fcb4ccd1df489f75d83d7b733a85
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ViralSeq
|
2
2
|
|
3
3
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
4
|
|
@@ -15,6 +15,12 @@ Load all ViralSeq classes by requiring 'viral_seq.rb'
|
|
15
15
|
#!/usr/bin/env ruby
|
16
16
|
require 'viral_seq'
|
17
17
|
|
18
|
+
## Updates
|
19
|
+
|
20
|
+
Version 1.0.0-07092019:
|
21
|
+
|
22
|
+
1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
|
23
|
+
|
18
24
|
## Development
|
19
25
|
|
20
26
|
Bug reports and pull requests are welcome on GitHub at https://github.com/ViralSeq/viral_seq. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# additional functions for Class::Integer
|
2
|
+
|
3
|
+
class Integer
|
4
|
+
# factorial method for an Integer
|
5
|
+
# @return [Integer] factorial for given Integer
|
6
|
+
# @example factorial for 5
|
7
|
+
# !5
|
8
|
+
# => 120
|
9
|
+
def !
|
10
|
+
if self == 0
|
11
|
+
return 1
|
12
|
+
else
|
13
|
+
(1..self).inject(:*)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# additional statistic/math functions to Module::Enumerable
|
2
|
+
# @example median number
|
3
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
4
|
+
# array.median
|
5
|
+
# => 5.5
|
6
|
+
# @example sum
|
7
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
8
|
+
# array.sum
|
9
|
+
# => 55
|
10
|
+
# @example average number (mean)
|
11
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
12
|
+
# array.mean
|
13
|
+
# => 5.5
|
14
|
+
# @example sample variance
|
15
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
16
|
+
# array.sample_variance
|
17
|
+
# => 9.166666666666666
|
18
|
+
# @example standard deviation
|
19
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
20
|
+
# array.stdev
|
21
|
+
# => 3.0276503540974917
|
22
|
+
# @example upper quartile
|
23
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
24
|
+
# array.upper_quartile
|
25
|
+
# => 7.5
|
26
|
+
# @example lower_quartile
|
27
|
+
# array = [1,2,3,4,5,6,7,8,9,10]
|
28
|
+
# array.lower_quartile
|
29
|
+
# => 3.5
|
30
|
+
# @example count frequency of elements in an array
|
31
|
+
# array = %w{cat dog monkey cat cat cat monkey}
|
32
|
+
# array.count_freq
|
33
|
+
# => {"cat"=>4, "dog"=>1, "monkey"=>2}
|
34
|
+
# @example count frequency as percentage of elements in an array
|
35
|
+
# array = %w{cat dog monkey cat cat cat monkey}
|
36
|
+
# array.count_freq2
|
37
|
+
# => {"cat"=>0.57, "dog"=>0.14, "monkey"=>0.29}
|
38
|
+
module Enumerable
|
39
|
+
|
40
|
+
# generate median number
|
41
|
+
# @return [Numeric] median number
|
42
|
+
def median
|
43
|
+
len = self.length
|
44
|
+
sorted = self.sort
|
45
|
+
len % 2 == 1 ? sorted[len/2] : (sorted[len/2 - 1] + sorted[len/2]).to_f / 2
|
46
|
+
end
|
47
|
+
|
48
|
+
# generate summed value
|
49
|
+
# @return [Numeric] summed value
|
50
|
+
def sum
|
51
|
+
self.inject(0){|accum, i| accum + i }
|
52
|
+
end
|
53
|
+
|
54
|
+
# generate mean number
|
55
|
+
# @return [Float] mean value
|
56
|
+
def mean
|
57
|
+
self.sum/self.length.to_f
|
58
|
+
end
|
59
|
+
|
60
|
+
# generate sample variance
|
61
|
+
# @return [Float] sample variance
|
62
|
+
def sample_variance
|
63
|
+
m = self.mean
|
64
|
+
sum = self.inject(0){|accum, i| accum + (i-m)**2 }
|
65
|
+
sum/(self.length - 1).to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
# generate standard deviation
|
69
|
+
# @return [Float] standard deviation
|
70
|
+
def stdev
|
71
|
+
return Math.sqrt(self.sample_variance)
|
72
|
+
end
|
73
|
+
|
74
|
+
# generate upper quartile value
|
75
|
+
# @return [Numeric] upper quartile value
|
76
|
+
def upper_quartile
|
77
|
+
return nil if self.empty?
|
78
|
+
sorted_array = self.sort
|
79
|
+
u = (0.25*(3*sorted_array.length))
|
80
|
+
if (u-u.truncate).is_a?(Integer)
|
81
|
+
return sorted_array[(u-u.truncate)-1]
|
82
|
+
else
|
83
|
+
sample = sorted_array[u.truncate.abs-1]
|
84
|
+
sample1 = sorted_array[(u.truncate.abs)]
|
85
|
+
return sample+((sample1-sample)*(u-u.truncate))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# generate lower quartile value
|
90
|
+
# @return [Numeric] lower quartile value
|
91
|
+
def lower_quartile
|
92
|
+
return nil if self.empty?
|
93
|
+
sorted_array = self.sort
|
94
|
+
u = 0.25*sorted_array.length + 1
|
95
|
+
if (u-u.truncate).is_a?(Integer)
|
96
|
+
return sorted_array[(u-u.truncate)-1]
|
97
|
+
else
|
98
|
+
sample = sorted_array[u.truncate.abs-1]
|
99
|
+
sample1 = sorted_array[(u.truncate.abs)]
|
100
|
+
return sample+((sample1-sample)*(u-u.truncate))
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# tabulate elements and frequencies of an Enumerable
|
105
|
+
# return [Hash] return a hash of :element => :freq_count
|
106
|
+
|
107
|
+
def count_freq
|
108
|
+
hash = Hash.new(0)
|
109
|
+
self.each do |element|
|
110
|
+
hash[element] +=1
|
111
|
+
end
|
112
|
+
return hash
|
113
|
+
end
|
114
|
+
|
115
|
+
# tabulate elements and frequencies (as percentage) of an Enumerable {
|
116
|
+
# @param decimal [Integer] decimals of frequency
|
117
|
+
# return [Hash] return a hash of :element => :percentage
|
118
|
+
|
119
|
+
def count_freq2(decimal = 2)
|
120
|
+
hash1 = Hash.new(0)
|
121
|
+
self.each do |element|
|
122
|
+
hash1[element] += 1
|
123
|
+
end
|
124
|
+
total_elements = self.size
|
125
|
+
hash2 = Hash.new(0)
|
126
|
+
hash1.each do |key,value|
|
127
|
+
hash2[key] = (value/total_elements.to_f).round(decimal)
|
128
|
+
end
|
129
|
+
return hash2
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# addition methods for Class::Hash required for ViralSeq
|
2
|
+
|
3
|
+
class Hash
|
4
|
+
|
5
|
+
# subtract one hash (h2) from the other (h1) if the keys are identical
|
6
|
+
# @param other_hash [Hash] the hash that needs to substracted from the hash before the method
|
7
|
+
# @return [Hash] hash after substraction
|
8
|
+
# @example substract h2 from h1 if the keys match
|
9
|
+
# h1 = {"Cat" => 100, "Dog" => 5, "Bird" => 2, "Snake" => 10}
|
10
|
+
# h2 = {"Cat" => 100, "Dog" => 5, "Bison" => 30}
|
11
|
+
# h1.difference(h2)
|
12
|
+
# => {"Bird" => 2, "Snake" => 10}
|
13
|
+
|
14
|
+
def difference(other_hash)
|
15
|
+
reject do |k,_v|
|
16
|
+
other_hash.has_key? k
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# return a new hash with the unique values of input hash as keys,
|
21
|
+
# and the keys of the unique values of input hash in an array as values of the new hash
|
22
|
+
# @return [Hash] a new hash of :uniq_value_of_orginial_hash => :array_of_keys
|
23
|
+
# @example
|
24
|
+
# hash = {1=>"A", 2=>"A", 3=>"C", 4=>"C", 5=>"T"}
|
25
|
+
# hash.uniq_hash
|
26
|
+
# => {"A"=>[1, 2], "C"=>[3, 4], "T"=>[5]}
|
27
|
+
|
28
|
+
def uniq_hash
|
29
|
+
uniq_values = self.values.uniq
|
30
|
+
out_hash = {}
|
31
|
+
uniq_values.each do |uniq_va|
|
32
|
+
self.each do |k,v|
|
33
|
+
if v == uniq_va
|
34
|
+
if out_hash[uniq_va]
|
35
|
+
out_hash[uniq_va] << k
|
36
|
+
else
|
37
|
+
out_hash[uniq_va] = []
|
38
|
+
out_hash[uniq_va] << k
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return out_hash
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,454 @@
|
|
1
|
+
|
2
|
+
module ViralSeq
|
3
|
+
class SeqHash
|
4
|
+
|
5
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
|
6
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
7
|
+
# PR codon 1-99
|
8
|
+
# @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
|
9
|
+
# can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
|
10
|
+
# @return [Array] three elements `[point_mutation_list, linkage_list, report_list]`
|
11
|
+
#
|
12
|
+
# # point_mutation_list: two demensional array for the following information,
|
13
|
+
# # [region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label]
|
14
|
+
# # linkage_list: two demensional array for the following information,
|
15
|
+
# # [region,tcs_number,linkage,count,%,CI_low,CI_high,label]
|
16
|
+
# # report_list: two demensional array for the following information,
|
17
|
+
# # [position,codon,tcs_number,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*]
|
18
|
+
# @example identify SDRMs from a FASTA sequence file of HIV PR sequences obtained after MPID-DR sequencing
|
19
|
+
# my_seqhash = ViralSeq::SeqHash.fa('spec/sample_files/sample_dr_sequences/pr.fasta')
|
20
|
+
# p_cut_off = my_seqhash.pm
|
21
|
+
# pr_sdrm = my_seqhash.sdrm_hiv_pr(p_cut_off)
|
22
|
+
# puts "region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label"; pr_sdrm[0].each {|n| puts n.join(',')}
|
23
|
+
# => region,tcs_number,position,wildtype,mutation,count,%,CI_low,CI_high,label
|
24
|
+
# => PR,396,30,D,N,247,0.62374,0.57398,0.67163,
|
25
|
+
# => PR,396,50,I,V,1,0.00253,6.0e-05,0.01399,*
|
26
|
+
# => PR,396,88,N,D,246,0.62121,0.57141,0.66919,
|
27
|
+
#
|
28
|
+
# puts "region,tcs_number,linkage,count,%,CI_low,CI_high,label"; pr_sdrm[1].each {|n| puts n.join(',')}
|
29
|
+
# => region,tcs_number,linkage,count,%,CI_low,CI_high,label
|
30
|
+
# => PR,396,D30N+N88D,245,0.61869,0.56884,0.66674,
|
31
|
+
# => PR,396,WT,149,0.37626,0.32837,0.42602,
|
32
|
+
# => PR,396,D30N,1,0.00253,6.0e-05,0.01399,*
|
33
|
+
# => PR,396,D30N+I50V+N88D,1,0.00253,6.0e-05,0.01399,*
|
34
|
+
#
|
35
|
+
# puts "position,codon,tcs_number," + ViralSeq::AMINO_ACID_LIST.join(","); pr_sdrm[2].each {|n|puts n.join(",")}
|
36
|
+
# => position,codon,tcs_number,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*
|
37
|
+
# => PR,1,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
38
|
+
# => PR,2,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
39
|
+
# => PR,3,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
40
|
+
# => PR,4,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
41
|
+
# => PR,5,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
42
|
+
# => PR,6,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0
|
43
|
+
# => PR,7,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
44
|
+
# => PR,8,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
|
45
|
+
# => PR,9,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
46
|
+
# => PR,10,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
47
|
+
# => PR,11,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
48
|
+
# => PR,12,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,62.1212,0.0,0.0,0.0,0.0
|
49
|
+
# => PR,13,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.1313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.8687,0.0,0.0,0.0
|
50
|
+
# => PR,14,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
51
|
+
# => PR,15,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.3737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.6263,0.0,0.0,0.0
|
52
|
+
# => PR,16,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
53
|
+
# => PR,17,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
54
|
+
# => PR,18,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.5051,0.0,0.0,0.0,0.0,0.0,0.0
|
55
|
+
# => PR,19,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
56
|
+
# => PR,20,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
57
|
+
# => PR,21,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
58
|
+
# => PR,22,396,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
59
|
+
# => PR,23,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
60
|
+
# => PR,24,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
61
|
+
# => PR,25,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
62
|
+
# => PR,26,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
63
|
+
# => PR,27,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
64
|
+
# => PR,28,396,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
65
|
+
# => PR,29,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
66
|
+
# => PR,30,396,0.0,0.0,37.6263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.3737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
67
|
+
# => PR,31,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
68
|
+
# => PR,32,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
69
|
+
# => PR,33,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
70
|
+
# => PR,34,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
71
|
+
# => PR,35,396,0.0,0.0,62.1212,37.6263,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
72
|
+
# => PR,36,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
73
|
+
# => PR,37,396,0.0,0.0,37.8788,61.8687,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
74
|
+
# => PR,38,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
75
|
+
# => PR,39,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.5051,0.0,0.0,0.0,0.0,0.0
|
76
|
+
# => PR,40,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
77
|
+
# => PR,41,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,0.0,0.0,0.0,0.0,0.0
|
78
|
+
# => PR,42,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0
|
79
|
+
# => PR,43,396,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
80
|
+
# => PR,44,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
81
|
+
# => PR,45,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
82
|
+
# => PR,46,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
83
|
+
# => PR,47,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
84
|
+
# => PR,48,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
85
|
+
# => PR,49,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
86
|
+
# => PR,50,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.0
|
87
|
+
# => PR,51,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
88
|
+
# => PR,52,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
89
|
+
# => PR,53,396,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
90
|
+
# => PR,54,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
91
|
+
# => PR,55,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
92
|
+
# => PR,56,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
93
|
+
# => PR,57,396,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.0,0.0,0.0,0.0
|
94
|
+
# => PR,58,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
95
|
+
# => PR,59,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0
|
96
|
+
# => PR,60,396,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
97
|
+
# => PR,61,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
98
|
+
# => PR,62,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
99
|
+
# => PR,63,396,0.0,0.0,0.0,0.0,0.0,0.0,0.2525,0.0,0.0,37.8788,0.0,0.0,61.8687,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
100
|
+
# => PR,64,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
101
|
+
# => PR,65,396,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
102
|
+
# => PR,66,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
103
|
+
# => PR,67,396,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
104
|
+
# => PR,68,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
105
|
+
# => PR,69,396,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
106
|
+
# => PR,70,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
107
|
+
# => PR,71,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,37.8788,0.0,0.0,0.0
|
108
|
+
# => PR,72,396,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.1212,0.0,0.0,0.0,0.0
|
109
|
+
# => PR,73,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
110
|
+
# => PR,74,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
111
|
+
# => PR,75,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
112
|
+
# => PR,76,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
113
|
+
# => PR,77,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
114
|
+
# => PR,78,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
115
|
+
# => PR,79,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
116
|
+
# => PR,80,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
117
|
+
# => PR,81,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
118
|
+
# => PR,82,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
|
119
|
+
# => PR,83,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.4949,0.0,0.0,0.0,0.5051,0.0,0.0,0.0,0.0,0.0
|
120
|
+
# => PR,84,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
121
|
+
# => PR,85,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
122
|
+
# => PR,86,396,0.0,0.0,0.0,0.5051,0.0,99.4949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
123
|
+
# => PR,87,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
|
124
|
+
# => PR,88,396,0.0,0.0,62.1212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.8788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
125
|
+
# => PR,89,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
126
|
+
# => PR,90,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
127
|
+
# => PR,91,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
128
|
+
# => PR,92,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
129
|
+
# => PR,93,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
130
|
+
# => PR,94,396,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
131
|
+
# => PR,95,396,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
132
|
+
# => PR,96,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0
|
133
|
+
# => PR,97,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
134
|
+
# => PR,98,396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,99.7475,0.0,0.0,0.0,0.2525,0.0,0.0,0.0,0.0,0.0
|
135
|
+
# => PR,99,396,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
136
|
+
|
137
|
+
def sdrm_hiv_pr(cutoff = 0)
|
138
|
+
sequences = self.dna_hash
|
139
|
+
region = "PR"
|
140
|
+
rf_label = 0
|
141
|
+
start_codon_number = 1
|
142
|
+
n_seq = sequences.size
|
143
|
+
mut = {}
|
144
|
+
mut_com = []
|
145
|
+
aa = {}
|
146
|
+
point_mutation_list = []
|
147
|
+
sequences.each do |name,seq|
|
148
|
+
s = ViralSeq::Sequence.new(name,seq)
|
149
|
+
s.translate(rf_label)
|
150
|
+
aa[name] = s.aa_string
|
151
|
+
record = s.sdrm(:hiv_pr)
|
152
|
+
mut_com << record
|
153
|
+
record.each do |position,mutation|
|
154
|
+
if mut[position]
|
155
|
+
mut[position][1] << mutation[1]
|
156
|
+
else
|
157
|
+
mut[position] = [mutation[0],[]]
|
158
|
+
mut[position][1] << mutation[1]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
mut.each do |position,mutation|
|
163
|
+
wt = mutation[0]
|
164
|
+
mut_list = mutation[1]
|
165
|
+
count_mut_list = mut_list.count_freq
|
166
|
+
count_mut_list.each do |m,number|
|
167
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
168
|
+
label = number < cutoff ? "*" : ""
|
169
|
+
point_mutation_list << [region, n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
173
|
+
|
174
|
+
link = mut_com.count_freq
|
175
|
+
link2 = {}
|
176
|
+
link.each do |k,v|
|
177
|
+
pattern = []
|
178
|
+
if k.size == 0
|
179
|
+
pattern = ['WT']
|
180
|
+
else
|
181
|
+
k.each do |p,m|
|
182
|
+
pattern << (m[0] + p.to_s + m[1])
|
183
|
+
end
|
184
|
+
end
|
185
|
+
link2[pattern.join("+")] = v
|
186
|
+
end
|
187
|
+
linkage_list = []
|
188
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
189
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
190
|
+
label = v < cutoff ? "*" : ""
|
191
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
192
|
+
end
|
193
|
+
|
194
|
+
report_list = []
|
195
|
+
|
196
|
+
div_aa = {}
|
197
|
+
aa_start = start_codon_number
|
198
|
+
|
199
|
+
aa_size = aa.values[0].size - 1
|
200
|
+
|
201
|
+
(0..aa_size).to_a.each do |p|
|
202
|
+
aas = []
|
203
|
+
aa.values.each do |r1|
|
204
|
+
aas << r1[p]
|
205
|
+
end
|
206
|
+
count_aas = aas.count_freq
|
207
|
+
div_aa[aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
208
|
+
aa_start += 1
|
209
|
+
end
|
210
|
+
|
211
|
+
div_aa.each do |k,v|
|
212
|
+
record = [region, k, n_seq]
|
213
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
214
|
+
aa_count = v[amino_acid]
|
215
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
216
|
+
end
|
217
|
+
report_list << record
|
218
|
+
end
|
219
|
+
|
220
|
+
return [point_mutation_list, linkage_list, report_list]
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV RT region.
|
225
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
226
|
+
# RT codon 34-122, 152-236, two regions are linked
|
227
|
+
# @param (see #sdrm_hiv_pr)
|
228
|
+
# @return (see #sdrm_hiv_pr)
|
229
|
+
|
230
|
+
def sdrm_hiv_rt(cutoff = 0)
|
231
|
+
sequences = self.dna_hash
|
232
|
+
region = "RT"
|
233
|
+
rf_label = 1
|
234
|
+
start_codon_number = 34
|
235
|
+
gap = "AGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCAC"
|
236
|
+
|
237
|
+
n_seq = sequences.size
|
238
|
+
mut_nrti = {}
|
239
|
+
mut_nnrti = {}
|
240
|
+
mut_com = []
|
241
|
+
r1_aa = {}
|
242
|
+
r2_aa = {}
|
243
|
+
point_mutation_list = []
|
244
|
+
sequences.each do |name,seq|
|
245
|
+
r1 = seq[0,267]
|
246
|
+
r2 = seq[267..-1]
|
247
|
+
seq = r1 + gap + r2
|
248
|
+
s = ViralSeq::Sequence.new(name,seq)
|
249
|
+
s.translate(rf_label)
|
250
|
+
|
251
|
+
r1_aa[name] = s.aa_string[0,89]
|
252
|
+
r2_aa[name] = s.aa_string[-85..-1]
|
253
|
+
nrti = s.sdrm(:nrti, start_codon_number)
|
254
|
+
nnrti = s.sdrm(:nnrti, start_codon_number)
|
255
|
+
mut_com << (nrti.merge(nnrti))
|
256
|
+
|
257
|
+
nrti.each do |position,mutation|
|
258
|
+
if mut_nrti[position]
|
259
|
+
mut_nrti[position][1] << mutation[1]
|
260
|
+
else
|
261
|
+
mut_nrti[position] = [mutation[0],[]]
|
262
|
+
mut_nrti[position][1] << mutation[1]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
nnrti.each do |position,mutation|
|
266
|
+
if mut_nnrti[position]
|
267
|
+
mut_nnrti[position][1] << mutation[1]
|
268
|
+
else
|
269
|
+
mut_nnrti[position] = [mutation[0],[]]
|
270
|
+
mut_nnrti[position][1] << mutation[1]
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
mut_nrti.each do |position,mutation|
|
276
|
+
wt = mutation[0]
|
277
|
+
mut_list = mutation[1]
|
278
|
+
count_mut_list = mut_list.count_freq
|
279
|
+
count_mut_list.each do |m,number|
|
280
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
281
|
+
label = number < cutoff ? "*" : ""
|
282
|
+
point_mutation_list << ["NRTI", n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
mut_nnrti.each do |position,mutation|
|
287
|
+
wt = mutation[0]
|
288
|
+
mut_list = mutation[1]
|
289
|
+
count_mut_list = mut_list.count_freq
|
290
|
+
count_mut_list.each do |m,number|
|
291
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
292
|
+
label = number < cutoff ? "*" : ""
|
293
|
+
point_mutation_list << ["NNRTI", n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
298
|
+
|
299
|
+
link = mut_com.count_freq
|
300
|
+
link2 = {}
|
301
|
+
link.each do |k,v|
|
302
|
+
pattern = []
|
303
|
+
if k.size == 0
|
304
|
+
pattern = ['WT']
|
305
|
+
else
|
306
|
+
k.each do |p,m|
|
307
|
+
pattern << (m[0] + p.to_s + m[1])
|
308
|
+
end
|
309
|
+
end
|
310
|
+
link2[pattern.join("+")] = v
|
311
|
+
end
|
312
|
+
linkage_list = []
|
313
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
314
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
315
|
+
label = v < cutoff ? "*" : ""
|
316
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
317
|
+
end
|
318
|
+
|
319
|
+
report_list = []
|
320
|
+
|
321
|
+
div_aa = {}
|
322
|
+
r1_aa_start = 34
|
323
|
+
r2_aa_start = 152
|
324
|
+
|
325
|
+
r1_aa_size = r1_aa.values[0].size - 1
|
326
|
+
r2_aa_size = r2_aa.values[0].size - 1
|
327
|
+
|
328
|
+
(0..r1_aa_size).to_a.each do |p|
|
329
|
+
aas = []
|
330
|
+
r1_aa.values.each do |r1|
|
331
|
+
aas << r1[p]
|
332
|
+
end
|
333
|
+
count_aas = aas.count_freq
|
334
|
+
div_aa[r1_aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
335
|
+
r1_aa_start += 1
|
336
|
+
end
|
337
|
+
|
338
|
+
(0..r2_aa_size).to_a.each do |p|
|
339
|
+
aas = []
|
340
|
+
r2_aa.values.each do |r1|
|
341
|
+
aas << r1[p]
|
342
|
+
end
|
343
|
+
count_aas = aas.count_freq
|
344
|
+
div_aa[r2_aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
345
|
+
r2_aa_start += 1
|
346
|
+
end
|
347
|
+
|
348
|
+
div_aa.each do |k,v|
|
349
|
+
record = [region, k, n_seq]
|
350
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
351
|
+
aa_count = v[amino_acid]
|
352
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
353
|
+
end
|
354
|
+
report_list << record
|
355
|
+
end
|
356
|
+
|
357
|
+
return [point_mutation_list, linkage_list, report_list]
|
358
|
+
end
|
359
|
+
|
360
|
+
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV IN region.
|
361
|
+
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
362
|
+
# IN codon 53-174
|
363
|
+
# @param (see #sdrm_hiv_pr)
|
364
|
+
# @return (see #sdrm_hiv_pr)
|
365
|
+
|
366
|
+
def sdrm_hiv_in(cutoff = 0)
|
367
|
+
sequences = self.dna_hash
|
368
|
+
region = "IN"
|
369
|
+
rf_label = 2
|
370
|
+
start_codon_number = 53
|
371
|
+
n_seq = sequences.size
|
372
|
+
mut = {}
|
373
|
+
mut_com = []
|
374
|
+
aa = {}
|
375
|
+
point_mutation_list = []
|
376
|
+
sequences.each do |name,seq|
|
377
|
+
s = ViralSeq::Sequence.new(name,seq)
|
378
|
+
s.translate(rf_label)
|
379
|
+
aa[name] = s.aa_string
|
380
|
+
record = s.sdrm(:hiv_in, start_codon_number)
|
381
|
+
mut_com << record
|
382
|
+
record.each do |position,mutation|
|
383
|
+
if mut[position]
|
384
|
+
mut[position][1] << mutation[1]
|
385
|
+
else
|
386
|
+
mut[position] = [mutation[0],[]]
|
387
|
+
mut[position][1] << mutation[1]
|
388
|
+
end
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
mut.each do |position,mutation|
|
393
|
+
wt = mutation[0]
|
394
|
+
mut_list = mutation[1]
|
395
|
+
count_mut_list = mut_list.count_freq
|
396
|
+
count_mut_list.each do |m,number|
|
397
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
398
|
+
label = number < cutoff ? "*" : ""
|
399
|
+
point_mutation_list << [region, n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
400
|
+
end
|
401
|
+
end
|
402
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
403
|
+
|
404
|
+
link = mut_com.count_freq
|
405
|
+
link2 = {}
|
406
|
+
link.each do |k,v|
|
407
|
+
pattern = []
|
408
|
+
if k.size == 0
|
409
|
+
pattern = ['WT']
|
410
|
+
else
|
411
|
+
k.each do |p,m|
|
412
|
+
pattern << (m[0] + p.to_s + m[1])
|
413
|
+
end
|
414
|
+
end
|
415
|
+
link2[pattern.join("+")] = v
|
416
|
+
end
|
417
|
+
linkage_list = []
|
418
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
419
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
420
|
+
label = v < cutoff ? "*" : ""
|
421
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
422
|
+
end
|
423
|
+
|
424
|
+
report_list = []
|
425
|
+
|
426
|
+
div_aa = {}
|
427
|
+
aa_start = start_codon_number
|
428
|
+
|
429
|
+
aa_size = aa.values[0].size - 1
|
430
|
+
|
431
|
+
(0..aa_size).to_a.each do |p|
|
432
|
+
aas = []
|
433
|
+
aa.values.each do |r1|
|
434
|
+
aas << r1[p]
|
435
|
+
end
|
436
|
+
count_aas = aas.count_freq
|
437
|
+
div_aa[aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
438
|
+
aa_start += 1
|
439
|
+
end
|
440
|
+
|
441
|
+
div_aa.each do |k,v|
|
442
|
+
record = [region, k, n_seq]
|
443
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
444
|
+
aa_count = v[amino_acid]
|
445
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
446
|
+
end
|
447
|
+
report_list << record
|
448
|
+
end
|
449
|
+
|
450
|
+
return [point_mutation_list, linkage_list, report_list]
|
451
|
+
end
|
452
|
+
|
453
|
+
end # end of ViralSeq::SeqHash
|
454
|
+
end # end of ViralSeq
|