semtools 0.1.2 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,140 +1,148 @@
1
- # TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
2
- #to cmpute fisher exact test
3
- #Fisher => http://www.biostathandbook.com/fishers.html
4
- def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil)
5
- listA_listB = listA & listB
6
- listA_nolistB = listA - listB
7
- nolistA_listB = listB - listA
8
- if weigths.nil?
9
- listA_listB_count = listA_listB.length
10
- listA_nolistB_count = listA_nolistB.length
11
- nolistA_listB_count = nolistA_listB.length
12
- nolistA_nolistB_count = all_elements_count - (listA | listB).length
13
- else
14
- # Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
15
- # https://academic.oup.com/bioinformatics/article/22/13/1600/193669
16
- listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
17
- listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
18
- nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
19
- nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
20
- all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
21
- end
22
- if tail == 'two_sided'
23
- accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
24
- elsif tail == 'less'
25
- accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
26
- end
27
- return accumulated_prob
28
- end
1
+ # # TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
2
+ # #to cmpute fisher exact test
3
+ # #Fisher => http://www.biostathandbook.com/fishers.html
4
+ # def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil, partial_weigths=true)
5
+ # #puts '-', listA.inspect, listB.inspect, '-'
6
+ # listA_listB = listA & listB
7
+ # listA_nolistB = listA - listB
8
+ # nolistA_listB = listB - listA
9
+ # if weigths.nil?
10
+ # listA_listB_count = listA_listB.length
11
+ # listA_nolistB_count = listA_nolistB.length
12
+ # nolistA_listB_count = nolistA_listB.length
13
+ # nolistA_nolistB_count = all_elements_count - (listA | listB).length
14
+ # else
15
+ # # Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
16
+ # # https://academic.oup.com/bioinformatics/article/22/13/1600/193669
17
+ # listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
18
+ # listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
19
+ # nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
29
20
 
30
- def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
31
- #https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
32
- accumulated_prob = 0
33
- ref_prob = compute_hyper_prob(
34
- listA_listB_count,
35
- listA_nolistB_count,
36
- nolistA_listB_count,
37
- nolistA_nolistB_count,
38
- all_elements_count
39
- )
40
- accumulated_prob += ref_prob
41
- [listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
42
- n += 1
43
- prob = compute_hyper_prob(
44
- listA_listB_count - n,
45
- listA_nolistB_count + n,
46
- nolistA_listB_count + n,
47
- nolistA_nolistB_count - n,
48
- all_elements_count
49
- )
50
- prob <= ref_prob ? accumulated_prob += prob : break
51
- end
21
+ # if partial_weigths
22
+ # nolistA_nolistB_count = all_elements_count - (listA | listB).length
23
+ # all_elements_count = nolistA_nolistB_count + listA_listB_count + listA_nolistB_count + nolistA_listB_count
24
+ # else
25
+ # nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
26
+ # all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
27
+ # end
28
+ # end
29
+ # #puts [listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count].inspect
30
+ # if tail == 'two_sided'
31
+ # accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
32
+ # elsif tail == 'less'
33
+ # accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
34
+ # end
35
+ # return accumulated_prob
36
+ # end
52
37
 
53
- [listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
54
- n += 1
55
- prob = compute_hyper_prob(
56
- listA_listB_count + n,
57
- listA_nolistB_count - n,
58
- nolistA_listB_count - n,
59
- nolistA_nolistB_count + n,
60
- all_elements_count
61
- )
62
- accumulated_prob += prob if prob <= ref_prob
63
- end
38
+ # def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
39
+ # #https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
40
+ # accumulated_prob = 0
41
+ # ref_prob = compute_hyper_prob(
42
+ # listA_listB_count,
43
+ # listA_nolistB_count,
44
+ # nolistA_listB_count,
45
+ # nolistA_nolistB_count,
46
+ # all_elements_count
47
+ # )
48
+ # accumulated_prob += ref_prob
49
+ # [listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
50
+ # n += 1
51
+ # prob = compute_hyper_prob(
52
+ # listA_listB_count - n,
53
+ # listA_nolistB_count + n,
54
+ # nolistA_listB_count + n,
55
+ # nolistA_nolistB_count - n,
56
+ # all_elements_count
57
+ # )
58
+ # prob <= ref_prob ? accumulated_prob += prob : break
59
+ # end
64
60
 
65
- return accumulated_prob
66
- end
61
+ # [listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
62
+ # n += 1
63
+ # prob = compute_hyper_prob(
64
+ # listA_listB_count + n,
65
+ # listA_nolistB_count - n,
66
+ # nolistA_listB_count - n,
67
+ # nolistA_nolistB_count + n,
68
+ # all_elements_count
69
+ # )
70
+ # accumulated_prob += prob if prob <= ref_prob
71
+ # end
67
72
 
68
- def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
69
- accumulated_prob = 0
70
- [listA_listB_count, nolistA_nolistB_count].min.times do |n|
71
- accumulated_prob += compute_hyper_prob(
72
- listA_listB_count - n,
73
- listA_nolistB_count + n,
74
- nolistA_listB_count + n,
75
- nolistA_nolistB_count - n,
76
- all_elements_count
77
- )
78
- end
79
- return accumulated_prob
80
- end
73
+ # return accumulated_prob
74
+ # end
81
75
 
82
- def compute_hyper_prob(a, b, c, d, n)
83
- # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
84
- binomA = binom(a + b, a)
85
- binomC = binom(c + d, c)
86
- divisor = binom(n, a + c)
87
- return (binomA * binomC).fdiv(divisor)
88
- end
76
+ # def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
77
+ # accumulated_prob = 0
78
+ # [listA_listB_count, nolistA_nolistB_count].min.times do |n|
79
+ # accumulated_prob += compute_hyper_prob(
80
+ # listA_listB_count - n,
81
+ # listA_nolistB_count + n,
82
+ # nolistA_listB_count + n,
83
+ # nolistA_nolistB_count - n,
84
+ # all_elements_count
85
+ # )
86
+ # end
87
+ # return accumulated_prob
88
+ # end
89
89
 
90
- def binom(n,k)
91
- if k > 0 && k < n
92
- res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
93
- else
94
- res = 1
95
- end
96
- end
90
+ # def compute_hyper_prob(a, b, c, d, n)
91
+ # # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
92
+ # binomA = binom(a + b, a)
93
+ # binomC = binom(c + d, c)
94
+ # divisor = binom(n, a + c)
95
+ # return (binomA * binomC).fdiv(divisor)
96
+ # end
97
97
 
98
- #to cmpute adjusted pvalues
99
- #https://rosettacode.org/wiki/P-value_correction#Ruby
100
- def get_benjaminiHochberg_pvalues(arr_pvalues)
101
- n = arr_pvalues.length
102
- arr_o = order(arr_pvalues, true)
103
- arr_cummin_input = []
104
- (0..(n - 1)).each do |i|
105
- arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
106
- end
107
- arr_ro = order(arr_o)
108
- arr_cummin = cummin(arr_cummin_input)
109
- arr_pmin = pmin(arr_cummin)
110
- return arr_pmin.values_at(*arr_ro)
111
- end
98
+ # def binom(n,k)
99
+ # if k > 0 && k < n
100
+ # res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
101
+ # else
102
+ # res = 1
103
+ # end
104
+ # end
112
105
 
113
- def order(array, decreasing = false)
114
- if decreasing == false
115
- array.sort.map { |n| array.index(n) }
116
- else
117
- array.sort.map { |n| array.index(n) }.reverse
118
- end
119
- end
106
+ # #to cmpute adjusted pvalues
107
+ # #https://rosettacode.org/wiki/P-value_correction#Ruby
108
+ # def get_benjaminiHochberg_pvalues(arr_pvalues)
109
+ # n = arr_pvalues.length
110
+ # arr_o = order(arr_pvalues, true)
111
+ # arr_cummin_input = []
112
+ # (0..(n - 1)).each do |i|
113
+ # arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
114
+ # end
115
+ # arr_ro = order(arr_o)
116
+ # arr_cummin = cummin(arr_cummin_input)
117
+ # arr_pmin = pmin(arr_cummin)
118
+ # return arr_pmin.values_at(*arr_ro)
119
+ # end
120
120
 
121
- def cummin(array)
122
- cumulative_min = array.first
123
- arr_cummin = []
124
- array.each do |p|
125
- cumulative_min = [p, cumulative_min].min
126
- arr_cummin << cumulative_min
127
- end
128
- return arr_cummin
129
- end
121
+ # def order(array, decreasing = false)
122
+ # if decreasing == false
123
+ # array.sort.map { |n| array.index(n) }
124
+ # else
125
+ # array.sort.map { |n| array.index(n) }.reverse
126
+ # end
127
+ # end
130
128
 
131
- def pmin(array)
132
- x = 1
133
- pmin_array = []
134
- array.each_index do |i|
135
- pmin_array[i] = [array[i], x].min
136
- abort if pmin_array[i] > 1
137
- end
138
- return pmin_array
139
- end
129
+ # def cummin(array)
130
+ # cumulative_min = array.first
131
+ # arr_cummin = []
132
+ # array.each do |p|
133
+ # cumulative_min = [p, cumulative_min].min
134
+ # arr_cummin << cumulative_min
135
+ # end
136
+ # return arr_cummin
137
+ # end
138
+
139
+ # def pmin(array)
140
+ # x = 1
141
+ # pmin_array = []
142
+ # array.each_index do |i|
143
+ # pmin_array[i] = [array[i], x].min
144
+ # abort if pmin_array[i] > 1
145
+ # end
146
+ # return pmin_array
147
+ # end
140
148