semtools 0.1.2 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,140 +1,148 @@
1
- # TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
2
- #to cmpute fisher exact test
3
- #Fisher => http://www.biostathandbook.com/fishers.html
4
- def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil)
5
- listA_listB = listA & listB
6
- listA_nolistB = listA - listB
7
- nolistA_listB = listB - listA
8
- if weigths.nil?
9
- listA_listB_count = listA_listB.length
10
- listA_nolistB_count = listA_nolistB.length
11
- nolistA_listB_count = nolistA_listB.length
12
- nolistA_nolistB_count = all_elements_count - (listA | listB).length
13
- else
14
- # Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
15
- # https://academic.oup.com/bioinformatics/article/22/13/1600/193669
16
- listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
17
- listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
18
- nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
19
- nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
20
- all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
21
- end
22
- if tail == 'two_sided'
23
- accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
24
- elsif tail == 'less'
25
- accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
26
- end
27
- return accumulated_prob
28
- end
1
+ # # TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
2
+ # #to cmpute fisher exact test
3
+ # #Fisher => http://www.biostathandbook.com/fishers.html
4
+ # def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil, partial_weigths=true)
5
+ # #puts '-', listA.inspect, listB.inspect, '-'
6
+ # listA_listB = listA & listB
7
+ # listA_nolistB = listA - listB
8
+ # nolistA_listB = listB - listA
9
+ # if weigths.nil?
10
+ # listA_listB_count = listA_listB.length
11
+ # listA_nolistB_count = listA_nolistB.length
12
+ # nolistA_listB_count = nolistA_listB.length
13
+ # nolistA_nolistB_count = all_elements_count - (listA | listB).length
14
+ # else
15
+ # # Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
16
+ # # https://academic.oup.com/bioinformatics/article/22/13/1600/193669
17
+ # listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
18
+ # listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
19
+ # nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
29
20
 
30
- def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
31
- #https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
32
- accumulated_prob = 0
33
- ref_prob = compute_hyper_prob(
34
- listA_listB_count,
35
- listA_nolistB_count,
36
- nolistA_listB_count,
37
- nolistA_nolistB_count,
38
- all_elements_count
39
- )
40
- accumulated_prob += ref_prob
41
- [listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
42
- n += 1
43
- prob = compute_hyper_prob(
44
- listA_listB_count - n,
45
- listA_nolistB_count + n,
46
- nolistA_listB_count + n,
47
- nolistA_nolistB_count - n,
48
- all_elements_count
49
- )
50
- prob <= ref_prob ? accumulated_prob += prob : break
51
- end
21
+ # if partial_weigths
22
+ # nolistA_nolistB_count = all_elements_count - (listA | listB).length
23
+ # all_elements_count = nolistA_nolistB_count + listA_listB_count + listA_nolistB_count + nolistA_listB_count
24
+ # else
25
+ # nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
26
+ # all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
27
+ # end
28
+ # end
29
+ # #puts [listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count].inspect
30
+ # if tail == 'two_sided'
31
+ # accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
32
+ # elsif tail == 'less'
33
+ # accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
34
+ # end
35
+ # return accumulated_prob
36
+ # end
52
37
 
53
- [listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
54
- n += 1
55
- prob = compute_hyper_prob(
56
- listA_listB_count + n,
57
- listA_nolistB_count - n,
58
- nolistA_listB_count - n,
59
- nolistA_nolistB_count + n,
60
- all_elements_count
61
- )
62
- accumulated_prob += prob if prob <= ref_prob
63
- end
38
+ # def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
39
+ # #https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
40
+ # accumulated_prob = 0
41
+ # ref_prob = compute_hyper_prob(
42
+ # listA_listB_count,
43
+ # listA_nolistB_count,
44
+ # nolistA_listB_count,
45
+ # nolistA_nolistB_count,
46
+ # all_elements_count
47
+ # )
48
+ # accumulated_prob += ref_prob
49
+ # [listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
50
+ # n += 1
51
+ # prob = compute_hyper_prob(
52
+ # listA_listB_count - n,
53
+ # listA_nolistB_count + n,
54
+ # nolistA_listB_count + n,
55
+ # nolistA_nolistB_count - n,
56
+ # all_elements_count
57
+ # )
58
+ # prob <= ref_prob ? accumulated_prob += prob : break
59
+ # end
64
60
 
65
- return accumulated_prob
66
- end
61
+ # [listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
62
+ # n += 1
63
+ # prob = compute_hyper_prob(
64
+ # listA_listB_count + n,
65
+ # listA_nolistB_count - n,
66
+ # nolistA_listB_count - n,
67
+ # nolistA_nolistB_count + n,
68
+ # all_elements_count
69
+ # )
70
+ # accumulated_prob += prob if prob <= ref_prob
71
+ # end
67
72
 
68
- def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
69
- accumulated_prob = 0
70
- [listA_listB_count, nolistA_nolistB_count].min.times do |n|
71
- accumulated_prob += compute_hyper_prob(
72
- listA_listB_count - n,
73
- listA_nolistB_count + n,
74
- nolistA_listB_count + n,
75
- nolistA_nolistB_count - n,
76
- all_elements_count
77
- )
78
- end
79
- return accumulated_prob
80
- end
73
+ # return accumulated_prob
74
+ # end
81
75
 
82
- def compute_hyper_prob(a, b, c, d, n)
83
- # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
84
- binomA = binom(a + b, a)
85
- binomC = binom(c + d, c)
86
- divisor = binom(n, a + c)
87
- return (binomA * binomC).fdiv(divisor)
88
- end
76
+ # def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
77
+ # accumulated_prob = 0
78
+ # [listA_listB_count, nolistA_nolistB_count].min.times do |n|
79
+ # accumulated_prob += compute_hyper_prob(
80
+ # listA_listB_count - n,
81
+ # listA_nolistB_count + n,
82
+ # nolistA_listB_count + n,
83
+ # nolistA_nolistB_count - n,
84
+ # all_elements_count
85
+ # )
86
+ # end
87
+ # return accumulated_prob
88
+ # end
89
89
 
90
- def binom(n,k)
91
- if k > 0 && k < n
92
- res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
93
- else
94
- res = 1
95
- end
96
- end
90
+ # def compute_hyper_prob(a, b, c, d, n)
91
+ # # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
92
+ # binomA = binom(a + b, a)
93
+ # binomC = binom(c + d, c)
94
+ # divisor = binom(n, a + c)
95
+ # return (binomA * binomC).fdiv(divisor)
96
+ # end
97
97
 
98
- #to cmpute adjusted pvalues
99
- #https://rosettacode.org/wiki/P-value_correction#Ruby
100
- def get_benjaminiHochberg_pvalues(arr_pvalues)
101
- n = arr_pvalues.length
102
- arr_o = order(arr_pvalues, true)
103
- arr_cummin_input = []
104
- (0..(n - 1)).each do |i|
105
- arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
106
- end
107
- arr_ro = order(arr_o)
108
- arr_cummin = cummin(arr_cummin_input)
109
- arr_pmin = pmin(arr_cummin)
110
- return arr_pmin.values_at(*arr_ro)
111
- end
98
+ # def binom(n,k)
99
+ # if k > 0 && k < n
100
+ # res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
101
+ # else
102
+ # res = 1
103
+ # end
104
+ # end
112
105
 
113
- def order(array, decreasing = false)
114
- if decreasing == false
115
- array.sort.map { |n| array.index(n) }
116
- else
117
- array.sort.map { |n| array.index(n) }.reverse
118
- end
119
- end
106
+ # #to cmpute adjusted pvalues
107
+ # #https://rosettacode.org/wiki/P-value_correction#Ruby
108
+ # def get_benjaminiHochberg_pvalues(arr_pvalues)
109
+ # n = arr_pvalues.length
110
+ # arr_o = order(arr_pvalues, true)
111
+ # arr_cummin_input = []
112
+ # (0..(n - 1)).each do |i|
113
+ # arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
114
+ # end
115
+ # arr_ro = order(arr_o)
116
+ # arr_cummin = cummin(arr_cummin_input)
117
+ # arr_pmin = pmin(arr_cummin)
118
+ # return arr_pmin.values_at(*arr_ro)
119
+ # end
120
120
 
121
- def cummin(array)
122
- cumulative_min = array.first
123
- arr_cummin = []
124
- array.each do |p|
125
- cumulative_min = [p, cumulative_min].min
126
- arr_cummin << cumulative_min
127
- end
128
- return arr_cummin
129
- end
121
+ # def order(array, decreasing = false)
122
+ # if decreasing == false
123
+ # array.sort.map { |n| array.index(n) }
124
+ # else
125
+ # array.sort.map { |n| array.index(n) }.reverse
126
+ # end
127
+ # end
130
128
 
131
- def pmin(array)
132
- x = 1
133
- pmin_array = []
134
- array.each_index do |i|
135
- pmin_array[i] = [array[i], x].min
136
- abort if pmin_array[i] > 1
137
- end
138
- return pmin_array
139
- end
129
+ # def cummin(array)
130
+ # cumulative_min = array.first
131
+ # arr_cummin = []
132
+ # array.each do |p|
133
+ # cumulative_min = [p, cumulative_min].min
134
+ # arr_cummin << cumulative_min
135
+ # end
136
+ # return arr_cummin
137
+ # end
138
+
139
+ # def pmin(array)
140
+ # x = 1
141
+ # pmin_array = []
142
+ # array.each_index do |i|
143
+ # pmin_array[i] = [array[i], x].min
144
+ # abort if pmin_array[i] > 1
145
+ # end
146
+ # return pmin_array
147
+ # end
140
148