rbbt-dm 0.0.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/network/paths.rb +47 -29
- data/lib/rbbt/plots/bar.rb +152 -0
- data/lib/rbbt/plots/heatmap.rb +63 -0
- data/lib/rbbt/statistics/fdr.rb +59 -29
- data/lib/rbbt/statistics/hypergeometric.rb +176 -72
- data/lib/rbbt/statistics/random_walk.rb +285 -42
- data/test/rbbt/network/test_paths.rb +3 -3
- data/test/rbbt/statistics/test_hypergeometric.rb +24 -2
- data/test/rbbt/statistics/test_random_walk.rb +39 -0
- data/test/test_helper.rb +1 -1
- metadata +95 -70
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'inline'
|
2
|
+
require 'rsruby'
|
2
3
|
require 'rbbt/tsv'
|
3
4
|
require 'rbbt/persist'
|
4
5
|
require 'rbbt/statistics/fdr'
|
5
6
|
require 'rbbt/entity'
|
7
|
+
require 'distribution'
|
8
|
+
require 'distribution/hypergeometric'
|
6
9
|
|
7
10
|
module Hypergeometric
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
+
inline do |builder|
|
12
|
+
builder.prefix <<-EOC
|
13
|
+
#include <math.h>
|
14
|
+
EOC
|
15
|
+
|
16
|
+
builder.c_raw_singleton <<-EOC
|
11
17
|
/**
|
12
18
|
* Compute log(k!)
|
13
19
|
* @param k The value k.
|
@@ -15,13 +21,13 @@ module Hypergeometric
|
|
15
21
|
*/
|
16
22
|
double lFactorial(double k)
|
17
23
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
double r = 0;
|
25
|
+
int i;
|
26
|
+
for(i=2 ; i<=(int)k ; i++)
|
27
|
+
{
|
28
|
+
r = r + (double)(log((double)i));
|
29
|
+
}
|
30
|
+
return r;
|
25
31
|
}
|
26
32
|
|
27
33
|
|
@@ -34,25 +40,25 @@ double lFactorial(double k)
|
|
34
40
|
*/
|
35
41
|
double lBinom(double n, double k)
|
36
42
|
{
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
long i;
|
44
|
+
double r = 0;
|
45
|
+
|
46
|
+
if(n > n-k){
|
47
|
+
k = n-k;
|
48
|
+
}
|
49
|
+
|
50
|
+
for(i = (long)n ; i> (n-k) ; i--)
|
51
|
+
{
|
52
|
+
r = r + log((double)i);
|
53
|
+
}
|
54
|
+
|
55
|
+
r = r - lFactorial(k);
|
56
|
+
|
57
|
+
return r;
|
52
58
|
}
|
53
|
-
|
54
|
-
|
55
|
-
|
59
|
+
EOC
|
60
|
+
|
61
|
+
builder.c_singleton <<-EOC
|
56
62
|
/**
|
57
63
|
* * Compute the Hypergeometric accumulated value.
|
58
64
|
* * @param total => total size
|
@@ -61,49 +67,93 @@ double lBinom(double n, double k)
|
|
61
67
|
* * @param found => support
|
62
68
|
* * @return The result
|
63
69
|
* */
|
64
|
-
|
70
|
+
//pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
71
|
+
//["200204", 2141, 15, 125, 3, 1.37320769558675e-188, ["Q05193", "P54762", "Q12923"]]
|
72
|
+
double hypergeometric_c(double total, double support, double list, double found)
|
65
73
|
{
|
66
|
-
|
74
|
+
double other = total - support;
|
67
75
|
|
68
|
-
|
69
|
-
|
70
|
-
top = support;
|
71
|
-
}
|
76
|
+
double top = list;
|
77
|
+
double log_n_choose_k = lBinom(total,list);
|
72
78
|
|
73
|
-
|
79
|
+
double lfoo = lBinom(support,top) + lBinom(other, list-top);
|
74
80
|
|
75
|
-
|
76
|
-
|
77
|
-
double sum = 0;
|
81
|
+
double sum = 0;
|
78
82
|
int i;
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
83
|
+
|
84
|
+
if(support < list){
|
85
|
+
top = support;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
for (i = (int)top; i >= found; i-- )
|
90
|
+
{
|
91
|
+
sum = sum + exp(lfoo - log_n_choose_k);
|
92
|
+
if ( i > found)
|
93
|
+
{
|
94
|
+
lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
|
95
|
+
}
|
96
|
+
}
|
97
|
+
return sum;
|
88
98
|
}
|
89
|
-
|
90
|
-
|
99
|
+
EOC
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.hypergeometric(total, support, list, found)
|
103
|
+
RSRuby.instance.phyper(found, support, total - support, list, false).to_f
|
91
104
|
end
|
92
105
|
end
|
93
106
|
|
94
107
|
module TSV
|
95
108
|
|
96
|
-
def annotation_counts(fields = nil, persistence = false)
|
109
|
+
def annotation_counts(fields = nil, persistence = false, options = {})
|
97
110
|
fields ||= self.fields
|
98
111
|
fields = [fields] if String === fields or Symbol === fields
|
112
|
+
rename = options.delete :rename
|
99
113
|
|
100
|
-
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
114
|
+
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => { :rename => rename }) do
|
101
115
|
data ||= Hash.new(0)
|
102
116
|
|
103
117
|
with_unnamed do
|
104
|
-
|
105
|
-
|
118
|
+
|
119
|
+
case type
|
120
|
+
when :single
|
121
|
+
through :key, fields do |key, value|
|
122
|
+
next if value.nil?
|
123
|
+
data[value] += 1
|
124
|
+
end
|
125
|
+
when :double
|
126
|
+
if rename
|
127
|
+
Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
|
128
|
+
through :key, fields do |key, values|
|
129
|
+
next if values.nil?
|
130
|
+
values.flatten.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
|
131
|
+
end
|
132
|
+
else
|
133
|
+
through :key, fields do |key, values|
|
134
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
135
|
+
end
|
136
|
+
end
|
137
|
+
when :list
|
138
|
+
through :key, fields do |key, values|
|
139
|
+
next if values.nil?
|
140
|
+
values.compact.uniq.each{|value| data[value] += 1}
|
141
|
+
end
|
142
|
+
when :flat
|
143
|
+
if rename
|
144
|
+
Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
|
145
|
+
through :key, fields do |key, values|
|
146
|
+
next if values.nil?
|
147
|
+
values.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
|
148
|
+
end
|
149
|
+
else
|
150
|
+
through :key, fields do |key, values|
|
151
|
+
next if values.nil?
|
152
|
+
values.compact.uniq.each{|value| data[value] += 1}
|
153
|
+
end
|
154
|
+
end
|
106
155
|
end
|
156
|
+
|
107
157
|
end
|
108
158
|
|
109
159
|
data
|
@@ -111,51 +161,106 @@ module TSV
|
|
111
161
|
end
|
112
162
|
|
113
163
|
def enrichment(list, fields = nil, options = {})
|
164
|
+
options = Misc.add_defaults options, :skip_missing => true, :background => nil
|
165
|
+
background, skip_missing = Misc.process_options options, :background, :skip_missing
|
166
|
+
|
167
|
+
if Array === background and not background.empty?
|
168
|
+
filter
|
169
|
+
add_filter(:key, background)
|
170
|
+
if defined? AnnotatedArray and AnnotatedArray === list
|
171
|
+
list = list.subset background
|
172
|
+
else
|
173
|
+
list = list & background
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
list = list.compact.uniq
|
178
|
+
|
114
179
|
with_unnamed do
|
115
180
|
fields ||= self.fields.first
|
116
181
|
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
|
117
182
|
|
118
|
-
add_keys = Misc.process_options options, :add_keys
|
183
|
+
add_keys, rename = Misc.process_options options, :add_keys, :rename
|
119
184
|
|
120
185
|
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
121
186
|
|
122
|
-
selected = select :key => list
|
187
|
+
selected = select :key => list.uniq
|
123
188
|
|
124
189
|
tsv_size = keys.length
|
125
|
-
|
126
|
-
Log.debug "Found #{
|
190
|
+
found = selected.keys.length
|
191
|
+
Log.debug "Found #{found} of #{list.length} entities"
|
127
192
|
|
128
|
-
|
193
|
+
if skip_missing
|
194
|
+
total = found
|
195
|
+
Log.debug "Using #{ found } as sample size; skipping missing"
|
196
|
+
else
|
197
|
+
total = list.uniq.length
|
198
|
+
Log.debug "Using #{ list.length } as sample size"
|
199
|
+
end
|
200
|
+
|
201
|
+
counts = annotation_counts fields, options[:persist], :rename => rename
|
129
202
|
|
130
|
-
annotations = Hash.new
|
131
203
|
annotation_keys = Hash.new
|
132
204
|
selected.with_unnamed do
|
133
|
-
|
134
|
-
|
205
|
+
|
206
|
+
case type
|
207
|
+
when :single
|
208
|
+
selected.through :key, fields do |key, value|
|
135
209
|
value = value.dup
|
136
|
-
annotations[value] ||= 0
|
137
|
-
annotations[value] += 1
|
138
|
-
next unless add_keys
|
139
210
|
annotation_keys[value] ||= []
|
140
211
|
annotation_keys[value] << key
|
141
|
-
|
212
|
+
end
|
213
|
+
|
214
|
+
when :double
|
215
|
+
selected.through :key, fields do |key, values|
|
216
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
217
|
+
value = value.dup
|
218
|
+
annotation_keys[value] ||= []
|
219
|
+
annotation_keys[value] << key
|
220
|
+
}
|
221
|
+
end
|
222
|
+
|
223
|
+
when :list
|
224
|
+
selected.through :key, fields do |key, values|
|
225
|
+
values.compact.uniq.reject{|value| value.empty?}.each{|value|
|
226
|
+
value = value.dup
|
227
|
+
annotation_keys[value] ||= []
|
228
|
+
annotation_keys[value] << key
|
229
|
+
}
|
230
|
+
end
|
231
|
+
|
232
|
+
when :flat
|
233
|
+
selected.through :key, fields do |key, values|
|
234
|
+
values.compact.uniq.reject{|value| value.empty?}.each{|value|
|
235
|
+
value = value.dup
|
236
|
+
annotation_keys[value] ||= []
|
237
|
+
annotation_keys[value] << key
|
238
|
+
}
|
239
|
+
end
|
240
|
+
|
142
241
|
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
if Array === background and not background.empty?
|
246
|
+
reset_filters
|
247
|
+
pop_filter
|
143
248
|
end
|
144
249
|
|
145
250
|
pvalues = {}
|
146
|
-
|
251
|
+
annotation_keys.each do |annotation, elems|
|
252
|
+
elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
|
253
|
+
count = elems.length
|
147
254
|
next if count < options[:min_support] or not counts.include? annotation
|
148
|
-
pvalues[annotation] =
|
255
|
+
pvalues[annotation] = RSRuby.instance.phyper(count - 1, counts[annotation], tsv_size - counts[annotation], total, false).to_f
|
149
256
|
end
|
150
257
|
|
151
258
|
FDR.adjust_hash! pvalues if options[:fdr]
|
152
259
|
|
153
260
|
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
154
261
|
|
155
|
-
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
156
|
-
|
157
262
|
if add_keys
|
158
|
-
tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
|
263
|
+
tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)
|
159
264
|
|
160
265
|
tsv.add_field 'p-value' do |annot, values|
|
161
266
|
[pvalues[annot]]
|
@@ -171,7 +276,7 @@ module TSV
|
|
171
276
|
|
172
277
|
tsv
|
173
278
|
else
|
174
|
-
pvalues
|
279
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
175
280
|
end
|
176
281
|
|
177
282
|
end
|
@@ -198,4 +303,3 @@ module Entity
|
|
198
303
|
end
|
199
304
|
end
|
200
305
|
|
201
|
-
|