rbbt-dm 0.0.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/network/paths.rb +47 -29
- data/lib/rbbt/plots/bar.rb +152 -0
- data/lib/rbbt/plots/heatmap.rb +63 -0
- data/lib/rbbt/statistics/fdr.rb +59 -29
- data/lib/rbbt/statistics/hypergeometric.rb +176 -72
- data/lib/rbbt/statistics/random_walk.rb +285 -42
- data/test/rbbt/network/test_paths.rb +3 -3
- data/test/rbbt/statistics/test_hypergeometric.rb +24 -2
- data/test/rbbt/statistics/test_random_walk.rb +39 -0
- data/test/test_helper.rb +1 -1
- metadata +95 -70
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'inline'
|
2
|
+
require 'rsruby'
|
2
3
|
require 'rbbt/tsv'
|
3
4
|
require 'rbbt/persist'
|
4
5
|
require 'rbbt/statistics/fdr'
|
5
6
|
require 'rbbt/entity'
|
7
|
+
require 'distribution'
|
8
|
+
require 'distribution/hypergeometric'
|
6
9
|
|
7
10
|
module Hypergeometric
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
+
inline do |builder|
|
12
|
+
builder.prefix <<-EOC
|
13
|
+
#include <math.h>
|
14
|
+
EOC
|
15
|
+
|
16
|
+
builder.c_raw_singleton <<-EOC
|
11
17
|
/**
|
12
18
|
* Compute log(k!)
|
13
19
|
* @param k The value k.
|
@@ -15,13 +21,13 @@ module Hypergeometric
|
|
15
21
|
*/
|
16
22
|
double lFactorial(double k)
|
17
23
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
double r = 0;
|
25
|
+
int i;
|
26
|
+
for(i=2 ; i<=(int)k ; i++)
|
27
|
+
{
|
28
|
+
r = r + (double)(log((double)i));
|
29
|
+
}
|
30
|
+
return r;
|
25
31
|
}
|
26
32
|
|
27
33
|
|
@@ -34,25 +40,25 @@ double lFactorial(double k)
|
|
34
40
|
*/
|
35
41
|
double lBinom(double n, double k)
|
36
42
|
{
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
long i;
|
44
|
+
double r = 0;
|
45
|
+
|
46
|
+
if(n > n-k){
|
47
|
+
k = n-k;
|
48
|
+
}
|
49
|
+
|
50
|
+
for(i = (long)n ; i> (n-k) ; i--)
|
51
|
+
{
|
52
|
+
r = r + log((double)i);
|
53
|
+
}
|
54
|
+
|
55
|
+
r = r - lFactorial(k);
|
56
|
+
|
57
|
+
return r;
|
52
58
|
}
|
53
|
-
|
54
|
-
|
55
|
-
|
59
|
+
EOC
|
60
|
+
|
61
|
+
builder.c_singleton <<-EOC
|
56
62
|
/**
|
57
63
|
* * Compute the Hypergeometric accumulated value.
|
58
64
|
* * @param total => total size
|
@@ -61,49 +67,93 @@ double lBinom(double n, double k)
|
|
61
67
|
* * @param found => support
|
62
68
|
* * @return The result
|
63
69
|
* */
|
64
|
-
|
70
|
+
//pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
71
|
+
//["200204", 2141, 15, 125, 3, 1.37320769558675e-188, ["Q05193", "P54762", "Q12923"]]
|
72
|
+
double hypergeometric_c(double total, double support, double list, double found)
|
65
73
|
{
|
66
|
-
|
74
|
+
double other = total - support;
|
67
75
|
|
68
|
-
|
69
|
-
|
70
|
-
top = support;
|
71
|
-
}
|
76
|
+
double top = list;
|
77
|
+
double log_n_choose_k = lBinom(total,list);
|
72
78
|
|
73
|
-
|
79
|
+
double lfoo = lBinom(support,top) + lBinom(other, list-top);
|
74
80
|
|
75
|
-
|
76
|
-
|
77
|
-
double sum = 0;
|
81
|
+
double sum = 0;
|
78
82
|
int i;
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
83
|
+
|
84
|
+
if(support < list){
|
85
|
+
top = support;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
for (i = (int)top; i >= found; i-- )
|
90
|
+
{
|
91
|
+
sum = sum + exp(lfoo - log_n_choose_k);
|
92
|
+
if ( i > found)
|
93
|
+
{
|
94
|
+
lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
|
95
|
+
}
|
96
|
+
}
|
97
|
+
return sum;
|
88
98
|
}
|
89
|
-
|
90
|
-
|
99
|
+
EOC
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.hypergeometric(total, support, list, found)
|
103
|
+
RSRuby.instance.phyper(found, support, total - support, list, false).to_f
|
91
104
|
end
|
92
105
|
end
|
93
106
|
|
94
107
|
module TSV
|
95
108
|
|
96
|
-
def annotation_counts(fields = nil, persistence = false)
|
109
|
+
def annotation_counts(fields = nil, persistence = false, options = {})
|
97
110
|
fields ||= self.fields
|
98
111
|
fields = [fields] if String === fields or Symbol === fields
|
112
|
+
rename = options.delete :rename
|
99
113
|
|
100
|
-
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
114
|
+
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => { :rename => rename }) do
|
101
115
|
data ||= Hash.new(0)
|
102
116
|
|
103
117
|
with_unnamed do
|
104
|
-
|
105
|
-
|
118
|
+
|
119
|
+
case type
|
120
|
+
when :single
|
121
|
+
through :key, fields do |key, value|
|
122
|
+
next if value.nil?
|
123
|
+
data[value] += 1
|
124
|
+
end
|
125
|
+
when :double
|
126
|
+
if rename
|
127
|
+
Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
|
128
|
+
through :key, fields do |key, values|
|
129
|
+
next if values.nil?
|
130
|
+
values.flatten.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
|
131
|
+
end
|
132
|
+
else
|
133
|
+
through :key, fields do |key, values|
|
134
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
135
|
+
end
|
136
|
+
end
|
137
|
+
when :list
|
138
|
+
through :key, fields do |key, values|
|
139
|
+
next if values.nil?
|
140
|
+
values.compact.uniq.each{|value| data[value] += 1}
|
141
|
+
end
|
142
|
+
when :flat
|
143
|
+
if rename
|
144
|
+
Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
|
145
|
+
through :key, fields do |key, values|
|
146
|
+
next if values.nil?
|
147
|
+
values.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
|
148
|
+
end
|
149
|
+
else
|
150
|
+
through :key, fields do |key, values|
|
151
|
+
next if values.nil?
|
152
|
+
values.compact.uniq.each{|value| data[value] += 1}
|
153
|
+
end
|
154
|
+
end
|
106
155
|
end
|
156
|
+
|
107
157
|
end
|
108
158
|
|
109
159
|
data
|
@@ -111,51 +161,106 @@ module TSV
|
|
111
161
|
end
|
112
162
|
|
113
163
|
def enrichment(list, fields = nil, options = {})
|
164
|
+
options = Misc.add_defaults options, :skip_missing => true, :background => nil
|
165
|
+
background, skip_missing = Misc.process_options options, :background, :skip_missing
|
166
|
+
|
167
|
+
if Array === background and not background.empty?
|
168
|
+
filter
|
169
|
+
add_filter(:key, background)
|
170
|
+
if defined? AnnotatedArray and AnnotatedArray === list
|
171
|
+
list = list.subset background
|
172
|
+
else
|
173
|
+
list = list & background
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
list = list.compact.uniq
|
178
|
+
|
114
179
|
with_unnamed do
|
115
180
|
fields ||= self.fields.first
|
116
181
|
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
|
117
182
|
|
118
|
-
add_keys = Misc.process_options options, :add_keys
|
183
|
+
add_keys, rename = Misc.process_options options, :add_keys, :rename
|
119
184
|
|
120
185
|
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
121
186
|
|
122
|
-
selected = select :key => list
|
187
|
+
selected = select :key => list.uniq
|
123
188
|
|
124
189
|
tsv_size = keys.length
|
125
|
-
|
126
|
-
Log.debug "Found #{
|
190
|
+
found = selected.keys.length
|
191
|
+
Log.debug "Found #{found} of #{list.length} entities"
|
127
192
|
|
128
|
-
|
193
|
+
if skip_missing
|
194
|
+
total = found
|
195
|
+
Log.debug "Using #{ found } as sample size; skipping missing"
|
196
|
+
else
|
197
|
+
total = list.uniq.length
|
198
|
+
Log.debug "Using #{ list.length } as sample size"
|
199
|
+
end
|
200
|
+
|
201
|
+
counts = annotation_counts fields, options[:persist], :rename => rename
|
129
202
|
|
130
|
-
annotations = Hash.new
|
131
203
|
annotation_keys = Hash.new
|
132
204
|
selected.with_unnamed do
|
133
|
-
|
134
|
-
|
205
|
+
|
206
|
+
case type
|
207
|
+
when :single
|
208
|
+
selected.through :key, fields do |key, value|
|
135
209
|
value = value.dup
|
136
|
-
annotations[value] ||= 0
|
137
|
-
annotations[value] += 1
|
138
|
-
next unless add_keys
|
139
210
|
annotation_keys[value] ||= []
|
140
211
|
annotation_keys[value] << key
|
141
|
-
|
212
|
+
end
|
213
|
+
|
214
|
+
when :double
|
215
|
+
selected.through :key, fields do |key, values|
|
216
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
217
|
+
value = value.dup
|
218
|
+
annotation_keys[value] ||= []
|
219
|
+
annotation_keys[value] << key
|
220
|
+
}
|
221
|
+
end
|
222
|
+
|
223
|
+
when :list
|
224
|
+
selected.through :key, fields do |key, values|
|
225
|
+
values.compact.uniq.reject{|value| value.empty?}.each{|value|
|
226
|
+
value = value.dup
|
227
|
+
annotation_keys[value] ||= []
|
228
|
+
annotation_keys[value] << key
|
229
|
+
}
|
230
|
+
end
|
231
|
+
|
232
|
+
when :flat
|
233
|
+
selected.through :key, fields do |key, values|
|
234
|
+
values.compact.uniq.reject{|value| value.empty?}.each{|value|
|
235
|
+
value = value.dup
|
236
|
+
annotation_keys[value] ||= []
|
237
|
+
annotation_keys[value] << key
|
238
|
+
}
|
239
|
+
end
|
240
|
+
|
142
241
|
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
if Array === background and not background.empty?
|
246
|
+
reset_filters
|
247
|
+
pop_filter
|
143
248
|
end
|
144
249
|
|
145
250
|
pvalues = {}
|
146
|
-
|
251
|
+
annotation_keys.each do |annotation, elems|
|
252
|
+
elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
|
253
|
+
count = elems.length
|
147
254
|
next if count < options[:min_support] or not counts.include? annotation
|
148
|
-
pvalues[annotation] =
|
255
|
+
pvalues[annotation] = RSRuby.instance.phyper(count - 1, counts[annotation], tsv_size - counts[annotation], total, false).to_f
|
149
256
|
end
|
150
257
|
|
151
258
|
FDR.adjust_hash! pvalues if options[:fdr]
|
152
259
|
|
153
260
|
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
154
261
|
|
155
|
-
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
156
|
-
|
157
262
|
if add_keys
|
158
|
-
tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
|
263
|
+
tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)
|
159
264
|
|
160
265
|
tsv.add_field 'p-value' do |annot, values|
|
161
266
|
[pvalues[annot]]
|
@@ -171,7 +276,7 @@ module TSV
|
|
171
276
|
|
172
277
|
tsv
|
173
278
|
else
|
174
|
-
pvalues
|
279
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
175
280
|
end
|
176
281
|
|
177
282
|
end
|
@@ -198,4 +303,3 @@ module Entity
|
|
198
303
|
end
|
199
304
|
end
|
200
305
|
|
201
|
-
|