rbbt-dm 0.0.4 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,13 +1,19 @@
1
1
  require 'inline'
2
+ require 'rsruby'
2
3
  require 'rbbt/tsv'
3
4
  require 'rbbt/persist'
4
5
  require 'rbbt/statistics/fdr'
5
6
  require 'rbbt/entity'
7
+ require 'distribution'
8
+ require 'distribution/hypergeometric'
6
9
 
7
10
  module Hypergeometric
8
- class << self
9
- inline do |builder|
10
- builder.c_raw <<-EOC
11
+ inline do |builder|
12
+ builder.prefix <<-EOC
13
+ #include <math.h>
14
+ EOC
15
+
16
+ builder.c_raw_singleton <<-EOC
11
17
  /**
12
18
  * Compute log(k!)
13
19
  * @param k The value k.
@@ -15,13 +21,13 @@ module Hypergeometric
15
21
  */
16
22
  double lFactorial(double k)
17
23
  {
18
- double r = 0;
19
- int i;
20
- for(i=2 ; i<=(int)k ; i++)
21
- {
22
- r = r + (double)(log((double)i));
23
- }
24
- return r;
24
+ double r = 0;
25
+ int i;
26
+ for(i=2 ; i<=(int)k ; i++)
27
+ {
28
+ r = r + (double)(log((double)i));
29
+ }
30
+ return r;
25
31
  }
26
32
 
27
33
 
@@ -34,25 +40,25 @@ double lFactorial(double k)
34
40
  */
35
41
  double lBinom(double n, double k)
36
42
  {
37
- long i;
38
- double r = 0;
39
-
40
- if(n > n-k){
41
- k = n-k;
42
- }
43
-
44
- for(i = (long)n ; i> (n-k) ; i--)
45
- {
46
- r = r + log((double)i);
47
- }
48
-
49
- r = r - lFactorial(k);
50
-
51
- return r;
43
+ long i;
44
+ double r = 0;
45
+
46
+ if(n > n-k){
47
+ k = n-k;
48
+ }
49
+
50
+ for(i = (long)n ; i> (n-k) ; i--)
51
+ {
52
+ r = r + log((double)i);
53
+ }
54
+
55
+ r = r - lFactorial(k);
56
+
57
+ return r;
52
58
  }
53
- EOC
54
-
55
- builder.c <<-EOC
59
+ EOC
60
+
61
+ builder.c_singleton <<-EOC
56
62
  /**
57
63
  * * Compute the Hypergeometric accumulated value.
58
64
  * * @param total => total size
@@ -61,49 +67,93 @@ double lBinom(double n, double k)
61
67
  * * @param found => support
62
68
  * * @return The result
63
69
  * */
64
- double hypergeometric(double total, double support, double list, double found)
70
+ //pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
71
+ //["200204", 2141, 15, 125, 3, 1.37320769558675e-188, ["Q05193", "P54762", "Q12923"]]
72
+ double hypergeometric_c(double total, double support, double list, double found)
65
73
  {
66
- double other = total - support;
74
+ double other = total - support;
67
75
 
68
- double top = list;
69
- if(support < list){
70
- top = support;
71
- }
76
+ double top = list;
77
+ double log_n_choose_k = lBinom(total,list);
72
78
 
73
- double log_n_choose_k = lBinom(total,list);
79
+ double lfoo = lBinom(support,top) + lBinom(other, list-top);
74
80
 
75
- double lfoo = lBinom(support,top) + lBinom(other, list-top);
76
-
77
- double sum = 0;
81
+ double sum = 0;
78
82
  int i;
79
- for (i = (int)top; i >= found; i-- )
80
- {
81
- sum = sum + exp(lfoo - log_n_choose_k);
82
- if ( i > found)
83
- {
84
- lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
85
- }
86
- }
87
- return sum;
83
+
84
+ if(support < list){
85
+ top = support;
86
+ }
87
+
88
+
89
+ for (i = (int)top; i >= found; i-- )
90
+ {
91
+ sum = sum + exp(lfoo - log_n_choose_k);
92
+ if ( i > found)
93
+ {
94
+ lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
95
+ }
96
+ }
97
+ return sum;
88
98
  }
89
- EOC
90
- end
99
+ EOC
100
+ end
101
+
102
+ def self.hypergeometric(total, support, list, found)
103
+ RSRuby.instance.phyper(found, support, total - support, list, false).to_f
91
104
  end
92
105
  end
93
106
 
94
107
  module TSV
95
108
 
96
- def annotation_counts(fields = nil, persistence = false)
109
+ def annotation_counts(fields = nil, persistence = false, options = {})
97
110
  fields ||= self.fields
98
111
  fields = [fields] if String === fields or Symbol === fields
112
+ rename = options.delete :rename
99
113
 
100
- Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
114
+ Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => { :rename => rename }) do
101
115
  data ||= Hash.new(0)
102
116
 
103
117
  with_unnamed do
104
- through :key, fields do |key, values|
105
- values.flatten.compact.uniq.each{|value| data[value] += 1}
118
+
119
+ case type
120
+ when :single
121
+ through :key, fields do |key, value|
122
+ next if value.nil?
123
+ data[value] += 1
124
+ end
125
+ when :double
126
+ if rename
127
+ Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
128
+ through :key, fields do |key, values|
129
+ next if values.nil?
130
+ values.flatten.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
131
+ end
132
+ else
133
+ through :key, fields do |key, values|
134
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
135
+ end
136
+ end
137
+ when :list
138
+ through :key, fields do |key, values|
139
+ next if values.nil?
140
+ values.compact.uniq.each{|value| data[value] += 1}
141
+ end
142
+ when :flat
143
+ if rename
144
+ Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
145
+ through :key, fields do |key, values|
146
+ next if values.nil?
147
+ values.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
148
+ end
149
+ else
150
+ through :key, fields do |key, values|
151
+ next if values.nil?
152
+ values.compact.uniq.each{|value| data[value] += 1}
153
+ end
154
+ end
106
155
  end
156
+
107
157
  end
108
158
 
109
159
  data
@@ -111,51 +161,106 @@ module TSV
111
161
  end
112
162
 
113
163
  def enrichment(list, fields = nil, options = {})
164
+ options = Misc.add_defaults options, :skip_missing => true, :background => nil
165
+ background, skip_missing = Misc.process_options options, :background, :skip_missing
166
+
167
+ if Array === background and not background.empty?
168
+ filter
169
+ add_filter(:key, background)
170
+ if defined? AnnotatedArray and AnnotatedArray === list
171
+ list = list.subset background
172
+ else
173
+ list = list & background
174
+ end
175
+ end
176
+
177
+ list = list.compact.uniq
178
+
114
179
  with_unnamed do
115
180
  fields ||= self.fields.first
116
181
  options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
117
182
 
118
- add_keys = Misc.process_options options, :add_keys
183
+ add_keys, rename = Misc.process_options options, :add_keys, :rename
119
184
 
120
185
  Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
121
186
 
122
- selected = select :key => list
187
+ selected = select :key => list.uniq
123
188
 
124
189
  tsv_size = keys.length
125
- total = selected.keys.length
126
- Log.debug "Found #{total} of #{list.length} entities"
190
+ found = selected.keys.length
191
+ Log.debug "Found #{found} of #{list.length} entities"
127
192
 
128
- counts = annotation_counts fields, options[:persist]
193
+ if skip_missing
194
+ total = found
195
+ Log.debug "Using #{ found } as sample size; skipping missing"
196
+ else
197
+ total = list.uniq.length
198
+ Log.debug "Using #{ list.length } as sample size"
199
+ end
200
+
201
+ counts = annotation_counts fields, options[:persist], :rename => rename
129
202
 
130
- annotations = Hash.new
131
203
  annotation_keys = Hash.new
132
204
  selected.with_unnamed do
133
- selected.through :key, fields do |key, values|
134
- values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
205
+
206
+ case type
207
+ when :single
208
+ selected.through :key, fields do |key, value|
135
209
  value = value.dup
136
- annotations[value] ||= 0
137
- annotations[value] += 1
138
- next unless add_keys
139
210
  annotation_keys[value] ||= []
140
211
  annotation_keys[value] << key
141
- }
212
+ end
213
+
214
+ when :double
215
+ selected.through :key, fields do |key, values|
216
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
217
+ value = value.dup
218
+ annotation_keys[value] ||= []
219
+ annotation_keys[value] << key
220
+ }
221
+ end
222
+
223
+ when :list
224
+ selected.through :key, fields do |key, values|
225
+ values.compact.uniq.reject{|value| value.empty?}.each{|value|
226
+ value = value.dup
227
+ annotation_keys[value] ||= []
228
+ annotation_keys[value] << key
229
+ }
230
+ end
231
+
232
+ when :flat
233
+ selected.through :key, fields do |key, values|
234
+ values.compact.uniq.reject{|value| value.empty?}.each{|value|
235
+ value = value.dup
236
+ annotation_keys[value] ||= []
237
+ annotation_keys[value] << key
238
+ }
239
+ end
240
+
142
241
  end
242
+
243
+ end
244
+
245
+ if Array === background and not background.empty?
246
+ reset_filters
247
+ pop_filter
143
248
  end
144
249
 
145
250
  pvalues = {}
146
- annotations.each do |annotation, count|
251
+ annotation_keys.each do |annotation, elems|
252
+ elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
253
+ count = elems.length
147
254
  next if count < options[:min_support] or not counts.include? annotation
148
- pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
255
+ pvalues[annotation] = RSRuby.instance.phyper(count - 1, counts[annotation], tsv_size - counts[annotation], total, false).to_f
149
256
  end
150
257
 
151
258
  FDR.adjust_hash! pvalues if options[:fdr]
152
259
 
153
260
  pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
154
261
 
155
- TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
156
-
157
262
  if add_keys
158
- tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
263
+ tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)
159
264
 
160
265
  tsv.add_field 'p-value' do |annot, values|
161
266
  [pvalues[annot]]
@@ -171,7 +276,7 @@ module TSV
171
276
 
172
277
  tsv
173
278
  else
174
- pvalues
279
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
175
280
  end
176
281
 
177
282
  end
@@ -198,4 +303,3 @@ module Entity
198
303
  end
199
304
  end
200
305
 
201
-