rbbt-dm 0.0.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,19 @@
1
1
  require 'inline'
2
+ require 'rsruby'
2
3
  require 'rbbt/tsv'
3
4
  require 'rbbt/persist'
4
5
  require 'rbbt/statistics/fdr'
5
6
  require 'rbbt/entity'
7
+ require 'distribution'
8
+ require 'distribution/hypergeometric'
6
9
 
7
10
  module Hypergeometric
8
- class << self
9
- inline do |builder|
10
- builder.c_raw <<-EOC
11
+ inline do |builder|
12
+ builder.prefix <<-EOC
13
+ #include <math.h>
14
+ EOC
15
+
16
+ builder.c_raw_singleton <<-EOC
11
17
  /**
12
18
  * Compute log(k!)
13
19
  * @param k The value k.
@@ -15,13 +21,13 @@ module Hypergeometric
15
21
  */
16
22
  double lFactorial(double k)
17
23
  {
18
- double r = 0;
19
- int i;
20
- for(i=2 ; i<=(int)k ; i++)
21
- {
22
- r = r + (double)(log((double)i));
23
- }
24
- return r;
24
+ double r = 0;
25
+ int i;
26
+ for(i=2 ; i<=(int)k ; i++)
27
+ {
28
+ r = r + (double)(log((double)i));
29
+ }
30
+ return r;
25
31
  }
26
32
 
27
33
 
@@ -34,25 +40,25 @@ double lFactorial(double k)
34
40
  */
35
41
  double lBinom(double n, double k)
36
42
  {
37
- long i;
38
- double r = 0;
39
-
40
- if(n > n-k){
41
- k = n-k;
42
- }
43
-
44
- for(i = (long)n ; i> (n-k) ; i--)
45
- {
46
- r = r + log((double)i);
47
- }
48
-
49
- r = r - lFactorial(k);
50
-
51
- return r;
43
+ long i;
44
+ double r = 0;
45
+
46
+ if(n > n-k){
47
+ k = n-k;
48
+ }
49
+
50
+ for(i = (long)n ; i> (n-k) ; i--)
51
+ {
52
+ r = r + log((double)i);
53
+ }
54
+
55
+ r = r - lFactorial(k);
56
+
57
+ return r;
52
58
  }
53
- EOC
54
-
55
- builder.c <<-EOC
59
+ EOC
60
+
61
+ builder.c_singleton <<-EOC
56
62
  /**
57
63
  * * Compute the Hypergeometric accumulated value.
58
64
  * * @param total => total size
@@ -61,49 +67,93 @@ double lBinom(double n, double k)
61
67
  * * @param found => support
62
68
  * * @return The result
63
69
  * */
64
- double hypergeometric(double total, double support, double list, double found)
70
+ //pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
71
+ //["200204", 2141, 15, 125, 3, 1.37320769558675e-188, ["Q05193", "P54762", "Q12923"]]
72
+ double hypergeometric_c(double total, double support, double list, double found)
65
73
  {
66
- double other = total - support;
74
+ double other = total - support;
67
75
 
68
- double top = list;
69
- if(support < list){
70
- top = support;
71
- }
76
+ double top = list;
77
+ double log_n_choose_k = lBinom(total,list);
72
78
 
73
- double log_n_choose_k = lBinom(total,list);
79
+ double lfoo = lBinom(support,top) + lBinom(other, list-top);
74
80
 
75
- double lfoo = lBinom(support,top) + lBinom(other, list-top);
76
-
77
- double sum = 0;
81
+ double sum = 0;
78
82
  int i;
79
- for (i = (int)top; i >= found; i-- )
80
- {
81
- sum = sum + exp(lfoo - log_n_choose_k);
82
- if ( i > found)
83
- {
84
- lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
85
- }
86
- }
87
- return sum;
83
+
84
+ if(support < list){
85
+ top = support;
86
+ }
87
+
88
+
89
+ for (i = (int)top; i >= found; i-- )
90
+ {
91
+ sum = sum + exp(lfoo - log_n_choose_k);
92
+ if ( i > found)
93
+ {
94
+ lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
95
+ }
96
+ }
97
+ return sum;
88
98
  }
89
- EOC
90
- end
99
+ EOC
100
+ end
101
+
102
+ def self.hypergeometric(total, support, list, found)
103
+ RSRuby.instance.phyper(found, support, total - support, list, false).to_f
91
104
  end
92
105
  end
93
106
 
94
107
  module TSV
95
108
 
96
- def annotation_counts(fields = nil, persistence = false)
109
+ def annotation_counts(fields = nil, persistence = false, options = {})
97
110
  fields ||= self.fields
98
111
  fields = [fields] if String === fields or Symbol === fields
112
+ rename = options.delete :rename
99
113
 
100
- Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
114
+ Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => { :rename => rename }) do
101
115
  data ||= Hash.new(0)
102
116
 
103
117
  with_unnamed do
104
- through :key, fields do |key, values|
105
- values.flatten.compact.uniq.each{|value| data[value] += 1}
118
+
119
+ case type
120
+ when :single
121
+ through :key, fields do |key, value|
122
+ next if value.nil?
123
+ data[value] += 1
124
+ end
125
+ when :double
126
+ if rename
127
+ Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
128
+ through :key, fields do |key, values|
129
+ next if values.nil?
130
+ values.flatten.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
131
+ end
132
+ else
133
+ through :key, fields do |key, values|
134
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
135
+ end
136
+ end
137
+ when :list
138
+ through :key, fields do |key, values|
139
+ next if values.nil?
140
+ values.compact.uniq.each{|value| data[value] += 1}
141
+ end
142
+ when :flat
143
+ if rename
144
+ Log.debug("Computing annotation counts with rename: #{rename.values.flatten.compact.uniq.sort * ", "} ")
145
+ through :key, fields do |key, values|
146
+ next if values.nil?
147
+ values.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq.each{|value| data[value] += 1 }
148
+ end
149
+ else
150
+ through :key, fields do |key, values|
151
+ next if values.nil?
152
+ values.compact.uniq.each{|value| data[value] += 1}
153
+ end
154
+ end
106
155
  end
156
+
107
157
  end
108
158
 
109
159
  data
@@ -111,51 +161,106 @@ module TSV
111
161
  end
112
162
 
113
163
  def enrichment(list, fields = nil, options = {})
164
+ options = Misc.add_defaults options, :skip_missing => true, :background => nil
165
+ background, skip_missing = Misc.process_options options, :background, :skip_missing
166
+
167
+ if Array === background and not background.empty?
168
+ filter
169
+ add_filter(:key, background)
170
+ if defined? AnnotatedArray and AnnotatedArray === list
171
+ list = list.subset background
172
+ else
173
+ list = list & background
174
+ end
175
+ end
176
+
177
+ list = list.compact.uniq
178
+
114
179
  with_unnamed do
115
180
  fields ||= self.fields.first
116
181
  options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
117
182
 
118
- add_keys = Misc.process_options options, :add_keys
183
+ add_keys, rename = Misc.process_options options, :add_keys, :rename
119
184
 
120
185
  Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
121
186
 
122
- selected = select :key => list
187
+ selected = select :key => list.uniq
123
188
 
124
189
  tsv_size = keys.length
125
- total = selected.keys.length
126
- Log.debug "Found #{total} of #{list.length} entities"
190
+ found = selected.keys.length
191
+ Log.debug "Found #{found} of #{list.length} entities"
127
192
 
128
- counts = annotation_counts fields, options[:persist]
193
+ if skip_missing
194
+ total = found
195
+ Log.debug "Using #{ found } as sample size; skipping missing"
196
+ else
197
+ total = list.uniq.length
198
+ Log.debug "Using #{ list.length } as sample size"
199
+ end
200
+
201
+ counts = annotation_counts fields, options[:persist], :rename => rename
129
202
 
130
- annotations = Hash.new
131
203
  annotation_keys = Hash.new
132
204
  selected.with_unnamed do
133
- selected.through :key, fields do |key, values|
134
- values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
205
+
206
+ case type
207
+ when :single
208
+ selected.through :key, fields do |key, value|
135
209
  value = value.dup
136
- annotations[value] ||= 0
137
- annotations[value] += 1
138
- next unless add_keys
139
210
  annotation_keys[value] ||= []
140
211
  annotation_keys[value] << key
141
- }
212
+ end
213
+
214
+ when :double
215
+ selected.through :key, fields do |key, values|
216
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
217
+ value = value.dup
218
+ annotation_keys[value] ||= []
219
+ annotation_keys[value] << key
220
+ }
221
+ end
222
+
223
+ when :list
224
+ selected.through :key, fields do |key, values|
225
+ values.compact.uniq.reject{|value| value.empty?}.each{|value|
226
+ value = value.dup
227
+ annotation_keys[value] ||= []
228
+ annotation_keys[value] << key
229
+ }
230
+ end
231
+
232
+ when :flat
233
+ selected.through :key, fields do |key, values|
234
+ values.compact.uniq.reject{|value| value.empty?}.each{|value|
235
+ value = value.dup
236
+ annotation_keys[value] ||= []
237
+ annotation_keys[value] << key
238
+ }
239
+ end
240
+
142
241
  end
242
+
243
+ end
244
+
245
+ if Array === background and not background.empty?
246
+ reset_filters
247
+ pop_filter
143
248
  end
144
249
 
145
250
  pvalues = {}
146
- annotations.each do |annotation, count|
251
+ annotation_keys.each do |annotation, elems|
252
+ elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
253
+ count = elems.length
147
254
  next if count < options[:min_support] or not counts.include? annotation
148
- pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
255
+ pvalues[annotation] = RSRuby.instance.phyper(count - 1, counts[annotation], tsv_size - counts[annotation], total, false).to_f
149
256
  end
150
257
 
151
258
  FDR.adjust_hash! pvalues if options[:fdr]
152
259
 
153
260
  pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
154
261
 
155
- TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
156
-
157
262
  if add_keys
158
- tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
263
+ tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)
159
264
 
160
265
  tsv.add_field 'p-value' do |annot, values|
161
266
  [pvalues[annot]]
@@ -171,7 +276,7 @@ module TSV
171
276
 
172
277
  tsv
173
278
  else
174
- pvalues
279
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
175
280
  end
176
281
 
177
282
  end
@@ -198,4 +303,3 @@ module Entity
198
303
  end
199
304
  end
200
305
 
201
-