rbbt-marq 2.1.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -65,91 +65,42 @@ module MADB
65
65
  end
66
66
 
67
67
  # {{{ Loading Positions
68
+
69
+ def self.num_values(dataset)
70
+ experiments =
71
+ DBcache.load(dataset + '_experiments').
72
+ sort_by {|p| p[0].to_i }.
73
+ collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
68
74
 
69
- # Number of probes in the platform
70
- def self.platform_entries(platform)
71
- DBcache.num_rows(platform + '_codes')
72
- end
73
-
74
- # Return the positions of the genes in the signatures derived from the
75
- # dataset. Returns a 3 value array: hash of arrays of positions (keys are
76
- # signatures), array of the gene ids in the same order as the positions,
77
- # and total number of probes in the platform.
78
- #
79
- def self.load_positions(dataset, genes, platform_entries)
80
- gene_positions = DBcache.load(dataset, genes)
81
-
82
- matched = gene_positions.keys.sort
83
-
84
- # Get signature names
85
- experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
86
- a[0].to_i <=> b[0].to_i
87
- }.collect{|p|
88
- MARQ::Name.clean(dataset) + ": " + p[1].first
89
- }
90
-
91
- # Get scale factors (to account for genes missing in the dataset)
92
- scale = (0..experiments.length - 1).collect{|i|
93
- rows = DBcache.num_rows(dataset, "C#{i}");
94
- if rows > 0
95
- platform_entries.to_f / rows
96
- else
97
- nil
98
- end
99
- }
100
-
101
- data = {}
102
- # Get experiment positions and scale them
103
- experiment_x_gene = gene_positions.values_at(*matched).transpose
104
- experiments.each_with_index{|experiment, i|
105
- next if scale[i].nil? || experiment_x_gene[i].nil?
106
- values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
107
- data[experiment] = values
108
- }
75
+ values = {}
76
+ experiments.each_with_index do |exp, i| values[exp] = DBcache.num_rows(dataset, "C#{i}") end
109
77
 
110
- [data, matched, platform_entries]
78
+ values
111
79
  end
112
-
113
- # Load positions of genes in signatures from the given datasets. Returns a
114
- # tree value array just like load_positions
115
- def self.dataset_positions(dataset, genes)
116
- return [{},[],0] if genes.empty?
117
80
 
118
- genes = genes.collect{|gene| gene.to_s.downcase.strip}
119
- platform_entries = platform_entries(dataset)
120
-
121
- load_positions(dataset, genes, platform_entries)
81
+ def self.num_codes(dataset)
82
+ DBcache.num_rows(dataset + '_codes')
122
83
  end
123
84
 
85
+ def self.load_positions(dataset, genes)
86
+ positions = DBcache.load(dataset, genes)
87
+ experiments =
88
+ DBcache.load(dataset + '_experiments').
89
+ sort_by {|p| p[0].to_i }.
90
+ collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
124
91
 
125
- # Loads the data from all signatures for datasets of the platform. The return
126
- # value is the same as in dataset_positions and load_positins, except that
127
- # the matched gene names need not be in the same order as the actual positions
128
- # of the signatures, it just the super set of all genes matched on the
129
- # signatures
130
- def self.platform_positions(platform, genes)
131
- return [{},[],0] if genes.empty?
132
-
133
- genes = genes.collect {|gene| gene.downcase.strip }
134
- platform_entries = platform_entries(platform)
135
-
136
- cross_platform = MARQ::Platform.is_cross_platform? platform
137
- datasets = MARQ::Platform.datasets(platform).sort
138
92
 
139
- total_data = {}
140
- total_matched = []
141
-
142
- datasets.each do |dataset|
143
- dataset = MARQ::Name.cross_platform dataset if cross_platform
144
- data, matched = load_positions(dataset, genes, platform_entries)
145
- total_data = total_data.merge(data)
146
- total_matched += matched
93
+
94
+ result = {}; experiments.each {|exp| result[exp] = [] }
95
+ positions.values_at(*genes).each do |values|
96
+ experiments.zip(values || []).each do |p|
97
+ experiment, value = p
98
+ result[experiment] << (value.nil? ? nil : value.to_i)
99
+ end
147
100
  end
148
- total_matched.uniq!
149
101
 
150
- [total_data, total_matched, platform_entries]
102
+ result
151
103
  end
152
-
153
104
  end
154
105
 
155
106
  if __FILE__ == $0
@@ -165,16 +165,13 @@ double hypergeometric(double total, double support, double list, double found)
165
165
  positions[term] << rank
166
166
  }
167
167
  }
168
-
169
- scores = []
170
-
171
168
 
172
169
  sizes = {}
173
170
  RANK_SIZE_BINS.each{|size| sizes[size.to_i] = []}
174
171
 
175
-
176
172
  # For each term compute the rank score. Also, place it in the closest size
177
173
  # bin for the permutations.
174
+ scores = []
178
175
  best.each_with_index{|term, pos|
179
176
  if positions[term]
180
177
  list = positions[term]
@@ -190,40 +187,32 @@ double hypergeometric(double total, double support, double list, double found)
190
187
  }
191
188
  sizes[sizes.keys.sort.last] << pos if !found
192
189
 
193
- scores << Score::score(list, ranks.length, 0)[:score]
190
+ scores << Score.score(list, ranks.length, 0)
194
191
  else # it has no score
195
192
  scores << nil
196
193
  end
197
194
  }
198
195
 
199
196
  info = {}
200
-
201
- # Go through all the size bins, run the permutations and assign the pvalues
202
- # to all terms in the bin.
203
- sizes.keys.each{|size|
197
+ sizes.each do |size, pos_list|
204
198
  next if size == 1
205
- next if sizes[size].empty?
199
+ next if pos_list.empty?
206
200
 
207
- # This are the actual scores for the terms in the bin
208
- sub_list_scores = sizes[size].collect{|pos| scores[pos] || 0}
201
+ size_info = {}
202
+ pos_list.each do |pos|
203
+ score = scores[pos]
204
+ term = best[pos]
205
+ hits = positions[term].nil? ? 0 : positions[term].length
209
206
 
210
- # Compute the pvalues for all the terms in the bin. The size of the
211
- # permutation list is that of the bin
212
- pvalues = Score::pvalues(sub_list_scores, size, 0, ranks.length)
207
+ size_info[term] = {:score => score, :hits => hits}
208
+ end
213
209
 
214
- # Save the information from the terms, score, hits, and pvalues.
215
- sizes[size].zip(pvalues).each{|p|
216
- pos = p[0]
217
- pvalue = p[1]
218
- score = scores[pos]
219
- next if score < 0
210
+ null_scores = Score.null_scores(size, 0)
211
+ size_info = Score.add_pvalues(size_info, null_scores)
220
212
 
221
- term = best[pos]
222
- hits = positions[term].nil? ? 0 : positions[term].length
213
+ info.merge! size_info
214
+ end
223
215
 
224
- info[term] = {:score => score, :hits => hits, :pvalue => pvalue}
225
- }
226
- }
227
216
 
228
217
  info
229
218
  end
@@ -349,7 +338,7 @@ double hypergeometric(double total, double support, double list, double found)
349
338
  end
350
339
 
351
340
  if algorithm == :rank
352
- ranks = scores.sort{|a,b| compare(a[1],b[1]) }.collect{|p| p[0]}
341
+ ranks = scores.sort {|a,b| compare(a[1],b[1]) }.collect {|p| p[0]}
353
342
  terms = enrichment_rank(annot, ranks, dict_options)
354
343
  else
355
344
  terms = enrichment_hypergeometric(annot, relevant, dict_options)
@@ -240,119 +240,35 @@ module MARQ
240
240
  end
241
241
 
242
242
  module RankQuery
243
- def self.complete_positions(positions, matched, genes)
244
- matched = matched.collect{|gene| gene.strip.downcase}
245
- genes = genes.collect{|gene| gene.strip.downcase}
246
-
247
- pos = Hash[*matched.zip(positions).flatten]
248
-
249
- complete = genes.collect{|gene|
250
- if matched.include? gene
251
- pos[gene] || "MISSING"
252
- else
253
- "NOT IN PLATFORM"
254
- end
255
- }
256
- complete
257
- end
258
-
259
-
260
- def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
261
- scores = []
262
-
263
- positions_up.keys.each do |experiment|
264
- score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
265
- score[:total_entries] = platform_entries
266
- score[:positions_up] = complete_positions(positions_up[experiment] || [], matched_up, up) if up.any?
267
- score[:positions_down] = complete_positions(positions_down[experiment] || [], matched_down, down) if down.any?
268
- scores << score
269
- end
270
-
271
- pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
272
-
273
- results = {}
274
- positions_up.keys.each_with_index{|experiment,i|
275
- results[experiment] = scores[i].merge(:pvalue => pvalues[i])
276
- }
277
-
278
- results
279
- end
243
+ NULL_SIZE = 10000
280
244
 
281
245
  def self.dataset_scores(dataset, up, down)
282
- positions_up, matched_up, platform_entries = MADB.dataset_positions(dataset, up)
283
- missing_up = positions_up.length - matched_up.length
284
-
285
- positions_down, matched_down = MADB.dataset_positions(dataset, down)
286
- missing_down = positions_down.length - matched_down.length
287
-
288
- position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
246
+ Score.scores_up_down(dataset, up, down)
289
247
  end
290
248
 
291
249
  def self.platform_scores(platform, up, down)
292
- positions_up, matched_up, platform_entries = MADB.platform_positions(platform, up)
293
- missing_up = up.length - matched_up.length
294
-
295
-
296
- positions_down, matched_down = MADB.platform_positions(platform, down)
297
- missing_down = down.length - matched_down.length
250
+ scores = {}
251
+ MARQ::Platform.datasets(platform).each do |dataset|
252
+ dataset = MARQ::Name.cross_platform dataset if MARQ::Name.is_cross_platform?(platform)
253
+ scores.merge!(dataset_scores(dataset, up, down))
254
+ end
298
255
 
299
- position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
256
+ scores
300
257
  end
301
258
 
302
259
  def self.organism_scores(organism, up, down)
303
- platforms = MARQ::Platform.organism_platforms(organism).
304
- select {|p| MARQ::Platform.has_cross_platform? p }.
305
- collect {|p| MARQ::Name.cross_platform p }
306
-
307
- total_scores = {}
308
- platforms.each do |platform|
309
- scores = platform_scores(platform, up, down)
310
- total_scores.merge!(scores)
260
+ scores = {}
261
+ MARQ::Platform.organism_platforms(organism).each do |platform|
262
+ scores.merge!(platform_scores(MARQ::Name.cross_platform(platform), up, down))
311
263
  end
312
264
 
313
- total_scores
265
+ scores
314
266
  end
315
267
 
316
- end
317
- end
318
-
319
- if __FILE__ == $0
320
- p MARQ::Dataset.platform 'GDS2791_cross_platform'
321
- p MARQ::Platform.organism 'GPL96'
322
- exit
323
- #puts MARQ::organism_platforms('human')
324
- #puts MARQ.platform_organism("HaploidData")
325
- #puts MARQ::platform_scores_up_down("HaploidData",%w( YMR261c YDL140c YIL122w YPL093w YHR211w YDL142c YHR106w YOR103c YDR233c YLR181c),%w()).keys
326
-
327
- up = %w(
328
-
329
- 51228_at 215046_at 205009_at 204915_s_at 202707_at
330
- 208265_at 210618_at 201185_at 206650_at 200719_at
331
- 215661_at 202071_at 214408_s_at 215092_s_at 206168_at
332
- 212686_at 214162_at 221008_s_at 217709_at 210957_s_at
333
-
334
- )
335
-
336
-
337
- require 'MARQ/ID'
338
- require 'pp'
339
- genes = ID.translate('human',up).compact
340
-
341
- #pp up.zip(genes)
342
- #genes = Open.read("/home/miki/git/MARQ/test/GDS1375_malignant_vs_normal_down.genes").collect{|l| l.chomp.strip}
343
- positions = MARQ::GEORQ.dataset_positions('GDS1231_cross_platform',genes)
344
- pp positions
345
-
346
-
347
- #MARQ::GEORQ.platform_scores_up_down('GPL96_cross_platform',genes,[]).each{|ex, r|
348
- # puts ex
349
- # puts r[:pvalue]
350
- #}
351
-
352
- #Score.draw_hits(positions["disease.state: malignant melanoma <=> normal"], MADB::GEORQ.experiment_entries('GPL96','GDS1375: disease.state: malignant melanoma <=> normal') , '/tmp/foo.png',:size => 1000)
353
-
354
-
268
+ def self.add_pvalues(scores, up_size, down_size)
269
+ null_scores = Score.null_scores(up_size, down_size, NULL_SIZE)
270
+ Score.add_pvalues(scores, null_scores)
271
+ end
355
272
 
273
+ end
356
274
  end
357
-
358
-
@@ -2,135 +2,10 @@ require 'png'
2
2
  require 'inline'
3
3
 
4
4
  module Score
5
- def self.combine(up, down)
6
- return down if up == 0
7
- return up if down == 0
8
-
9
- return up - down
10
- if (up > 0) == (down > 0)
11
- return 0
12
- else
13
- up - down
14
- end
15
- end
16
-
17
- def self.average(list)
18
- clean = list.compact
19
- clean.inject(0){|acc, e| acc += e}.to_f / clean.length
20
- end
21
-
22
- def self.score_area(positions, platform_entries, missing = 0)
23
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
24
-
25
- clean_positions = positions.compact.sort
26
-
27
- total_tags = positions.length + missing
28
- extra = total_tags - clean_positions.length
29
-
30
- top = 0
31
- bottom = 0
32
-
33
- clean_positions.each_with_index{|p,i|
34
- rel_qt = (i + 1).to_f / total_tags
35
- rel_qb = ( i + extra ).to_f / total_tags
36
- rel_p = p.to_f / platform_entries
37
-
38
-
39
- top += rel_qt - rel_p if rel_qt > rel_p
40
- bottom += rel_p - rel_qb if rel_p > rel_qb
41
- }
42
-
43
-
44
- {
45
- :top => top,
46
- :bottom => bottom,
47
- :score => top > bottom ? top.to_f / total_tags : - bottom.to_f / total_tags,
48
- }
49
- end
50
-
51
- def self.score_max_norm(positions, platform_entries, missing = 0)
52
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
53
-
54
- clean_positions = positions.compact.sort
55
-
56
- extra = missing + (positions.length - clean_positions.length)
57
- total_tags = extra + clean_positions.length
58
-
59
- mean = platform_entries / 2
60
-
61
- values_top = [0]
62
- values_bottom = [0]
63
-
64
- clean_positions.each_with_index{|p,i|
65
- rel_qt = (i + 1).to_f / total_tags
66
- rel_qb = ( i + extra ).to_f / total_tags
67
- rel_p = p.to_f / platform_entries
68
-
69
-
70
- values_top << (rel_qt - rel_p) * ((p - mean).abs.to_f / mean)**2
71
- values_bottom << (rel_p - rel_qb) * ((p - mean).abs.to_f / mean)**2
72
- }
73
5
 
74
- top = values_top.max
75
- bottom = values_bottom.max
76
-
77
-
78
- {
79
- :score => top > bottom ? top : -bottom,
80
- }
81
-
82
-
83
- end
84
-
85
- def self.scale_score1(positions, platform_entries)
86
-
87
- mean = platform_entries/2
88
- max_top = 0
89
- max_bottom = 0
90
-
91
- top_list = []
92
- bottom_list = []
93
-
94
- weights = positions.sort.collect{|position|
95
- rel_pos = ((position - mean).abs.to_f / mean);
96
- 0.3 * rel_pos + 0.7 * Math::exp(30*rel_pos)/Math::exp(30)
97
- }
98
- weights.unshift(0)
99
- total_weights = weights.inject(0){|v,acc| acc += v}
100
- weights.collect!{|v| v / total_weights}
101
-
102
- rel_qt = 0
103
- rel_qb = 0
104
- positions.sort.each_with_index{|position, idx|
105
-
106
- rel_qt += weights[idx + 1]
107
- rel_qb += weights[idx]
108
- rel_p = position.to_f / platform_entries
109
-
110
- top = (rel_qt - rel_p);
111
- bottom = (rel_p - rel_qb);
112
-
113
- top_list << top
114
- bottom_list << bottom
115
-
116
- if (top > max_top)
117
- max_top = top;
118
- end
119
- if (bottom > max_bottom)
120
- max_bottom = bottom;
121
- end
122
- }
123
-
124
- p [top_list, bottom_list]
125
- if (max_top > max_bottom)
126
- return max_top;
127
- else
128
- return -max_bottom;
129
- end
130
- end
131
-
132
6
  class << self
133
7
  inline do |builder|
8
+
134
9
  builder.c_raw <<-'EOC'
135
10
  double weight(int position, int mean){
136
11
  double rel_pos = (double) abs(position - mean) / mean;
@@ -138,171 +13,140 @@ module Score
138
13
  return(weight);
139
14
  }
140
15
  EOC
16
+
141
17
  builder.c <<-'EOC'
142
- double fast_score_scale( VALUE positions, int platform_entries, double missing){
18
+ double fast_score_scale(VALUE positions, int total, int missing){
143
19
  int idx;
144
20
 
145
- int mean = platform_entries / 2;
21
+ int mean = total / 2;
146
22
 
147
23
  VALUE rel_q = rb_ary_new();
148
24
  VALUE rel_l = rb_ary_new();
149
25
 
150
26
  rb_ary_push(rel_q,rb_float_new(0));
151
27
 
28
+ // Rescale positions and accumulate weights
152
29
  double total_weights = 0;
153
30
  for (idx = 0; idx < RARRAY(positions)->len; idx++){
154
31
  int position = FIX2INT(rb_ary_entry(positions, idx));
155
32
 
156
- rb_ary_push(rel_l, rb_float_new((double) position / platform_entries));
33
+ rb_ary_push(rel_l, rb_float_new((double) position / total));
157
34
 
158
- total_weights = total_weights + weight(position, mean);
159
- rb_ary_push(rel_q,rb_float_new(total_weights));
35
+ total_weights += weight(position, mean);
36
+ rb_ary_push(rel_q, rb_float_new(total_weights));
160
37
  }
161
38
 
162
39
  // Add penalty for missing genes
163
-
164
- double penalty = missing * weight( mean * 0.8,mean);
165
- total_weights = total_weights + penalty;
40
+ double penalty = missing * weight(mean * 0.8, mean);
41
+ total_weights = total_weights + penalty;
166
42
 
43
+ // Traverse list and get extreme values
167
44
  double max_top, max_bottom;
168
45
  max_top = max_bottom = 0;
169
46
  for (idx = 0; idx < RARRAY(positions)->len; idx++){
170
- double top = RFLOAT(rb_ary_entry(rel_q,idx + 1))->value / total_weights -
171
- RFLOAT(rb_ary_entry(rel_l,idx))->value;
172
- double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q,idx))->value) / total_weights +
173
- RFLOAT(rb_ary_entry(rel_l,idx))->value;
47
+ double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
48
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
49
+ double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
50
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
174
51
 
175
- if (top > max_top) max_top = top;
52
+ if (top > max_top) max_top = top;
176
53
  if (bottom > max_bottom) max_bottom = bottom;
177
54
  }
178
55
 
179
56
  if (max_top > max_bottom) return max_top;
180
57
  else return -max_bottom;
181
58
  }
182
-
183
59
  EOC
184
60
 
185
-
186
-
187
- builder.c <<-'EOC'
188
- double fast_norm_score( VALUE positions, int total, int extra, int platform_entries){
189
- int idx;
190
-
191
- double mean = (double) platform_entries / 2;
192
- double max_top, max_bottom;
193
- max_top = max_bottom = 0;
194
-
195
- for (idx = 0; idx < RARRAY(positions)->len; idx++){
196
- double position = (double) FIX2INT(rb_ary_entry(positions, (long) idx));
197
-
198
-
199
- double rel_qt = (double) (idx + 1) / total;
200
- double rel_qb = (double) (idx + extra) / total;
201
- double rel_p = position / platform_entries;
202
-
203
- double scale = (abs(position - mean) / mean);
204
- scale = scale * scale;
205
-
206
- double top = (rel_qt - rel_p) * scale;
207
- double bottom = (rel_p - rel_qb) * scale;
208
-
209
-
210
- if (top > max_top) max_top = top;
211
- if (bottom > max_bottom) max_bottom = bottom;
212
- }
213
-
214
- if (max_top > max_bottom) return max_top;
215
- else return -max_bottom;
216
- }
217
-
218
- EOC
219
61
  end
220
- end
221
- def self.score_scale_fast(positions, platform_entries, missing=0)
222
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
223
62
 
224
- clean_positions = positions.compact.sort
225
- missing = missing + positions.length - clean_positions.length
226
-
227
- {
228
- :score => fast_score_scale(clean_positions, platform_entries, missing)
229
- }
230
63
  end
231
64
 
232
65
 
233
- def self.score_norm_fast(positions, platform_entries, missing = 0)
234
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
235
-
236
- clean_positions = positions.compact.sort
237
-
238
- extra = missing + (positions.length - clean_positions.length)
239
- total_tags = extra + clean_positions.length
240
-
241
- {
242
- :score => fast_norm_score(clean_positions, total_tags, extra, platform_entries)
243
- }
66
+ def self.score(*args)
67
+ self.fast_score_scale(*args)
244
68
  end
245
69
 
246
- def self.score_max(positions, platform_entries, missing = 0)
247
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
248
-
249
- clean_positions = positions.compact.sort
250
-
251
- extra = missing + (positions.length - clean_positions.length)
252
- total_tags = extra + clean_positions.length
253
-
70
+ def self.scores(dataset, genes)
71
+ positions = MADB.load_positions(dataset, genes)
72
+ values = MADB.num_values(dataset)
73
+
74
+ experiments = positions.keys
75
+
76
+ scores = {}
77
+ experiments.each do |experiment|
78
+ hits = positions[experiment].compact
79
+ total = values[experiment]
80
+ if hits.nil? || hits.empty?
81
+ score = 0
82
+ else
83
+ missing = genes.length - hits.length
84
+ score = self.fast_score_scale(hits.sort, total, missing)
85
+ end
86
+ scores[experiment] = {
87
+ :positions => positions[experiment],
88
+ :score => score,
89
+ :total => total,
90
+ }
91
+ end
254
92
 
255
- values = [0]
93
+ scores
94
+ end
256
95
 
257
- clean_positions.each_with_index{|p,i|
258
-
259
- up = (i + 1).to_f / total_tags
260
- down = p.to_f / platform_entries
96
+ def self.combine(up, down)
97
+ return down if up == 0
98
+ return up if down == 0
261
99
 
262
- values << up - down
263
- }
100
+ return up - down
101
+ if (up > 0) == (down > 0)
102
+ return 0
103
+ else
104
+ up - down
105
+ end
106
+ end
264
107
 
265
- top = values.max
266
- bottom = values.min + ((extra - 1).to_f/ total_tags)
108
+ def self.scores_up_down(dataset, up, down)
109
+ scores_up = scores(dataset, up)
110
+ scores_down = scores(dataset, down)
267
111
 
112
+ scores = {}
113
+ scores_up.keys.each do |experiment|
114
+ scores[experiment] = {}
115
+ scores[experiment][:up] = scores_up[experiment]
116
+ scores[experiment][:down] = scores_down[experiment]
117
+ scores[experiment][:score] = combine(scores_up[experiment][:score], scores_down[experiment][:score])
118
+ end
268
119
 
269
- {
270
- :score => top.abs > bottom.abs ? top : bottom,
271
- }
120
+ scores
272
121
  end
273
122
 
274
- class << self
275
- alias_method :score, :score_scale_fast
123
+ def self.permutations(size, times)
124
+ total = 10000
125
+ if size == 0
126
+ [0] * times
127
+ else
128
+ (1..times).collect do
129
+ fast_score_scale(Array.new(size){ (rand * total).to_i }.sort, total, 0)
130
+ end
131
+ end
276
132
  end
277
133
 
134
+ def self.null_scores(up_size, down_size, times = 10000)
135
+ up_perm = permutations(up_size, times)
136
+ down_perm = permutations(down_size, times)
278
137
 
279
- def self.score_up_down(up, down, total, missing_up = 0, missing_down = 0)
280
- up = score(up, total, missing_up)
281
- down = score(down, total, missing_down)
282
-
283
- {:up => up[:score], :down => down[:score], :score => combine(up[:score], down[:score])}
138
+ up_perm.zip(down_perm).collect{|p| up, down = p; combine(up, down).abs}
284
139
  end
285
140
 
286
- def self.permutations(genes, total, times = 10000)
287
- scores = []
288
- times.times{
289
- positions = Array.new(genes){ (rand * total).to_i }
290
- scores << score(positions, total)[:score]
291
- }
292
- scores
293
- end
294
-
295
- def self.pvalues(scores, up, down, total, options = {})
296
- times = options[:times]|| 1000
141
+ def self.add_pvalues(scores, null_scores)
142
+ null_scores = null_scores.sort
143
+ times = null_scores.length
297
144
 
298
- permutations_up = permutations(up, total, times)
299
- permutations_down = permutations(down, total, times )
300
- permutations = permutations_up.zip(permutations_down).collect{|p| combine(*p).abs }.sort
145
+ scores.each do |experiment, info|
146
+ info[:pvalue] = (times - null_scores.count_smaller(info[:score].abs)).to_f / times
147
+ end
301
148
 
302
- scores.collect{|score|
303
- num = permutations.count_smaller(score.abs)
304
- (times - num).to_f / times
305
- }
149
+ scores
306
150
  end
307
151
 
308
152
  COLORS = {
@@ -352,44 +196,3 @@ module Score
352
196
  end
353
197
  end
354
198
  end
355
-
356
- if __FILE__ == $0
357
- size = 1000
358
- positions=%w(10 30 200).collect{|v| v.to_i}
359
- np = positions.collect{|p| size - p}
360
- p Score.score(positions, size )
361
- p Score.score(np, size )
362
-
363
-
364
- p Score.scale_score1(positions, size )
365
- p Score.scale_score1(np, size )
366
-
367
- require 'benchmark'
368
-
369
-
370
- p = (0..100).collect{ (rand * 1000).to_i}
371
- puts Benchmark.measure{
372
- 1000.times{|i|
373
- Score.score_max_norm(p, 1000);
374
- }
375
- }
376
- puts Benchmark.measure{
377
- 1000.times{|i|
378
- Score.score_max_norm_fast(p, 1000);
379
- }
380
- }
381
-
382
-
383
- per_list = []
384
- 1000.times{
385
- per_list << Array.new(200){(rand * 1000).to_i}
386
- }
387
-
388
- require 'benchmark'
389
- puts Benchmark.measure{
390
- per_list.each{|p|
391
- Score.score_max_norm(p, 1000);
392
- }
393
- }
394
-
395
- end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-marq
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-04 00:00:00 +01:00
12
+ date: 2010-03-09 00:00:00 +01:00
13
13
  default_executable: marq_config
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency