rbbt-marq 2.1.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,91 +65,42 @@ module MADB
65
65
  end
66
66
 
67
67
  # {{{ Loading Positions
68
+
69
+ def self.num_values(dataset)
70
+ experiments =
71
+ DBcache.load(dataset + '_experiments').
72
+ sort_by {|p| p[0].to_i }.
73
+ collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
68
74
 
69
- # Number of probes in the platform
70
- def self.platform_entries(platform)
71
- DBcache.num_rows(platform + '_codes')
72
- end
73
-
74
- # Return the positions of the genes in the signatures derived from the
75
- # dataset. Returns a 3 value array: hash of arrays of positions (keys are
76
- # signatures), array of the gene ids in the same order as the positions,
77
- # and total number of probes in the platform.
78
- #
79
- def self.load_positions(dataset, genes, platform_entries)
80
- gene_positions = DBcache.load(dataset, genes)
81
-
82
- matched = gene_positions.keys.sort
83
-
84
- # Get signature names
85
- experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
86
- a[0].to_i <=> b[0].to_i
87
- }.collect{|p|
88
- MARQ::Name.clean(dataset) + ": " + p[1].first
89
- }
90
-
91
- # Get scale factors (to account for genes missing in the dataset)
92
- scale = (0..experiments.length - 1).collect{|i|
93
- rows = DBcache.num_rows(dataset, "C#{i}");
94
- if rows > 0
95
- platform_entries.to_f / rows
96
- else
97
- nil
98
- end
99
- }
100
-
101
- data = {}
102
- # Get experiment positions and scale them
103
- experiment_x_gene = gene_positions.values_at(*matched).transpose
104
- experiments.each_with_index{|experiment, i|
105
- next if scale[i].nil? || experiment_x_gene[i].nil?
106
- values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
107
- data[experiment] = values
108
- }
75
+ values = {}
76
+ experiments.each_with_index do |exp, i| values[exp] = DBcache.num_rows(dataset, "C#{i}") end
109
77
 
110
- [data, matched, platform_entries]
78
+ values
111
79
  end
112
-
113
- # Load positions of genes in signatures from the given datasets. Returns a
114
- # tree value array just like load_positions
115
- def self.dataset_positions(dataset, genes)
116
- return [{},[],0] if genes.empty?
117
80
 
118
- genes = genes.collect{|gene| gene.to_s.downcase.strip}
119
- platform_entries = platform_entries(dataset)
120
-
121
- load_positions(dataset, genes, platform_entries)
81
+ def self.num_codes(dataset)
82
+ DBcache.num_rows(dataset + '_codes')
122
83
  end
123
84
 
85
+ def self.load_positions(dataset, genes)
86
+ positions = DBcache.load(dataset, genes)
87
+ experiments =
88
+ DBcache.load(dataset + '_experiments').
89
+ sort_by {|p| p[0].to_i }.
90
+ collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
124
91
 
125
- # Loads the data from all signatures for datasets of the platform. The return
126
- # value is the same as in dataset_positions and load_positins, except that
127
- # the matched gene names need not be in the same order as the actual positions
128
- # of the signatures, it just the super set of all genes matched on the
129
- # signatures
130
- def self.platform_positions(platform, genes)
131
- return [{},[],0] if genes.empty?
132
-
133
- genes = genes.collect {|gene| gene.downcase.strip }
134
- platform_entries = platform_entries(platform)
135
-
136
- cross_platform = MARQ::Platform.is_cross_platform? platform
137
- datasets = MARQ::Platform.datasets(platform).sort
138
92
 
139
- total_data = {}
140
- total_matched = []
141
-
142
- datasets.each do |dataset|
143
- dataset = MARQ::Name.cross_platform dataset if cross_platform
144
- data, matched = load_positions(dataset, genes, platform_entries)
145
- total_data = total_data.merge(data)
146
- total_matched += matched
93
+
94
+ result = {}; experiments.each {|exp| result[exp] = [] }
95
+ positions.values_at(*genes).each do |values|
96
+ experiments.zip(values || []).each do |p|
97
+ experiment, value = p
98
+ result[experiment] << (value.nil? ? nil : value.to_i)
99
+ end
147
100
  end
148
- total_matched.uniq!
149
101
 
150
- [total_data, total_matched, platform_entries]
102
+ result
151
103
  end
152
-
153
104
  end
154
105
 
155
106
  if __FILE__ == $0
@@ -165,16 +165,13 @@ double hypergeometric(double total, double support, double list, double found)
165
165
  positions[term] << rank
166
166
  }
167
167
  }
168
-
169
- scores = []
170
-
171
168
 
172
169
  sizes = {}
173
170
  RANK_SIZE_BINS.each{|size| sizes[size.to_i] = []}
174
171
 
175
-
176
172
  # For each term compute the rank score. Also, place it in the closest size
177
173
  # bin for the permutations.
174
+ scores = []
178
175
  best.each_with_index{|term, pos|
179
176
  if positions[term]
180
177
  list = positions[term]
@@ -190,40 +187,32 @@ double hypergeometric(double total, double support, double list, double found)
190
187
  }
191
188
  sizes[sizes.keys.sort.last] << pos if !found
192
189
 
193
- scores << Score::score(list, ranks.length, 0)[:score]
190
+ scores << Score.score(list, ranks.length, 0)
194
191
  else # it has no score
195
192
  scores << nil
196
193
  end
197
194
  }
198
195
 
199
196
  info = {}
200
-
201
- # Go through all the size bins, run the permutations and assign the pvalues
202
- # to all terms in the bin.
203
- sizes.keys.each{|size|
197
+ sizes.each do |size, pos_list|
204
198
  next if size == 1
205
- next if sizes[size].empty?
199
+ next if pos_list.empty?
206
200
 
207
- # This are the actual scores for the terms in the bin
208
- sub_list_scores = sizes[size].collect{|pos| scores[pos] || 0}
201
+ size_info = {}
202
+ pos_list.each do |pos|
203
+ score = scores[pos]
204
+ term = best[pos]
205
+ hits = positions[term].nil? ? 0 : positions[term].length
209
206
 
210
- # Compute the pvalues for all the terms in the bin. The size of the
211
- # permutation list is that of the bin
212
- pvalues = Score::pvalues(sub_list_scores, size, 0, ranks.length)
207
+ size_info[term] = {:score => score, :hits => hits}
208
+ end
213
209
 
214
- # Save the information from the terms, score, hits, and pvalues.
215
- sizes[size].zip(pvalues).each{|p|
216
- pos = p[0]
217
- pvalue = p[1]
218
- score = scores[pos]
219
- next if score < 0
210
+ null_scores = Score.null_scores(size, 0)
211
+ size_info = Score.add_pvalues(size_info, null_scores)
220
212
 
221
- term = best[pos]
222
- hits = positions[term].nil? ? 0 : positions[term].length
213
+ info.merge! size_info
214
+ end
223
215
 
224
- info[term] = {:score => score, :hits => hits, :pvalue => pvalue}
225
- }
226
- }
227
216
 
228
217
  info
229
218
  end
@@ -349,7 +338,7 @@ double hypergeometric(double total, double support, double list, double found)
349
338
  end
350
339
 
351
340
  if algorithm == :rank
352
- ranks = scores.sort{|a,b| compare(a[1],b[1]) }.collect{|p| p[0]}
341
+ ranks = scores.sort {|a,b| compare(a[1],b[1]) }.collect {|p| p[0]}
353
342
  terms = enrichment_rank(annot, ranks, dict_options)
354
343
  else
355
344
  terms = enrichment_hypergeometric(annot, relevant, dict_options)
@@ -240,119 +240,35 @@ module MARQ
240
240
  end
241
241
 
242
242
  module RankQuery
243
- def self.complete_positions(positions, matched, genes)
244
- matched = matched.collect{|gene| gene.strip.downcase}
245
- genes = genes.collect{|gene| gene.strip.downcase}
246
-
247
- pos = Hash[*matched.zip(positions).flatten]
248
-
249
- complete = genes.collect{|gene|
250
- if matched.include? gene
251
- pos[gene] || "MISSING"
252
- else
253
- "NOT IN PLATFORM"
254
- end
255
- }
256
- complete
257
- end
258
-
259
-
260
- def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
261
- scores = []
262
-
263
- positions_up.keys.each do |experiment|
264
- score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
265
- score[:total_entries] = platform_entries
266
- score[:positions_up] = complete_positions(positions_up[experiment] || [], matched_up, up) if up.any?
267
- score[:positions_down] = complete_positions(positions_down[experiment] || [], matched_down, down) if down.any?
268
- scores << score
269
- end
270
-
271
- pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
272
-
273
- results = {}
274
- positions_up.keys.each_with_index{|experiment,i|
275
- results[experiment] = scores[i].merge(:pvalue => pvalues[i])
276
- }
277
-
278
- results
279
- end
243
+ NULL_SIZE = 10000
280
244
 
281
245
  def self.dataset_scores(dataset, up, down)
282
- positions_up, matched_up, platform_entries = MADB.dataset_positions(dataset, up)
283
- missing_up = positions_up.length - matched_up.length
284
-
285
- positions_down, matched_down = MADB.dataset_positions(dataset, down)
286
- missing_down = positions_down.length - matched_down.length
287
-
288
- position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
246
+ Score.scores_up_down(dataset, up, down)
289
247
  end
290
248
 
291
249
  def self.platform_scores(platform, up, down)
292
- positions_up, matched_up, platform_entries = MADB.platform_positions(platform, up)
293
- missing_up = up.length - matched_up.length
294
-
295
-
296
- positions_down, matched_down = MADB.platform_positions(platform, down)
297
- missing_down = down.length - matched_down.length
250
+ scores = {}
251
+ MARQ::Platform.datasets(platform).each do |dataset|
252
+ dataset = MARQ::Name.cross_platform dataset if MARQ::Name.is_cross_platform?(platform)
253
+ scores.merge!(dataset_scores(dataset, up, down))
254
+ end
298
255
 
299
- position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
256
+ scores
300
257
  end
301
258
 
302
259
  def self.organism_scores(organism, up, down)
303
- platforms = MARQ::Platform.organism_platforms(organism).
304
- select {|p| MARQ::Platform.has_cross_platform? p }.
305
- collect {|p| MARQ::Name.cross_platform p }
306
-
307
- total_scores = {}
308
- platforms.each do |platform|
309
- scores = platform_scores(platform, up, down)
310
- total_scores.merge!(scores)
260
+ scores = {}
261
+ MARQ::Platform.organism_platforms(organism).each do |platform|
262
+ scores.merge!(platform_scores(MARQ::Name.cross_platform(platform), up, down))
311
263
  end
312
264
 
313
- total_scores
265
+ scores
314
266
  end
315
267
 
316
- end
317
- end
318
-
319
- if __FILE__ == $0
320
- p MARQ::Dataset.platform 'GDS2791_cross_platform'
321
- p MARQ::Platform.organism 'GPL96'
322
- exit
323
- #puts MARQ::organism_platforms('human')
324
- #puts MARQ.platform_organism("HaploidData")
325
- #puts MARQ::platform_scores_up_down("HaploidData",%w( YMR261c YDL140c YIL122w YPL093w YHR211w YDL142c YHR106w YOR103c YDR233c YLR181c),%w()).keys
326
-
327
- up = %w(
328
-
329
- 51228_at 215046_at 205009_at 204915_s_at 202707_at
330
- 208265_at 210618_at 201185_at 206650_at 200719_at
331
- 215661_at 202071_at 214408_s_at 215092_s_at 206168_at
332
- 212686_at 214162_at 221008_s_at 217709_at 210957_s_at
333
-
334
- )
335
-
336
-
337
- require 'MARQ/ID'
338
- require 'pp'
339
- genes = ID.translate('human',up).compact
340
-
341
- #pp up.zip(genes)
342
- #genes = Open.read("/home/miki/git/MARQ/test/GDS1375_malignant_vs_normal_down.genes").collect{|l| l.chomp.strip}
343
- positions = MARQ::GEORQ.dataset_positions('GDS1231_cross_platform',genes)
344
- pp positions
345
-
346
-
347
- #MARQ::GEORQ.platform_scores_up_down('GPL96_cross_platform',genes,[]).each{|ex, r|
348
- # puts ex
349
- # puts r[:pvalue]
350
- #}
351
-
352
- #Score.draw_hits(positions["disease.state: malignant melanoma <=> normal"], MADB::GEORQ.experiment_entries('GPL96','GDS1375: disease.state: malignant melanoma <=> normal') , '/tmp/foo.png',:size => 1000)
353
-
354
-
268
+ def self.add_pvalues(scores, up_size, down_size)
269
+ null_scores = Score.null_scores(up_size, down_size, NULL_SIZE)
270
+ Score.add_pvalues(scores, null_scores)
271
+ end
355
272
 
273
+ end
356
274
  end
357
-
358
-
@@ -2,135 +2,10 @@ require 'png'
2
2
  require 'inline'
3
3
 
4
4
  module Score
5
- def self.combine(up, down)
6
- return down if up == 0
7
- return up if down == 0
8
-
9
- return up - down
10
- if (up > 0) == (down > 0)
11
- return 0
12
- else
13
- up - down
14
- end
15
- end
16
-
17
- def self.average(list)
18
- clean = list.compact
19
- clean.inject(0){|acc, e| acc += e}.to_f / clean.length
20
- end
21
-
22
- def self.score_area(positions, platform_entries, missing = 0)
23
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
24
-
25
- clean_positions = positions.compact.sort
26
-
27
- total_tags = positions.length + missing
28
- extra = total_tags - clean_positions.length
29
-
30
- top = 0
31
- bottom = 0
32
-
33
- clean_positions.each_with_index{|p,i|
34
- rel_qt = (i + 1).to_f / total_tags
35
- rel_qb = ( i + extra ).to_f / total_tags
36
- rel_p = p.to_f / platform_entries
37
-
38
-
39
- top += rel_qt - rel_p if rel_qt > rel_p
40
- bottom += rel_p - rel_qb if rel_p > rel_qb
41
- }
42
-
43
-
44
- {
45
- :top => top,
46
- :bottom => bottom,
47
- :score => top > bottom ? top.to_f / total_tags : - bottom.to_f / total_tags,
48
- }
49
- end
50
-
51
- def self.score_max_norm(positions, platform_entries, missing = 0)
52
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
53
-
54
- clean_positions = positions.compact.sort
55
-
56
- extra = missing + (positions.length - clean_positions.length)
57
- total_tags = extra + clean_positions.length
58
-
59
- mean = platform_entries / 2
60
-
61
- values_top = [0]
62
- values_bottom = [0]
63
-
64
- clean_positions.each_with_index{|p,i|
65
- rel_qt = (i + 1).to_f / total_tags
66
- rel_qb = ( i + extra ).to_f / total_tags
67
- rel_p = p.to_f / platform_entries
68
-
69
-
70
- values_top << (rel_qt - rel_p) * ((p - mean).abs.to_f / mean)**2
71
- values_bottom << (rel_p - rel_qb) * ((p - mean).abs.to_f / mean)**2
72
- }
73
5
 
74
- top = values_top.max
75
- bottom = values_bottom.max
76
-
77
-
78
- {
79
- :score => top > bottom ? top : -bottom,
80
- }
81
-
82
-
83
- end
84
-
85
- def self.scale_score1(positions, platform_entries)
86
-
87
- mean = platform_entries/2
88
- max_top = 0
89
- max_bottom = 0
90
-
91
- top_list = []
92
- bottom_list = []
93
-
94
- weights = positions.sort.collect{|position|
95
- rel_pos = ((position - mean).abs.to_f / mean);
96
- 0.3 * rel_pos + 0.7 * Math::exp(30*rel_pos)/Math::exp(30)
97
- }
98
- weights.unshift(0)
99
- total_weights = weights.inject(0){|v,acc| acc += v}
100
- weights.collect!{|v| v / total_weights}
101
-
102
- rel_qt = 0
103
- rel_qb = 0
104
- positions.sort.each_with_index{|position, idx|
105
-
106
- rel_qt += weights[idx + 1]
107
- rel_qb += weights[idx]
108
- rel_p = position.to_f / platform_entries
109
-
110
- top = (rel_qt - rel_p);
111
- bottom = (rel_p - rel_qb);
112
-
113
- top_list << top
114
- bottom_list << bottom
115
-
116
- if (top > max_top)
117
- max_top = top;
118
- end
119
- if (bottom > max_bottom)
120
- max_bottom = bottom;
121
- end
122
- }
123
-
124
- p [top_list, bottom_list]
125
- if (max_top > max_bottom)
126
- return max_top;
127
- else
128
- return -max_bottom;
129
- end
130
- end
131
-
132
6
  class << self
133
7
  inline do |builder|
8
+
134
9
  builder.c_raw <<-'EOC'
135
10
  double weight(int position, int mean){
136
11
  double rel_pos = (double) abs(position - mean) / mean;
@@ -138,171 +13,140 @@ module Score
138
13
  return(weight);
139
14
  }
140
15
  EOC
16
+
141
17
  builder.c <<-'EOC'
142
- double fast_score_scale( VALUE positions, int platform_entries, double missing){
18
+ double fast_score_scale(VALUE positions, int total, int missing){
143
19
  int idx;
144
20
 
145
- int mean = platform_entries / 2;
21
+ int mean = total / 2;
146
22
 
147
23
  VALUE rel_q = rb_ary_new();
148
24
  VALUE rel_l = rb_ary_new();
149
25
 
150
26
  rb_ary_push(rel_q,rb_float_new(0));
151
27
 
28
+ // Rescale positions and accumulate weights
152
29
  double total_weights = 0;
153
30
  for (idx = 0; idx < RARRAY(positions)->len; idx++){
154
31
  int position = FIX2INT(rb_ary_entry(positions, idx));
155
32
 
156
- rb_ary_push(rel_l, rb_float_new((double) position / platform_entries));
33
+ rb_ary_push(rel_l, rb_float_new((double) position / total));
157
34
 
158
- total_weights = total_weights + weight(position, mean);
159
- rb_ary_push(rel_q,rb_float_new(total_weights));
35
+ total_weights += weight(position, mean);
36
+ rb_ary_push(rel_q, rb_float_new(total_weights));
160
37
  }
161
38
 
162
39
  // Add penalty for missing genes
163
-
164
- double penalty = missing * weight( mean * 0.8,mean);
165
- total_weights = total_weights + penalty;
40
+ double penalty = missing * weight(mean * 0.8, mean);
41
+ total_weights = total_weights + penalty;
166
42
 
43
+ // Traverse list and get extreme values
167
44
  double max_top, max_bottom;
168
45
  max_top = max_bottom = 0;
169
46
  for (idx = 0; idx < RARRAY(positions)->len; idx++){
170
- double top = RFLOAT(rb_ary_entry(rel_q,idx + 1))->value / total_weights -
171
- RFLOAT(rb_ary_entry(rel_l,idx))->value;
172
- double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q,idx))->value) / total_weights +
173
- RFLOAT(rb_ary_entry(rel_l,idx))->value;
47
+ double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
48
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
49
+ double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
50
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
174
51
 
175
- if (top > max_top) max_top = top;
52
+ if (top > max_top) max_top = top;
176
53
  if (bottom > max_bottom) max_bottom = bottom;
177
54
  }
178
55
 
179
56
  if (max_top > max_bottom) return max_top;
180
57
  else return -max_bottom;
181
58
  }
182
-
183
59
  EOC
184
60
 
185
-
186
-
187
- builder.c <<-'EOC'
188
- double fast_norm_score( VALUE positions, int total, int extra, int platform_entries){
189
- int idx;
190
-
191
- double mean = (double) platform_entries / 2;
192
- double max_top, max_bottom;
193
- max_top = max_bottom = 0;
194
-
195
- for (idx = 0; idx < RARRAY(positions)->len; idx++){
196
- double position = (double) FIX2INT(rb_ary_entry(positions, (long) idx));
197
-
198
-
199
- double rel_qt = (double) (idx + 1) / total;
200
- double rel_qb = (double) (idx + extra) / total;
201
- double rel_p = position / platform_entries;
202
-
203
- double scale = (abs(position - mean) / mean);
204
- scale = scale * scale;
205
-
206
- double top = (rel_qt - rel_p) * scale;
207
- double bottom = (rel_p - rel_qb) * scale;
208
-
209
-
210
- if (top > max_top) max_top = top;
211
- if (bottom > max_bottom) max_bottom = bottom;
212
- }
213
-
214
- if (max_top > max_bottom) return max_top;
215
- else return -max_bottom;
216
- }
217
-
218
- EOC
219
61
  end
220
- end
221
- def self.score_scale_fast(positions, platform_entries, missing=0)
222
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
223
62
 
224
- clean_positions = positions.compact.sort
225
- missing = missing + positions.length - clean_positions.length
226
-
227
- {
228
- :score => fast_score_scale(clean_positions, platform_entries, missing)
229
- }
230
63
  end
231
64
 
232
65
 
233
- def self.score_norm_fast(positions, platform_entries, missing = 0)
234
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
235
-
236
- clean_positions = positions.compact.sort
237
-
238
- extra = missing + (positions.length - clean_positions.length)
239
- total_tags = extra + clean_positions.length
240
-
241
- {
242
- :score => fast_norm_score(clean_positions, total_tags, extra, platform_entries)
243
- }
66
+ def self.score(*args)
67
+ self.fast_score_scale(*args)
244
68
  end
245
69
 
246
- def self.score_max(positions, platform_entries, missing = 0)
247
- return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
248
-
249
- clean_positions = positions.compact.sort
250
-
251
- extra = missing + (positions.length - clean_positions.length)
252
- total_tags = extra + clean_positions.length
253
-
70
+ def self.scores(dataset, genes)
71
+ positions = MADB.load_positions(dataset, genes)
72
+ values = MADB.num_values(dataset)
73
+
74
+ experiments = positions.keys
75
+
76
+ scores = {}
77
+ experiments.each do |experiment|
78
+ hits = positions[experiment].compact
79
+ total = values[experiment]
80
+ if hits.nil? || hits.empty?
81
+ score = 0
82
+ else
83
+ missing = genes.length - hits.length
84
+ score = self.fast_score_scale(hits.sort, total, missing)
85
+ end
86
+ scores[experiment] = {
87
+ :positions => positions[experiment],
88
+ :score => score,
89
+ :total => total,
90
+ }
91
+ end
254
92
 
255
- values = [0]
93
+ scores
94
+ end
256
95
 
257
- clean_positions.each_with_index{|p,i|
258
-
259
- up = (i + 1).to_f / total_tags
260
- down = p.to_f / platform_entries
96
+ def self.combine(up, down)
97
+ return down if up == 0
98
+ return up if down == 0
261
99
 
262
- values << up - down
263
- }
100
+ return up - down
101
+ if (up > 0) == (down > 0)
102
+ return 0
103
+ else
104
+ up - down
105
+ end
106
+ end
264
107
 
265
- top = values.max
266
- bottom = values.min + ((extra - 1).to_f/ total_tags)
108
+ def self.scores_up_down(dataset, up, down)
109
+ scores_up = scores(dataset, up)
110
+ scores_down = scores(dataset, down)
267
111
 
112
+ scores = {}
113
+ scores_up.keys.each do |experiment|
114
+ scores[experiment] = {}
115
+ scores[experiment][:up] = scores_up[experiment]
116
+ scores[experiment][:down] = scores_down[experiment]
117
+ scores[experiment][:score] = combine(scores_up[experiment][:score], scores_down[experiment][:score])
118
+ end
268
119
 
269
- {
270
- :score => top.abs > bottom.abs ? top : bottom,
271
- }
120
+ scores
272
121
  end
273
122
 
274
- class << self
275
- alias_method :score, :score_scale_fast
123
+ def self.permutations(size, times)
124
+ total = 10000
125
+ if size == 0
126
+ [0] * times
127
+ else
128
+ (1..times).collect do
129
+ fast_score_scale(Array.new(size){ (rand * total).to_i }.sort, total, 0)
130
+ end
131
+ end
276
132
  end
277
133
 
134
+ def self.null_scores(up_size, down_size, times = 10000)
135
+ up_perm = permutations(up_size, times)
136
+ down_perm = permutations(down_size, times)
278
137
 
279
- def self.score_up_down(up, down, total, missing_up = 0, missing_down = 0)
280
- up = score(up, total, missing_up)
281
- down = score(down, total, missing_down)
282
-
283
- {:up => up[:score], :down => down[:score], :score => combine(up[:score], down[:score])}
138
+ up_perm.zip(down_perm).collect{|p| up, down = p; combine(up, down).abs}
284
139
  end
285
140
 
286
- def self.permutations(genes, total, times = 10000)
287
- scores = []
288
- times.times{
289
- positions = Array.new(genes){ (rand * total).to_i }
290
- scores << score(positions, total)[:score]
291
- }
292
- scores
293
- end
294
-
295
- def self.pvalues(scores, up, down, total, options = {})
296
- times = options[:times]|| 1000
141
+ def self.add_pvalues(scores, null_scores)
142
+ null_scores = null_scores.sort
143
+ times = null_scores.length
297
144
 
298
- permutations_up = permutations(up, total, times)
299
- permutations_down = permutations(down, total, times )
300
- permutations = permutations_up.zip(permutations_down).collect{|p| combine(*p).abs }.sort
145
+ scores.each do |experiment, info|
146
+ info[:pvalue] = (times - null_scores.count_smaller(info[:score].abs)).to_f / times
147
+ end
301
148
 
302
- scores.collect{|score|
303
- num = permutations.count_smaller(score.abs)
304
- (times - num).to_f / times
305
- }
149
+ scores
306
150
  end
307
151
 
308
152
  COLORS = {
@@ -352,44 +196,3 @@ module Score
352
196
  end
353
197
  end
354
198
  end
355
-
356
- if __FILE__ == $0
357
- size = 1000
358
- positions=%w(10 30 200).collect{|v| v.to_i}
359
- np = positions.collect{|p| size - p}
360
- p Score.score(positions, size )
361
- p Score.score(np, size )
362
-
363
-
364
- p Score.scale_score1(positions, size )
365
- p Score.scale_score1(np, size )
366
-
367
- require 'benchmark'
368
-
369
-
370
- p = (0..100).collect{ (rand * 1000).to_i}
371
- puts Benchmark.measure{
372
- 1000.times{|i|
373
- Score.score_max_norm(p, 1000);
374
- }
375
- }
376
- puts Benchmark.measure{
377
- 1000.times{|i|
378
- Score.score_max_norm_fast(p, 1000);
379
- }
380
- }
381
-
382
-
383
- per_list = []
384
- 1000.times{
385
- per_list << Array.new(200){(rand * 1000).to_i}
386
- }
387
-
388
- require 'benchmark'
389
- puts Benchmark.measure{
390
- per_list.each{|p|
391
- Score.score_max_norm(p, 1000);
392
- }
393
- }
394
-
395
- end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-marq
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-04 00:00:00 +01:00
12
+ date: 2010-03-09 00:00:00 +01:00
13
13
  default_executable: marq_config
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency