rbbt-marq 2.1.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/MARQ/MADB.rb +25 -74
- data/lib/MARQ/annotations.rb +16 -27
- data/lib/MARQ/main.rb +17 -101
- data/lib/MARQ/score.rb +83 -280
- metadata +2 -2
data/lib/MARQ/MADB.rb
CHANGED
@@ -65,91 +65,42 @@ module MADB
|
|
65
65
|
end
|
66
66
|
|
67
67
|
# {{{ Loading Positions
|
68
|
+
|
69
|
+
def self.num_values(dataset)
|
70
|
+
experiments =
|
71
|
+
DBcache.load(dataset + '_experiments').
|
72
|
+
sort_by {|p| p[0].to_i }.
|
73
|
+
collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
|
68
74
|
|
69
|
-
|
70
|
-
|
71
|
-
DBcache.num_rows(platform + '_codes')
|
72
|
-
end
|
73
|
-
|
74
|
-
# Return the positions of the genes in the signatures derived from the
|
75
|
-
# dataset. Returns a 3 value array: hash of arrays of positions (keys are
|
76
|
-
# signatures), array of the gene ids in the same order as the positions,
|
77
|
-
# and total number of probes in the platform.
|
78
|
-
#
|
79
|
-
def self.load_positions(dataset, genes, platform_entries)
|
80
|
-
gene_positions = DBcache.load(dataset, genes)
|
81
|
-
|
82
|
-
matched = gene_positions.keys.sort
|
83
|
-
|
84
|
-
# Get signature names
|
85
|
-
experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
|
86
|
-
a[0].to_i <=> b[0].to_i
|
87
|
-
}.collect{|p|
|
88
|
-
MARQ::Name.clean(dataset) + ": " + p[1].first
|
89
|
-
}
|
90
|
-
|
91
|
-
# Get scale factors (to account for genes missing in the dataset)
|
92
|
-
scale = (0..experiments.length - 1).collect{|i|
|
93
|
-
rows = DBcache.num_rows(dataset, "C#{i}");
|
94
|
-
if rows > 0
|
95
|
-
platform_entries.to_f / rows
|
96
|
-
else
|
97
|
-
nil
|
98
|
-
end
|
99
|
-
}
|
100
|
-
|
101
|
-
data = {}
|
102
|
-
# Get experiment positions and scale them
|
103
|
-
experiment_x_gene = gene_positions.values_at(*matched).transpose
|
104
|
-
experiments.each_with_index{|experiment, i|
|
105
|
-
next if scale[i].nil? || experiment_x_gene[i].nil?
|
106
|
-
values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
|
107
|
-
data[experiment] = values
|
108
|
-
}
|
75
|
+
values = {}
|
76
|
+
experiments.each_with_index do |exp, i| values[exp] = DBcache.num_rows(dataset, "C#{i}") end
|
109
77
|
|
110
|
-
|
78
|
+
values
|
111
79
|
end
|
112
|
-
|
113
|
-
# Load positions of genes in signatures from the given datasets. Returns a
|
114
|
-
# tree value array just like load_positions
|
115
|
-
def self.dataset_positions(dataset, genes)
|
116
|
-
return [{},[],0] if genes.empty?
|
117
80
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
load_positions(dataset, genes, platform_entries)
|
81
|
+
def self.num_codes(dataset)
|
82
|
+
DBcache.num_rows(dataset + '_codes')
|
122
83
|
end
|
123
84
|
|
85
|
+
def self.load_positions(dataset, genes)
|
86
|
+
positions = DBcache.load(dataset, genes)
|
87
|
+
experiments =
|
88
|
+
DBcache.load(dataset + '_experiments').
|
89
|
+
sort_by {|p| p[0].to_i }.
|
90
|
+
collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
|
124
91
|
|
125
|
-
# Loads the data from all signatures for datasets of the platform. The return
|
126
|
-
# value is the same as in dataset_positions and load_positins, except that
|
127
|
-
# the matched gene names need not be in the same order as the actual positions
|
128
|
-
# of the signatures, it just the super set of all genes matched on the
|
129
|
-
# signatures
|
130
|
-
def self.platform_positions(platform, genes)
|
131
|
-
return [{},[],0] if genes.empty?
|
132
|
-
|
133
|
-
genes = genes.collect {|gene| gene.downcase.strip }
|
134
|
-
platform_entries = platform_entries(platform)
|
135
|
-
|
136
|
-
cross_platform = MARQ::Platform.is_cross_platform? platform
|
137
|
-
datasets = MARQ::Platform.datasets(platform).sort
|
138
92
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
total_matched += matched
|
93
|
+
|
94
|
+
result = {}; experiments.each {|exp| result[exp] = [] }
|
95
|
+
positions.values_at(*genes).each do |values|
|
96
|
+
experiments.zip(values || []).each do |p|
|
97
|
+
experiment, value = p
|
98
|
+
result[experiment] << (value.nil? ? nil : value.to_i)
|
99
|
+
end
|
147
100
|
end
|
148
|
-
total_matched.uniq!
|
149
101
|
|
150
|
-
|
102
|
+
result
|
151
103
|
end
|
152
|
-
|
153
104
|
end
|
154
105
|
|
155
106
|
if __FILE__ == $0
|
data/lib/MARQ/annotations.rb
CHANGED
@@ -165,16 +165,13 @@ double hypergeometric(double total, double support, double list, double found)
|
|
165
165
|
positions[term] << rank
|
166
166
|
}
|
167
167
|
}
|
168
|
-
|
169
|
-
scores = []
|
170
|
-
|
171
168
|
|
172
169
|
sizes = {}
|
173
170
|
RANK_SIZE_BINS.each{|size| sizes[size.to_i] = []}
|
174
171
|
|
175
|
-
|
176
172
|
# For each term compute the rank score. Also, place it in the closest size
|
177
173
|
# bin for the permutations.
|
174
|
+
scores = []
|
178
175
|
best.each_with_index{|term, pos|
|
179
176
|
if positions[term]
|
180
177
|
list = positions[term]
|
@@ -190,40 +187,32 @@ double hypergeometric(double total, double support, double list, double found)
|
|
190
187
|
}
|
191
188
|
sizes[sizes.keys.sort.last] << pos if !found
|
192
189
|
|
193
|
-
scores << Score
|
190
|
+
scores << Score.score(list, ranks.length, 0)
|
194
191
|
else # it has no score
|
195
192
|
scores << nil
|
196
193
|
end
|
197
194
|
}
|
198
195
|
|
199
196
|
info = {}
|
200
|
-
|
201
|
-
# Go through all the size bins, run the permutations and assign the pvalues
|
202
|
-
# to all terms in the bin.
|
203
|
-
sizes.keys.each{|size|
|
197
|
+
sizes.each do |size, pos_list|
|
204
198
|
next if size == 1
|
205
|
-
next if
|
199
|
+
next if pos_list.empty?
|
206
200
|
|
207
|
-
|
208
|
-
|
201
|
+
size_info = {}
|
202
|
+
pos_list.each do |pos|
|
203
|
+
score = scores[pos]
|
204
|
+
term = best[pos]
|
205
|
+
hits = positions[term].nil? ? 0 : positions[term].length
|
209
206
|
|
210
|
-
|
211
|
-
|
212
|
-
pvalues = Score::pvalues(sub_list_scores, size, 0, ranks.length)
|
207
|
+
size_info[term] = {:score => score, :hits => hits}
|
208
|
+
end
|
213
209
|
|
214
|
-
|
215
|
-
|
216
|
-
pos = p[0]
|
217
|
-
pvalue = p[1]
|
218
|
-
score = scores[pos]
|
219
|
-
next if score < 0
|
210
|
+
null_scores = Score.null_scores(size, 0)
|
211
|
+
size_info = Score.add_pvalues(size_info, null_scores)
|
220
212
|
|
221
|
-
|
222
|
-
|
213
|
+
info.merge! size_info
|
214
|
+
end
|
223
215
|
|
224
|
-
info[term] = {:score => score, :hits => hits, :pvalue => pvalue}
|
225
|
-
}
|
226
|
-
}
|
227
216
|
|
228
217
|
info
|
229
218
|
end
|
@@ -349,7 +338,7 @@ double hypergeometric(double total, double support, double list, double found)
|
|
349
338
|
end
|
350
339
|
|
351
340
|
if algorithm == :rank
|
352
|
-
ranks = scores.sort{|a,b| compare(a[1],b[1]) }.collect{|p| p[0]}
|
341
|
+
ranks = scores.sort {|a,b| compare(a[1],b[1]) }.collect {|p| p[0]}
|
353
342
|
terms = enrichment_rank(annot, ranks, dict_options)
|
354
343
|
else
|
355
344
|
terms = enrichment_hypergeometric(annot, relevant, dict_options)
|
data/lib/MARQ/main.rb
CHANGED
@@ -240,119 +240,35 @@ module MARQ
|
|
240
240
|
end
|
241
241
|
|
242
242
|
module RankQuery
|
243
|
-
|
244
|
-
matched = matched.collect{|gene| gene.strip.downcase}
|
245
|
-
genes = genes.collect{|gene| gene.strip.downcase}
|
246
|
-
|
247
|
-
pos = Hash[*matched.zip(positions).flatten]
|
248
|
-
|
249
|
-
complete = genes.collect{|gene|
|
250
|
-
if matched.include? gene
|
251
|
-
pos[gene] || "MISSING"
|
252
|
-
else
|
253
|
-
"NOT IN PLATFORM"
|
254
|
-
end
|
255
|
-
}
|
256
|
-
complete
|
257
|
-
end
|
258
|
-
|
259
|
-
|
260
|
-
def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
261
|
-
scores = []
|
262
|
-
|
263
|
-
positions_up.keys.each do |experiment|
|
264
|
-
score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
|
265
|
-
score[:total_entries] = platform_entries
|
266
|
-
score[:positions_up] = complete_positions(positions_up[experiment] || [], matched_up, up) if up.any?
|
267
|
-
score[:positions_down] = complete_positions(positions_down[experiment] || [], matched_down, down) if down.any?
|
268
|
-
scores << score
|
269
|
-
end
|
270
|
-
|
271
|
-
pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
|
272
|
-
|
273
|
-
results = {}
|
274
|
-
positions_up.keys.each_with_index{|experiment,i|
|
275
|
-
results[experiment] = scores[i].merge(:pvalue => pvalues[i])
|
276
|
-
}
|
277
|
-
|
278
|
-
results
|
279
|
-
end
|
243
|
+
NULL_SIZE = 10000
|
280
244
|
|
281
245
|
def self.dataset_scores(dataset, up, down)
|
282
|
-
|
283
|
-
missing_up = positions_up.length - matched_up.length
|
284
|
-
|
285
|
-
positions_down, matched_down = MADB.dataset_positions(dataset, down)
|
286
|
-
missing_down = positions_down.length - matched_down.length
|
287
|
-
|
288
|
-
position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
246
|
+
Score.scores_up_down(dataset, up, down)
|
289
247
|
end
|
290
248
|
|
291
249
|
def self.platform_scores(platform, up, down)
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
missing_down = down.length - matched_down.length
|
250
|
+
scores = {}
|
251
|
+
MARQ::Platform.datasets(platform).each do |dataset|
|
252
|
+
dataset = MARQ::Name.cross_platform dataset if MARQ::Name.is_cross_platform?(platform)
|
253
|
+
scores.merge!(dataset_scores(dataset, up, down))
|
254
|
+
end
|
298
255
|
|
299
|
-
|
256
|
+
scores
|
300
257
|
end
|
301
258
|
|
302
259
|
def self.organism_scores(organism, up, down)
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
total_scores = {}
|
308
|
-
platforms.each do |platform|
|
309
|
-
scores = platform_scores(platform, up, down)
|
310
|
-
total_scores.merge!(scores)
|
260
|
+
scores = {}
|
261
|
+
MARQ::Platform.organism_platforms(organism).each do |platform|
|
262
|
+
scores.merge!(platform_scores(MARQ::Name.cross_platform(platform), up, down))
|
311
263
|
end
|
312
264
|
|
313
|
-
|
265
|
+
scores
|
314
266
|
end
|
315
267
|
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
p MARQ::Dataset.platform 'GDS2791_cross_platform'
|
321
|
-
p MARQ::Platform.organism 'GPL96'
|
322
|
-
exit
|
323
|
-
#puts MARQ::organism_platforms('human')
|
324
|
-
#puts MARQ.platform_organism("HaploidData")
|
325
|
-
#puts MARQ::platform_scores_up_down("HaploidData",%w( YMR261c YDL140c YIL122w YPL093w YHR211w YDL142c YHR106w YOR103c YDR233c YLR181c),%w()).keys
|
326
|
-
|
327
|
-
up = %w(
|
328
|
-
|
329
|
-
51228_at 215046_at 205009_at 204915_s_at 202707_at
|
330
|
-
208265_at 210618_at 201185_at 206650_at 200719_at
|
331
|
-
215661_at 202071_at 214408_s_at 215092_s_at 206168_at
|
332
|
-
212686_at 214162_at 221008_s_at 217709_at 210957_s_at
|
333
|
-
|
334
|
-
)
|
335
|
-
|
336
|
-
|
337
|
-
require 'MARQ/ID'
|
338
|
-
require 'pp'
|
339
|
-
genes = ID.translate('human',up).compact
|
340
|
-
|
341
|
-
#pp up.zip(genes)
|
342
|
-
#genes = Open.read("/home/miki/git/MARQ/test/GDS1375_malignant_vs_normal_down.genes").collect{|l| l.chomp.strip}
|
343
|
-
positions = MARQ::GEORQ.dataset_positions('GDS1231_cross_platform',genes)
|
344
|
-
pp positions
|
345
|
-
|
346
|
-
|
347
|
-
#MARQ::GEORQ.platform_scores_up_down('GPL96_cross_platform',genes,[]).each{|ex, r|
|
348
|
-
# puts ex
|
349
|
-
# puts r[:pvalue]
|
350
|
-
#}
|
351
|
-
|
352
|
-
#Score.draw_hits(positions["disease.state: malignant melanoma <=> normal"], MADB::GEORQ.experiment_entries('GPL96','GDS1375: disease.state: malignant melanoma <=> normal') , '/tmp/foo.png',:size => 1000)
|
353
|
-
|
354
|
-
|
268
|
+
def self.add_pvalues(scores, up_size, down_size)
|
269
|
+
null_scores = Score.null_scores(up_size, down_size, NULL_SIZE)
|
270
|
+
Score.add_pvalues(scores, null_scores)
|
271
|
+
end
|
355
272
|
|
273
|
+
end
|
356
274
|
end
|
357
|
-
|
358
|
-
|
data/lib/MARQ/score.rb
CHANGED
@@ -2,135 +2,10 @@ require 'png'
|
|
2
2
|
require 'inline'
|
3
3
|
|
4
4
|
module Score
|
5
|
-
def self.combine(up, down)
|
6
|
-
return down if up == 0
|
7
|
-
return up if down == 0
|
8
|
-
|
9
|
-
return up - down
|
10
|
-
if (up > 0) == (down > 0)
|
11
|
-
return 0
|
12
|
-
else
|
13
|
-
up - down
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.average(list)
|
18
|
-
clean = list.compact
|
19
|
-
clean.inject(0){|acc, e| acc += e}.to_f / clean.length
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.score_area(positions, platform_entries, missing = 0)
|
23
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
24
|
-
|
25
|
-
clean_positions = positions.compact.sort
|
26
|
-
|
27
|
-
total_tags = positions.length + missing
|
28
|
-
extra = total_tags - clean_positions.length
|
29
|
-
|
30
|
-
top = 0
|
31
|
-
bottom = 0
|
32
|
-
|
33
|
-
clean_positions.each_with_index{|p,i|
|
34
|
-
rel_qt = (i + 1).to_f / total_tags
|
35
|
-
rel_qb = ( i + extra ).to_f / total_tags
|
36
|
-
rel_p = p.to_f / platform_entries
|
37
|
-
|
38
|
-
|
39
|
-
top += rel_qt - rel_p if rel_qt > rel_p
|
40
|
-
bottom += rel_p - rel_qb if rel_p > rel_qb
|
41
|
-
}
|
42
|
-
|
43
|
-
|
44
|
-
{
|
45
|
-
:top => top,
|
46
|
-
:bottom => bottom,
|
47
|
-
:score => top > bottom ? top.to_f / total_tags : - bottom.to_f / total_tags,
|
48
|
-
}
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.score_max_norm(positions, platform_entries, missing = 0)
|
52
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
53
|
-
|
54
|
-
clean_positions = positions.compact.sort
|
55
|
-
|
56
|
-
extra = missing + (positions.length - clean_positions.length)
|
57
|
-
total_tags = extra + clean_positions.length
|
58
|
-
|
59
|
-
mean = platform_entries / 2
|
60
|
-
|
61
|
-
values_top = [0]
|
62
|
-
values_bottom = [0]
|
63
|
-
|
64
|
-
clean_positions.each_with_index{|p,i|
|
65
|
-
rel_qt = (i + 1).to_f / total_tags
|
66
|
-
rel_qb = ( i + extra ).to_f / total_tags
|
67
|
-
rel_p = p.to_f / platform_entries
|
68
|
-
|
69
|
-
|
70
|
-
values_top << (rel_qt - rel_p) * ((p - mean).abs.to_f / mean)**2
|
71
|
-
values_bottom << (rel_p - rel_qb) * ((p - mean).abs.to_f / mean)**2
|
72
|
-
}
|
73
5
|
|
74
|
-
top = values_top.max
|
75
|
-
bottom = values_bottom.max
|
76
|
-
|
77
|
-
|
78
|
-
{
|
79
|
-
:score => top > bottom ? top : -bottom,
|
80
|
-
}
|
81
|
-
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.scale_score1(positions, platform_entries)
|
86
|
-
|
87
|
-
mean = platform_entries/2
|
88
|
-
max_top = 0
|
89
|
-
max_bottom = 0
|
90
|
-
|
91
|
-
top_list = []
|
92
|
-
bottom_list = []
|
93
|
-
|
94
|
-
weights = positions.sort.collect{|position|
|
95
|
-
rel_pos = ((position - mean).abs.to_f / mean);
|
96
|
-
0.3 * rel_pos + 0.7 * Math::exp(30*rel_pos)/Math::exp(30)
|
97
|
-
}
|
98
|
-
weights.unshift(0)
|
99
|
-
total_weights = weights.inject(0){|v,acc| acc += v}
|
100
|
-
weights.collect!{|v| v / total_weights}
|
101
|
-
|
102
|
-
rel_qt = 0
|
103
|
-
rel_qb = 0
|
104
|
-
positions.sort.each_with_index{|position, idx|
|
105
|
-
|
106
|
-
rel_qt += weights[idx + 1]
|
107
|
-
rel_qb += weights[idx]
|
108
|
-
rel_p = position.to_f / platform_entries
|
109
|
-
|
110
|
-
top = (rel_qt - rel_p);
|
111
|
-
bottom = (rel_p - rel_qb);
|
112
|
-
|
113
|
-
top_list << top
|
114
|
-
bottom_list << bottom
|
115
|
-
|
116
|
-
if (top > max_top)
|
117
|
-
max_top = top;
|
118
|
-
end
|
119
|
-
if (bottom > max_bottom)
|
120
|
-
max_bottom = bottom;
|
121
|
-
end
|
122
|
-
}
|
123
|
-
|
124
|
-
p [top_list, bottom_list]
|
125
|
-
if (max_top > max_bottom)
|
126
|
-
return max_top;
|
127
|
-
else
|
128
|
-
return -max_bottom;
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
6
|
class << self
|
133
7
|
inline do |builder|
|
8
|
+
|
134
9
|
builder.c_raw <<-'EOC'
|
135
10
|
double weight(int position, int mean){
|
136
11
|
double rel_pos = (double) abs(position - mean) / mean;
|
@@ -138,171 +13,140 @@ module Score
|
|
138
13
|
return(weight);
|
139
14
|
}
|
140
15
|
EOC
|
16
|
+
|
141
17
|
builder.c <<-'EOC'
|
142
|
-
double fast_score_scale(
|
18
|
+
double fast_score_scale(VALUE positions, int total, int missing){
|
143
19
|
int idx;
|
144
20
|
|
145
|
-
int mean =
|
21
|
+
int mean = total / 2;
|
146
22
|
|
147
23
|
VALUE rel_q = rb_ary_new();
|
148
24
|
VALUE rel_l = rb_ary_new();
|
149
25
|
|
150
26
|
rb_ary_push(rel_q,rb_float_new(0));
|
151
27
|
|
28
|
+
// Rescale positions and accumulate weights
|
152
29
|
double total_weights = 0;
|
153
30
|
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
154
31
|
int position = FIX2INT(rb_ary_entry(positions, idx));
|
155
32
|
|
156
|
-
rb_ary_push(rel_l, rb_float_new((double) position /
|
33
|
+
rb_ary_push(rel_l, rb_float_new((double) position / total));
|
157
34
|
|
158
|
-
total_weights
|
159
|
-
rb_ary_push(rel_q,rb_float_new(total_weights));
|
35
|
+
total_weights += weight(position, mean);
|
36
|
+
rb_ary_push(rel_q, rb_float_new(total_weights));
|
160
37
|
}
|
161
38
|
|
162
39
|
// Add penalty for missing genes
|
163
|
-
|
164
|
-
|
165
|
-
total_weights = total_weights + penalty;
|
40
|
+
double penalty = missing * weight(mean * 0.8, mean);
|
41
|
+
total_weights = total_weights + penalty;
|
166
42
|
|
43
|
+
// Traverse list and get extreme values
|
167
44
|
double max_top, max_bottom;
|
168
45
|
max_top = max_bottom = 0;
|
169
46
|
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
170
|
-
double top = RFLOAT(rb_ary_entry(rel_q,idx + 1))->value / total_weights -
|
171
|
-
RFLOAT(rb_ary_entry(rel_l,idx))->value;
|
172
|
-
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q,idx))->value) / total_weights +
|
173
|
-
RFLOAT(rb_ary_entry(rel_l,idx))->value;
|
47
|
+
double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
|
48
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
49
|
+
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
|
50
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
174
51
|
|
175
|
-
if (top > max_top)
|
52
|
+
if (top > max_top) max_top = top;
|
176
53
|
if (bottom > max_bottom) max_bottom = bottom;
|
177
54
|
}
|
178
55
|
|
179
56
|
if (max_top > max_bottom) return max_top;
|
180
57
|
else return -max_bottom;
|
181
58
|
}
|
182
|
-
|
183
59
|
EOC
|
184
60
|
|
185
|
-
|
186
|
-
|
187
|
-
builder.c <<-'EOC'
|
188
|
-
double fast_norm_score( VALUE positions, int total, int extra, int platform_entries){
|
189
|
-
int idx;
|
190
|
-
|
191
|
-
double mean = (double) platform_entries / 2;
|
192
|
-
double max_top, max_bottom;
|
193
|
-
max_top = max_bottom = 0;
|
194
|
-
|
195
|
-
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
196
|
-
double position = (double) FIX2INT(rb_ary_entry(positions, (long) idx));
|
197
|
-
|
198
|
-
|
199
|
-
double rel_qt = (double) (idx + 1) / total;
|
200
|
-
double rel_qb = (double) (idx + extra) / total;
|
201
|
-
double rel_p = position / platform_entries;
|
202
|
-
|
203
|
-
double scale = (abs(position - mean) / mean);
|
204
|
-
scale = scale * scale;
|
205
|
-
|
206
|
-
double top = (rel_qt - rel_p) * scale;
|
207
|
-
double bottom = (rel_p - rel_qb) * scale;
|
208
|
-
|
209
|
-
|
210
|
-
if (top > max_top) max_top = top;
|
211
|
-
if (bottom > max_bottom) max_bottom = bottom;
|
212
|
-
}
|
213
|
-
|
214
|
-
if (max_top > max_bottom) return max_top;
|
215
|
-
else return -max_bottom;
|
216
|
-
}
|
217
|
-
|
218
|
-
EOC
|
219
61
|
end
|
220
|
-
end
|
221
|
-
def self.score_scale_fast(positions, platform_entries, missing=0)
|
222
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
223
62
|
|
224
|
-
clean_positions = positions.compact.sort
|
225
|
-
missing = missing + positions.length - clean_positions.length
|
226
|
-
|
227
|
-
{
|
228
|
-
:score => fast_score_scale(clean_positions, platform_entries, missing)
|
229
|
-
}
|
230
63
|
end
|
231
64
|
|
232
65
|
|
233
|
-
def self.
|
234
|
-
|
235
|
-
|
236
|
-
clean_positions = positions.compact.sort
|
237
|
-
|
238
|
-
extra = missing + (positions.length - clean_positions.length)
|
239
|
-
total_tags = extra + clean_positions.length
|
240
|
-
|
241
|
-
{
|
242
|
-
:score => fast_norm_score(clean_positions, total_tags, extra, platform_entries)
|
243
|
-
}
|
66
|
+
def self.score(*args)
|
67
|
+
self.fast_score_scale(*args)
|
244
68
|
end
|
245
69
|
|
246
|
-
def self.
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
70
|
+
def self.scores(dataset, genes)
|
71
|
+
positions = MADB.load_positions(dataset, genes)
|
72
|
+
values = MADB.num_values(dataset)
|
73
|
+
|
74
|
+
experiments = positions.keys
|
75
|
+
|
76
|
+
scores = {}
|
77
|
+
experiments.each do |experiment|
|
78
|
+
hits = positions[experiment].compact
|
79
|
+
total = values[experiment]
|
80
|
+
if hits.nil? || hits.empty?
|
81
|
+
score = 0
|
82
|
+
else
|
83
|
+
missing = genes.length - hits.length
|
84
|
+
score = self.fast_score_scale(hits.sort, total, missing)
|
85
|
+
end
|
86
|
+
scores[experiment] = {
|
87
|
+
:positions => positions[experiment],
|
88
|
+
:score => score,
|
89
|
+
:total => total,
|
90
|
+
}
|
91
|
+
end
|
254
92
|
|
255
|
-
|
93
|
+
scores
|
94
|
+
end
|
256
95
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
down = p.to_f / platform_entries
|
96
|
+
def self.combine(up, down)
|
97
|
+
return down if up == 0
|
98
|
+
return up if down == 0
|
261
99
|
|
262
|
-
|
263
|
-
|
100
|
+
return up - down
|
101
|
+
if (up > 0) == (down > 0)
|
102
|
+
return 0
|
103
|
+
else
|
104
|
+
up - down
|
105
|
+
end
|
106
|
+
end
|
264
107
|
|
265
|
-
|
266
|
-
|
108
|
+
def self.scores_up_down(dataset, up, down)
|
109
|
+
scores_up = scores(dataset, up)
|
110
|
+
scores_down = scores(dataset, down)
|
267
111
|
|
112
|
+
scores = {}
|
113
|
+
scores_up.keys.each do |experiment|
|
114
|
+
scores[experiment] = {}
|
115
|
+
scores[experiment][:up] = scores_up[experiment]
|
116
|
+
scores[experiment][:down] = scores_down[experiment]
|
117
|
+
scores[experiment][:score] = combine(scores_up[experiment][:score], scores_down[experiment][:score])
|
118
|
+
end
|
268
119
|
|
269
|
-
|
270
|
-
:score => top.abs > bottom.abs ? top : bottom,
|
271
|
-
}
|
120
|
+
scores
|
272
121
|
end
|
273
122
|
|
274
|
-
|
275
|
-
|
123
|
+
def self.permutations(size, times)
|
124
|
+
total = 10000
|
125
|
+
if size == 0
|
126
|
+
[0] * times
|
127
|
+
else
|
128
|
+
(1..times).collect do
|
129
|
+
fast_score_scale(Array.new(size){ (rand * total).to_i }.sort, total, 0)
|
130
|
+
end
|
131
|
+
end
|
276
132
|
end
|
277
133
|
|
134
|
+
def self.null_scores(up_size, down_size, times = 10000)
|
135
|
+
up_perm = permutations(up_size, times)
|
136
|
+
down_perm = permutations(down_size, times)
|
278
137
|
|
279
|
-
|
280
|
-
up = score(up, total, missing_up)
|
281
|
-
down = score(down, total, missing_down)
|
282
|
-
|
283
|
-
{:up => up[:score], :down => down[:score], :score => combine(up[:score], down[:score])}
|
138
|
+
up_perm.zip(down_perm).collect{|p| up, down = p; combine(up, down).abs}
|
284
139
|
end
|
285
140
|
|
286
|
-
def self.
|
287
|
-
|
288
|
-
times.
|
289
|
-
positions = Array.new(genes){ (rand * total).to_i }
|
290
|
-
scores << score(positions, total)[:score]
|
291
|
-
}
|
292
|
-
scores
|
293
|
-
end
|
294
|
-
|
295
|
-
def self.pvalues(scores, up, down, total, options = {})
|
296
|
-
times = options[:times]|| 1000
|
141
|
+
def self.add_pvalues(scores, null_scores)
|
142
|
+
null_scores = null_scores.sort
|
143
|
+
times = null_scores.length
|
297
144
|
|
298
|
-
|
299
|
-
|
300
|
-
|
145
|
+
scores.each do |experiment, info|
|
146
|
+
info[:pvalue] = (times - null_scores.count_smaller(info[:score].abs)).to_f / times
|
147
|
+
end
|
301
148
|
|
302
|
-
scores
|
303
|
-
num = permutations.count_smaller(score.abs)
|
304
|
-
(times - num).to_f / times
|
305
|
-
}
|
149
|
+
scores
|
306
150
|
end
|
307
151
|
|
308
152
|
COLORS = {
|
@@ -352,44 +196,3 @@ module Score
|
|
352
196
|
end
|
353
197
|
end
|
354
198
|
end
|
355
|
-
|
356
|
-
if __FILE__ == $0
|
357
|
-
size = 1000
|
358
|
-
positions=%w(10 30 200).collect{|v| v.to_i}
|
359
|
-
np = positions.collect{|p| size - p}
|
360
|
-
p Score.score(positions, size )
|
361
|
-
p Score.score(np, size )
|
362
|
-
|
363
|
-
|
364
|
-
p Score.scale_score1(positions, size )
|
365
|
-
p Score.scale_score1(np, size )
|
366
|
-
|
367
|
-
require 'benchmark'
|
368
|
-
|
369
|
-
|
370
|
-
p = (0..100).collect{ (rand * 1000).to_i}
|
371
|
-
puts Benchmark.measure{
|
372
|
-
1000.times{|i|
|
373
|
-
Score.score_max_norm(p, 1000);
|
374
|
-
}
|
375
|
-
}
|
376
|
-
puts Benchmark.measure{
|
377
|
-
1000.times{|i|
|
378
|
-
Score.score_max_norm_fast(p, 1000);
|
379
|
-
}
|
380
|
-
}
|
381
|
-
|
382
|
-
|
383
|
-
per_list = []
|
384
|
-
1000.times{
|
385
|
-
per_list << Array.new(200){(rand * 1000).to_i}
|
386
|
-
}
|
387
|
-
|
388
|
-
require 'benchmark'
|
389
|
-
puts Benchmark.measure{
|
390
|
-
per_list.each{|p|
|
391
|
-
Score.score_max_norm(p, 1000);
|
392
|
-
}
|
393
|
-
}
|
394
|
-
|
395
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-marq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-03-
|
12
|
+
date: 2010-03-09 00:00:00 +01:00
|
13
13
|
default_executable: marq_config
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|