rbbt-marq 2.1.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/MARQ/MADB.rb +25 -74
- data/lib/MARQ/annotations.rb +16 -27
- data/lib/MARQ/main.rb +17 -101
- data/lib/MARQ/score.rb +83 -280
- metadata +2 -2
data/lib/MARQ/MADB.rb
CHANGED
@@ -65,91 +65,42 @@ module MADB
|
|
65
65
|
end
|
66
66
|
|
67
67
|
# {{{ Loading Positions
|
68
|
+
|
69
|
+
def self.num_values(dataset)
|
70
|
+
experiments =
|
71
|
+
DBcache.load(dataset + '_experiments').
|
72
|
+
sort_by {|p| p[0].to_i }.
|
73
|
+
collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
|
68
74
|
|
69
|
-
|
70
|
-
|
71
|
-
DBcache.num_rows(platform + '_codes')
|
72
|
-
end
|
73
|
-
|
74
|
-
# Return the positions of the genes in the signatures derived from the
|
75
|
-
# dataset. Returns a 3 value array: hash of arrays of positions (keys are
|
76
|
-
# signatures), array of the gene ids in the same order as the positions,
|
77
|
-
# and total number of probes in the platform.
|
78
|
-
#
|
79
|
-
def self.load_positions(dataset, genes, platform_entries)
|
80
|
-
gene_positions = DBcache.load(dataset, genes)
|
81
|
-
|
82
|
-
matched = gene_positions.keys.sort
|
83
|
-
|
84
|
-
# Get signature names
|
85
|
-
experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
|
86
|
-
a[0].to_i <=> b[0].to_i
|
87
|
-
}.collect{|p|
|
88
|
-
MARQ::Name.clean(dataset) + ": " + p[1].first
|
89
|
-
}
|
90
|
-
|
91
|
-
# Get scale factors (to account for genes missing in the dataset)
|
92
|
-
scale = (0..experiments.length - 1).collect{|i|
|
93
|
-
rows = DBcache.num_rows(dataset, "C#{i}");
|
94
|
-
if rows > 0
|
95
|
-
platform_entries.to_f / rows
|
96
|
-
else
|
97
|
-
nil
|
98
|
-
end
|
99
|
-
}
|
100
|
-
|
101
|
-
data = {}
|
102
|
-
# Get experiment positions and scale them
|
103
|
-
experiment_x_gene = gene_positions.values_at(*matched).transpose
|
104
|
-
experiments.each_with_index{|experiment, i|
|
105
|
-
next if scale[i].nil? || experiment_x_gene[i].nil?
|
106
|
-
values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
|
107
|
-
data[experiment] = values
|
108
|
-
}
|
75
|
+
values = {}
|
76
|
+
experiments.each_with_index do |exp, i| values[exp] = DBcache.num_rows(dataset, "C#{i}") end
|
109
77
|
|
110
|
-
|
78
|
+
values
|
111
79
|
end
|
112
|
-
|
113
|
-
# Load positions of genes in signatures from the given datasets. Returns a
|
114
|
-
# tree value array just like load_positions
|
115
|
-
def self.dataset_positions(dataset, genes)
|
116
|
-
return [{},[],0] if genes.empty?
|
117
80
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
load_positions(dataset, genes, platform_entries)
|
81
|
+
def self.num_codes(dataset)
|
82
|
+
DBcache.num_rows(dataset + '_codes')
|
122
83
|
end
|
123
84
|
|
85
|
+
def self.load_positions(dataset, genes)
|
86
|
+
positions = DBcache.load(dataset, genes)
|
87
|
+
experiments =
|
88
|
+
DBcache.load(dataset + '_experiments').
|
89
|
+
sort_by {|p| p[0].to_i }.
|
90
|
+
collect {|p| MARQ::Name.clean(dataset) + ": " + p[1].first }
|
124
91
|
|
125
|
-
# Loads the data from all signatures for datasets of the platform. The return
|
126
|
-
# value is the same as in dataset_positions and load_positins, except that
|
127
|
-
# the matched gene names need not be in the same order as the actual positions
|
128
|
-
# of the signatures, it just the super set of all genes matched on the
|
129
|
-
# signatures
|
130
|
-
def self.platform_positions(platform, genes)
|
131
|
-
return [{},[],0] if genes.empty?
|
132
|
-
|
133
|
-
genes = genes.collect {|gene| gene.downcase.strip }
|
134
|
-
platform_entries = platform_entries(platform)
|
135
|
-
|
136
|
-
cross_platform = MARQ::Platform.is_cross_platform? platform
|
137
|
-
datasets = MARQ::Platform.datasets(platform).sort
|
138
92
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
total_matched += matched
|
93
|
+
|
94
|
+
result = {}; experiments.each {|exp| result[exp] = [] }
|
95
|
+
positions.values_at(*genes).each do |values|
|
96
|
+
experiments.zip(values || []).each do |p|
|
97
|
+
experiment, value = p
|
98
|
+
result[experiment] << (value.nil? ? nil : value.to_i)
|
99
|
+
end
|
147
100
|
end
|
148
|
-
total_matched.uniq!
|
149
101
|
|
150
|
-
|
102
|
+
result
|
151
103
|
end
|
152
|
-
|
153
104
|
end
|
154
105
|
|
155
106
|
if __FILE__ == $0
|
data/lib/MARQ/annotations.rb
CHANGED
@@ -165,16 +165,13 @@ double hypergeometric(double total, double support, double list, double found)
|
|
165
165
|
positions[term] << rank
|
166
166
|
}
|
167
167
|
}
|
168
|
-
|
169
|
-
scores = []
|
170
|
-
|
171
168
|
|
172
169
|
sizes = {}
|
173
170
|
RANK_SIZE_BINS.each{|size| sizes[size.to_i] = []}
|
174
171
|
|
175
|
-
|
176
172
|
# For each term compute the rank score. Also, place it in the closest size
|
177
173
|
# bin for the permutations.
|
174
|
+
scores = []
|
178
175
|
best.each_with_index{|term, pos|
|
179
176
|
if positions[term]
|
180
177
|
list = positions[term]
|
@@ -190,40 +187,32 @@ double hypergeometric(double total, double support, double list, double found)
|
|
190
187
|
}
|
191
188
|
sizes[sizes.keys.sort.last] << pos if !found
|
192
189
|
|
193
|
-
scores << Score
|
190
|
+
scores << Score.score(list, ranks.length, 0)
|
194
191
|
else # it has no score
|
195
192
|
scores << nil
|
196
193
|
end
|
197
194
|
}
|
198
195
|
|
199
196
|
info = {}
|
200
|
-
|
201
|
-
# Go through all the size bins, run the permutations and assign the pvalues
|
202
|
-
# to all terms in the bin.
|
203
|
-
sizes.keys.each{|size|
|
197
|
+
sizes.each do |size, pos_list|
|
204
198
|
next if size == 1
|
205
|
-
next if
|
199
|
+
next if pos_list.empty?
|
206
200
|
|
207
|
-
|
208
|
-
|
201
|
+
size_info = {}
|
202
|
+
pos_list.each do |pos|
|
203
|
+
score = scores[pos]
|
204
|
+
term = best[pos]
|
205
|
+
hits = positions[term].nil? ? 0 : positions[term].length
|
209
206
|
|
210
|
-
|
211
|
-
|
212
|
-
pvalues = Score::pvalues(sub_list_scores, size, 0, ranks.length)
|
207
|
+
size_info[term] = {:score => score, :hits => hits}
|
208
|
+
end
|
213
209
|
|
214
|
-
|
215
|
-
|
216
|
-
pos = p[0]
|
217
|
-
pvalue = p[1]
|
218
|
-
score = scores[pos]
|
219
|
-
next if score < 0
|
210
|
+
null_scores = Score.null_scores(size, 0)
|
211
|
+
size_info = Score.add_pvalues(size_info, null_scores)
|
220
212
|
|
221
|
-
|
222
|
-
|
213
|
+
info.merge! size_info
|
214
|
+
end
|
223
215
|
|
224
|
-
info[term] = {:score => score, :hits => hits, :pvalue => pvalue}
|
225
|
-
}
|
226
|
-
}
|
227
216
|
|
228
217
|
info
|
229
218
|
end
|
@@ -349,7 +338,7 @@ double hypergeometric(double total, double support, double list, double found)
|
|
349
338
|
end
|
350
339
|
|
351
340
|
if algorithm == :rank
|
352
|
-
ranks = scores.sort{|a,b| compare(a[1],b[1]) }.collect{|p| p[0]}
|
341
|
+
ranks = scores.sort {|a,b| compare(a[1],b[1]) }.collect {|p| p[0]}
|
353
342
|
terms = enrichment_rank(annot, ranks, dict_options)
|
354
343
|
else
|
355
344
|
terms = enrichment_hypergeometric(annot, relevant, dict_options)
|
data/lib/MARQ/main.rb
CHANGED
@@ -240,119 +240,35 @@ module MARQ
|
|
240
240
|
end
|
241
241
|
|
242
242
|
module RankQuery
|
243
|
-
|
244
|
-
matched = matched.collect{|gene| gene.strip.downcase}
|
245
|
-
genes = genes.collect{|gene| gene.strip.downcase}
|
246
|
-
|
247
|
-
pos = Hash[*matched.zip(positions).flatten]
|
248
|
-
|
249
|
-
complete = genes.collect{|gene|
|
250
|
-
if matched.include? gene
|
251
|
-
pos[gene] || "MISSING"
|
252
|
-
else
|
253
|
-
"NOT IN PLATFORM"
|
254
|
-
end
|
255
|
-
}
|
256
|
-
complete
|
257
|
-
end
|
258
|
-
|
259
|
-
|
260
|
-
def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
261
|
-
scores = []
|
262
|
-
|
263
|
-
positions_up.keys.each do |experiment|
|
264
|
-
score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
|
265
|
-
score[:total_entries] = platform_entries
|
266
|
-
score[:positions_up] = complete_positions(positions_up[experiment] || [], matched_up, up) if up.any?
|
267
|
-
score[:positions_down] = complete_positions(positions_down[experiment] || [], matched_down, down) if down.any?
|
268
|
-
scores << score
|
269
|
-
end
|
270
|
-
|
271
|
-
pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
|
272
|
-
|
273
|
-
results = {}
|
274
|
-
positions_up.keys.each_with_index{|experiment,i|
|
275
|
-
results[experiment] = scores[i].merge(:pvalue => pvalues[i])
|
276
|
-
}
|
277
|
-
|
278
|
-
results
|
279
|
-
end
|
243
|
+
NULL_SIZE = 10000
|
280
244
|
|
281
245
|
def self.dataset_scores(dataset, up, down)
|
282
|
-
|
283
|
-
missing_up = positions_up.length - matched_up.length
|
284
|
-
|
285
|
-
positions_down, matched_down = MADB.dataset_positions(dataset, down)
|
286
|
-
missing_down = positions_down.length - matched_down.length
|
287
|
-
|
288
|
-
position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
246
|
+
Score.scores_up_down(dataset, up, down)
|
289
247
|
end
|
290
248
|
|
291
249
|
def self.platform_scores(platform, up, down)
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
missing_down = down.length - matched_down.length
|
250
|
+
scores = {}
|
251
|
+
MARQ::Platform.datasets(platform).each do |dataset|
|
252
|
+
dataset = MARQ::Name.cross_platform dataset if MARQ::Name.is_cross_platform?(platform)
|
253
|
+
scores.merge!(dataset_scores(dataset, up, down))
|
254
|
+
end
|
298
255
|
|
299
|
-
|
256
|
+
scores
|
300
257
|
end
|
301
258
|
|
302
259
|
def self.organism_scores(organism, up, down)
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
total_scores = {}
|
308
|
-
platforms.each do |platform|
|
309
|
-
scores = platform_scores(platform, up, down)
|
310
|
-
total_scores.merge!(scores)
|
260
|
+
scores = {}
|
261
|
+
MARQ::Platform.organism_platforms(organism).each do |platform|
|
262
|
+
scores.merge!(platform_scores(MARQ::Name.cross_platform(platform), up, down))
|
311
263
|
end
|
312
264
|
|
313
|
-
|
265
|
+
scores
|
314
266
|
end
|
315
267
|
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
p MARQ::Dataset.platform 'GDS2791_cross_platform'
|
321
|
-
p MARQ::Platform.organism 'GPL96'
|
322
|
-
exit
|
323
|
-
#puts MARQ::organism_platforms('human')
|
324
|
-
#puts MARQ.platform_organism("HaploidData")
|
325
|
-
#puts MARQ::platform_scores_up_down("HaploidData",%w( YMR261c YDL140c YIL122w YPL093w YHR211w YDL142c YHR106w YOR103c YDR233c YLR181c),%w()).keys
|
326
|
-
|
327
|
-
up = %w(
|
328
|
-
|
329
|
-
51228_at 215046_at 205009_at 204915_s_at 202707_at
|
330
|
-
208265_at 210618_at 201185_at 206650_at 200719_at
|
331
|
-
215661_at 202071_at 214408_s_at 215092_s_at 206168_at
|
332
|
-
212686_at 214162_at 221008_s_at 217709_at 210957_s_at
|
333
|
-
|
334
|
-
)
|
335
|
-
|
336
|
-
|
337
|
-
require 'MARQ/ID'
|
338
|
-
require 'pp'
|
339
|
-
genes = ID.translate('human',up).compact
|
340
|
-
|
341
|
-
#pp up.zip(genes)
|
342
|
-
#genes = Open.read("/home/miki/git/MARQ/test/GDS1375_malignant_vs_normal_down.genes").collect{|l| l.chomp.strip}
|
343
|
-
positions = MARQ::GEORQ.dataset_positions('GDS1231_cross_platform',genes)
|
344
|
-
pp positions
|
345
|
-
|
346
|
-
|
347
|
-
#MARQ::GEORQ.platform_scores_up_down('GPL96_cross_platform',genes,[]).each{|ex, r|
|
348
|
-
# puts ex
|
349
|
-
# puts r[:pvalue]
|
350
|
-
#}
|
351
|
-
|
352
|
-
#Score.draw_hits(positions["disease.state: malignant melanoma <=> normal"], MADB::GEORQ.experiment_entries('GPL96','GDS1375: disease.state: malignant melanoma <=> normal') , '/tmp/foo.png',:size => 1000)
|
353
|
-
|
354
|
-
|
268
|
+
def self.add_pvalues(scores, up_size, down_size)
|
269
|
+
null_scores = Score.null_scores(up_size, down_size, NULL_SIZE)
|
270
|
+
Score.add_pvalues(scores, null_scores)
|
271
|
+
end
|
355
272
|
|
273
|
+
end
|
356
274
|
end
|
357
|
-
|
358
|
-
|
data/lib/MARQ/score.rb
CHANGED
@@ -2,135 +2,10 @@ require 'png'
|
|
2
2
|
require 'inline'
|
3
3
|
|
4
4
|
module Score
|
5
|
-
def self.combine(up, down)
|
6
|
-
return down if up == 0
|
7
|
-
return up if down == 0
|
8
|
-
|
9
|
-
return up - down
|
10
|
-
if (up > 0) == (down > 0)
|
11
|
-
return 0
|
12
|
-
else
|
13
|
-
up - down
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.average(list)
|
18
|
-
clean = list.compact
|
19
|
-
clean.inject(0){|acc, e| acc += e}.to_f / clean.length
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.score_area(positions, platform_entries, missing = 0)
|
23
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
24
|
-
|
25
|
-
clean_positions = positions.compact.sort
|
26
|
-
|
27
|
-
total_tags = positions.length + missing
|
28
|
-
extra = total_tags - clean_positions.length
|
29
|
-
|
30
|
-
top = 0
|
31
|
-
bottom = 0
|
32
|
-
|
33
|
-
clean_positions.each_with_index{|p,i|
|
34
|
-
rel_qt = (i + 1).to_f / total_tags
|
35
|
-
rel_qb = ( i + extra ).to_f / total_tags
|
36
|
-
rel_p = p.to_f / platform_entries
|
37
|
-
|
38
|
-
|
39
|
-
top += rel_qt - rel_p if rel_qt > rel_p
|
40
|
-
bottom += rel_p - rel_qb if rel_p > rel_qb
|
41
|
-
}
|
42
|
-
|
43
|
-
|
44
|
-
{
|
45
|
-
:top => top,
|
46
|
-
:bottom => bottom,
|
47
|
-
:score => top > bottom ? top.to_f / total_tags : - bottom.to_f / total_tags,
|
48
|
-
}
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.score_max_norm(positions, platform_entries, missing = 0)
|
52
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
53
|
-
|
54
|
-
clean_positions = positions.compact.sort
|
55
|
-
|
56
|
-
extra = missing + (positions.length - clean_positions.length)
|
57
|
-
total_tags = extra + clean_positions.length
|
58
|
-
|
59
|
-
mean = platform_entries / 2
|
60
|
-
|
61
|
-
values_top = [0]
|
62
|
-
values_bottom = [0]
|
63
|
-
|
64
|
-
clean_positions.each_with_index{|p,i|
|
65
|
-
rel_qt = (i + 1).to_f / total_tags
|
66
|
-
rel_qb = ( i + extra ).to_f / total_tags
|
67
|
-
rel_p = p.to_f / platform_entries
|
68
|
-
|
69
|
-
|
70
|
-
values_top << (rel_qt - rel_p) * ((p - mean).abs.to_f / mean)**2
|
71
|
-
values_bottom << (rel_p - rel_qb) * ((p - mean).abs.to_f / mean)**2
|
72
|
-
}
|
73
5
|
|
74
|
-
top = values_top.max
|
75
|
-
bottom = values_bottom.max
|
76
|
-
|
77
|
-
|
78
|
-
{
|
79
|
-
:score => top > bottom ? top : -bottom,
|
80
|
-
}
|
81
|
-
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.scale_score1(positions, platform_entries)
|
86
|
-
|
87
|
-
mean = platform_entries/2
|
88
|
-
max_top = 0
|
89
|
-
max_bottom = 0
|
90
|
-
|
91
|
-
top_list = []
|
92
|
-
bottom_list = []
|
93
|
-
|
94
|
-
weights = positions.sort.collect{|position|
|
95
|
-
rel_pos = ((position - mean).abs.to_f / mean);
|
96
|
-
0.3 * rel_pos + 0.7 * Math::exp(30*rel_pos)/Math::exp(30)
|
97
|
-
}
|
98
|
-
weights.unshift(0)
|
99
|
-
total_weights = weights.inject(0){|v,acc| acc += v}
|
100
|
-
weights.collect!{|v| v / total_weights}
|
101
|
-
|
102
|
-
rel_qt = 0
|
103
|
-
rel_qb = 0
|
104
|
-
positions.sort.each_with_index{|position, idx|
|
105
|
-
|
106
|
-
rel_qt += weights[idx + 1]
|
107
|
-
rel_qb += weights[idx]
|
108
|
-
rel_p = position.to_f / platform_entries
|
109
|
-
|
110
|
-
top = (rel_qt - rel_p);
|
111
|
-
bottom = (rel_p - rel_qb);
|
112
|
-
|
113
|
-
top_list << top
|
114
|
-
bottom_list << bottom
|
115
|
-
|
116
|
-
if (top > max_top)
|
117
|
-
max_top = top;
|
118
|
-
end
|
119
|
-
if (bottom > max_bottom)
|
120
|
-
max_bottom = bottom;
|
121
|
-
end
|
122
|
-
}
|
123
|
-
|
124
|
-
p [top_list, bottom_list]
|
125
|
-
if (max_top > max_bottom)
|
126
|
-
return max_top;
|
127
|
-
else
|
128
|
-
return -max_bottom;
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
6
|
class << self
|
133
7
|
inline do |builder|
|
8
|
+
|
134
9
|
builder.c_raw <<-'EOC'
|
135
10
|
double weight(int position, int mean){
|
136
11
|
double rel_pos = (double) abs(position - mean) / mean;
|
@@ -138,171 +13,140 @@ module Score
|
|
138
13
|
return(weight);
|
139
14
|
}
|
140
15
|
EOC
|
16
|
+
|
141
17
|
builder.c <<-'EOC'
|
142
|
-
double fast_score_scale(
|
18
|
+
double fast_score_scale(VALUE positions, int total, int missing){
|
143
19
|
int idx;
|
144
20
|
|
145
|
-
int mean =
|
21
|
+
int mean = total / 2;
|
146
22
|
|
147
23
|
VALUE rel_q = rb_ary_new();
|
148
24
|
VALUE rel_l = rb_ary_new();
|
149
25
|
|
150
26
|
rb_ary_push(rel_q,rb_float_new(0));
|
151
27
|
|
28
|
+
// Rescale positions and accumulate weights
|
152
29
|
double total_weights = 0;
|
153
30
|
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
154
31
|
int position = FIX2INT(rb_ary_entry(positions, idx));
|
155
32
|
|
156
|
-
rb_ary_push(rel_l, rb_float_new((double) position /
|
33
|
+
rb_ary_push(rel_l, rb_float_new((double) position / total));
|
157
34
|
|
158
|
-
total_weights
|
159
|
-
rb_ary_push(rel_q,rb_float_new(total_weights));
|
35
|
+
total_weights += weight(position, mean);
|
36
|
+
rb_ary_push(rel_q, rb_float_new(total_weights));
|
160
37
|
}
|
161
38
|
|
162
39
|
// Add penalty for missing genes
|
163
|
-
|
164
|
-
|
165
|
-
total_weights = total_weights + penalty;
|
40
|
+
double penalty = missing * weight(mean * 0.8, mean);
|
41
|
+
total_weights = total_weights + penalty;
|
166
42
|
|
43
|
+
// Traverse list and get extreme values
|
167
44
|
double max_top, max_bottom;
|
168
45
|
max_top = max_bottom = 0;
|
169
46
|
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
170
|
-
double top = RFLOAT(rb_ary_entry(rel_q,idx + 1))->value / total_weights -
|
171
|
-
RFLOAT(rb_ary_entry(rel_l,idx))->value;
|
172
|
-
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q,idx))->value) / total_weights +
|
173
|
-
RFLOAT(rb_ary_entry(rel_l,idx))->value;
|
47
|
+
double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
|
48
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
49
|
+
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
|
50
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
174
51
|
|
175
|
-
if (top > max_top)
|
52
|
+
if (top > max_top) max_top = top;
|
176
53
|
if (bottom > max_bottom) max_bottom = bottom;
|
177
54
|
}
|
178
55
|
|
179
56
|
if (max_top > max_bottom) return max_top;
|
180
57
|
else return -max_bottom;
|
181
58
|
}
|
182
|
-
|
183
59
|
EOC
|
184
60
|
|
185
|
-
|
186
|
-
|
187
|
-
builder.c <<-'EOC'
|
188
|
-
double fast_norm_score( VALUE positions, int total, int extra, int platform_entries){
|
189
|
-
int idx;
|
190
|
-
|
191
|
-
double mean = (double) platform_entries / 2;
|
192
|
-
double max_top, max_bottom;
|
193
|
-
max_top = max_bottom = 0;
|
194
|
-
|
195
|
-
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
196
|
-
double position = (double) FIX2INT(rb_ary_entry(positions, (long) idx));
|
197
|
-
|
198
|
-
|
199
|
-
double rel_qt = (double) (idx + 1) / total;
|
200
|
-
double rel_qb = (double) (idx + extra) / total;
|
201
|
-
double rel_p = position / platform_entries;
|
202
|
-
|
203
|
-
double scale = (abs(position - mean) / mean);
|
204
|
-
scale = scale * scale;
|
205
|
-
|
206
|
-
double top = (rel_qt - rel_p) * scale;
|
207
|
-
double bottom = (rel_p - rel_qb) * scale;
|
208
|
-
|
209
|
-
|
210
|
-
if (top > max_top) max_top = top;
|
211
|
-
if (bottom > max_bottom) max_bottom = bottom;
|
212
|
-
}
|
213
|
-
|
214
|
-
if (max_top > max_bottom) return max_top;
|
215
|
-
else return -max_bottom;
|
216
|
-
}
|
217
|
-
|
218
|
-
EOC
|
219
61
|
end
|
220
|
-
end
|
221
|
-
def self.score_scale_fast(positions, platform_entries, missing=0)
|
222
|
-
return {:score => 0, :top => 0, :bottom => 0} if positions.nil? || positions.empty? || positions.compact.empty?
|
223
62
|
|
224
|
-
clean_positions = positions.compact.sort
|
225
|
-
missing = missing + positions.length - clean_positions.length
|
226
|
-
|
227
|
-
{
|
228
|
-
:score => fast_score_scale(clean_positions, platform_entries, missing)
|
229
|
-
}
|
230
63
|
end
|
231
64
|
|
232
65
|
|
233
|
-
def self.
|
234
|
-
|
235
|
-
|
236
|
-
clean_positions = positions.compact.sort
|
237
|
-
|
238
|
-
extra = missing + (positions.length - clean_positions.length)
|
239
|
-
total_tags = extra + clean_positions.length
|
240
|
-
|
241
|
-
{
|
242
|
-
:score => fast_norm_score(clean_positions, total_tags, extra, platform_entries)
|
243
|
-
}
|
66
|
+
def self.score(*args)
|
67
|
+
self.fast_score_scale(*args)
|
244
68
|
end
|
245
69
|
|
246
|
-
def self.
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
70
|
+
def self.scores(dataset, genes)
|
71
|
+
positions = MADB.load_positions(dataset, genes)
|
72
|
+
values = MADB.num_values(dataset)
|
73
|
+
|
74
|
+
experiments = positions.keys
|
75
|
+
|
76
|
+
scores = {}
|
77
|
+
experiments.each do |experiment|
|
78
|
+
hits = positions[experiment].compact
|
79
|
+
total = values[experiment]
|
80
|
+
if hits.nil? || hits.empty?
|
81
|
+
score = 0
|
82
|
+
else
|
83
|
+
missing = genes.length - hits.length
|
84
|
+
score = self.fast_score_scale(hits.sort, total, missing)
|
85
|
+
end
|
86
|
+
scores[experiment] = {
|
87
|
+
:positions => positions[experiment],
|
88
|
+
:score => score,
|
89
|
+
:total => total,
|
90
|
+
}
|
91
|
+
end
|
254
92
|
|
255
|
-
|
93
|
+
scores
|
94
|
+
end
|
256
95
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
down = p.to_f / platform_entries
|
96
|
+
def self.combine(up, down)
|
97
|
+
return down if up == 0
|
98
|
+
return up if down == 0
|
261
99
|
|
262
|
-
|
263
|
-
|
100
|
+
return up - down
|
101
|
+
if (up > 0) == (down > 0)
|
102
|
+
return 0
|
103
|
+
else
|
104
|
+
up - down
|
105
|
+
end
|
106
|
+
end
|
264
107
|
|
265
|
-
|
266
|
-
|
108
|
+
def self.scores_up_down(dataset, up, down)
|
109
|
+
scores_up = scores(dataset, up)
|
110
|
+
scores_down = scores(dataset, down)
|
267
111
|
|
112
|
+
scores = {}
|
113
|
+
scores_up.keys.each do |experiment|
|
114
|
+
scores[experiment] = {}
|
115
|
+
scores[experiment][:up] = scores_up[experiment]
|
116
|
+
scores[experiment][:down] = scores_down[experiment]
|
117
|
+
scores[experiment][:score] = combine(scores_up[experiment][:score], scores_down[experiment][:score])
|
118
|
+
end
|
268
119
|
|
269
|
-
|
270
|
-
:score => top.abs > bottom.abs ? top : bottom,
|
271
|
-
}
|
120
|
+
scores
|
272
121
|
end
|
273
122
|
|
274
|
-
|
275
|
-
|
123
|
+
def self.permutations(size, times)
|
124
|
+
total = 10000
|
125
|
+
if size == 0
|
126
|
+
[0] * times
|
127
|
+
else
|
128
|
+
(1..times).collect do
|
129
|
+
fast_score_scale(Array.new(size){ (rand * total).to_i }.sort, total, 0)
|
130
|
+
end
|
131
|
+
end
|
276
132
|
end
|
277
133
|
|
134
|
+
def self.null_scores(up_size, down_size, times = 10000)
|
135
|
+
up_perm = permutations(up_size, times)
|
136
|
+
down_perm = permutations(down_size, times)
|
278
137
|
|
279
|
-
|
280
|
-
up = score(up, total, missing_up)
|
281
|
-
down = score(down, total, missing_down)
|
282
|
-
|
283
|
-
{:up => up[:score], :down => down[:score], :score => combine(up[:score], down[:score])}
|
138
|
+
up_perm.zip(down_perm).collect{|p| up, down = p; combine(up, down).abs}
|
284
139
|
end
|
285
140
|
|
286
|
-
def self.
|
287
|
-
|
288
|
-
times.
|
289
|
-
positions = Array.new(genes){ (rand * total).to_i }
|
290
|
-
scores << score(positions, total)[:score]
|
291
|
-
}
|
292
|
-
scores
|
293
|
-
end
|
294
|
-
|
295
|
-
def self.pvalues(scores, up, down, total, options = {})
|
296
|
-
times = options[:times]|| 1000
|
141
|
+
def self.add_pvalues(scores, null_scores)
|
142
|
+
null_scores = null_scores.sort
|
143
|
+
times = null_scores.length
|
297
144
|
|
298
|
-
|
299
|
-
|
300
|
-
|
145
|
+
scores.each do |experiment, info|
|
146
|
+
info[:pvalue] = (times - null_scores.count_smaller(info[:score].abs)).to_f / times
|
147
|
+
end
|
301
148
|
|
302
|
-
scores
|
303
|
-
num = permutations.count_smaller(score.abs)
|
304
|
-
(times - num).to_f / times
|
305
|
-
}
|
149
|
+
scores
|
306
150
|
end
|
307
151
|
|
308
152
|
COLORS = {
|
@@ -352,44 +196,3 @@ module Score
|
|
352
196
|
end
|
353
197
|
end
|
354
198
|
end
|
355
|
-
|
356
|
-
if __FILE__ == $0
|
357
|
-
size = 1000
|
358
|
-
positions=%w(10 30 200).collect{|v| v.to_i}
|
359
|
-
np = positions.collect{|p| size - p}
|
360
|
-
p Score.score(positions, size )
|
361
|
-
p Score.score(np, size )
|
362
|
-
|
363
|
-
|
364
|
-
p Score.scale_score1(positions, size )
|
365
|
-
p Score.scale_score1(np, size )
|
366
|
-
|
367
|
-
require 'benchmark'
|
368
|
-
|
369
|
-
|
370
|
-
p = (0..100).collect{ (rand * 1000).to_i}
|
371
|
-
puts Benchmark.measure{
|
372
|
-
1000.times{|i|
|
373
|
-
Score.score_max_norm(p, 1000);
|
374
|
-
}
|
375
|
-
}
|
376
|
-
puts Benchmark.measure{
|
377
|
-
1000.times{|i|
|
378
|
-
Score.score_max_norm_fast(p, 1000);
|
379
|
-
}
|
380
|
-
}
|
381
|
-
|
382
|
-
|
383
|
-
per_list = []
|
384
|
-
1000.times{
|
385
|
-
per_list << Array.new(200){(rand * 1000).to_i}
|
386
|
-
}
|
387
|
-
|
388
|
-
require 'benchmark'
|
389
|
-
puts Benchmark.measure{
|
390
|
-
per_list.each{|p|
|
391
|
-
Score.score_max_norm(p, 1000);
|
392
|
-
}
|
393
|
-
}
|
394
|
-
|
395
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-marq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-03-
|
12
|
+
date: 2010-03-09 00:00:00 +01:00
|
13
13
|
default_executable: marq_config
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|