rbbt-marq 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/R/GEO.R +6 -4
- data/R/MA.R +1 -0
- data/bin/marq_config +4 -3
- data/install_scripts/CustomDS/Rakefile +27 -215
- data/install_scripts/GEO/Rakefile +34 -275
- data/install_scripts/rake_includes.rb +236 -0
- data/lib/MARQ/CustomDS.rb +63 -32
- data/lib/MARQ/GEO.rb +99 -63
- data/lib/MARQ/MADB.rb +107 -202
- data/lib/MARQ/annotations.rb +124 -38
- data/lib/MARQ/main.rb +152 -160
- data/lib/MARQ/rankproduct.rb +20 -34
- data/tasks/install.rake +7 -2
- metadata +3 -2
data/lib/MARQ/main.rb
CHANGED
@@ -3,86 +3,156 @@ require 'MARQ/MADB'
|
|
3
3
|
require 'MARQ/score'
|
4
4
|
|
5
5
|
module MARQ
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
else
|
10
|
-
return :CustomDS
|
6
|
+
module Platform
|
7
|
+
def self.is_GEO?(platform)
|
8
|
+
! platform.match(/^GPL/).nil?
|
11
9
|
end
|
12
|
-
end
|
13
10
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
def self.is_cross_platform?(platform)
|
12
|
+
! platform.match(/_cross_platform$/).nil?
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.clean(name)
|
16
|
+
name.sub(/_cross_platform/,'') unless name.nil?
|
19
17
|
end
|
20
|
-
end
|
21
18
|
|
22
|
-
|
23
|
-
|
24
|
-
if platform
|
25
|
-
GEO
|
19
|
+
def self.path(platform)
|
20
|
+
platform = clean(platform)
|
21
|
+
if is_GEO? platform
|
22
|
+
GEO.platform_path(platform)
|
26
23
|
else
|
27
|
-
|
24
|
+
CustomDS.platform_path(platform)
|
28
25
|
end
|
29
|
-
else
|
30
|
-
CustomDS::organism(platform)
|
31
26
|
end
|
32
|
-
end
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
def self.has_cross_platform?(platform)
|
29
|
+
File.exists? File.join(path(platform), 'cross_platform')
|
30
|
+
end
|
37
31
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
32
|
+
def self.datasets(platform)
|
33
|
+
if is_GEO? platform
|
34
|
+
GEO.platform_datasets(platform)
|
35
|
+
else
|
36
|
+
CustomDS.platform_datasets(platform)
|
37
|
+
end
|
43
38
|
end
|
44
|
-
end
|
45
39
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
else
|
50
|
-
CustomDS::is_cross_platform?(platform)
|
40
|
+
def self.codes(platform)
|
41
|
+
platform = clean(platform)
|
42
|
+
Open.read(File.join(path(platform), 'codes')).scan(/[^\s]+/)
|
51
43
|
end
|
52
|
-
end
|
53
44
|
|
54
|
-
|
45
|
+
def self.cross_platform(platform)
|
46
|
+
platform = clean(platform)
|
47
|
+
Open.read(File.join(path(platform), 'cross_platform')).scan(/[^\s]+/)
|
48
|
+
end
|
55
49
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
pos[gene] || "MISSING"
|
50
|
+
def self.organism(platform)
|
51
|
+
platform = clean(platform)
|
52
|
+
if is_GEO? platform
|
53
|
+
GEO.platform_organism platform
|
61
54
|
else
|
62
|
-
|
55
|
+
CustomDS.platform_organism platform
|
63
56
|
end
|
64
|
-
|
57
|
+
end
|
65
58
|
|
59
|
+
def self.process(platform)
|
60
|
+
platform = clean(platform)
|
61
|
+
if is_GEO? platform
|
62
|
+
GEO.process_platform(platform)
|
63
|
+
else
|
64
|
+
CustomDS.process_platform(platform)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.organism_platforms(organism)
|
69
|
+
GEO.platforms.select {|platform|
|
70
|
+
GEO::SOFT.GPL(platform)[:organism] == organism && MARQ::Platform.datasets(platform).any?
|
71
|
+
} +
|
72
|
+
CustomDS.organism_platforms(organism)
|
73
|
+
end
|
66
74
|
end
|
67
75
|
|
76
|
+
|
68
77
|
module Dataset
|
78
|
+
def self.is_GEO?(dataset)
|
79
|
+
! dataset.match(/^(?:GDS|GSE)/).nil?
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.clean(name)
|
83
|
+
name.sub(/_cross_platform/,'') if name
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.path(platform)
|
87
|
+
if is_GEO? platform
|
88
|
+
GEO.dataset_path(platform)
|
89
|
+
else
|
90
|
+
CustomDS.dataset_path(platform)
|
91
|
+
end
|
92
|
+
end
|
69
93
|
|
70
94
|
def self.exists?(dataset)
|
71
|
-
|
95
|
+
! path(dataset).nil?
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.is_cross_platform?(dataset)
|
99
|
+
! dataset.match(/_cross_platform$/).nil?
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.has_cross_platform?(dataset)
|
103
|
+
File.exists?(path(dataset) + '_cross_platform.orders')
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.exists?(dataset)
|
107
|
+
path(dataset) != nil
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.info(name)
|
111
|
+
begin
|
112
|
+
title, description = Open.read(path(name) + '.description').split(/\n--\n/).values_at(0,1)
|
113
|
+
{:title => title.strip, :description => description.strip}
|
114
|
+
rescue Exception
|
115
|
+
puts $!.message
|
116
|
+
{:title => "" , :description => "" }
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.platform(dataset)
|
121
|
+
if is_GEO? dataset
|
122
|
+
GEO.dataset_platform(dataset)
|
123
|
+
else
|
124
|
+
CustomDS.dataset_platform(dataset)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.organism(dataset)
|
129
|
+
MARQ::Platform.organism(platform(dataset))
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.process(dataset, platform = nil)
|
133
|
+
if is_GEO? dataset
|
134
|
+
GEO.process_dataset(dataset, platform)
|
135
|
+
else
|
136
|
+
CustomDS.process_dataset(dataset, platform)
|
137
|
+
end
|
72
138
|
end
|
73
139
|
|
74
140
|
def self.read_file(dataset, ext)
|
75
|
-
Open.read(
|
141
|
+
Open.read(path(dataset) + '.' + ext)
|
76
142
|
end
|
77
143
|
|
78
|
-
def self.read_values(dataset, file)
|
144
|
+
def self.read_values(dataset, file, integer = false)
|
79
145
|
result = {}
|
80
146
|
|
81
147
|
experiments = experiments(dataset)
|
82
148
|
experiments.each{|experiment| result[experiment] = [] }
|
83
149
|
read_file(dataset, file).split(/\n/).each do |line|
|
84
150
|
values = line.chomp.split(/\t/)
|
85
|
-
|
151
|
+
if integer
|
152
|
+
values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_i)}
|
153
|
+
else
|
154
|
+
values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_f)}
|
155
|
+
end
|
86
156
|
end
|
87
157
|
|
88
158
|
result
|
@@ -104,16 +174,6 @@ module MARQ
|
|
104
174
|
end
|
105
175
|
|
106
176
|
|
107
|
-
def self.platform_codes(platform)
|
108
|
-
if MARQ::is_cross_platform? platform
|
109
|
-
file = 'cross_platform'
|
110
|
-
else
|
111
|
-
file = 'codes'
|
112
|
-
end
|
113
|
-
|
114
|
-
Open.read(File.join(MARQ::platform_path(platform), file))
|
115
|
-
end
|
116
|
-
|
117
177
|
def self.experiments(dataset)
|
118
178
|
read_file(dataset, 'experiments').split(/\n/)
|
119
179
|
end
|
@@ -123,13 +183,13 @@ module MARQ
|
|
123
183
|
end
|
124
184
|
|
125
185
|
def self.orders(dataset)
|
126
|
-
read_values(dataset, 'orders')
|
186
|
+
read_values(dataset, 'orders', true)
|
127
187
|
end
|
128
188
|
|
129
189
|
def self.logratios(dataset)
|
130
190
|
read_values(dataset, 'logratios')
|
131
191
|
end
|
132
|
-
|
192
|
+
|
133
193
|
def self.pvalues(dataset)
|
134
194
|
read_values_t(dataset, 'pvalues')
|
135
195
|
end
|
@@ -138,136 +198,68 @@ module MARQ
|
|
138
198
|
read_values_t(dataset, 't')
|
139
199
|
end
|
140
200
|
|
141
|
-
|
142
|
-
|
143
|
-
end
|
144
|
-
|
145
|
-
def self.platform_scores_up_down(platform, up, down)
|
146
|
-
if platform_type(platform) == :GEO
|
147
|
-
GEORQ.platform_scores_up_down(platform, up, down)
|
148
|
-
else
|
149
|
-
CustomDSRQ.scores_up_down(platform, up, down)
|
150
|
-
end
|
151
201
|
end
|
152
202
|
|
153
|
-
module
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
positions_down, matched_down = *MADB::CustomDS.positions(platform, down)
|
164
|
-
missing_down = down.length - matched_down.length
|
165
|
-
|
166
|
-
experiments = positions_up.keys
|
167
|
-
platform_entries = MADB::CustomDS.platform_entries(platform)
|
168
|
-
|
169
|
-
scores = experiments.collect{|experiment|
|
170
|
-
score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
|
171
|
-
score[:total_entries] = platform_entries
|
172
|
-
score[:positions_up] = MARQ.complete_positions(positions_up[experiment], matched_up, up) if positions_up.any?
|
173
|
-
score[:positions_down] = MARQ.complete_positions(positions_down[experiment], matched_down, down) if positions_down.any?
|
174
|
-
score
|
175
|
-
}
|
176
|
-
|
177
|
-
pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
|
178
|
-
|
179
|
-
results = {}
|
180
|
-
experiments.each_with_index{|experiment,i|
|
181
|
-
results[experiment] = scores[i].merge(:pvalue => pvalues[i])
|
203
|
+
module RankQuery
|
204
|
+
def self.complete_positions(positions, matched, genes)
|
205
|
+
pos = Hash[*matched.zip(positions).flatten]
|
206
|
+
complete = genes.collect{|gene|
|
207
|
+
gene = gene.downcase.strip
|
208
|
+
if matched.include? gene
|
209
|
+
pos[gene] || "MISSING"
|
210
|
+
else
|
211
|
+
"NOT IN PLATFORM"
|
212
|
+
end
|
182
213
|
}
|
183
|
-
|
184
|
-
|
185
|
-
results
|
186
214
|
end
|
187
|
-
end
|
188
|
-
|
189
|
-
module GEORQ
|
190
|
-
|
191
|
-
def self.platform_scores_up_down(platform, up, down)
|
192
|
-
platform_entries = MADB::GEO.platform_entries(platform)
|
193
215
|
|
194
|
-
positions_up, matched_up = *MADB::GEO.positions(platform, up)
|
195
|
-
missing_up = up.length - matched_up.length
|
196
|
-
|
197
|
-
positions_down, matched_down = *MADB::GEO.positions(platform, down)
|
198
|
-
missing_down = down.length - matched_down.length
|
199
216
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
else
|
204
|
-
experiments = positions_down.keys
|
205
|
-
end
|
206
|
-
|
207
|
-
scores = experiments.collect{|experiment|
|
217
|
+
def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
218
|
+
scores = []
|
219
|
+
positions_up.keys.each do |experiment|
|
208
220
|
score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
|
209
221
|
score[:total_entries] = platform_entries
|
210
|
-
score[:positions_up] =
|
211
|
-
score[:positions_down] =
|
212
|
-
score
|
213
|
-
|
222
|
+
score[:positions_up] = complete_positions(positions_up[experiment], matched_up, up) if up.any?
|
223
|
+
score[:positions_down] = complete_positions(positions_down[experiment], matched_down, down) if down.any?
|
224
|
+
scores << score
|
225
|
+
end
|
214
226
|
|
215
227
|
pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
|
216
228
|
|
217
229
|
results = {}
|
218
|
-
|
219
|
-
results[experiment] = scores[i]
|
220
|
-
results[experiment][:pvalue] = pvalues[i]
|
230
|
+
positions_up.keys.each_with_index{|experiment,i|
|
231
|
+
results[experiment] = scores[i].merge(:pvalue => pvalues[i])
|
221
232
|
}
|
222
233
|
|
223
|
-
|
224
234
|
results
|
225
235
|
end
|
226
236
|
|
237
|
+
def self.dataset_scores(dataset, up, down)
|
238
|
+
positions_up, matched_up, platform_entries = MADB.dataset_positions(dataset, up)
|
239
|
+
missing_up = positions_up.length - matched_up.length
|
227
240
|
|
228
|
-
|
229
|
-
|
241
|
+
positions_down, matched_down = MADB.dataset_positions(dataset, down)
|
242
|
+
missing_down = positions_down.length - matched_down.length
|
230
243
|
|
231
|
-
platform_entries
|
232
|
-
|
233
|
-
positions.each{|experiment, positions|
|
234
|
-
Score.draw_hits(positions.compact, platform_entries, File.join(directory, experiment.hash_code + '.png'), {:size => 1000, :bg_color => :green})
|
235
|
-
}
|
244
|
+
position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
236
245
|
end
|
237
246
|
|
247
|
+
def self.platform_scores(platform, up, down)
|
248
|
+
positions_up, matched_up, platform_entries = MADB.platform_positions(platform, up)
|
249
|
+
missing_up = up.length - matched_up.length
|
238
250
|
|
239
|
-
|
240
|
-
|
241
|
-
dataset.sub!(/_cross_platform/,'')
|
242
|
-
platform = GEO.dataset_platform(dataset) + '_cross_platform'
|
243
|
-
else
|
244
|
-
platform = GEO.dataset_platform(dataset)
|
245
|
-
end
|
246
|
-
|
247
|
-
|
248
|
-
positions = {}
|
249
|
-
MADB::GEO.positions(platform, genes).first.select{|k,v| k =~ /^#{ dataset }/}.each{|p|
|
250
|
-
positions[p[0].sub(/^#{ dataset }: /,'')] = p[1]
|
251
|
-
}
|
252
|
-
positions
|
253
|
-
end
|
254
|
-
|
255
|
-
def self.dataset_scores(dataset, genes)
|
256
|
-
total = DBcache::num_rows(platform)
|
257
|
-
positions = dataset_positions(dataset, genes)
|
251
|
+
positions_down, matched_down = MADB.platform_positions(platform, down)
|
252
|
+
missing_down = down.length - matched_down.length
|
258
253
|
|
259
|
-
|
260
|
-
positions.each{|experiment, positions|
|
261
|
-
scores[experiment] = Score.score(positions, total, genes.length - positions.length)
|
262
|
-
}
|
263
|
-
scores
|
254
|
+
position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
|
264
255
|
end
|
265
256
|
|
266
257
|
end
|
267
258
|
end
|
268
259
|
|
269
260
|
if __FILE__ == $0
|
270
|
-
p MARQ::Dataset.
|
261
|
+
p MARQ::Dataset.platform 'GDS2791_cross_platform'
|
262
|
+
p MARQ::Platform.organism 'GPL96'
|
271
263
|
exit
|
272
264
|
#puts MARQ::organism_platforms('human')
|
273
265
|
#puts MARQ.platform_organism("HaploidData")
|
data/lib/MARQ/rankproduct.rb
CHANGED
@@ -1,43 +1,29 @@
|
|
1
1
|
require 'MARQ'
|
2
|
+
require 'MARQ/main'
|
2
3
|
module RankProduct
|
3
|
-
|
4
4
|
def self.ranks(dataset, experiment , from_FC = false, invert = false)
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
nil
|
17
|
-
|
18
|
-
# To sort decreasingly we change sign by default
|
19
|
-
when invert
|
20
|
-
value.to_f
|
21
|
-
else
|
22
|
-
- value.to_f
|
23
|
-
end
|
24
|
-
}
|
25
|
-
Hash[*codes.zip(ratios).sort{|a,b| b[1] <=> a[1]}.collect{|p| p[0]}.zip((1..codes.length).to_a).flatten]
|
5
|
+
codes = MARQ::Dataset.codes(dataset)
|
6
|
+
|
7
|
+
if from_FC
|
8
|
+
ratios = MARQ::Dataset.logratios(dataset)[experiment.strip]
|
9
|
+
sorted_genes = codes.zip(ratios).
|
10
|
+
reject {|p| p[1].nil? }.
|
11
|
+
sort_by {|p| p[1] }.
|
12
|
+
collect {|p| p[0] }
|
13
|
+
sorted_genes.reverse! unless invert
|
14
|
+
ranks = Hash[*sorted_genes.zip((1..sorted_genes.length).to_a).flatten]
|
15
|
+
(codes - sorted_genes).each {|gene| ranks[gene] = nil}
|
26
16
|
else
|
27
|
-
orders =
|
28
|
-
value = line.strip.split("\t")[field]
|
29
|
-
case
|
30
|
-
when value == "NA"
|
31
|
-
nil
|
32
|
-
when invert
|
33
|
-
codes.length - line.strip.split("\t")[field].to_i + 1
|
34
|
-
else
|
35
|
-
line.strip.split("\t")[field].to_i
|
36
|
-
end
|
17
|
+
orders = MARQ::Dataset.orders(dataset)[experiment.strip]
|
37
18
|
|
38
|
-
|
39
|
-
|
19
|
+
if invert
|
20
|
+
num_genes = orders.length
|
21
|
+
orders.collect! {|pos| pos.nil? ? nil : num_genes - pos }
|
22
|
+
end
|
23
|
+
|
24
|
+
ranks = Hash[*codes.zip(orders).flatten]
|
40
25
|
end
|
26
|
+
ranks
|
41
27
|
end
|
42
28
|
|
43
29
|
def self.score(gene_ranks, signature_sizes)
|