rbbt-marq 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/MARQ/main.rb CHANGED
@@ -3,86 +3,156 @@ require 'MARQ/MADB'
3
3
  require 'MARQ/score'
4
4
 
5
5
  module MARQ
6
- def self.platform_type(platform)
7
- if platform.match(/GPL\d+|GDS\d+|GSE\d+/)
8
- return :GEO
9
- else
10
- return :CustomDS
6
+ module Platform
7
+ def self.is_GEO?(platform)
8
+ ! platform.match(/^GPL/).nil?
11
9
  end
12
- end
13
10
 
14
- def self.dataset_path(dataset)
15
- if platform_type(dataset) == :GEO
16
- GEO::dataset_path(dataset)
17
- else
18
- CustomDS::path(dataset)
11
+ def self.is_cross_platform?(platform)
12
+ ! platform.match(/_cross_platform$/).nil?
13
+ end
14
+
15
+ def self.clean(name)
16
+ name.sub(/_cross_platform/,'') unless name.nil?
19
17
  end
20
- end
21
18
 
22
- def self.platform_organism(platform)
23
- if platform_type(platform) == :GEO
24
- if platform.match(/^GPL/)
25
- GEO::SOFT::GPL(platform)[:organism]
19
+ def self.path(platform)
20
+ platform = clean(platform)
21
+ if is_GEO? platform
22
+ GEO.platform_path(platform)
26
23
  else
27
- GEO::SOFT::GPL(GEO::dataset_platform(platform))[:organism]
24
+ CustomDS.platform_path(platform)
28
25
  end
29
- else
30
- CustomDS::organism(platform)
31
26
  end
32
- end
33
27
 
34
- def self.organism_platforms(org)
35
- GEO::organism_platforms(org) + CustomDS::datasets(org)
36
- end
28
+ def self.has_cross_platform?(platform)
29
+ File.exists? File.join(path(platform), 'cross_platform')
30
+ end
37
31
 
38
- def self.has_cross_platform?(dataset, platform=nil)
39
- if platform_type(platform) == :GEO
40
- GEO::has_cross_platform?(dataset, platform)
41
- else
42
- CustomDS::has_cross_platform?(platform)
32
+ def self.datasets(platform)
33
+ if is_GEO? platform
34
+ GEO.platform_datasets(platform)
35
+ else
36
+ CustomDS.platform_datasets(platform)
37
+ end
43
38
  end
44
- end
45
39
 
46
- def self.is_cross_platform?(dataset, platform=nil)
47
- if platform_type(platform) == :GEO
48
- GEO::is_cross_platform?(dataset, platform)
49
- else
50
- CustomDS::is_cross_platform?(platform)
40
+ def self.codes(platform)
41
+ platform = clean(platform)
42
+ Open.read(File.join(path(platform), 'codes')).scan(/[^\s]+/)
51
43
  end
52
- end
53
44
 
54
- def self.complete_positions(positions, matched, genes)
45
+ def self.cross_platform(platform)
46
+ platform = clean(platform)
47
+ Open.read(File.join(path(platform), 'cross_platform')).scan(/[^\s]+/)
48
+ end
55
49
 
56
- pos = Hash[*matched.zip(positions).flatten]
57
- complete = genes.collect{|gene|
58
- gene = gene.downcase.strip
59
- if matched.include? gene
60
- pos[gene] || "MISSING"
50
+ def self.organism(platform)
51
+ platform = clean(platform)
52
+ if is_GEO? platform
53
+ GEO.platform_organism platform
61
54
  else
62
- "NOT IN PLATFORM"
55
+ CustomDS.platform_organism platform
63
56
  end
64
- }
57
+ end
65
58
 
59
+ def self.process(platform)
60
+ platform = clean(platform)
61
+ if is_GEO? platform
62
+ GEO.process_platform(platform)
63
+ else
64
+ CustomDS.process_platform(platform)
65
+ end
66
+ end
67
+
68
+ def self.organism_platforms(organism)
69
+ GEO.platforms.select {|platform|
70
+ GEO::SOFT.GPL(platform)[:organism] == organism && MARQ::Platform.datasets(platform).any?
71
+ } +
72
+ CustomDS.organism_platforms(organism)
73
+ end
66
74
  end
67
75
 
76
+
68
77
  module Dataset
78
+ def self.is_GEO?(dataset)
79
+ ! dataset.match(/^(?:GDS|GSE)/).nil?
80
+ end
81
+
82
+ def self.clean(name)
83
+ name.sub(/_cross_platform/,'') if name
84
+ end
85
+
86
+ def self.path(platform)
87
+ if is_GEO? platform
88
+ GEO.dataset_path(platform)
89
+ else
90
+ CustomDS.dataset_path(platform)
91
+ end
92
+ end
69
93
 
70
94
  def self.exists?(dataset)
71
- MARQ::dataset_path(dataset) != nil
95
+ ! path(dataset).nil?
96
+ end
97
+
98
+ def self.is_cross_platform?(dataset)
99
+ ! dataset.match(/_cross_platform$/).nil?
100
+ end
101
+
102
+ def self.has_cross_platform?(dataset)
103
+ File.exists?(path(dataset) + '_cross_platform.orders')
104
+ end
105
+
106
+ def self.exists?(dataset)
107
+ path(dataset) != nil
108
+ end
109
+
110
+ def self.info(name)
111
+ begin
112
+ title, description = Open.read(path(name) + '.description').split(/\n--\n/).values_at(0,1)
113
+ {:title => title.strip, :description => description.strip}
114
+ rescue Exception
115
+ puts $!.message
116
+ {:title => "" , :description => "" }
117
+ end
118
+ end
119
+
120
+ def self.platform(dataset)
121
+ if is_GEO? dataset
122
+ GEO.dataset_platform(dataset)
123
+ else
124
+ CustomDS.dataset_platform(dataset)
125
+ end
126
+ end
127
+
128
+ def self.organism(dataset)
129
+ MARQ::Platform.organism(platform(dataset))
130
+ end
131
+
132
+ def self.process(dataset, platform = nil)
133
+ if is_GEO? dataset
134
+ GEO.process_dataset(dataset, platform)
135
+ else
136
+ CustomDS.process_dataset(dataset, platform)
137
+ end
72
138
  end
73
139
 
74
140
  def self.read_file(dataset, ext)
75
- Open.read(MARQ::dataset_path(dataset) + '.' + ext)
141
+ Open.read(path(dataset) + '.' + ext)
76
142
  end
77
143
 
78
- def self.read_values(dataset, file)
144
+ def self.read_values(dataset, file, integer = false)
79
145
  result = {}
80
146
 
81
147
  experiments = experiments(dataset)
82
148
  experiments.each{|experiment| result[experiment] = [] }
83
149
  read_file(dataset, file).split(/\n/).each do |line|
84
150
  values = line.chomp.split(/\t/)
85
- values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_f) }
151
+ if integer
152
+ values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_i)}
153
+ else
154
+ values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_f)}
155
+ end
86
156
  end
87
157
 
88
158
  result
@@ -104,16 +174,6 @@ module MARQ
104
174
  end
105
175
 
106
176
 
107
- def self.platform_codes(platform)
108
- if MARQ::is_cross_platform? platform
109
- file = 'cross_platform'
110
- else
111
- file = 'codes'
112
- end
113
-
114
- Open.read(File.join(MARQ::platform_path(platform), file))
115
- end
116
-
117
177
  def self.experiments(dataset)
118
178
  read_file(dataset, 'experiments').split(/\n/)
119
179
  end
@@ -123,13 +183,13 @@ module MARQ
123
183
  end
124
184
 
125
185
  def self.orders(dataset)
126
- read_values(dataset, 'orders')
186
+ read_values(dataset, 'orders', true)
127
187
  end
128
188
 
129
189
  def self.logratios(dataset)
130
190
  read_values(dataset, 'logratios')
131
191
  end
132
-
192
+
133
193
  def self.pvalues(dataset)
134
194
  read_values_t(dataset, 'pvalues')
135
195
  end
@@ -138,136 +198,68 @@ module MARQ
138
198
  read_values_t(dataset, 't')
139
199
  end
140
200
 
141
-
142
-
143
- end
144
-
145
- def self.platform_scores_up_down(platform, up, down)
146
- if platform_type(platform) == :GEO
147
- GEORQ.platform_scores_up_down(platform, up, down)
148
- else
149
- CustomDSRQ.scores_up_down(platform, up, down)
150
- end
151
201
  end
152
202
 
153
- module CustomDSRQ
154
-
155
- def self.scores_up_down(platform, up, down)
156
- matched_up = DBcache.matches(platform + '_codes', up).length
157
- positions_up, matched_up = *MADB::CustomDS.positions(platform, up)
158
- missing_up = up.length - matched_up.length
159
-
160
-
161
-
162
- matched_down = DBcache.matches(platform + '_codes', down).length
163
- positions_down, matched_down = *MADB::CustomDS.positions(platform, down)
164
- missing_down = down.length - matched_down.length
165
-
166
- experiments = positions_up.keys
167
- platform_entries = MADB::CustomDS.platform_entries(platform)
168
-
169
- scores = experiments.collect{|experiment|
170
- score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
171
- score[:total_entries] = platform_entries
172
- score[:positions_up] = MARQ.complete_positions(positions_up[experiment], matched_up, up) if positions_up.any?
173
- score[:positions_down] = MARQ.complete_positions(positions_down[experiment], matched_down, down) if positions_down.any?
174
- score
175
- }
176
-
177
- pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
178
-
179
- results = {}
180
- experiments.each_with_index{|experiment,i|
181
- results[experiment] = scores[i].merge(:pvalue => pvalues[i])
203
+ module RankQuery
204
+ def self.complete_positions(positions, matched, genes)
205
+ pos = Hash[*matched.zip(positions).flatten]
206
+ complete = genes.collect{|gene|
207
+ gene = gene.downcase.strip
208
+ if matched.include? gene
209
+ pos[gene] || "MISSING"
210
+ else
211
+ "NOT IN PLATFORM"
212
+ end
182
213
  }
183
-
184
-
185
- results
186
214
  end
187
- end
188
-
189
- module GEORQ
190
-
191
- def self.platform_scores_up_down(platform, up, down)
192
- platform_entries = MADB::GEO.platform_entries(platform)
193
215
 
194
- positions_up, matched_up = *MADB::GEO.positions(platform, up)
195
- missing_up = up.length - matched_up.length
196
-
197
- positions_down, matched_down = *MADB::GEO.positions(platform, down)
198
- missing_down = down.length - matched_down.length
199
216
 
200
- return {} if positions_up.keys.empty? && positions_down.keys.empty?
201
- if positions_up.keys.any?
202
- experiments = positions_up.keys
203
- else
204
- experiments = positions_down.keys
205
- end
206
-
207
- scores = experiments.collect{|experiment|
217
+ def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
218
+ scores = []
219
+ positions_up.keys.each do |experiment|
208
220
  score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
209
221
  score[:total_entries] = platform_entries
210
- score[:positions_up] = MARQ.complete_positions(positions_up[experiment], matched_up, up) if positions_up.any?
211
- score[:positions_down] = MARQ.complete_positions(positions_down[experiment], matched_down, down) if positions_down.any?
212
- score
213
- }
222
+ score[:positions_up] = complete_positions(positions_up[experiment], matched_up, up) if up.any?
223
+ score[:positions_down] = complete_positions(positions_down[experiment], matched_down, down) if down.any?
224
+ scores << score
225
+ end
214
226
 
215
227
  pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
216
228
 
217
229
  results = {}
218
- experiments.each_with_index{|experiment,i|
219
- results[experiment] = scores[i]
220
- results[experiment][:pvalue] = pvalues[i]
230
+ positions_up.keys.each_with_index{|experiment,i|
231
+ results[experiment] = scores[i].merge(:pvalue => pvalues[i])
221
232
  }
222
233
 
223
-
224
234
  results
225
235
  end
226
236
 
237
+ def self.dataset_scores(dataset, up, down)
238
+ positions_up, matched_up, platform_entries = MADB.dataset_positions(dataset, up)
239
+ missing_up = positions_up.length - matched_up.length
227
240
 
228
- def self.draw_hits(platform, genes, directory)
229
- positions, matched = MADB::GEO.positions(platform, genes).values_at(0,1)
241
+ positions_down, matched_down = MADB.dataset_positions(dataset, down)
242
+ missing_down = positions_down.length - matched_down.length
230
243
 
231
- platform_entries = MADB::GEO.platform_entries(platform)
232
-
233
- positions.each{|experiment, positions|
234
- Score.draw_hits(positions.compact, platform_entries, File.join(directory, experiment.hash_code + '.png'), {:size => 1000, :bg_color => :green})
235
- }
244
+ position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
236
245
  end
237
246
 
247
+ def self.platform_scores(platform, up, down)
248
+ positions_up, matched_up, platform_entries = MADB.platform_positions(platform, up)
249
+ missing_up = up.length - matched_up.length
238
250
 
239
- def self.dataset_positions(dataset, genes)
240
- if dataset =~ /_cross_platform/
241
- dataset.sub!(/_cross_platform/,'')
242
- platform = GEO.dataset_platform(dataset) + '_cross_platform'
243
- else
244
- platform = GEO.dataset_platform(dataset)
245
- end
246
-
247
-
248
- positions = {}
249
- MADB::GEO.positions(platform, genes).first.select{|k,v| k =~ /^#{ dataset }/}.each{|p|
250
- positions[p[0].sub(/^#{ dataset }: /,'')] = p[1]
251
- }
252
- positions
253
- end
254
-
255
- def self.dataset_scores(dataset, genes)
256
- total = DBcache::num_rows(platform)
257
- positions = dataset_positions(dataset, genes)
251
+ positions_down, matched_down = MADB.platform_positions(platform, down)
252
+ missing_down = down.length - matched_down.length
258
253
 
259
- scores = {}
260
- positions.each{|experiment, positions|
261
- scores[experiment] = Score.score(positions, total, genes.length - positions.length)
262
- }
263
- scores
254
+ position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
264
255
  end
265
256
 
266
257
  end
267
258
  end
268
259
 
269
260
  if __FILE__ == $0
270
- p MARQ::Dataset.orders('GDS2419')
261
+ p MARQ::Dataset.platform 'GDS2791_cross_platform'
262
+ p MARQ::Platform.organism 'GPL96'
271
263
  exit
272
264
  #puts MARQ::organism_platforms('human')
273
265
  #puts MARQ.platform_organism("HaploidData")
@@ -1,43 +1,29 @@
1
1
  require 'MARQ'
2
+ require 'MARQ/main'
2
3
  module RankProduct
3
-
4
4
  def self.ranks(dataset, experiment , from_FC = false, invert = false)
5
- path = MARQ.dataset_path(dataset)
6
- codes = Open.read(path + '.codes').collect{|line| line.strip}
7
-
8
- experiments = Open.read(path + '.experiments').collect{|line| line.strip}
9
- field = experiments.index experiment.strip
10
-
11
- if from_FC
12
- ratios = Open.read(path + '.logratios').collect{|line|
13
- value = line.strip.split("\t")[field]
14
- case
15
- when value == "NA"
16
- nil
17
-
18
- # To sort decreasingly we change sign by default
19
- when invert
20
- value.to_f
21
- else
22
- - value.to_f
23
- end
24
- }
25
- Hash[*codes.zip(ratios).sort{|a,b| b[1] <=> a[1]}.collect{|p| p[0]}.zip((1..codes.length).to_a).flatten]
5
+ codes = MARQ::Dataset.codes(dataset)
6
+
7
+ if from_FC
8
+ ratios = MARQ::Dataset.logratios(dataset)[experiment.strip]
9
+ sorted_genes = codes.zip(ratios).
10
+ reject {|p| p[1].nil? }.
11
+ sort_by {|p| p[1] }.
12
+ collect {|p| p[0] }
13
+ sorted_genes.reverse! unless invert
14
+ ranks = Hash[*sorted_genes.zip((1..sorted_genes.length).to_a).flatten]
15
+ (codes - sorted_genes).each {|gene| ranks[gene] = nil}
26
16
  else
27
- orders = Open.read(path + '.orders').collect{|line|
28
- value = line.strip.split("\t")[field]
29
- case
30
- when value == "NA"
31
- nil
32
- when invert
33
- codes.length - line.strip.split("\t")[field].to_i + 1
34
- else
35
- line.strip.split("\t")[field].to_i
36
- end
17
+ orders = MARQ::Dataset.orders(dataset)[experiment.strip]
37
18
 
38
- }
39
- Hash[*codes.zip(orders).flatten]
19
+ if invert
20
+ num_genes = orders.length
21
+ orders.collect! {|pos| pos.nil? ? nil : num_genes - pos }
22
+ end
23
+
24
+ ranks = Hash[*codes.zip(orders).flatten]
40
25
  end
26
+ ranks
41
27
  end
42
28
 
43
29
  def self.score(gene_ranks, signature_sizes)