rbbt-marq 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/MARQ/main.rb CHANGED
@@ -3,86 +3,156 @@ require 'MARQ/MADB'
3
3
  require 'MARQ/score'
4
4
 
5
5
  module MARQ
6
- def self.platform_type(platform)
7
- if platform.match(/GPL\d+|GDS\d+|GSE\d+/)
8
- return :GEO
9
- else
10
- return :CustomDS
6
+ module Platform
7
+ def self.is_GEO?(platform)
8
+ ! platform.match(/^GPL/).nil?
11
9
  end
12
- end
13
10
 
14
- def self.dataset_path(dataset)
15
- if platform_type(dataset) == :GEO
16
- GEO::dataset_path(dataset)
17
- else
18
- CustomDS::path(dataset)
11
+ def self.is_cross_platform?(platform)
12
+ ! platform.match(/_cross_platform$/).nil?
13
+ end
14
+
15
+ def self.clean(name)
16
+ name.sub(/_cross_platform/,'') unless name.nil?
19
17
  end
20
- end
21
18
 
22
- def self.platform_organism(platform)
23
- if platform_type(platform) == :GEO
24
- if platform.match(/^GPL/)
25
- GEO::SOFT::GPL(platform)[:organism]
19
+ def self.path(platform)
20
+ platform = clean(platform)
21
+ if is_GEO? platform
22
+ GEO.platform_path(platform)
26
23
  else
27
- GEO::SOFT::GPL(GEO::dataset_platform(platform))[:organism]
24
+ CustomDS.platform_path(platform)
28
25
  end
29
- else
30
- CustomDS::organism(platform)
31
26
  end
32
- end
33
27
 
34
- def self.organism_platforms(org)
35
- GEO::organism_platforms(org) + CustomDS::datasets(org)
36
- end
28
+ def self.has_cross_platform?(platform)
29
+ File.exists? File.join(path(platform), 'cross_platform')
30
+ end
37
31
 
38
- def self.has_cross_platform?(dataset, platform=nil)
39
- if platform_type(platform) == :GEO
40
- GEO::has_cross_platform?(dataset, platform)
41
- else
42
- CustomDS::has_cross_platform?(platform)
32
+ def self.datasets(platform)
33
+ if is_GEO? platform
34
+ GEO.platform_datasets(platform)
35
+ else
36
+ CustomDS.platform_datasets(platform)
37
+ end
43
38
  end
44
- end
45
39
 
46
- def self.is_cross_platform?(dataset, platform=nil)
47
- if platform_type(platform) == :GEO
48
- GEO::is_cross_platform?(dataset, platform)
49
- else
50
- CustomDS::is_cross_platform?(platform)
40
+ def self.codes(platform)
41
+ platform = clean(platform)
42
+ Open.read(File.join(path(platform), 'codes')).scan(/[^\s]+/)
51
43
  end
52
- end
53
44
 
54
- def self.complete_positions(positions, matched, genes)
45
+ def self.cross_platform(platform)
46
+ platform = clean(platform)
47
+ Open.read(File.join(path(platform), 'cross_platform')).scan(/[^\s]+/)
48
+ end
55
49
 
56
- pos = Hash[*matched.zip(positions).flatten]
57
- complete = genes.collect{|gene|
58
- gene = gene.downcase.strip
59
- if matched.include? gene
60
- pos[gene] || "MISSING"
50
+ def self.organism(platform)
51
+ platform = clean(platform)
52
+ if is_GEO? platform
53
+ GEO.platform_organism platform
61
54
  else
62
- "NOT IN PLATFORM"
55
+ CustomDS.platform_organism platform
63
56
  end
64
- }
57
+ end
65
58
 
59
+ def self.process(platform)
60
+ platform = clean(platform)
61
+ if is_GEO? platform
62
+ GEO.process_platform(platform)
63
+ else
64
+ CustomDS.process_platform(platform)
65
+ end
66
+ end
67
+
68
+ def self.organism_platforms(organism)
69
+ GEO.platforms.select {|platform|
70
+ GEO::SOFT.GPL(platform)[:organism] == organism && MARQ::Platform.datasets(platform).any?
71
+ } +
72
+ CustomDS.organism_platforms(organism)
73
+ end
66
74
  end
67
75
 
76
+
68
77
  module Dataset
78
+ def self.is_GEO?(dataset)
79
+ ! dataset.match(/^(?:GDS|GSE)/).nil?
80
+ end
81
+
82
+ def self.clean(name)
83
+ name.sub(/_cross_platform/,'') if name
84
+ end
85
+
86
+ def self.path(platform)
87
+ if is_GEO? platform
88
+ GEO.dataset_path(platform)
89
+ else
90
+ CustomDS.dataset_path(platform)
91
+ end
92
+ end
69
93
 
70
94
  def self.exists?(dataset)
71
- MARQ::dataset_path(dataset) != nil
95
+ ! path(dataset).nil?
96
+ end
97
+
98
+ def self.is_cross_platform?(dataset)
99
+ ! dataset.match(/_cross_platform$/).nil?
100
+ end
101
+
102
+ def self.has_cross_platform?(dataset)
103
+ File.exists?(path(dataset) + '_cross_platform.orders')
104
+ end
105
+
106
+ def self.exists?(dataset)
107
+ path(dataset) != nil
108
+ end
109
+
110
+ def self.info(name)
111
+ begin
112
+ title, description = Open.read(path(name) + '.description').split(/\n--\n/).values_at(0,1)
113
+ {:title => title.strip, :description => description.strip}
114
+ rescue Exception
115
+ puts $!.message
116
+ {:title => "" , :description => "" }
117
+ end
118
+ end
119
+
120
+ def self.platform(dataset)
121
+ if is_GEO? dataset
122
+ GEO.dataset_platform(dataset)
123
+ else
124
+ CustomDS.dataset_platform(dataset)
125
+ end
126
+ end
127
+
128
+ def self.organism(dataset)
129
+ MARQ::Platform.organism(platform(dataset))
130
+ end
131
+
132
+ def self.process(dataset, platform = nil)
133
+ if is_GEO? dataset
134
+ GEO.process_dataset(dataset, platform)
135
+ else
136
+ CustomDS.process_dataset(dataset, platform)
137
+ end
72
138
  end
73
139
 
74
140
  def self.read_file(dataset, ext)
75
- Open.read(MARQ::dataset_path(dataset) + '.' + ext)
141
+ Open.read(path(dataset) + '.' + ext)
76
142
  end
77
143
 
78
- def self.read_values(dataset, file)
144
+ def self.read_values(dataset, file, integer = false)
79
145
  result = {}
80
146
 
81
147
  experiments = experiments(dataset)
82
148
  experiments.each{|experiment| result[experiment] = [] }
83
149
  read_file(dataset, file).split(/\n/).each do |line|
84
150
  values = line.chomp.split(/\t/)
85
- values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_f) }
151
+ if integer
152
+ values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_i)}
153
+ else
154
+ values.each_with_index{|value, i| result[experiments[i]] << (value == 'NA' ? nil : value.to_f)}
155
+ end
86
156
  end
87
157
 
88
158
  result
@@ -104,16 +174,6 @@ module MARQ
104
174
  end
105
175
 
106
176
 
107
- def self.platform_codes(platform)
108
- if MARQ::is_cross_platform? platform
109
- file = 'cross_platform'
110
- else
111
- file = 'codes'
112
- end
113
-
114
- Open.read(File.join(MARQ::platform_path(platform), file))
115
- end
116
-
117
177
  def self.experiments(dataset)
118
178
  read_file(dataset, 'experiments').split(/\n/)
119
179
  end
@@ -123,13 +183,13 @@ module MARQ
123
183
  end
124
184
 
125
185
  def self.orders(dataset)
126
- read_values(dataset, 'orders')
186
+ read_values(dataset, 'orders', true)
127
187
  end
128
188
 
129
189
  def self.logratios(dataset)
130
190
  read_values(dataset, 'logratios')
131
191
  end
132
-
192
+
133
193
  def self.pvalues(dataset)
134
194
  read_values_t(dataset, 'pvalues')
135
195
  end
@@ -138,136 +198,68 @@ module MARQ
138
198
  read_values_t(dataset, 't')
139
199
  end
140
200
 
141
-
142
-
143
- end
144
-
145
- def self.platform_scores_up_down(platform, up, down)
146
- if platform_type(platform) == :GEO
147
- GEORQ.platform_scores_up_down(platform, up, down)
148
- else
149
- CustomDSRQ.scores_up_down(platform, up, down)
150
- end
151
201
  end
152
202
 
153
- module CustomDSRQ
154
-
155
- def self.scores_up_down(platform, up, down)
156
- matched_up = DBcache.matches(platform + '_codes', up).length
157
- positions_up, matched_up = *MADB::CustomDS.positions(platform, up)
158
- missing_up = up.length - matched_up.length
159
-
160
-
161
-
162
- matched_down = DBcache.matches(platform + '_codes', down).length
163
- positions_down, matched_down = *MADB::CustomDS.positions(platform, down)
164
- missing_down = down.length - matched_down.length
165
-
166
- experiments = positions_up.keys
167
- platform_entries = MADB::CustomDS.platform_entries(platform)
168
-
169
- scores = experiments.collect{|experiment|
170
- score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
171
- score[:total_entries] = platform_entries
172
- score[:positions_up] = MARQ.complete_positions(positions_up[experiment], matched_up, up) if positions_up.any?
173
- score[:positions_down] = MARQ.complete_positions(positions_down[experiment], matched_down, down) if positions_down.any?
174
- score
175
- }
176
-
177
- pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
178
-
179
- results = {}
180
- experiments.each_with_index{|experiment,i|
181
- results[experiment] = scores[i].merge(:pvalue => pvalues[i])
203
+ module RankQuery
204
+ def self.complete_positions(positions, matched, genes)
205
+ pos = Hash[*matched.zip(positions).flatten]
206
+ complete = genes.collect{|gene|
207
+ gene = gene.downcase.strip
208
+ if matched.include? gene
209
+ pos[gene] || "MISSING"
210
+ else
211
+ "NOT IN PLATFORM"
212
+ end
182
213
  }
183
-
184
-
185
- results
186
214
  end
187
- end
188
-
189
- module GEORQ
190
-
191
- def self.platform_scores_up_down(platform, up, down)
192
- platform_entries = MADB::GEO.platform_entries(platform)
193
215
 
194
- positions_up, matched_up = *MADB::GEO.positions(platform, up)
195
- missing_up = up.length - matched_up.length
196
-
197
- positions_down, matched_down = *MADB::GEO.positions(platform, down)
198
- missing_down = down.length - matched_down.length
199
216
 
200
- return {} if positions_up.keys.empty? && positions_down.keys.empty?
201
- if positions_up.keys.any?
202
- experiments = positions_up.keys
203
- else
204
- experiments = positions_down.keys
205
- end
206
-
207
- scores = experiments.collect{|experiment|
217
+ def self.position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
218
+ scores = []
219
+ positions_up.keys.each do |experiment|
208
220
  score = Score.score_up_down(positions_up[experiment], positions_down[experiment], platform_entries, missing_up, missing_down)
209
221
  score[:total_entries] = platform_entries
210
- score[:positions_up] = MARQ.complete_positions(positions_up[experiment], matched_up, up) if positions_up.any?
211
- score[:positions_down] = MARQ.complete_positions(positions_down[experiment], matched_down, down) if positions_down.any?
212
- score
213
- }
222
+ score[:positions_up] = complete_positions(positions_up[experiment], matched_up, up) if up.any?
223
+ score[:positions_down] = complete_positions(positions_down[experiment], matched_down, down) if down.any?
224
+ scores << score
225
+ end
214
226
 
215
227
  pvalues = Score.pvalues(scores.collect{|s| s[:score]}, up.length, down.length, platform_entries)
216
228
 
217
229
  results = {}
218
- experiments.each_with_index{|experiment,i|
219
- results[experiment] = scores[i]
220
- results[experiment][:pvalue] = pvalues[i]
230
+ positions_up.keys.each_with_index{|experiment,i|
231
+ results[experiment] = scores[i].merge(:pvalue => pvalues[i])
221
232
  }
222
233
 
223
-
224
234
  results
225
235
  end
226
236
 
237
+ def self.dataset_scores(dataset, up, down)
238
+ positions_up, matched_up, platform_entries = MADB.dataset_positions(dataset, up)
239
+ missing_up = positions_up.length - matched_up.length
227
240
 
228
- def self.draw_hits(platform, genes, directory)
229
- positions, matched = MADB::GEO.positions(platform, genes).values_at(0,1)
241
+ positions_down, matched_down = MADB.dataset_positions(dataset, down)
242
+ missing_down = positions_down.length - matched_down.length
230
243
 
231
- platform_entries = MADB::GEO.platform_entries(platform)
232
-
233
- positions.each{|experiment, positions|
234
- Score.draw_hits(positions.compact, platform_entries, File.join(directory, experiment.hash_code + '.png'), {:size => 1000, :bg_color => :green})
235
- }
244
+ position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
236
245
  end
237
246
 
247
+ def self.platform_scores(platform, up, down)
248
+ positions_up, matched_up, platform_entries = MADB.platform_positions(platform, up)
249
+ missing_up = up.length - matched_up.length
238
250
 
239
- def self.dataset_positions(dataset, genes)
240
- if dataset =~ /_cross_platform/
241
- dataset.sub!(/_cross_platform/,'')
242
- platform = GEO.dataset_platform(dataset) + '_cross_platform'
243
- else
244
- platform = GEO.dataset_platform(dataset)
245
- end
246
-
247
-
248
- positions = {}
249
- MADB::GEO.positions(platform, genes).first.select{|k,v| k =~ /^#{ dataset }/}.each{|p|
250
- positions[p[0].sub(/^#{ dataset }: /,'')] = p[1]
251
- }
252
- positions
253
- end
254
-
255
- def self.dataset_scores(dataset, genes)
256
- total = DBcache::num_rows(platform)
257
- positions = dataset_positions(dataset, genes)
251
+ positions_down, matched_down = MADB.platform_positions(platform, down)
252
+ missing_down = down.length - matched_down.length
258
253
 
259
- scores = {}
260
- positions.each{|experiment, positions|
261
- scores[experiment] = Score.score(positions, total, genes.length - positions.length)
262
- }
263
- scores
254
+ position_scores(up, down, positions_up, positions_down, platform_entries, matched_up, matched_down, missing_up, missing_down)
264
255
  end
265
256
 
266
257
  end
267
258
  end
268
259
 
269
260
  if __FILE__ == $0
270
- p MARQ::Dataset.orders('GDS2419')
261
+ p MARQ::Dataset.platform 'GDS2791_cross_platform'
262
+ p MARQ::Platform.organism 'GPL96'
271
263
  exit
272
264
  #puts MARQ::organism_platforms('human')
273
265
  #puts MARQ.platform_organism("HaploidData")
@@ -1,43 +1,29 @@
1
1
  require 'MARQ'
2
+ require 'MARQ/main'
2
3
  module RankProduct
3
-
4
4
  def self.ranks(dataset, experiment , from_FC = false, invert = false)
5
- path = MARQ.dataset_path(dataset)
6
- codes = Open.read(path + '.codes').collect{|line| line.strip}
7
-
8
- experiments = Open.read(path + '.experiments').collect{|line| line.strip}
9
- field = experiments.index experiment.strip
10
-
11
- if from_FC
12
- ratios = Open.read(path + '.logratios').collect{|line|
13
- value = line.strip.split("\t")[field]
14
- case
15
- when value == "NA"
16
- nil
17
-
18
- # To sort decreasingly we change sign by default
19
- when invert
20
- value.to_f
21
- else
22
- - value.to_f
23
- end
24
- }
25
- Hash[*codes.zip(ratios).sort{|a,b| b[1] <=> a[1]}.collect{|p| p[0]}.zip((1..codes.length).to_a).flatten]
5
+ codes = MARQ::Dataset.codes(dataset)
6
+
7
+ if from_FC
8
+ ratios = MARQ::Dataset.logratios(dataset)[experiment.strip]
9
+ sorted_genes = codes.zip(ratios).
10
+ reject {|p| p[1].nil? }.
11
+ sort_by {|p| p[1] }.
12
+ collect {|p| p[0] }
13
+ sorted_genes.reverse! unless invert
14
+ ranks = Hash[*sorted_genes.zip((1..sorted_genes.length).to_a).flatten]
15
+ (codes - sorted_genes).each {|gene| ranks[gene] = nil}
26
16
  else
27
- orders = Open.read(path + '.orders').collect{|line|
28
- value = line.strip.split("\t")[field]
29
- case
30
- when value == "NA"
31
- nil
32
- when invert
33
- codes.length - line.strip.split("\t")[field].to_i + 1
34
- else
35
- line.strip.split("\t")[field].to_i
36
- end
17
+ orders = MARQ::Dataset.orders(dataset)[experiment.strip]
37
18
 
38
- }
39
- Hash[*codes.zip(orders).flatten]
19
+ if invert
20
+ num_genes = orders.length
21
+ orders.collect! {|pos| pos.nil? ? nil : num_genes - pos }
22
+ end
23
+
24
+ ranks = Hash[*codes.zip(orders).flatten]
40
25
  end
26
+ ranks
41
27
  end
42
28
 
43
29
  def self.score(gene_ranks, signature_sizes)