rbbt-marq 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/R/GEO.R +6 -4
- data/R/MA.R +1 -0
- data/bin/marq_config +4 -3
- data/install_scripts/CustomDS/Rakefile +27 -215
- data/install_scripts/GEO/Rakefile +34 -275
- data/install_scripts/rake_includes.rb +236 -0
- data/lib/MARQ/CustomDS.rb +63 -32
- data/lib/MARQ/GEO.rb +99 -63
- data/lib/MARQ/MADB.rb +107 -202
- data/lib/MARQ/annotations.rb +124 -38
- data/lib/MARQ/main.rb +152 -160
- data/lib/MARQ/rankproduct.rb +20 -34
- data/tasks/install.rake +7 -2
- metadata +3 -2
@@ -0,0 +1,236 @@
|
|
1
|
+
$expr_threshold ||= (ENV['threshold'] || 0.05).to_f
|
2
|
+
$folds ||= (ENV['folds'] || 2.5).to_f
|
3
|
+
$nth_genes ||= (ENV['nth_genes'] || 100).to_i
|
4
|
+
|
5
|
+
$force = [$force, ENV['force'], false].compact.first.to_s == 'true'
|
6
|
+
$tranlations = [$tranlations, ENV['translations'], false].compact.first.to_s == 'true'
|
7
|
+
$series = [$series, ENV['series'], true].compact.first.to_s == 'true'
|
8
|
+
$update_db = [$update_db, ENV['update_db'], false].compact.first.to_s == 'true'
|
9
|
+
$skip_db = [$skip_db, ENV['skip_db'], false].compact.first.to_s == 'true'
|
10
|
+
$fdr = [$fdr, ENV['fdr'], true].compact.first.to_s == 'true'
|
11
|
+
$do_folds = [$do_folds, ENV['do_folds'], true].compact.first.to_s == 'true'
|
12
|
+
|
13
|
+
|
14
|
+
$changes = false
|
15
|
+
module GEO::Process::R
|
16
|
+
class << self
|
17
|
+
alias_method :GDS_old, :GDS
|
18
|
+
def GDS(*args)
|
19
|
+
$changes = true
|
20
|
+
GDS_old(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :GSE_old, :GSE
|
24
|
+
def GSE(*args)
|
25
|
+
$changes = true
|
26
|
+
GSE_old(*args)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "Analyze datasets"
|
32
|
+
task 'data' do
|
33
|
+
|
34
|
+
platforms_to_save = []
|
35
|
+
|
36
|
+
platforms = process_list
|
37
|
+
platforms.each{|platform, datasets|
|
38
|
+
|
39
|
+
begin
|
40
|
+
# Prepare the platform
|
41
|
+
MARQ::Platform.process(platform)
|
42
|
+
rescue
|
43
|
+
puts "Error processing platform #{platform}"
|
44
|
+
puts $!.message
|
45
|
+
puts $!.backtrace.join("\n")
|
46
|
+
next
|
47
|
+
end
|
48
|
+
|
49
|
+
next if $tranlations
|
50
|
+
|
51
|
+
$changes = false
|
52
|
+
# Process all datasets
|
53
|
+
datasets.each{|dataset|
|
54
|
+
begin
|
55
|
+
next unless $force || MARQ::Dataset.path(dataset).nil?
|
56
|
+
MARQ::Dataset.process(dataset, platform)
|
57
|
+
rescue
|
58
|
+
puts "Error processing dataset #{ dataset }"
|
59
|
+
puts $!.message
|
60
|
+
puts $!.backtrace.join("\n")
|
61
|
+
end
|
62
|
+
}
|
63
|
+
|
64
|
+
# Mark the platform for saving in DB
|
65
|
+
platforms_to_save << platform if $changes || $update_db
|
66
|
+
}
|
67
|
+
|
68
|
+
platforms_to_save.each{|platform|
|
69
|
+
begin
|
70
|
+
puts "Saving #{platform}"
|
71
|
+
MADB.save_platform(platform)
|
72
|
+
rescue
|
73
|
+
puts "Error saving platform #{ platform }"
|
74
|
+
puts $!.message
|
75
|
+
puts $!.backtrace.join("\n")
|
76
|
+
end
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
def annotations(name, cross_platform = false, &block)
|
81
|
+
platforms = process_list
|
82
|
+
|
83
|
+
platforms.each do |platform, datasets|
|
84
|
+
datasets.each do |dataset|
|
85
|
+
begin
|
86
|
+
next if File.exist?(File.join("annotations", name, dataset)) && ! $force
|
87
|
+
next if MARQ::Dataset.path(dataset).nil?
|
88
|
+
|
89
|
+
FileUtils.mkdir_p File.join("annotations", name)
|
90
|
+
filename = File.join("annotations", name, dataset)
|
91
|
+
dataset += '_cross_platform' if cross_platform && MARQ::Platform::has_cross_platform?(platform)
|
92
|
+
next if ! MARQ::Dataset.exists?(dataset)
|
93
|
+
terms = block.call(dataset)
|
94
|
+
Open.write(filename, terms.to_yaml)
|
95
|
+
rescue
|
96
|
+
puts "Error processing dataset #{ dataset }"
|
97
|
+
puts $!.message
|
98
|
+
puts $!.backtrace.join("\n")
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
task 'annotate_Words' do
|
106
|
+
require 'MARQ/annotations'
|
107
|
+
require 'rbbt/bow/bow'
|
108
|
+
annotations('Words') do |dataset|
|
109
|
+
terms = {}
|
110
|
+
description = Open.read(MARQ::Dataset.path(dataset) + '.description')
|
111
|
+
terms[:dataset] = [dataset] + description.words.uniq
|
112
|
+
Open.read(MARQ::Dataset.path(dataset) + '.experiments').collect{|name|
|
113
|
+
name = name.strip
|
114
|
+
terms[name] = name.sub(/.*?: /,'').sub(/\[ratio\]/,'').words.uniq
|
115
|
+
}
|
116
|
+
terms
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
task 'annotate_UMLS' do
|
122
|
+
require 'MARQ/annotations'
|
123
|
+
require 'rbbt/util/misc'
|
124
|
+
annotations('UMLS') do |dataset|
|
125
|
+
terms = {}
|
126
|
+
description = Open.read(MARQ::Dataset.path(dataset) + '.description')
|
127
|
+
terms[:dataset] = Annotations::UMLS::OBA(description).uniq
|
128
|
+
Open.read(MARQ::Dataset.path(dataset) + '.experiments').collect{|name|
|
129
|
+
name = name.strip
|
130
|
+
terms[name] = Annotations::UMLS::OBA(name.sub(/.*?: /,'').sub(/\[ratio\]/,'')).uniq
|
131
|
+
}
|
132
|
+
terms
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
task 'annotate_Polysearch' do
|
138
|
+
require 'MARQ/annotations'
|
139
|
+
require 'rbbt/util/misc'
|
140
|
+
require 'rbbt/sources/polysearch'
|
141
|
+
annotations('Polysearch') do |dataset|
|
142
|
+
terms = {}
|
143
|
+
description = Open.read(MARQ::Dataset.path(dataset) + '.description')
|
144
|
+
terms[:dataset] = Polysearch::match(description).values.flatten.sort.collect{|n| n.gsub(/\s+/,' ').downcase}.uniq
|
145
|
+
Open.read(MARQ::Dataset.path(dataset) + '.experiments').collect{|name|
|
146
|
+
name = name.strip
|
147
|
+
terms[name] = Polysearch::match(name.sub(/.*?: /,'').sub(/\[ratio\]/,'')).values.flatten.sort.collect{|n| n.gsub(/\s+/,' ').downcase}.uniq
|
148
|
+
}
|
149
|
+
terms
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
def goterms(org, list, slim, threshold)
|
155
|
+
return [] if list.empty?
|
156
|
+
results = Annotations::Genes::Genecodis::Local.analysis(org, list, slim)
|
157
|
+
return [] if results.nil?
|
158
|
+
results.
|
159
|
+
select{|info| info[:s].to_i > 2 }.
|
160
|
+
select{|info| info[:hyp_c].to_f < threshold }.
|
161
|
+
collect{|info| info[:items]}.collect{|id| GO::id2name(id)}
|
162
|
+
end
|
163
|
+
|
164
|
+
task 'annotate_GO' do
|
165
|
+
require 'MARQ/annotations'
|
166
|
+
require 'rbbt/sources/go'
|
167
|
+
options = { :cut_off => $expr_threshold, :fdr => $fdr, :folds => $folds, :do_folds => $do_folds, :nth_genes => $nth_genes}
|
168
|
+
annotations('GO_up', true) do |dataset|
|
169
|
+
org = MARQ::Dataset.organism(dataset)
|
170
|
+
genes = Annotations::Genes.get_genes(dataset, options)
|
171
|
+
|
172
|
+
up = {}
|
173
|
+
genes[:up] ||= []
|
174
|
+
genes[:up].collect{|experiment,list|
|
175
|
+
up[experiment] = goterms(org, list, false, $expr_threshold)
|
176
|
+
}
|
177
|
+
up
|
178
|
+
end
|
179
|
+
|
180
|
+
annotations('GO_down', true) do |dataset|
|
181
|
+
org = MARQ::Dataset.organism(dataset)
|
182
|
+
genes = Annotations::Genes.get_genes(dataset, options)
|
183
|
+
|
184
|
+
down = {}
|
185
|
+
genes[:down] ||= []
|
186
|
+
genes[:down].collect{|experiment,list|
|
187
|
+
down[experiment] = goterms(org, list, false, $expr_threshold)
|
188
|
+
}
|
189
|
+
down
|
190
|
+
end
|
191
|
+
|
192
|
+
annotations('GOSlim_up', true) do |dataset|
|
193
|
+
org = MARQ::Dataset.organism(dataset)
|
194
|
+
genes = Annotations::Genes.get_genes(dataset, options)
|
195
|
+
|
196
|
+
up = {}
|
197
|
+
genes[:up] ||= []
|
198
|
+
genes[:up].collect{|experiment,list|
|
199
|
+
up[experiment] = goterms(org, list, true, $expr_threshold)
|
200
|
+
}
|
201
|
+
up
|
202
|
+
end
|
203
|
+
|
204
|
+
annotations('GOSlim_down', true) do |dataset|
|
205
|
+
org = MARQ::Dataset.organism(dataset)
|
206
|
+
genes = Annotations::Genes.get_genes(dataset, options)
|
207
|
+
|
208
|
+
down = {}
|
209
|
+
genes[:down] ||= []
|
210
|
+
genes[:down].collect{|experiment,list|
|
211
|
+
down[experiment] = goterms(org, list, true, $expr_threshold)
|
212
|
+
}
|
213
|
+
down
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
task 'annotate_SENT' do
|
218
|
+
require 'MARQ/annotations'
|
219
|
+
options = { :cut_off => $expr_threshold, :fdr => $fdr, :folds => $folds, :do_folds => $do_folds, :nth_genes => $nth_genes}
|
220
|
+
annotations('SENT') do |dataset|
|
221
|
+
org = MARQ::Dataset.organism(dataset)
|
222
|
+
genes = Annotations::Genes.get_genes(dataset, options)
|
223
|
+
terms = Annotations::Genes::SENT.terms(org, genes)
|
224
|
+
terms
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
end
|
229
|
+
|
230
|
+
task 'default' do
|
231
|
+
Rake::Task['data'].invoke
|
232
|
+
Rake::Task['annotate_Words'].invoke
|
233
|
+
Rake::Task['annotate_UMLS'].invoke
|
234
|
+
Rake::Task['annotate_Polysearch'].invoke
|
235
|
+
Rake::Task['annotate_GO'].invoke
|
236
|
+
end
|
data/lib/MARQ/CustomDS.rb
CHANGED
@@ -5,9 +5,8 @@ require 'MARQ/ID'
|
|
5
5
|
module CustomDS
|
6
6
|
@@r = nil
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
8
|
+
DATA_DIR = File.join(MARQ.datadir,'CustomDS')
|
9
|
+
|
11
10
|
|
12
11
|
def self.r
|
13
12
|
require 'rsruby'
|
@@ -35,33 +34,12 @@ module CustomDS
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
38
|
-
def self.path(dataset)
|
39
|
-
files = Dir.glob(customdir + "/*/#{ dataset }.orders")
|
40
|
-
if files.length == 1
|
41
|
-
files.first.sub(/.orders/,'')
|
42
|
-
else
|
43
|
-
Dir.glob(customdir + "/*/#{ dataset }").first
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
37
|
def self.organism(dataset)
|
48
|
-
path(dataset).match(/#{
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.is_cross_platform?(dataset)
|
52
|
-
dataset.match(/_cross_platform/)
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.clean(dataset)
|
56
|
-
dataset.sub(/_cross_platform/,'')
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.has_cross_platform?(dataset)
|
60
|
-
Dir.glob(path(clean(dataset)) + '_cross_platform.orders').any?
|
38
|
+
path(dataset).match(/#{ DATA_DIR }\/(.*?)\//)[1]
|
61
39
|
end
|
62
40
|
|
63
41
|
def self.datasets(org)
|
64
|
-
Dir.glob(File.join(
|
42
|
+
Dir.glob(File.join(DATA_DIR, org) + '/*.orders').collect{|f| clean(File.basename(f.sub(/.orders/,'')))}.uniq
|
65
43
|
end
|
66
44
|
|
67
45
|
def self.process_matrix(prefix, org)
|
@@ -82,9 +60,62 @@ module CustomDS
|
|
82
60
|
end
|
83
61
|
|
84
62
|
def self.process(name)
|
85
|
-
|
86
|
-
|
87
|
-
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.organisms
|
66
|
+
Dir.glob(File.join(DATA_DIR, '*')).
|
67
|
+
select {|path| File.directory? path}.
|
68
|
+
collect {|path| File.basename path}
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.dataset_path(dataset)
|
72
|
+
organisms.each do |organism|
|
73
|
+
case
|
74
|
+
when File.exists?(File.join(DATA_DIR, organism, dataset + '.orders'))
|
75
|
+
return File.join(DATA_DIR, organism, dataset)
|
76
|
+
when File.exists?(File.join(DATA_DIR, organism, dataset + '.skip'))
|
77
|
+
return nil
|
78
|
+
end
|
79
|
+
end
|
80
|
+
return nil
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.platform_path(platform)
|
84
|
+
dataset_path(platform)
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.platform_datasets(platform)
|
88
|
+
MARQ::Dataset.clean(platform)
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.platform_organism(platform)
|
92
|
+
path = platform_path(platform)
|
93
|
+
return nil if path.nil?
|
94
|
+
path.match(/#{DATA_DIR}\/(.*)\/#{ platform }$/)
|
95
|
+
return $1
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.dataset_organism(dataset)
|
99
|
+
platform_organism(dataset)
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.dataset_platform(dataset)
|
103
|
+
dataset
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.organism_platforms(organism)
|
107
|
+
Dir.glob(File.join(DATA_DIR,organism,'*.orders')).
|
108
|
+
collect {|path| File.basename(path).sub(/\.orders$/,'').sub(/_cross_platform/,'')}.
|
109
|
+
uniq
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.process_platform(platform)
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.process_dataset(dataset, platform = nil)
|
116
|
+
puts "Processing #{ dataset }"
|
117
|
+
org = dataset_organism(dataset)
|
118
|
+
prefix = File.join(DATA_DIR, org, dataset)
|
88
119
|
|
89
120
|
CustomDS::process_matrix(prefix, org)
|
90
121
|
end
|
@@ -94,13 +125,13 @@ end
|
|
94
125
|
|
95
126
|
if __FILE__ == $0
|
96
127
|
p CustomDS::datasets('sgd')
|
97
|
-
p CustomDS::
|
98
|
-
p CustomDS::
|
128
|
+
p CustomDS::dataset_path('HaploidData')
|
129
|
+
p CustomDS::dataset_path('HaploidData_cross_platform')
|
99
130
|
|
100
131
|
exit
|
101
132
|
|
102
133
|
org = 'sgd'
|
103
|
-
process = Dir.glob(File.join(CustomDS::
|
134
|
+
process = Dir.glob(File.join(CustomDS::DATA_DIR, org) + '/*').select{|f| File.directory? f}.collect{|f| File.basename(f)} - CustomDS.datasets('sgd')
|
104
135
|
p process
|
105
136
|
process.each{|d| CustomDS::process(d)}
|
106
137
|
|
data/lib/MARQ/GEO.rb
CHANGED
@@ -1,45 +1,56 @@
|
|
1
1
|
require 'MARQ'
|
2
|
+
require 'MARQ/main'
|
2
3
|
require 'rbbt/sources/organism'
|
3
4
|
|
4
5
|
# Work with GEO datasets
|
5
6
|
module GEO
|
6
7
|
|
8
|
+
CACHE_DIR = File.join(MARQ.cachedir,'GEO')
|
9
|
+
FileUtils.mkdir_p CACHE_DIR unless File.exists? CACHE_DIR
|
10
|
+
|
11
|
+
DATA_DIR = File.join(MARQ.datadir, 'GEO')
|
12
|
+
|
7
13
|
# Get information from Entrez
|
8
14
|
module Remote
|
9
15
|
|
16
|
+
@@nice = 1
|
10
17
|
def self.organism_platforms(org)
|
11
18
|
name = Organism.name(org)
|
12
|
-
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds&term=\"#{name}\"[Organism:exp]+AND+%22gpl%22[Filter]&retmax=10000").
|
19
|
+
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds&term=\"#{name}\"[Organism:exp]+AND+%22gpl%22[Filter]&retmax=10000", :nice => @@nice).
|
13
20
|
scan(/<Id>(\d+?)<\/Id>/).collect{|id| id.first}.collect{|id| "GPL#{id.sub(/^100*/,'')}"}
|
14
21
|
end
|
15
22
|
|
16
23
|
def self.platform_datasets(platform)
|
17
|
-
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds&term=#{platform}[Accession]&retmax=2000").
|
24
|
+
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds&term=#{platform}[Accession]&retmax=2000", :nice => @@nice).
|
18
25
|
scan(/<Id>(\d+?)<\/Id>/).collect{|id| id.first}.select{|id| !id.match(/^(1|2)000/) }.collect{|id| "GDS#{id}"}
|
19
26
|
end
|
20
27
|
|
21
28
|
def self.dataset_platform(dataset)
|
22
29
|
if dataset =~ /GSE/
|
23
|
-
Open.read("http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=#{dataset}").scan(/GPL\d+/).uniq.sort.join("_")
|
30
|
+
Open.read("http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=#{dataset}", :nice => @@nice).scan(/GPL\d+/).uniq.sort.join("_")
|
24
31
|
else
|
25
|
-
Open.read("http://www.ncbi.nlm.nih.gov/sites/GDSbrowser?acc=#{dataset}").scan(/GPL\d+/).uniq.sort.join("_")
|
32
|
+
Open.read("http://www.ncbi.nlm.nih.gov/sites/GDSbrowser?acc=#{dataset}", :nice => @@nice).scan(/GPL\d+/).uniq.sort.join("_")
|
26
33
|
end
|
27
34
|
end
|
28
35
|
|
29
36
|
def self.series_dataset?(gse)
|
30
|
-
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=geo&term=#{gse}[Accession]&retmax=2000").
|
37
|
+
Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=geo&term=#{gse}[Accession]&retmax=2000", :nice => @@nice).
|
31
38
|
match(/<Id>(\d+?)<\/Id>/) != nil
|
32
39
|
end
|
33
40
|
|
41
|
+
def self.platform_organism(platform)
|
42
|
+
Open.read("http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=#{platform}", :nice => @@nice).
|
43
|
+
match(%r#<td><a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi\?mode=Info&id=\d+" onmouseout="onLinkOut\('HelpMessage' , geo_empty_help\)" onmouseover="onLinkOver\('HelpMessage' , geoaxema_organismus\)">(.*)</a></td>#)[1]
|
44
|
+
end
|
45
|
+
|
34
46
|
end
|
35
47
|
|
36
|
-
CACHE_DIR = File.join(MARQ.cachedir,'GEO')
|
37
|
-
FileUtils.mkdir_p CACHE_DIR unless File.exists? CACHE_DIR
|
38
48
|
|
39
49
|
|
40
50
|
# Parse information in .soft files
|
41
51
|
module SOFT
|
42
52
|
|
53
|
+
@@nice = 1
|
43
54
|
GEO_SOFT="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&view=full&form=text&acc="
|
44
55
|
|
45
56
|
# Download a soft file. Uses cache
|
@@ -49,7 +60,7 @@ module GEO
|
|
49
60
|
if File.exist?( cache_file )
|
50
61
|
File.open(cache_file).read
|
51
62
|
else
|
52
|
-
content = Open.read(GEO_SOFT + item, :nocache => true)
|
63
|
+
content = Open.read(GEO_SOFT + item, :nocache => true, :nice => @@nice)
|
53
64
|
raise "SOFT file error" if content !~ /!/
|
54
65
|
fout = File.open(cache_file,'w')
|
55
66
|
fout.write content
|
@@ -105,7 +116,7 @@ module GEO
|
|
105
116
|
soft = get_soft(series)
|
106
117
|
|
107
118
|
if match = soft.scan(/!Series_platform_id\s*=?\s*(.*)/)
|
108
|
-
platform = match.flatten.collect{|p| p.strip}
|
119
|
+
platform = match.flatten.collect{|p| p.strip}.join("_")
|
109
120
|
else
|
110
121
|
raise "No Platform information"
|
111
122
|
end
|
@@ -131,7 +142,7 @@ module GEO
|
|
131
142
|
end
|
132
143
|
|
133
144
|
{
|
134
|
-
:platform => platform
|
145
|
+
:platform => platform,
|
135
146
|
:description =>description.strip,
|
136
147
|
:title => title.strip,
|
137
148
|
:samples => samples,
|
@@ -162,8 +173,8 @@ module GEO
|
|
162
173
|
end
|
163
174
|
|
164
175
|
def self.GPL(platform)
|
165
|
-
if !File.exist?(File.join(
|
166
|
-
|
176
|
+
if !File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.yaml")) &&
|
177
|
+
!File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.skip"))
|
167
178
|
begin
|
168
179
|
if platform =~ /_/
|
169
180
|
organism = GPL(platform.match(/(.*?)_/)[1])[:organism]
|
@@ -230,17 +241,17 @@ module GEO
|
|
230
241
|
info[:other_ID_field] = [other_pos + 1, other_name] if other_pos > 0
|
231
242
|
|
232
243
|
|
233
|
-
Open.write(File.join(
|
244
|
+
Open.write(File.join(DATA_DIR, 'platforms',"#{platform}.yaml"), info.to_yaml)
|
234
245
|
rescue Exception
|
235
246
|
puts $!.message
|
236
247
|
puts $!.backtrace
|
237
|
-
Open.write(File.join(
|
248
|
+
Open.write(File.join(DATA_DIR, 'platforms',"#{platform}.skip"), $!.message)
|
238
249
|
end
|
239
250
|
end
|
240
251
|
|
241
252
|
raise "Platform info for #{ platform } is not available and could not be automatically produced." if File.exist?(File.join(MARQ.datadir, 'GEO', 'platforms',"#{platform}.skip"))
|
242
253
|
|
243
|
-
YAML::load(File.open(File.join(
|
254
|
+
YAML::load(File.open(File.join(DATA_DIR, 'platforms',"#{platform}.yaml")))
|
244
255
|
end
|
245
256
|
|
246
257
|
|
@@ -365,6 +376,8 @@ module GEO
|
|
365
376
|
if File.exist?(File.join(platform,'cross_platform'))
|
366
377
|
puts "-- Translated to cross_platform format"
|
367
378
|
R.GDS(dataset, prefix + '_cross_platform', field, File.join(platform_path, 'translations'))
|
379
|
+
else
|
380
|
+
puts "No cross_platform probe ids for platform"
|
368
381
|
end
|
369
382
|
end
|
370
383
|
|
@@ -388,10 +401,11 @@ module GEO
|
|
388
401
|
do_log = !info[:log2] if info[:log2]
|
389
402
|
fields = info[:fields]
|
390
403
|
|
391
|
-
puts "Processing GSE #{ series }. Platform #{ platform }"
|
392
|
-
|
393
404
|
platform_path = GEO::platform_path(platform)
|
405
|
+
return if platform_path.nil?
|
394
406
|
prefix = File.join(platform_path, 'GSE', series.to_s)
|
407
|
+
|
408
|
+
puts "Processing GSE #{ series }. Platform #{ platform }"
|
395
409
|
puts "-- Original"
|
396
410
|
R.GSE(gsms, conditions, do_log, prefix, nil, fields, info[:title], info[:description])
|
397
411
|
|
@@ -433,6 +447,8 @@ module GEO
|
|
433
447
|
fix_GSE_ids(File.join(platform_path, 'cross_platform'),prefix + '_cross_platform');
|
434
448
|
FileUtils.cp(File.join(platform_path, 'cross_platform'),prefix + '_cross_platform.codes')
|
435
449
|
FileUtils.rm(prefix + '.translations') if File.exist?(prefix + '.translations')
|
450
|
+
else
|
451
|
+
puts "No cross_platform probe ids for platform"
|
436
452
|
end
|
437
453
|
FileUtils.rm(prefix + '.swap') if File.exist?(prefix + '.swap')
|
438
454
|
end
|
@@ -441,7 +457,7 @@ module GEO
|
|
441
457
|
# system (called biomart for clarity)
|
442
458
|
def self.GPL(platform)
|
443
459
|
path = GEO::platform_path(platform)
|
444
|
-
return if File.exist?
|
460
|
+
return if path.nil? || File.exist?(path)
|
445
461
|
|
446
462
|
if platform =~ /_/
|
447
463
|
FileUtils.mkdir(path)
|
@@ -522,81 +538,101 @@ module GEO
|
|
522
538
|
|
523
539
|
end
|
524
540
|
|
541
|
+
def self.platforms
|
542
|
+
Dir.glob(File.join(DATA_DIR, "GPL*")).collect {|path| File.basename(path) }
|
543
|
+
end
|
525
544
|
|
526
545
|
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
546
|
+
def self.dataset_type(dataset)
|
547
|
+
case
|
548
|
+
when dataset =~ /^GDS/
|
549
|
+
:GDS
|
550
|
+
when dataset =~ /^GSE/
|
551
|
+
:GSE
|
552
|
+
end
|
531
553
|
end
|
532
554
|
|
533
|
-
|
534
555
|
def self.platform_path(platform)
|
535
|
-
File.join(
|
556
|
+
path = File.join(DATA_DIR, platform)
|
557
|
+
path = nil unless File.exists? path
|
558
|
+
path
|
536
559
|
end
|
537
560
|
|
561
|
+
def self.dataset_path(dataset, platform = nil)
|
562
|
+
if platform
|
563
|
+
platforms = [platform]
|
564
|
+
else
|
565
|
+
platforms = self.platforms
|
566
|
+
end
|
538
567
|
|
539
|
-
|
540
|
-
|
541
|
-
|
568
|
+
platforms.each do |platform|
|
569
|
+
platform_path = platform_path(platform)
|
570
|
+
next if platform_path.nil?
|
542
571
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
Dir.glob(File.join(platform_path(platform), '*', '*_cross_platform.orders')).any?
|
572
|
+
prefix = File.join(platform_path, dataset_type(dataset).to_s, dataset)
|
573
|
+
case
|
574
|
+
when File.exists?(prefix + '.orders')
|
575
|
+
return File.join(platform_path, dataset_type(dataset).to_s, dataset)
|
576
|
+
when File.exists?(prefix + '.skip')
|
577
|
+
return nil
|
578
|
+
end
|
551
579
|
end
|
580
|
+
|
581
|
+
return nil
|
552
582
|
end
|
553
583
|
|
554
|
-
def self.
|
555
|
-
|
556
|
-
|
584
|
+
def self.platform_datasets(platform)
|
585
|
+
cross_platform = MARQ::Platform.is_cross_platform? platform
|
586
|
+
|
587
|
+
path = platform_path(MARQ::Platform.clean(platform))
|
588
|
+
return [] if path.nil?
|
589
|
+
|
590
|
+
datasets = Dir.glob(File.join(path, '*', '*.orders')).
|
591
|
+
collect {|path| File.basename(path).sub(/\.orders$/,'')}
|
592
|
+
|
593
|
+
if cross_platform
|
594
|
+
datasets.select {|dataset| MARQ::Dataset.is_cross_platform? dataset }.
|
595
|
+
collect {|dataset| MARQ::Dataset.clean(dataset) }
|
557
596
|
else
|
558
|
-
|
597
|
+
datasets.select {|dataset| ! MARQ::Dataset.is_cross_platform? dataset }
|
559
598
|
end
|
560
|
-
return nil if files.empty?
|
561
|
-
return files.first.match(/(.*)\./)[1]
|
562
599
|
end
|
563
600
|
|
564
|
-
def self.
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
platform_datasets(platform).any?
|
570
|
-
}
|
601
|
+
def self.dataset_platform(dataset)
|
602
|
+
path = dataset_path(dataset)
|
603
|
+
return nil if path.nil?
|
604
|
+
path.match(/(GPL\d+)/)
|
605
|
+
return $1
|
571
606
|
end
|
572
607
|
|
608
|
+
def self.platform_organism(platform)
|
609
|
+
GEO::SOFT.GPL(platform)[:organism]
|
610
|
+
end
|
573
611
|
|
574
|
-
|
575
|
-
|
576
|
-
Dir.glob(File.join(platform_path(platform),"*/*.orders")).collect{|f| File.basename(f).sub(/.orders$/,'')}.select{|d| !is_cross_platform?(d)}
|
612
|
+
def self.dataset_organism(dataset)
|
613
|
+
platform_organism(dataset_platform(dataset))
|
577
614
|
end
|
578
615
|
|
579
|
-
def self.
|
580
|
-
|
581
|
-
$1
|
616
|
+
def self.process_platform(platform)
|
617
|
+
GEO::Process.GPL(platform)
|
582
618
|
end
|
583
619
|
|
584
|
-
def self.
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
{:
|
620
|
+
def self.process_dataset(dataset, platform)
|
621
|
+
case dataset_type(dataset)
|
622
|
+
when :GDS
|
623
|
+
GEO::Process.GDS(dataset, platform)
|
624
|
+
when :GSE
|
625
|
+
info = YAML::load(File.open("series/#{ dataset }.yaml"))
|
626
|
+
FileUtils.rm("platforms/#{ info[:platform] }.skip") if File.exist? "platforms/#{ info[:platform] }.skip"
|
627
|
+
GEO::Process.GSE(dataset, info)
|
591
628
|
end
|
592
|
-
|
593
629
|
end
|
594
630
|
|
595
|
-
|
596
631
|
end
|
597
632
|
|
598
633
|
|
599
634
|
if __FILE__ == $0
|
635
|
+
p GEO.dataset_path 'GDS2791_cross_platform', 'GPL96'
|
600
636
|
|
601
637
|
end
|
602
638
|
|