rbbt-marq 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,7 +65,7 @@ end
65
65
 
66
66
 
67
67
  $USAGE =<<EOT
68
- #{__FILE__} <action> [<subaction>] [--force (true|false)] [--update_db (true|false)] [--platform <gpl>] [--series <gse>] [--organism <org>] [--port <number>] [--host <name>]
68
+ #{__FILE__} <action> [<subaction>] [--force (true|false)] [--update_db (true|false)] [--platform <gpl>] [--dataset <gds>] [--series (true|false)] [--organism <org>] [--port <number>] [--host <name>]
69
69
  actions:
70
70
  * config: Set paths for data, cache, and tmp directories
71
71
 
@@ -88,7 +88,7 @@ $USAGE =<<EOT
88
88
  EOT
89
89
 
90
90
  class Controller < SimpleConsole::Controller
91
- params :string => {:t => :target, :p => :platform, :s => :series, :o => :organism}, :boolean => {:d => :update_db, :f => :force}, :integer => {:p => :port}
91
+ params :string => {:d => :dataset, :t => :target, :p => :platform, :s => :series, :o => :organism, :db => :update_db, :f => :force}, :integer => {:p => :port}
92
92
 
93
93
 
94
94
  def prepare
@@ -99,9 +99,10 @@ class Controller < SimpleConsole::Controller
99
99
  def install
100
100
  $platform = params[:platform] unless params[:platform].nil?
101
101
  $series = params[:series] unless params[:series].nil?
102
+ $dataset = params[:dataset] unless params[:dataset].nil?
102
103
  $organism = params[:organism] unless params[:organism].nil?
103
- $update_db = params[:update_db] && params[:update_db] =~ /true|yes|y/i
104
- $force = params[:force] && params[:force] =~ /true|yes|y/i
104
+ $update_db = params[:update_db].match(/true|yes|y/i) != nil unless params[:update_db].nil?
105
+ $force = params[:force].match(/true|yes|y/i) != nil unless params[:force].nil?
105
106
  @actions = params[:id] || %w(GEO)
106
107
  @rake_action = params[:target] || 'default'
107
108
  end
@@ -186,25 +187,28 @@ class View < SimpleConsole::View
186
187
 
187
188
  @actions = [@actions] if @actions === String
188
189
  @actions.each{|action|
189
- puts "Installing #{ action }"
190
+ puts "Prepare #{ action }"
190
191
  Rake::Task[action].invoke
191
192
  }
192
193
  end
193
194
  end
194
195
 
195
196
  def install
196
-
197
197
  require 'rake'
198
+
198
199
  @actions = [@actions] if @actions === String
199
200
 
200
- @actions.each{|action|
201
+ @actions.each do |action|
202
+
201
203
  Thread.new{
202
- puts "Updating #{action}. Target: #{@rake_action}"
203
- FileUtils.cd File.join(MARQ.datadir, action)
204
+ puts "Install #{action}. Target: #{@rake_action}"
204
205
  load File.join(MARQ.datadir, action, 'Rakefile')
206
+
207
+ FileUtils.cd File.join(MARQ.datadir, action)
205
208
  Rake::Task[@rake_action].invoke
206
209
  }.join
207
- }
210
+
211
+ end
208
212
  end
209
213
 
210
214
 
@@ -25,7 +25,7 @@ def process_list
25
25
  organisms.each do |organism|
26
26
  platforms = CustomDS.organism_platforms(organism)
27
27
  platforms.each do |platform|
28
- list[platform] = platform
28
+ list[platform] = [platform]
29
29
  end
30
30
  end
31
31
  end
@@ -12,6 +12,7 @@ require File.join(File.dirname(File.dirname(__FILE__)), 'rake_includes')
12
12
  $platform ||= ENV['platform']
13
13
  $organism ||= [$organism, ENV['organism'], nil].compact.first
14
14
  $dataset ||= ENV['dataset']
15
+ $series = [$series, ENV['series'], true].compact.first.to_s == 'true'
15
16
 
16
17
  # More global variables in rake_includes file
17
18
 
@@ -51,7 +52,7 @@ def process_list
51
52
  else
52
53
  organism = GEO::Remote::platform_organism(platform)
53
54
  end
54
- if organism.split(',').select{|org| organisms.include? Organism.name2org(org.strip)}.any?
55
+ if organism.split(',').select{|org| organisms.include?(Organism.name2org(org.strip)) || organisms.include?(org) }.any?
55
56
  list[platform] ||= []
56
57
  list[platform] << serie
57
58
  end
@@ -0,0 +1,44 @@
1
+ ---
2
+ :title: Transcriptional effects of the TOR2-controlled signaling function
3
+ :description: |-
4
+ We analysed the transcriptional effects of the TOR2-controlled signaling function using a genome-wide microarray approach in yeast. In S. cerevisiae, TOR2 has two essential signaling functions. One, shared with TOR1, is required for translation initiation, transcription, and cell growth in response to the presence of nutrients. The second is unique to TOR2, and functions in cell-cycle-dependent actin polarization and possibly in transcription. A previous genetic screen for mutants defective in the TOR-shared and the TOR2-unique functions identified several TOR2 temperature-sensitive alleles. In this study, we compared total transcription profiles for strain SH121, which is specifically defective in the TOR2-unique function, and its isogenic wild type counterpart SH100.
5
+ Keywords = TOR
6
+ Keywords: time-course
7
+ :arrays:
8
+ GSM31661:
9
+ time: 2H
10
+ treatment: SH121
11
+ GSM31662:
12
+ time: 2H
13
+ treatment: SH121
14
+ GSM31663:
15
+ time: 6H
16
+ treatment: SH121
17
+ GSM31664:
18
+ time: 6H
19
+ treatment: SH121
20
+ GSM31653:
21
+ time: 0H
22
+ treatment: SH100
23
+ GSM31654:
24
+ time: 0H
25
+ treatment: SH100
26
+ GSM31655:
27
+ time: 2H
28
+ treatment: SH100
29
+ GSM31656:
30
+ time: 2H
31
+ treatment: SH100
32
+ GSM31657:
33
+ time: 6H
34
+ treatment: SH100
35
+ GSM31658:
36
+ time: 6H
37
+ treatment: SH100
38
+ GSM31659:
39
+ time: 0H
40
+ treatment: SH121
41
+ GSM31660:
42
+ time: 0H
43
+ treatment: SH121
44
+ :platform: GPL90
@@ -0,0 +1,44 @@
1
+ ---
2
+ :title: snf/swi mutants of S. cerevisiae.
3
+ :description: |-
4
+ The Saccharomyces cerevisiae Snf/Swi complex has been previously demonstrated to control transcription and chromatin structure of particular genes in vivo and to remodel nucleosomes in vitro. We have performed whole-genome expression analysis, using DNA microarrays, to study mutants deleted for a gene encoding one conserved (Snf2) or one unconserved (Swi1) Snf/Swi component. This analysis was performed on cells grown in both rich and minimal media. The microarray results, combined with Northern blot, computational, and genetic analyses, show that snf2Delta and swi1Delta mutations cause similar effects on mRNA levels, that Snf/Swi controls some genes differently in rich and minimal media, and that Snf/Swi control is exerted at the level of individual genes rather than over larger chromosomal domains. In addition, this work shows that Snf/Swi controls mRNA levels of MATalpha-specific genes, likely via controlling transcription of the regulators MATalpha1 and MCM1. Finally, we provide evidence that Snf/Swi acts both as an activator and as a repressor of transcription, and that neither mode of control is an indirect effect of the other.
5
+ This study is described in more detail in Sudarsanam P et al.(2000) Proc Natl Acad Sci U S A 97:3364-9
6
+ Keywords: other
7
+ :arrays:
8
+ GSM1011:
9
+ mutante: snf2
10
+ medio: rich
11
+ GSM1012:
12
+ mutante: swi1
13
+ medio: rich
14
+ GSM1013:
15
+ mutante: snf2
16
+ medio: rich
17
+ GSM1014:
18
+ mutante: snf2
19
+ medio: rich
20
+ GSM1015:
21
+ mutante: swi1
22
+ medio: rich
23
+ GSM1004:
24
+ mutante: swi1
25
+ medio: minimal
26
+ GSM1005:
27
+ mutante: snf2
28
+ medio: minimal
29
+ GSM1006:
30
+ mutante: swi1
31
+ medio: minimal
32
+ GSM1007:
33
+ mutante: snf2
34
+ medio: minimal
35
+ GSM1008:
36
+ mutante: swi1
37
+ medio: minimal
38
+ GSM1010:
39
+ mutante: swi1
40
+ medio: rich
41
+ GSM1009:
42
+ mutante: snf2
43
+ medio: minimal
44
+ :platform: GPL57
@@ -0,0 +1,22 @@
1
+ ---
2
+ :title: Sporulation in yeast
3
+ :description: |-
4
+ Diploid cells of budding yeast produce haploid cells through the developmental program of sporulation, which consists of meiosis and spore morphogenesis. DNA microarrays containing nearly every yeast gene were used to assay changes in gene expression during sporulation. At least seven distinct temporal patterns of induction were observed. The transcription factor Ndt80 appeared to be important for induction of a large group of genes at the end of meiotic prophase. Consensus sequences known or proposed to be responsible for temporal regulation could be identified solely from analysis of sequences of coordinately expressed genes. The temporal expression pattern provided clues to potential functions of hundreds of previously uncharacterized genes, some of which have vertebrate homologs that may function during gametogenesis.
5
+ This study is described in more detail in Chu S, et al. 1998. Science 282:699-705
6
+ Keywords: time-course
7
+ :arrays:
8
+ GSM1000:
9
+ time: 0.5h
10
+ GSM995:
11
+ time: 17h
12
+ GSM996:
13
+ time: 19h
14
+ GSM998:
15
+ time: 11h
16
+ GSM992:
17
+ time: 0h
18
+ GSM993:
19
+ time: 12h
20
+ GSM994:
21
+ time: 15h
22
+ :platform: GPL67
@@ -0,0 +1,19 @@
1
+ ---
2
+ :arrays:
3
+ GSM125326:
4
+ condition: BY
5
+ GSM125327:
6
+ condition: BY
7
+ GSM125328:
8
+ condition: BY
9
+ GSM125330:
10
+ condition: dbr1
11
+ GSM125329:
12
+ condition: dbr1
13
+ GSM125331:
14
+ condition: dbr1
15
+ :description: |-
16
+ Introns in pre-mRNAs must be spliced out prior to their translation. During splicing, introns are removed in the form of a lariat, in which the 5' end is linked to the 2' hydroxyl of an internal adenosine. Lariat degradation is initiated by an 2'-5' phosphodiester-specific RNA endonuclease which debranches these lariat RNAs to linear form. Deletion of the debranching enzyme is yeast results in the accumulation of lariat introns. We used this accumulation to identify spliced lariat introns on a genome-wide scale in S. cerevisiae using tiling microarrays.
17
+ Keywords: two sample comparison, 3 biological replicates
18
+ :title: Expression data from BY4743 and dbr1 yeast
19
+ :platform: GPL4065
@@ -1,10 +1,11 @@
1
+ require 'progress-monitor'
2
+
1
3
  $expr_threshold ||= (ENV['threshold'] || 0.05).to_f
2
4
  $folds ||= (ENV['folds'] || 2.5).to_f
3
5
  $nth_genes ||= (ENV['nth_genes'] || 100).to_i
4
6
 
5
7
  $force = [$force, ENV['force'], false].compact.first.to_s == 'true'
6
8
  $tranlations = [$tranlations, ENV['translations'], false].compact.first.to_s == 'true'
7
- $series = [$series, ENV['series'], true].compact.first.to_s == 'true'
8
9
  $update_db = [$update_db, ENV['update_db'], false].compact.first.to_s == 'true'
9
10
  $skip_db = [$skip_db, ENV['skip_db'], false].compact.first.to_s == 'true'
10
11
  $fdr = [$fdr, ENV['fdr'], true].compact.first.to_s == 'true'
@@ -28,12 +29,23 @@ module GEO::Process::R
28
29
  end
29
30
  end
30
31
 
32
+ module CustomDS
33
+ class << self
34
+ alias_method :process_dataset_old, :process_dataset
35
+ def process_dataset(*args)
36
+ $changes = true
37
+ process_dataset_old(*args)
38
+ end
39
+ end
40
+ end
41
+
31
42
  desc "Analyze datasets"
32
43
  task 'data' do
33
-
34
44
  platforms_to_save = []
35
45
 
36
46
  platforms = process_list
47
+
48
+ Progress.monitor("Processing #{platforms.keys.length} platforms") if platforms.keys.length > 1
37
49
  platforms.each{|platform, datasets|
38
50
 
39
51
  begin
@@ -50,10 +62,15 @@ task 'data' do
50
62
 
51
63
  $changes = false
52
64
  # Process all datasets
65
+
66
+ Progress.monitor("Processing #{datasets.length} datasets") if datasets.length > 1
53
67
  datasets.each{|dataset|
54
68
  begin
55
- next unless $force || MARQ::Dataset.path(dataset).nil?
69
+ already_processed = MARQ::Dataset.exists?(dataset) || MARQ::Dataset.broken?(dataset)
70
+ next if already_processed && ! $force
71
+
56
72
  MARQ::Dataset.process(dataset, platform)
73
+ MARQ::Dataset.process(MARQ::Name.cross_platform(dataset), platform) if MARQ::Platform.has_cross_platform?(platform)
57
74
  rescue
58
75
  puts "Error processing dataset #{ dataset }"
59
76
  puts $!.message
@@ -65,9 +82,9 @@ task 'data' do
65
82
  platforms_to_save << platform if $changes || $update_db
66
83
  }
67
84
 
85
+ Progress.monitor("Saving #{platforms_to_save.length} platforms in DB") if platforms_to_save.length > 1
68
86
  platforms_to_save.each{|platform|
69
87
  begin
70
- puts "Saving #{platform}"
71
88
  MADB.save_platform(platform)
72
89
  rescue
73
90
  puts "Error saving platform #{ platform }"
@@ -88,7 +105,7 @@ def annotations(name, cross_platform = false, &block)
88
105
 
89
106
  FileUtils.mkdir_p File.join("annotations", name)
90
107
  filename = File.join("annotations", name, dataset)
91
- dataset += '_cross_platform' if cross_platform && MARQ::Platform::has_cross_platform?(platform)
108
+ dataset += MARQ::Name.cross_platform(dataset) if cross_platform && MARQ::Platform::has_cross_platform?(platform)
92
109
  next if ! MARQ::Dataset.exists?(dataset)
93
110
  terms = block.call(dataset)
94
111
  Open.write(filename, terms.to_yaml)
@@ -42,25 +42,23 @@ module CustomDS
42
42
  Dir.glob(File.join(DATA_DIR, org) + '/*.orders').collect{|f| clean(File.basename(f.sub(/.orders/,'')))}.uniq
43
43
  end
44
44
 
45
- def self.process_matrix(prefix, org)
46
- conditions = Dir.glob(prefix + '/*').collect{|f| File.basename(f)} - %w(values codes info description cross_platform)
47
- description = Open.read(File.join(prefix, 'description'))
48
-
49
- info = YAML.load(File.open(File.join(prefix, 'info')))
50
- r.CustomDS_process(prefix, false, conditions, description, info["two_channel"], !info["log2"])
51
-
52
-
53
- codes = Open.read(File.join(prefix,'codes')).collect{|l| l.chomp}
54
- cross_platform = ID.translate(org, codes)
55
-
56
- if cross_platform.length > codes.length / 4
57
- Open.write(File.join(prefix,'cross_platform'),cross_platform.collect{|c| c || "NO MATCH"}.join("\n"))
45
+ def self.process_dataset(dataset, platform)
46
+ org = platform_organism(platform)
47
+ platform_path = platform_path(platform)
48
+ prefix = File.join(DATA_DIR, org, MARQ::Name.clean(dataset))
49
+
50
+ conditions = Dir.glob(File.join(platform_path, '*')).collect{|f| File.basename(f)} - %w(values codes info description cross_platform)
51
+ description = Open.read(File.join(platform_path, 'description'))
52
+ info = YAML.load(File.open(File.join(platform_path, 'info')))
53
+
54
+ if MARQ::Dataset.is_cross_platform?(dataset)
58
55
  r.CustomDS_process(prefix, true, conditions, description, info["two_channel"], !info["log2"])
56
+ else
57
+ r.CustomDS_process(prefix, false, conditions, description, info["two_channel"], !info["log2"])
59
58
  end
60
- end
61
59
 
62
- def self.process(name)
63
- end
60
+
61
+ end
64
62
 
65
63
  def self.organisms
66
64
  Dir.glob(File.join(DATA_DIR, '*')).
@@ -69,23 +67,22 @@ module CustomDS
69
67
  end
70
68
 
71
69
  def self.dataset_path(dataset)
70
+
72
71
  organisms.each do |organism|
73
- case
74
- when File.exists?(File.join(DATA_DIR, organism, dataset + '.orders'))
72
+ if File.exists?(File.join(DATA_DIR, organism, dataset + '.orders')) || File.exists?(File.join(DATA_DIR, organism, dataset + '.skip'))
75
73
  return File.join(DATA_DIR, organism, dataset)
76
- when File.exists?(File.join(DATA_DIR, organism, dataset + '.skip'))
77
- return nil
78
74
  end
79
75
  end
76
+
80
77
  return nil
81
78
  end
82
79
 
83
80
  def self.platform_path(platform)
84
- dataset_path(platform)
81
+ Dir.glob(File.join(DATA_DIR, '*', platform)).first
85
82
  end
86
83
 
87
84
  def self.platform_datasets(platform)
88
- MARQ::Dataset.clean(platform)
85
+ MARQ::Name.clean(platform)
89
86
  end
90
87
 
91
88
  def self.platform_organism(platform)
@@ -104,22 +101,21 @@ module CustomDS
104
101
  end
105
102
 
106
103
  def self.organism_platforms(organism)
107
- Dir.glob(File.join(DATA_DIR,organism,'*.orders')).
108
- collect {|path| File.basename(path).sub(/\.orders$/,'').sub(/_cross_platform/,'')}.
109
- uniq
104
+ Dir.glob(File.join(DATA_DIR, organism, '*', 'codes')).
105
+ collect {|path| File.basename(File.dirname(path))}.uniq
110
106
  end
111
107
 
112
108
  def self.process_platform(platform)
113
- end
109
+ prefix = platform_path(platform)
110
+ org = platform_organism(platform)
114
111
 
115
- def self.process_dataset(dataset, platform = nil)
116
- puts "Processing #{ dataset }"
117
- org = dataset_organism(dataset)
118
- prefix = File.join(DATA_DIR, org, dataset)
112
+ codes = Open.read(File.join(prefix,'codes')).collect{|l| l.chomp}
113
+ cross_platform = ID.translate(org, codes)
119
114
 
120
- CustomDS::process_matrix(prefix, org)
115
+ if cross_platform.length > codes.length / 4
116
+ Open.write(File.join(prefix,'cross_platform'),cross_platform.collect{|c| c || "NO MATCH"}.join("\n"))
117
+ end
121
118
  end
122
-
123
119
  end
124
120
 
125
121
 
@@ -40,13 +40,11 @@ module GEO
40
40
 
41
41
  def self.platform_organism(platform)
42
42
  Open.read("http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=#{platform}", :nice => @@nice).
43
- match(%r#<td><a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi\?mode=Info&amp;id=\d+" onmouseout="onLinkOut\('HelpMessage' , geo_empty_help\)" onmouseover="onLinkOver\('HelpMessage' , geoaxema_organismus\)">(.*)</a></td>#)[1]
43
+ scan(%r#<a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi\?mode=Info&amp;id=\d+" onmouseout="onLinkOut\('HelpMessage' , geo_empty_help\)" onmouseover="onLinkOver\('HelpMessage' , geoaxema_organismus\)">(.*?)</a>#).collect{|p| p.first}.join(', ')
44
44
  end
45
45
 
46
46
  end
47
47
 
48
-
49
-
50
48
  # Parse information in .soft files
51
49
  module SOFT
52
50
 
@@ -111,22 +109,26 @@ module GEO
111
109
  end
112
110
 
113
111
 
112
+ #{{{ Parse soft files for several GEO entities
114
113
 
115
114
  def self.GSE(series)
116
115
  soft = get_soft(series)
117
116
 
117
+ # Find platform
118
118
  if match = soft.scan(/!Series_platform_id\s*=?\s*(.*)/)
119
119
  platform = match.flatten.collect{|p| p.strip}.join("_")
120
120
  else
121
121
  raise "No Platform information"
122
122
  end
123
123
 
124
+ # Find title
124
125
  if soft.match(/!Series_title \s*=?\s*(.*)/)
125
126
  title = $1
126
127
  else
127
128
  raise "No Title information"
128
129
  end
129
130
 
131
+ # Find summary
130
132
  if soft.match(/!Series_summary \s*=?\s*(.*)/)
131
133
  matches = soft.scan(/!Series_summary \s*=?\s*(.*)/).to_a
132
134
  description = matches.collect{|m| m.to_s.strip.sub(/!Series_summary \s*=?\s*/,'')}.join("\n")
@@ -134,6 +136,7 @@ module GEO
134
136
  raise "No Summary information"
135
137
  end
136
138
 
139
+ # Find samples
137
140
  if soft.match(/!Series_sample_id \s*=?\s*(.*)/)
138
141
  matches = soft.scan(/!Series_sample_id \s*=?\s*(.*)/).to_a
139
142
  samples = matches.collect{|m| m.to_s.strip.sub(/!Series_sample_id \s*=?\s*/,'')}
@@ -152,6 +155,7 @@ module GEO
152
155
  def self.GSM(array)
153
156
  soft = get_soft(array)
154
157
 
158
+ # Find title
155
159
  if soft.match(/!Sample_title\s*=?\s*(.*)/)
156
160
  title = $1
157
161
  else
@@ -159,6 +163,7 @@ module GEO
159
163
  end
160
164
 
161
165
 
166
+ # Find description
162
167
  if soft.match(/!Sample_description \s*=?\s*(.*)/)
163
168
  description = $1
164
169
  else
@@ -173,6 +178,7 @@ module GEO
173
178
  end
174
179
 
175
180
  def self.GPL(platform)
181
+
176
182
  if !File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.yaml")) &&
177
183
  !File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.skip"))
178
184
  begin
@@ -263,6 +269,9 @@ module GEO
263
269
  # Use R to load and process the datasets
264
270
  module Process
265
271
 
272
+ class PlatformNotProcessedError < StandardError; end
273
+ class AdhocPlatformCollisionError < StandardError; end
274
+
266
275
  # R library wrapper
267
276
  module R
268
277
  @@r = nil
@@ -350,109 +359,87 @@ module GEO
350
359
  rearange(platform_positions, prefix + '.' + ext)
351
360
  }
352
361
 
362
+ FileUtils.cp(platform_codes_file, prefix + '.codes')
353
363
  Open.write(prefix + '.swap', platform_positions.join("\n"))
354
- end
355
-
356
-
357
- # Process a dataset. Need to specify the platform. The field parameter can
358
- # be used to use a different column for the field.
359
- #
360
- # Deprecated in favor of using the original firt column and using a
361
- # different one only for translation
362
- def self.GDS(dataset, platform, field = nil)
363
- puts "Processing GDS #{ dataset }. Platform #{ platform }"
364
- platform_path = GEO.platform_path(platform)
365
364
 
366
- puts "-- Original"
367
- prefix = File.join(platform_path, 'GDS', dataset.to_s)
368
- R.GDS(dataset, prefix, field, nil)
369
-
370
- # Was there an error?
371
- if File.exist?(prefix + '.skip')
372
- FileUtils.cp(prefix + '.skip', prefix + '_cross_platform.skip')
373
- return
374
- end
375
-
376
- if File.exist?(File.join(platform,'cross_platform'))
377
- puts "-- Translated to cross_platform format"
378
- R.GDS(dataset, prefix + '_cross_platform', field, File.join(platform_path, 'translations'))
379
- else
380
- puts "No cross_platform probe ids for platform"
381
- end
382
365
  end
383
366
 
384
- # Process a series. The info parameters is a hash with the :array,
385
- # :platform, :log2 and :fields keys
386
367
  def self.GSE(series, info)
387
- return if Dir.glob(File.join(info[:platform], 'GSE', series) + '.*').any?
388
-
389
-
368
+ platform = info[:platform]
369
+ do_log = info[:log2].nil? ? nil : !info[:log2]
370
+ fields = info[:fields]
371
+
372
+ # Determine samples and sample conditions
390
373
  gsms = []
391
374
  conditions = {}
392
375
  info[:arrays].each{|gsm, cond|
393
376
  gsms << gsm
394
- cond.each{|condition, value|
395
- conditions[condition] ||= []
396
- conditions[condition] << value
377
+ cond.each{|type, value|
378
+ conditions[type] ||= []
379
+ conditions[type] << value
397
380
  }
398
381
  }
399
- platform = info[:platform]
400
- do_log = nil
401
- do_log = !info[:log2] if info[:log2]
402
- fields = info[:fields]
382
+
383
+ # Adhoc platforms are for series with samples from different platforms.
384
+ # They are created when the series is processed
385
+ adhoc_platform = platform.match(/_/) != nil
403
386
 
404
- platform_path = GEO::platform_path(platform)
405
- return if platform_path.nil?
406
- prefix = File.join(platform_path, 'GSE', series.to_s)
387
+ raise PlatformNotProcessedError if ! adhoc_platform && ! MARQ::Platform.exists?(platform)
388
+ raise AdhocPlatformCollisionError if adhoc_platform && MARQ::Platform.exists?(platform)
407
389
 
408
- puts "Processing GSE #{ series }. Platform #{ platform }"
409
- puts "-- Original"
410
- R.GSE(gsms, conditions, do_log, prefix, nil, fields, info[:title], info[:description])
390
+ cross_platform = MARQ::Name.is_cross_platform?(series)
411
391
 
412
- # Was there an error?
413
- if File.exist?(prefix + '.skip')
414
- FileUtils.cp(prefix + '.skip', prefix + '_cross_platform.skip')
415
- return
416
- end
392
+ platform_path = GEO.platform_path(platform)
417
393
 
418
- if platform =~ /_/
419
- FileUtils.cp(prefix + '.codes', File.join(platform_path,'codes'))
420
- codes = Open.read(File.join(platform_path, 'codes')).collect{|l| l.chomp}
421
- organism = SOFT::GPL(platform.match(/(.*?)_/)[1])[:organism]
422
- translations = translate(organism, codes)
423
- Open.write(File.join(platform_path, 'translations'), translations.collect{|v| v || "NO MATCH"}.join("\n"))
424
- Open.write(File.join(platform_path, 'cross_platform'), translations.compact.sort.uniq.join("\n"))
425
- else
426
- # Are the codes of the series equivalent to the ones in the platform?
427
- if File.open(File.join(platform_path,'codes')).collect{|l| l.chomp} != File.open(prefix + '.codes').collect{|l| l.chomp}
394
+ prefix = File.join(platform_path, 'GSE', series)
395
+
396
+ FileUtils.rm(prefix + '.skip') if File.exist?(prefix + '.skip')
397
+
398
+ if ! cross_platform
399
+ R.GSE(gsms, conditions, do_log, prefix, nil, fields, info[:title], info[:description])
400
+
401
+ # Set up codes and cross_platform for adhoc platforms
402
+ if adhoc_platform
403
+ codes = Open.read(prefix + '.codes').collect{|l| l.chomp}
404
+ organism = GEO.platform_organism(platform.split(/_/)[0])
405
+ translations = translate(organism, codes)
406
+ FileUtils.cp(prefix + '.codes', File.join(platform_path,'codes'))
407
+ Open.write(File.join(platform_path, 'translations'), translations.collect{|v| v || "NO MATCH"}.join("\n"))
408
+ Open.write(File.join(platform_path, 'cross_platform'), translations.compact.sort.uniq.join("\n"))
409
+ else
428
410
  fix_GSE_ids(File.join(platform_path, 'codes'),prefix);
429
- FileUtils.cp(File.join(platform_path, 'codes'),prefix + '.codes')
430
411
  end
412
+
413
+ else
414
+ R.GSE(gsms, conditions, do_log, prefix, File.join(platform_path, 'translations'), fields, info[:title], info[:description])
415
+ fix_GSE_ids(File.join(platform_path, 'cross_platform'),prefix);
431
416
  end
432
417
 
418
+ end
433
419
 
434
- if File.exist?(File.join(platform,'translations'))
435
- FileUtils.cp(File.join(platform,'translations'), prefix + '.translations')
436
- if File.exist?(prefix + '.swap')
437
- orders = Open.read(prefix + '.swap').collect{|l| l.chomp}
438
- inverse_orders = Array.new(orders.length)
439
- orders.each_with_index{|pos,i|
440
- next if pos !~ /\d/
441
- inverse_orders[pos.to_i] = i
442
- }
443
- rearange(inverse_orders, prefix + '.translations', "NO MATCH")
444
- end
445
- puts "-- Translated to cross_platform format"
446
- R.GSE(gsms, conditions, do_log, prefix + '_cross_platform', prefix + '.translations',fields, info[:title], info[:description])
447
- fix_GSE_ids(File.join(platform_path, 'cross_platform'),prefix + '_cross_platform');
448
- FileUtils.cp(File.join(platform_path, 'cross_platform'),prefix + '_cross_platform.codes')
449
- FileUtils.rm(prefix + '.translations') if File.exist?(prefix + '.translations')
420
+ # Process a dataset. Need to specify the platform. The field parameter can
421
+ # be used to use a different column for the field.
422
+ #
423
+ # Deprecated in favor of using the original firt column and using a
424
+ # different one only for translation
425
+ def self.GDS(dataset, platform, field = nil)
426
+ raise PlatformNotProcessedError if ! MARQ::Platform.exists? platform
427
+
428
+ cross_platform = MARQ::Name.is_cross_platform? dataset
429
+
430
+ platform_path = GEO.platform_path(platform)
431
+ prefix = File.join(platform_path, 'GDS', dataset)
432
+
433
+ FileUtils.rm(prefix + '.skip') if File.exist?(prefix + '.skip')
434
+
435
+ if cross_platform
436
+ R.GDS(MARQ::Name.clean(dataset), prefix, field, File.join(platform_path, 'translations'))
450
437
  else
451
- puts "No cross_platform probe ids for platform"
438
+ R.GDS(dataset, prefix, field, nil)
452
439
  end
453
- FileUtils.rm(prefix + '.swap') if File.exist?(prefix + '.swap')
454
440
  end
455
441
 
442
+
456
443
  # Load GPL data. Translates IDS of the platform probes using AILUN and our
457
444
  # system (called biomart for clarity)
458
445
  def self.GPL(platform)
@@ -535,7 +522,6 @@ module GEO
535
522
  end
536
523
 
537
524
  end
538
-
539
525
  end
540
526
 
541
527
  def self.platforms
@@ -554,7 +540,7 @@ module GEO
554
540
 
555
541
  def self.platform_path(platform)
556
542
  path = File.join(DATA_DIR, platform)
557
- path = nil unless File.exists? path
543
+ path = nil unless File.exists? File.join(path, 'codes')
558
544
  path
559
545
  end
560
546
 
@@ -567,15 +553,15 @@ module GEO
567
553
 
568
554
  platforms.each do |platform|
569
555
  platform_path = platform_path(platform)
556
+
570
557
  next if platform_path.nil?
571
558
 
572
559
  prefix = File.join(platform_path, dataset_type(dataset).to_s, dataset)
573
- case
574
- when File.exists?(prefix + '.orders')
560
+
561
+ if File.exists?(prefix + '.orders') || File.exists?(prefix + '.skip')
575
562
  return File.join(platform_path, dataset_type(dataset).to_s, dataset)
576
- when File.exists?(prefix + '.skip')
577
- return nil
578
563
  end
564
+
579
565
  end
580
566
 
581
567
  return nil
@@ -584,7 +570,7 @@ module GEO
584
570
  def self.platform_datasets(platform)
585
571
  cross_platform = MARQ::Platform.is_cross_platform? platform
586
572
 
587
- path = platform_path(MARQ::Platform.clean(platform))
573
+ path = platform_path(MARQ::Name.clean(platform))
588
574
  return [] if path.nil?
589
575
 
590
576
  datasets = Dir.glob(File.join(path, '*', '*.orders')).
@@ -592,7 +578,7 @@ module GEO
592
578
 
593
579
  if cross_platform
594
580
  datasets.select {|dataset| MARQ::Dataset.is_cross_platform? dataset }.
595
- collect {|dataset| MARQ::Dataset.clean(dataset) }
581
+ collect {|dataset| MARQ::Name.clean(dataset) }
596
582
  else
597
583
  datasets.select {|dataset| ! MARQ::Dataset.is_cross_platform? dataset }
598
584
  end
@@ -614,7 +600,7 @@ module GEO
614
600
  end
615
601
 
616
602
  def self.process_platform(platform)
617
- GEO::Process.GPL(platform)
603
+ GEO::Process.GPL(platform) unless platform =~ /_/
618
604
  end
619
605
 
620
606
  def self.process_dataset(dataset, platform)