rbbt-marq 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/R/CustomDS.R +7 -24
- data/R/GEO.R +1 -21
- data/R/MA.R +253 -223
- data/bin/marq_config +14 -10
- data/install_scripts/CustomDS/Rakefile +1 -1
- data/install_scripts/GEO/Rakefile +2 -1
- data/install_scripts/GEO/series/GSE1814.yaml +44 -0
- data/install_scripts/GEO/series/GSE21.yaml +44 -0
- data/install_scripts/GEO/series/GSE27.yaml +22 -0
- data/install_scripts/GEO/series/GSE5470.yaml +19 -0
- data/install_scripts/rake_includes.rb +22 -5
- data/lib/MARQ/CustomDS.rb +28 -32
- data/lib/MARQ/GEO.rb +77 -91
- data/lib/MARQ/ID.rb +1 -2
- data/lib/MARQ/MADB.rb +31 -25
- data/lib/MARQ/annotations.rb +3 -3
- data/lib/MARQ/main.rb +85 -26
- data/lib/MARQ/rankproduct.rb +14 -8
- metadata +6 -2
data/bin/marq_config
CHANGED
@@ -65,7 +65,7 @@ end
|
|
65
65
|
|
66
66
|
|
67
67
|
$USAGE =<<EOT
|
68
|
-
#{__FILE__} <action> [<subaction>] [--force (true|false)] [--update_db (true|false)] [--platform <gpl>] [--
|
68
|
+
#{__FILE__} <action> [<subaction>] [--force (true|false)] [--update_db (true|false)] [--platform <gpl>] [--dataset <gds>] [--series (true|false)] [--organism <org>] [--port <number>] [--host <name>]
|
69
69
|
actions:
|
70
70
|
* config: Set paths for data, cache, and tmp directories
|
71
71
|
|
@@ -88,7 +88,7 @@ $USAGE =<<EOT
|
|
88
88
|
EOT
|
89
89
|
|
90
90
|
class Controller < SimpleConsole::Controller
|
91
|
-
params :string => {:t => :target, :p => :platform, :s => :series, :o => :organism
|
91
|
+
params :string => {:d => :dataset, :t => :target, :p => :platform, :s => :series, :o => :organism, :db => :update_db, :f => :force}, :integer => {:p => :port}
|
92
92
|
|
93
93
|
|
94
94
|
def prepare
|
@@ -99,9 +99,10 @@ class Controller < SimpleConsole::Controller
|
|
99
99
|
def install
|
100
100
|
$platform = params[:platform] unless params[:platform].nil?
|
101
101
|
$series = params[:series] unless params[:series].nil?
|
102
|
+
$dataset = params[:dataset] unless params[:dataset].nil?
|
102
103
|
$organism = params[:organism] unless params[:organism].nil?
|
103
|
-
$update_db = params[:update_db]
|
104
|
-
$force = params[:force]
|
104
|
+
$update_db = params[:update_db].match(/true|yes|y/i) != nil unless params[:update_db].nil?
|
105
|
+
$force = params[:force].match(/true|yes|y/i) != nil unless params[:force].nil?
|
105
106
|
@actions = params[:id] || %w(GEO)
|
106
107
|
@rake_action = params[:target] || 'default'
|
107
108
|
end
|
@@ -186,25 +187,28 @@ class View < SimpleConsole::View
|
|
186
187
|
|
187
188
|
@actions = [@actions] if @actions === String
|
188
189
|
@actions.each{|action|
|
189
|
-
puts "
|
190
|
+
puts "Prepare #{ action }"
|
190
191
|
Rake::Task[action].invoke
|
191
192
|
}
|
192
193
|
end
|
193
194
|
end
|
194
195
|
|
195
196
|
def install
|
196
|
-
|
197
197
|
require 'rake'
|
198
|
+
|
198
199
|
@actions = [@actions] if @actions === String
|
199
200
|
|
200
|
-
@actions.each
|
201
|
+
@actions.each do |action|
|
202
|
+
|
201
203
|
Thread.new{
|
202
|
-
puts "
|
203
|
-
FileUtils.cd File.join(MARQ.datadir, action)
|
204
|
+
puts "Install #{action}. Target: #{@rake_action}"
|
204
205
|
load File.join(MARQ.datadir, action, 'Rakefile')
|
206
|
+
|
207
|
+
FileUtils.cd File.join(MARQ.datadir, action)
|
205
208
|
Rake::Task[@rake_action].invoke
|
206
209
|
}.join
|
207
|
-
|
210
|
+
|
211
|
+
end
|
208
212
|
end
|
209
213
|
|
210
214
|
|
@@ -12,6 +12,7 @@ require File.join(File.dirname(File.dirname(__FILE__)), 'rake_includes')
|
|
12
12
|
$platform ||= ENV['platform']
|
13
13
|
$organism ||= [$organism, ENV['organism'], nil].compact.first
|
14
14
|
$dataset ||= ENV['dataset']
|
15
|
+
$series = [$series, ENV['series'], true].compact.first.to_s == 'true'
|
15
16
|
|
16
17
|
# More global variables in rake_includes file
|
17
18
|
|
@@ -51,7 +52,7 @@ def process_list
|
|
51
52
|
else
|
52
53
|
organism = GEO::Remote::platform_organism(platform)
|
53
54
|
end
|
54
|
-
if organism.split(',').select{|org| organisms.include?
|
55
|
+
if organism.split(',').select{|org| organisms.include?(Organism.name2org(org.strip)) || organisms.include?(org) }.any?
|
55
56
|
list[platform] ||= []
|
56
57
|
list[platform] << serie
|
57
58
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
---
|
2
|
+
:title: Transcriptional effects of the TOR2-controlled signaling function
|
3
|
+
:description: |-
|
4
|
+
We analysed the transcriptional effects of the TOR2-controlled signaling function using a genome-wide microarray approach in yeast. In S. cerevisiae, TOR2 has two essential signaling functions. One, shared with TOR1, is required for translation initiation, transcription, and cell growth in response to the presence of nutrients. The second is unique to TOR2, and functions in cell-cycle-dependent actin polarization and possibly in transcription. A previous genetic screen for mutants defective in the TOR-shared and the TOR2-unique functions identified several TOR2 temperature-sensitive alleles. In this study, we compared total transcription profiles for strain SH121, which is specifically defective in the TOR2-unique function, and its isogenic wild type counterpart SH100.
|
5
|
+
Keywords = TOR
|
6
|
+
Keywords: time-course
|
7
|
+
:arrays:
|
8
|
+
GSM31661:
|
9
|
+
time: 2H
|
10
|
+
treatment: SH121
|
11
|
+
GSM31662:
|
12
|
+
time: 2H
|
13
|
+
treatment: SH121
|
14
|
+
GSM31663:
|
15
|
+
time: 6H
|
16
|
+
treatment: SH121
|
17
|
+
GSM31664:
|
18
|
+
time: 6H
|
19
|
+
treatment: SH121
|
20
|
+
GSM31653:
|
21
|
+
time: 0H
|
22
|
+
treatment: SH100
|
23
|
+
GSM31654:
|
24
|
+
time: 0H
|
25
|
+
treatment: SH100
|
26
|
+
GSM31655:
|
27
|
+
time: 2H
|
28
|
+
treatment: SH100
|
29
|
+
GSM31656:
|
30
|
+
time: 2H
|
31
|
+
treatment: SH100
|
32
|
+
GSM31657:
|
33
|
+
time: 6H
|
34
|
+
treatment: SH100
|
35
|
+
GSM31658:
|
36
|
+
time: 6H
|
37
|
+
treatment: SH100
|
38
|
+
GSM31659:
|
39
|
+
time: 0H
|
40
|
+
treatment: SH121
|
41
|
+
GSM31660:
|
42
|
+
time: 0H
|
43
|
+
treatment: SH121
|
44
|
+
:platform: GPL90
|
@@ -0,0 +1,44 @@
|
|
1
|
+
---
|
2
|
+
:title: snf/swi mutants of S. cerevisiae.
|
3
|
+
:description: |-
|
4
|
+
The Saccharomyces cerevisiae Snf/Swi complex has been previously demonstrated to control transcription and chromatin structure of particular genes in vivo and to remodel nucleosomes in vitro. We have performed whole-genome expression analysis, using DNA microarrays, to study mutants deleted for a gene encoding one conserved (Snf2) or one unconserved (Swi1) Snf/Swi component. This analysis was performed on cells grown in both rich and minimal media. The microarray results, combined with Northern blot, computational, and genetic analyses, show that snf2Delta and swi1Delta mutations cause similar effects on mRNA levels, that Snf/Swi controls some genes differently in rich and minimal media, and that Snf/Swi control is exerted at the level of individual genes rather than over larger chromosomal domains. In addition, this work shows that Snf/Swi controls mRNA levels of MATalpha-specific genes, likely via controlling transcription of the regulators MATalpha1 and MCM1. Finally, we provide evidence that Snf/Swi acts both as an activator and as a repressor of transcription, and that neither mode of control is an indirect effect of the other.
|
5
|
+
This study is described in more detail in Sudarsanam P et al.(2000) Proc Natl Acad Sci U S A 97:3364-9
|
6
|
+
Keywords: other
|
7
|
+
:arrays:
|
8
|
+
GSM1011:
|
9
|
+
mutante: snf2
|
10
|
+
medio: rich
|
11
|
+
GSM1012:
|
12
|
+
mutante: swi1
|
13
|
+
medio: rich
|
14
|
+
GSM1013:
|
15
|
+
mutante: snf2
|
16
|
+
medio: rich
|
17
|
+
GSM1014:
|
18
|
+
mutante: snf2
|
19
|
+
medio: rich
|
20
|
+
GSM1015:
|
21
|
+
mutante: swi1
|
22
|
+
medio: rich
|
23
|
+
GSM1004:
|
24
|
+
mutante: swi1
|
25
|
+
medio: minimal
|
26
|
+
GSM1005:
|
27
|
+
mutante: snf2
|
28
|
+
medio: minimal
|
29
|
+
GSM1006:
|
30
|
+
mutante: swi1
|
31
|
+
medio: minimal
|
32
|
+
GSM1007:
|
33
|
+
mutante: snf2
|
34
|
+
medio: minimal
|
35
|
+
GSM1008:
|
36
|
+
mutante: swi1
|
37
|
+
medio: minimal
|
38
|
+
GSM1010:
|
39
|
+
mutante: swi1
|
40
|
+
medio: rich
|
41
|
+
GSM1009:
|
42
|
+
mutante: snf2
|
43
|
+
medio: minimal
|
44
|
+
:platform: GPL57
|
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
:title: Sporulation in yeast
|
3
|
+
:description: |-
|
4
|
+
Diploid cells of budding yeast produce haploid cells through the developmental program of sporulation, which consists of meiosis and spore morphogenesis. DNA microarrays containing nearly every yeast gene were used to assay changes in gene expression during sporulation. At least seven distinct temporal patterns of induction were observed. The transcription factor Ndt80 appeared to be important for induction of a large group of genes at the end of meiotic prophase. Consensus sequences known or proposed to be responsible for temporal regulation could be identified solely from analysis of sequences of coordinately expressed genes. The temporal expression pattern provided clues to potential functions of hundreds of previously uncharacterized genes, some of which have vertebrate homologs that may function during gametogenesis.
|
5
|
+
This study is described in more detail in Chu S, et al. 1998. Science 282:699-705
|
6
|
+
Keywords: time-course
|
7
|
+
:arrays:
|
8
|
+
GSM1000:
|
9
|
+
time: 0.5h
|
10
|
+
GSM995:
|
11
|
+
time: 17h
|
12
|
+
GSM996:
|
13
|
+
time: 19h
|
14
|
+
GSM998:
|
15
|
+
time: 11h
|
16
|
+
GSM992:
|
17
|
+
time: 0h
|
18
|
+
GSM993:
|
19
|
+
time: 12h
|
20
|
+
GSM994:
|
21
|
+
time: 15h
|
22
|
+
:platform: GPL67
|
@@ -0,0 +1,19 @@
|
|
1
|
+
---
|
2
|
+
:arrays:
|
3
|
+
GSM125326:
|
4
|
+
condition: BY
|
5
|
+
GSM125327:
|
6
|
+
condition: BY
|
7
|
+
GSM125328:
|
8
|
+
condition: BY
|
9
|
+
GSM125330:
|
10
|
+
condition: dbr1
|
11
|
+
GSM125329:
|
12
|
+
condition: dbr1
|
13
|
+
GSM125331:
|
14
|
+
condition: dbr1
|
15
|
+
:description: |-
|
16
|
+
Introns in pre-mRNAs must be spliced out prior to their translation. During splicing, introns are removed in the form of a lariat, in which the 5' end is linked to the 2' hydroxyl of an internal adenosine. Lariat degradation is initiated by an 2'-5' phosphodiester-specific RNA endonuclease which debranches these lariat RNAs to linear form. Deletion of the debranching enzyme is yeast results in the accumulation of lariat introns. We used this accumulation to identify spliced lariat introns on a genome-wide scale in S. cerevisiae using tiling microarrays.
|
17
|
+
Keywords: two sample comparison, 3 biological replicates
|
18
|
+
:title: Expression data from BY4743 and dbr1 yeast
|
19
|
+
:platform: GPL4065
|
@@ -1,10 +1,11 @@
|
|
1
|
+
require 'progress-monitor'
|
2
|
+
|
1
3
|
$expr_threshold ||= (ENV['threshold'] || 0.05).to_f
|
2
4
|
$folds ||= (ENV['folds'] || 2.5).to_f
|
3
5
|
$nth_genes ||= (ENV['nth_genes'] || 100).to_i
|
4
6
|
|
5
7
|
$force = [$force, ENV['force'], false].compact.first.to_s == 'true'
|
6
8
|
$tranlations = [$tranlations, ENV['translations'], false].compact.first.to_s == 'true'
|
7
|
-
$series = [$series, ENV['series'], true].compact.first.to_s == 'true'
|
8
9
|
$update_db = [$update_db, ENV['update_db'], false].compact.first.to_s == 'true'
|
9
10
|
$skip_db = [$skip_db, ENV['skip_db'], false].compact.first.to_s == 'true'
|
10
11
|
$fdr = [$fdr, ENV['fdr'], true].compact.first.to_s == 'true'
|
@@ -28,12 +29,23 @@ module GEO::Process::R
|
|
28
29
|
end
|
29
30
|
end
|
30
31
|
|
32
|
+
module CustomDS
|
33
|
+
class << self
|
34
|
+
alias_method :process_dataset_old, :process_dataset
|
35
|
+
def process_dataset(*args)
|
36
|
+
$changes = true
|
37
|
+
process_dataset_old(*args)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
desc "Analyze datasets"
|
32
43
|
task 'data' do
|
33
|
-
|
34
44
|
platforms_to_save = []
|
35
45
|
|
36
46
|
platforms = process_list
|
47
|
+
|
48
|
+
Progress.monitor("Processing #{platforms.keys.length} platforms") if platforms.keys.length > 1
|
37
49
|
platforms.each{|platform, datasets|
|
38
50
|
|
39
51
|
begin
|
@@ -50,10 +62,15 @@ task 'data' do
|
|
50
62
|
|
51
63
|
$changes = false
|
52
64
|
# Process all datasets
|
65
|
+
|
66
|
+
Progress.monitor("Processing #{datasets.length} datasets") if datasets.length > 1
|
53
67
|
datasets.each{|dataset|
|
54
68
|
begin
|
55
|
-
|
69
|
+
already_processed = MARQ::Dataset.exists?(dataset) || MARQ::Dataset.broken?(dataset)
|
70
|
+
next if already_processed && ! $force
|
71
|
+
|
56
72
|
MARQ::Dataset.process(dataset, platform)
|
73
|
+
MARQ::Dataset.process(MARQ::Name.cross_platform(dataset), platform) if MARQ::Platform.has_cross_platform?(platform)
|
57
74
|
rescue
|
58
75
|
puts "Error processing dataset #{ dataset }"
|
59
76
|
puts $!.message
|
@@ -65,9 +82,9 @@ task 'data' do
|
|
65
82
|
platforms_to_save << platform if $changes || $update_db
|
66
83
|
}
|
67
84
|
|
85
|
+
Progress.monitor("Saving #{platforms_to_save.length} platforms in DB") if platforms_to_save.length > 1
|
68
86
|
platforms_to_save.each{|platform|
|
69
87
|
begin
|
70
|
-
puts "Saving #{platform}"
|
71
88
|
MADB.save_platform(platform)
|
72
89
|
rescue
|
73
90
|
puts "Error saving platform #{ platform }"
|
@@ -88,7 +105,7 @@ def annotations(name, cross_platform = false, &block)
|
|
88
105
|
|
89
106
|
FileUtils.mkdir_p File.join("annotations", name)
|
90
107
|
filename = File.join("annotations", name, dataset)
|
91
|
-
dataset +=
|
108
|
+
dataset += MARQ::Name.cross_platform(dataset) if cross_platform && MARQ::Platform::has_cross_platform?(platform)
|
92
109
|
next if ! MARQ::Dataset.exists?(dataset)
|
93
110
|
terms = block.call(dataset)
|
94
111
|
Open.write(filename, terms.to_yaml)
|
data/lib/MARQ/CustomDS.rb
CHANGED
@@ -42,25 +42,23 @@ module CustomDS
|
|
42
42
|
Dir.glob(File.join(DATA_DIR, org) + '/*.orders').collect{|f| clean(File.basename(f.sub(/.orders/,'')))}.uniq
|
43
43
|
end
|
44
44
|
|
45
|
-
def self.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if cross_platform.length > codes.length / 4
|
57
|
-
Open.write(File.join(prefix,'cross_platform'),cross_platform.collect{|c| c || "NO MATCH"}.join("\n"))
|
45
|
+
def self.process_dataset(dataset, platform)
|
46
|
+
org = platform_organism(platform)
|
47
|
+
platform_path = platform_path(platform)
|
48
|
+
prefix = File.join(DATA_DIR, org, MARQ::Name.clean(dataset))
|
49
|
+
|
50
|
+
conditions = Dir.glob(File.join(platform_path, '*')).collect{|f| File.basename(f)} - %w(values codes info description cross_platform)
|
51
|
+
description = Open.read(File.join(platform_path, 'description'))
|
52
|
+
info = YAML.load(File.open(File.join(platform_path, 'info')))
|
53
|
+
|
54
|
+
if MARQ::Dataset.is_cross_platform?(dataset)
|
58
55
|
r.CustomDS_process(prefix, true, conditions, description, info["two_channel"], !info["log2"])
|
56
|
+
else
|
57
|
+
r.CustomDS_process(prefix, false, conditions, description, info["two_channel"], !info["log2"])
|
59
58
|
end
|
60
|
-
end
|
61
59
|
|
62
|
-
|
63
|
-
|
60
|
+
|
61
|
+
end
|
64
62
|
|
65
63
|
def self.organisms
|
66
64
|
Dir.glob(File.join(DATA_DIR, '*')).
|
@@ -69,23 +67,22 @@ module CustomDS
|
|
69
67
|
end
|
70
68
|
|
71
69
|
def self.dataset_path(dataset)
|
70
|
+
|
72
71
|
organisms.each do |organism|
|
73
|
-
|
74
|
-
when File.exists?(File.join(DATA_DIR, organism, dataset + '.orders'))
|
72
|
+
if File.exists?(File.join(DATA_DIR, organism, dataset + '.orders')) || File.exists?(File.join(DATA_DIR, organism, dataset + '.skip'))
|
75
73
|
return File.join(DATA_DIR, organism, dataset)
|
76
|
-
when File.exists?(File.join(DATA_DIR, organism, dataset + '.skip'))
|
77
|
-
return nil
|
78
74
|
end
|
79
75
|
end
|
76
|
+
|
80
77
|
return nil
|
81
78
|
end
|
82
79
|
|
83
80
|
def self.platform_path(platform)
|
84
|
-
|
81
|
+
Dir.glob(File.join(DATA_DIR, '*', platform)).first
|
85
82
|
end
|
86
83
|
|
87
84
|
def self.platform_datasets(platform)
|
88
|
-
MARQ::
|
85
|
+
MARQ::Name.clean(platform)
|
89
86
|
end
|
90
87
|
|
91
88
|
def self.platform_organism(platform)
|
@@ -104,22 +101,21 @@ module CustomDS
|
|
104
101
|
end
|
105
102
|
|
106
103
|
def self.organism_platforms(organism)
|
107
|
-
Dir.glob(File.join(DATA_DIR,organism,'
|
108
|
-
collect {|path| File.basename(
|
109
|
-
uniq
|
104
|
+
Dir.glob(File.join(DATA_DIR, organism, '*', 'codes')).
|
105
|
+
collect {|path| File.basename(File.dirname(path))}.uniq
|
110
106
|
end
|
111
107
|
|
112
108
|
def self.process_platform(platform)
|
113
|
-
|
109
|
+
prefix = platform_path(platform)
|
110
|
+
org = platform_organism(platform)
|
114
111
|
|
115
|
-
|
116
|
-
|
117
|
-
org = dataset_organism(dataset)
|
118
|
-
prefix = File.join(DATA_DIR, org, dataset)
|
112
|
+
codes = Open.read(File.join(prefix,'codes')).collect{|l| l.chomp}
|
113
|
+
cross_platform = ID.translate(org, codes)
|
119
114
|
|
120
|
-
|
115
|
+
if cross_platform.length > codes.length / 4
|
116
|
+
Open.write(File.join(prefix,'cross_platform'),cross_platform.collect{|c| c || "NO MATCH"}.join("\n"))
|
117
|
+
end
|
121
118
|
end
|
122
|
-
|
123
119
|
end
|
124
120
|
|
125
121
|
|
data/lib/MARQ/GEO.rb
CHANGED
@@ -40,13 +40,11 @@ module GEO
|
|
40
40
|
|
41
41
|
def self.platform_organism(platform)
|
42
42
|
Open.read("http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=#{platform}", :nice => @@nice).
|
43
|
-
|
43
|
+
scan(%r#<a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi\?mode=Info&id=\d+" onmouseout="onLinkOut\('HelpMessage' , geo_empty_help\)" onmouseover="onLinkOver\('HelpMessage' , geoaxema_organismus\)">(.*?)</a>#).collect{|p| p.first}.join(', ')
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
47
47
|
|
48
|
-
|
49
|
-
|
50
48
|
# Parse information in .soft files
|
51
49
|
module SOFT
|
52
50
|
|
@@ -111,22 +109,26 @@ module GEO
|
|
111
109
|
end
|
112
110
|
|
113
111
|
|
112
|
+
#{{{ Parse soft files for several GEO entities
|
114
113
|
|
115
114
|
def self.GSE(series)
|
116
115
|
soft = get_soft(series)
|
117
116
|
|
117
|
+
# Find platform
|
118
118
|
if match = soft.scan(/!Series_platform_id\s*=?\s*(.*)/)
|
119
119
|
platform = match.flatten.collect{|p| p.strip}.join("_")
|
120
120
|
else
|
121
121
|
raise "No Platform information"
|
122
122
|
end
|
123
123
|
|
124
|
+
# Find title
|
124
125
|
if soft.match(/!Series_title \s*=?\s*(.*)/)
|
125
126
|
title = $1
|
126
127
|
else
|
127
128
|
raise "No Title information"
|
128
129
|
end
|
129
130
|
|
131
|
+
# Find summary
|
130
132
|
if soft.match(/!Series_summary \s*=?\s*(.*)/)
|
131
133
|
matches = soft.scan(/!Series_summary \s*=?\s*(.*)/).to_a
|
132
134
|
description = matches.collect{|m| m.to_s.strip.sub(/!Series_summary \s*=?\s*/,'')}.join("\n")
|
@@ -134,6 +136,7 @@ module GEO
|
|
134
136
|
raise "No Summary information"
|
135
137
|
end
|
136
138
|
|
139
|
+
# Find samples
|
137
140
|
if soft.match(/!Series_sample_id \s*=?\s*(.*)/)
|
138
141
|
matches = soft.scan(/!Series_sample_id \s*=?\s*(.*)/).to_a
|
139
142
|
samples = matches.collect{|m| m.to_s.strip.sub(/!Series_sample_id \s*=?\s*/,'')}
|
@@ -152,6 +155,7 @@ module GEO
|
|
152
155
|
def self.GSM(array)
|
153
156
|
soft = get_soft(array)
|
154
157
|
|
158
|
+
# Find title
|
155
159
|
if soft.match(/!Sample_title\s*=?\s*(.*)/)
|
156
160
|
title = $1
|
157
161
|
else
|
@@ -159,6 +163,7 @@ module GEO
|
|
159
163
|
end
|
160
164
|
|
161
165
|
|
166
|
+
# Find description
|
162
167
|
if soft.match(/!Sample_description \s*=?\s*(.*)/)
|
163
168
|
description = $1
|
164
169
|
else
|
@@ -173,6 +178,7 @@ module GEO
|
|
173
178
|
end
|
174
179
|
|
175
180
|
def self.GPL(platform)
|
181
|
+
|
176
182
|
if !File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.yaml")) &&
|
177
183
|
!File.exist?(File.join(DATA_DIR, 'platforms',"#{platform}.skip"))
|
178
184
|
begin
|
@@ -263,6 +269,9 @@ module GEO
|
|
263
269
|
# Use R to load and process the datasets
|
264
270
|
module Process
|
265
271
|
|
272
|
+
class PlatformNotProcessedError < StandardError; end
|
273
|
+
class AdhocPlatformCollisionError < StandardError; end
|
274
|
+
|
266
275
|
# R library wrapper
|
267
276
|
module R
|
268
277
|
@@r = nil
|
@@ -350,109 +359,87 @@ module GEO
|
|
350
359
|
rearange(platform_positions, prefix + '.' + ext)
|
351
360
|
}
|
352
361
|
|
362
|
+
FileUtils.cp(platform_codes_file, prefix + '.codes')
|
353
363
|
Open.write(prefix + '.swap', platform_positions.join("\n"))
|
354
|
-
end
|
355
|
-
|
356
|
-
|
357
|
-
# Process a dataset. Need to specify the platform. The field parameter can
|
358
|
-
# be used to use a different column for the field.
|
359
|
-
#
|
360
|
-
# Deprecated in favor of using the original firt column and using a
|
361
|
-
# different one only for translation
|
362
|
-
def self.GDS(dataset, platform, field = nil)
|
363
|
-
puts "Processing GDS #{ dataset }. Platform #{ platform }"
|
364
|
-
platform_path = GEO.platform_path(platform)
|
365
364
|
|
366
|
-
puts "-- Original"
|
367
|
-
prefix = File.join(platform_path, 'GDS', dataset.to_s)
|
368
|
-
R.GDS(dataset, prefix, field, nil)
|
369
|
-
|
370
|
-
# Was there an error?
|
371
|
-
if File.exist?(prefix + '.skip')
|
372
|
-
FileUtils.cp(prefix + '.skip', prefix + '_cross_platform.skip')
|
373
|
-
return
|
374
|
-
end
|
375
|
-
|
376
|
-
if File.exist?(File.join(platform,'cross_platform'))
|
377
|
-
puts "-- Translated to cross_platform format"
|
378
|
-
R.GDS(dataset, prefix + '_cross_platform', field, File.join(platform_path, 'translations'))
|
379
|
-
else
|
380
|
-
puts "No cross_platform probe ids for platform"
|
381
|
-
end
|
382
365
|
end
|
383
366
|
|
384
|
-
# Process a series. The info parameters is a hash with the :array,
|
385
|
-
# :platform, :log2 and :fields keys
|
386
367
|
def self.GSE(series, info)
|
387
|
-
|
388
|
-
|
389
|
-
|
368
|
+
platform = info[:platform]
|
369
|
+
do_log = info[:log2].nil? ? nil : !info[:log2]
|
370
|
+
fields = info[:fields]
|
371
|
+
|
372
|
+
# Determine samples and sample conditions
|
390
373
|
gsms = []
|
391
374
|
conditions = {}
|
392
375
|
info[:arrays].each{|gsm, cond|
|
393
376
|
gsms << gsm
|
394
|
-
cond.each{|
|
395
|
-
conditions[
|
396
|
-
conditions[
|
377
|
+
cond.each{|type, value|
|
378
|
+
conditions[type] ||= []
|
379
|
+
conditions[type] << value
|
397
380
|
}
|
398
381
|
}
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
382
|
+
|
383
|
+
# Adhoc platforms are for series with samples from different platforms.
|
384
|
+
# They are created when the series is processed
|
385
|
+
adhoc_platform = platform.match(/_/) != nil
|
403
386
|
|
404
|
-
|
405
|
-
|
406
|
-
prefix = File.join(platform_path, 'GSE', series.to_s)
|
387
|
+
raise PlatformNotProcessedError if ! adhoc_platform && ! MARQ::Platform.exists?(platform)
|
388
|
+
raise AdhocPlatformCollisionError if adhoc_platform && MARQ::Platform.exists?(platform)
|
407
389
|
|
408
|
-
|
409
|
-
puts "-- Original"
|
410
|
-
R.GSE(gsms, conditions, do_log, prefix, nil, fields, info[:title], info[:description])
|
390
|
+
cross_platform = MARQ::Name.is_cross_platform?(series)
|
411
391
|
|
412
|
-
|
413
|
-
if File.exist?(prefix + '.skip')
|
414
|
-
FileUtils.cp(prefix + '.skip', prefix + '_cross_platform.skip')
|
415
|
-
return
|
416
|
-
end
|
392
|
+
platform_path = GEO.platform_path(platform)
|
417
393
|
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
394
|
+
prefix = File.join(platform_path, 'GSE', series)
|
395
|
+
|
396
|
+
FileUtils.rm(prefix + '.skip') if File.exist?(prefix + '.skip')
|
397
|
+
|
398
|
+
if ! cross_platform
|
399
|
+
R.GSE(gsms, conditions, do_log, prefix, nil, fields, info[:title], info[:description])
|
400
|
+
|
401
|
+
# Set up codes and cross_platform for adhoc platforms
|
402
|
+
if adhoc_platform
|
403
|
+
codes = Open.read(prefix + '.codes').collect{|l| l.chomp}
|
404
|
+
organism = GEO.platform_organism(platform.split(/_/)[0])
|
405
|
+
translations = translate(organism, codes)
|
406
|
+
FileUtils.cp(prefix + '.codes', File.join(platform_path,'codes'))
|
407
|
+
Open.write(File.join(platform_path, 'translations'), translations.collect{|v| v || "NO MATCH"}.join("\n"))
|
408
|
+
Open.write(File.join(platform_path, 'cross_platform'), translations.compact.sort.uniq.join("\n"))
|
409
|
+
else
|
428
410
|
fix_GSE_ids(File.join(platform_path, 'codes'),prefix);
|
429
|
-
FileUtils.cp(File.join(platform_path, 'codes'),prefix + '.codes')
|
430
411
|
end
|
412
|
+
|
413
|
+
else
|
414
|
+
R.GSE(gsms, conditions, do_log, prefix, File.join(platform_path, 'translations'), fields, info[:title], info[:description])
|
415
|
+
fix_GSE_ids(File.join(platform_path, 'cross_platform'),prefix);
|
431
416
|
end
|
432
417
|
|
418
|
+
end
|
433
419
|
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
420
|
+
# Process a dataset. Need to specify the platform. The field parameter can
|
421
|
+
# be used to use a different column for the field.
|
422
|
+
#
|
423
|
+
# Deprecated in favor of using the original firt column and using a
|
424
|
+
# different one only for translation
|
425
|
+
def self.GDS(dataset, platform, field = nil)
|
426
|
+
raise PlatformNotProcessedError if ! MARQ::Platform.exists? platform
|
427
|
+
|
428
|
+
cross_platform = MARQ::Name.is_cross_platform? dataset
|
429
|
+
|
430
|
+
platform_path = GEO.platform_path(platform)
|
431
|
+
prefix = File.join(platform_path, 'GDS', dataset)
|
432
|
+
|
433
|
+
FileUtils.rm(prefix + '.skip') if File.exist?(prefix + '.skip')
|
434
|
+
|
435
|
+
if cross_platform
|
436
|
+
R.GDS(MARQ::Name.clean(dataset), prefix, field, File.join(platform_path, 'translations'))
|
450
437
|
else
|
451
|
-
|
438
|
+
R.GDS(dataset, prefix, field, nil)
|
452
439
|
end
|
453
|
-
FileUtils.rm(prefix + '.swap') if File.exist?(prefix + '.swap')
|
454
440
|
end
|
455
441
|
|
442
|
+
|
456
443
|
# Load GPL data. Translates IDS of the platform probes using AILUN and our
|
457
444
|
# system (called biomart for clarity)
|
458
445
|
def self.GPL(platform)
|
@@ -535,7 +522,6 @@ module GEO
|
|
535
522
|
end
|
536
523
|
|
537
524
|
end
|
538
|
-
|
539
525
|
end
|
540
526
|
|
541
527
|
def self.platforms
|
@@ -554,7 +540,7 @@ module GEO
|
|
554
540
|
|
555
541
|
def self.platform_path(platform)
|
556
542
|
path = File.join(DATA_DIR, platform)
|
557
|
-
path = nil unless File.exists? path
|
543
|
+
path = nil unless File.exists? File.join(path, 'codes')
|
558
544
|
path
|
559
545
|
end
|
560
546
|
|
@@ -567,15 +553,15 @@ module GEO
|
|
567
553
|
|
568
554
|
platforms.each do |platform|
|
569
555
|
platform_path = platform_path(platform)
|
556
|
+
|
570
557
|
next if platform_path.nil?
|
571
558
|
|
572
559
|
prefix = File.join(platform_path, dataset_type(dataset).to_s, dataset)
|
573
|
-
|
574
|
-
|
560
|
+
|
561
|
+
if File.exists?(prefix + '.orders') || File.exists?(prefix + '.skip')
|
575
562
|
return File.join(platform_path, dataset_type(dataset).to_s, dataset)
|
576
|
-
when File.exists?(prefix + '.skip')
|
577
|
-
return nil
|
578
563
|
end
|
564
|
+
|
579
565
|
end
|
580
566
|
|
581
567
|
return nil
|
@@ -584,7 +570,7 @@ module GEO
|
|
584
570
|
def self.platform_datasets(platform)
|
585
571
|
cross_platform = MARQ::Platform.is_cross_platform? platform
|
586
572
|
|
587
|
-
path = platform_path(MARQ::
|
573
|
+
path = platform_path(MARQ::Name.clean(platform))
|
588
574
|
return [] if path.nil?
|
589
575
|
|
590
576
|
datasets = Dir.glob(File.join(path, '*', '*.orders')).
|
@@ -592,7 +578,7 @@ module GEO
|
|
592
578
|
|
593
579
|
if cross_platform
|
594
580
|
datasets.select {|dataset| MARQ::Dataset.is_cross_platform? dataset }.
|
595
|
-
collect {|dataset| MARQ::
|
581
|
+
collect {|dataset| MARQ::Name.clean(dataset) }
|
596
582
|
else
|
597
583
|
datasets.select {|dataset| ! MARQ::Dataset.is_cross_platform? dataset }
|
598
584
|
end
|
@@ -614,7 +600,7 @@ module GEO
|
|
614
600
|
end
|
615
601
|
|
616
602
|
def self.process_platform(platform)
|
617
|
-
GEO::Process.GPL(platform)
|
603
|
+
GEO::Process.GPL(platform) unless platform =~ /_/
|
618
604
|
end
|
619
605
|
|
620
606
|
def self.process_dataset(dataset, platform)
|