rbbt-GE 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/GE/GEO.rb +72 -7
- data/share/install/GEO/Rakefile +8 -0
- data/test/rbbt/GE/test_GEO.rb +3 -5
- metadata +5 -7
data/lib/rbbt/GE/GEO.rb
CHANGED
@@ -58,20 +58,27 @@ module GEO
|
|
58
58
|
GSE_URL="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/by_series/#SERIES#/#SERIES#_family.soft.gz"
|
59
59
|
|
60
60
|
GSE_INFO = {
|
61
|
+
:DELIMITER => "\\^PLATFORM",
|
61
62
|
:title => "!Series_title",
|
62
63
|
:channel_count => "!Sample_channel_count",
|
63
64
|
:value_type => "!Series_value_type",
|
64
65
|
:platform => "!Series_platform_id",
|
65
66
|
:description => "!Series_summary*", # Join with \n
|
66
67
|
}
|
67
|
-
|
68
|
+
|
69
|
+
GSE_SAMPLE_INFO = {
|
70
|
+
:DELIMITER => "\\^SAMPLE",
|
71
|
+
:title => "!Sample_title",
|
72
|
+
:accession => "!Sample_geo_accession",
|
73
|
+
:channel_count => "!Sample_channel_count",
|
74
|
+
}
|
75
|
+
|
68
76
|
GDS_INFO = {
|
69
|
-
:DELIMITER => "\\^SUBSET",
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
74
|
-
:description => "!dataset_description",
|
77
|
+
:DELIMITER => "\\^SUBSET|!sample_table_begin",
|
78
|
+
:title => "!Sample_title",
|
79
|
+
:accession => "!Sample_geo_accession",
|
80
|
+
:channel_count => "!Sample_channel_count",
|
81
|
+
:platform => "!Sample_platform_id",
|
75
82
|
}
|
76
83
|
|
77
84
|
GDS_SUBSET_INFO = {
|
@@ -253,6 +260,64 @@ module GEO
|
|
253
260
|
|
254
261
|
info
|
255
262
|
end
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
def self.series_samples(stream)
|
267
|
+
text = stream.read
|
268
|
+
|
269
|
+
values = nil
|
270
|
+
|
271
|
+
sample_info = {}
|
272
|
+
|
273
|
+
samples = []
|
274
|
+
text.split(/\^SAMPLE/).each do |chunk|
|
275
|
+
info = get_info(chunk, GSE_SAMPLE_INFO)
|
276
|
+
sample = info[:accession]
|
277
|
+
next if sample.nil?
|
278
|
+
|
279
|
+
samples << sample
|
280
|
+
|
281
|
+
sample_values = TSV.open(StringIO.new(chunk.match(/!sample_table_begin(.*)!sample_table_end/msi)[0].strip), :type => :list, :header_hash => '')
|
282
|
+
sample_values.fields = [sample]
|
283
|
+
|
284
|
+
if values.nil?
|
285
|
+
values = sample_values
|
286
|
+
else
|
287
|
+
values.attach sample_values
|
288
|
+
end
|
289
|
+
sample_info[sample] = info
|
290
|
+
end
|
291
|
+
|
292
|
+
[values, sample_info]
|
293
|
+
end
|
294
|
+
|
295
|
+
def self.GSE(series, directory)
|
296
|
+
FileUtils.mkdir_p directory unless File.exists? directory
|
297
|
+
|
298
|
+
value_file = File.join(directory, 'values')
|
299
|
+
info_file = File.join(directory, 'info.yaml')
|
300
|
+
|
301
|
+
stream = Open.open(GSE_URL.gsub('#SERIES#', series), :nocache => true)
|
302
|
+
|
303
|
+
info = parse_header(stream, GSE_INFO)
|
304
|
+
info[:value_file] = value_file
|
305
|
+
info[:data_directory] = directory
|
306
|
+
|
307
|
+
Log.medium "Producing values file for #{ series }"
|
308
|
+
values, sample_info = series_samples(stream)
|
309
|
+
|
310
|
+
key_field = TSV.parse_header(GEO[info[:platform]]['codes'].open).key_field
|
311
|
+
values.key_field = key_field
|
312
|
+
|
313
|
+
Open.write(value_file, values.to_s)
|
314
|
+
Open.write(info_file, info.to_yaml)
|
315
|
+
|
316
|
+
info[:channel_count] ||= sample_info.values.first[:channel_count]
|
317
|
+
info[:value_type] ||= sample_info.values.first[:value_type]
|
318
|
+
|
319
|
+
info
|
320
|
+
end
|
256
321
|
end
|
257
322
|
|
258
323
|
def self.compare(dataset, field, condition, control, path)
|
data/share/install/GEO/Rakefile
CHANGED
@@ -13,6 +13,14 @@ rule /^(GDS\d+)\/?(values|info\.yaml)?$/ do |t|
|
|
13
13
|
GEO::SOFT.GDS(dataset, file.nil? ? t.name : File.dirname(t.name))
|
14
14
|
end
|
15
15
|
|
16
|
+
rule /^(GSE\d+)\/?(values|info\.yaml)?$/ do |t|
|
17
|
+
t.name =~ /^(GSE\d+)\/?(values|info\.yaml)?/
|
18
|
+
series = $1
|
19
|
+
file = $2
|
20
|
+
GEO::SOFT.GSE(series, file.nil? ? t.name : File.dirname(t.name))
|
21
|
+
end
|
22
|
+
|
23
|
+
|
16
24
|
rule /^(GDS\d+)\/comparison\/(.*)$/ do |t|
|
17
25
|
t.name =~ /^(GDS\d+)\/comparison\/(.*)/
|
18
26
|
dataset = $1
|
data/test/rbbt/GE/test_GEO.rb
CHANGED
@@ -71,9 +71,9 @@ class TestClass < Test::Unit::TestCase
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def test_GSE
|
74
|
-
gse="
|
75
|
-
info = GEO.GSE(gse)
|
76
|
-
assert_equal "
|
74
|
+
gse = "GSE21029"
|
75
|
+
info = GEO::SOFT.GSE(gse, "/tmp/gse")
|
76
|
+
assert_equal "GPL570", info[:platform]
|
77
77
|
end
|
78
78
|
|
79
79
|
|
@@ -98,7 +98,5 @@ class TestClass < Test::Unit::TestCase
|
|
98
98
|
|
99
99
|
assert GEO[dataset].comparison[GEO.comparison_name field, condition, control].produce.exists?
|
100
100
|
end
|
101
|
-
|
102
|
-
|
103
101
|
end
|
104
102
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-GE
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-01-
|
19
|
-
default_executable:
|
18
|
+
date: 2012-01-31 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: rbbt-util
|
@@ -48,7 +47,6 @@ files:
|
|
48
47
|
- share/lib/R/MA.R
|
49
48
|
- test/test_helper.rb
|
50
49
|
- test/rbbt/GE/test_GEO.rb
|
51
|
-
has_rdoc: true
|
52
50
|
homepage: http://github.com/mikisvaz/rbbt-GE
|
53
51
|
licenses: []
|
54
52
|
|
@@ -78,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
76
|
requirements: []
|
79
77
|
|
80
78
|
rubyforge_project:
|
81
|
-
rubygems_version: 1.
|
79
|
+
rubygems_version: 1.8.10
|
82
80
|
signing_key:
|
83
81
|
specification_version: 3
|
84
82
|
summary: Gene Expression in RBBT
|