rbbt-GE 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/GE/GEO.rb +72 -7
- data/share/install/GEO/Rakefile +8 -0
- data/test/rbbt/GE/test_GEO.rb +3 -5
- metadata +5 -7
data/lib/rbbt/GE/GEO.rb
CHANGED
@@ -58,20 +58,27 @@ module GEO
|
|
58
58
|
GSE_URL="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/by_series/#SERIES#/#SERIES#_family.soft.gz"
|
59
59
|
|
60
60
|
GSE_INFO = {
|
61
|
+
:DELIMITER => "\\^PLATFORM",
|
61
62
|
:title => "!Series_title",
|
62
63
|
:channel_count => "!Sample_channel_count",
|
63
64
|
:value_type => "!Series_value_type",
|
64
65
|
:platform => "!Series_platform_id",
|
65
66
|
:description => "!Series_summary*", # Join with \n
|
66
67
|
}
|
67
|
-
|
68
|
+
|
69
|
+
GSE_SAMPLE_INFO = {
|
70
|
+
:DELIMITER => "\\^SAMPLE",
|
71
|
+
:title => "!Sample_title",
|
72
|
+
:accession => "!Sample_geo_accession",
|
73
|
+
:channel_count => "!Sample_channel_count",
|
74
|
+
}
|
75
|
+
|
68
76
|
GDS_INFO = {
|
69
|
-
:DELIMITER => "\\^SUBSET",
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
74
|
-
:description => "!dataset_description",
|
77
|
+
:DELIMITER => "\\^SUBSET|!sample_table_begin",
|
78
|
+
:title => "!Sample_title",
|
79
|
+
:accession => "!Sample_geo_accession",
|
80
|
+
:channel_count => "!Sample_channel_count",
|
81
|
+
:platform => "!Sample_platform_id",
|
75
82
|
}
|
76
83
|
|
77
84
|
GDS_SUBSET_INFO = {
|
@@ -253,6 +260,64 @@ module GEO
|
|
253
260
|
|
254
261
|
info
|
255
262
|
end
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
def self.series_samples(stream)
|
267
|
+
text = stream.read
|
268
|
+
|
269
|
+
values = nil
|
270
|
+
|
271
|
+
sample_info = {}
|
272
|
+
|
273
|
+
samples = []
|
274
|
+
text.split(/\^SAMPLE/).each do |chunk|
|
275
|
+
info = get_info(chunk, GSE_SAMPLE_INFO)
|
276
|
+
sample = info[:accession]
|
277
|
+
next if sample.nil?
|
278
|
+
|
279
|
+
samples << sample
|
280
|
+
|
281
|
+
sample_values = TSV.open(StringIO.new(chunk.match(/!sample_table_begin(.*)!sample_table_end/msi)[0].strip), :type => :list, :header_hash => '')
|
282
|
+
sample_values.fields = [sample]
|
283
|
+
|
284
|
+
if values.nil?
|
285
|
+
values = sample_values
|
286
|
+
else
|
287
|
+
values.attach sample_values
|
288
|
+
end
|
289
|
+
sample_info[sample] = info
|
290
|
+
end
|
291
|
+
|
292
|
+
[values, sample_info]
|
293
|
+
end
|
294
|
+
|
295
|
+
def self.GSE(series, directory)
|
296
|
+
FileUtils.mkdir_p directory unless File.exists? directory
|
297
|
+
|
298
|
+
value_file = File.join(directory, 'values')
|
299
|
+
info_file = File.join(directory, 'info.yaml')
|
300
|
+
|
301
|
+
stream = Open.open(GSE_URL.gsub('#SERIES#', series), :nocache => true)
|
302
|
+
|
303
|
+
info = parse_header(stream, GSE_INFO)
|
304
|
+
info[:value_file] = value_file
|
305
|
+
info[:data_directory] = directory
|
306
|
+
|
307
|
+
Log.medium "Producing values file for #{ series }"
|
308
|
+
values, sample_info = series_samples(stream)
|
309
|
+
|
310
|
+
key_field = TSV.parse_header(GEO[info[:platform]]['codes'].open).key_field
|
311
|
+
values.key_field = key_field
|
312
|
+
|
313
|
+
Open.write(value_file, values.to_s)
|
314
|
+
Open.write(info_file, info.to_yaml)
|
315
|
+
|
316
|
+
info[:channel_count] ||= sample_info.values.first[:channel_count]
|
317
|
+
info[:value_type] ||= sample_info.values.first[:value_type]
|
318
|
+
|
319
|
+
info
|
320
|
+
end
|
256
321
|
end
|
257
322
|
|
258
323
|
def self.compare(dataset, field, condition, control, path)
|
data/share/install/GEO/Rakefile
CHANGED
@@ -13,6 +13,14 @@ rule /^(GDS\d+)\/?(values|info\.yaml)?$/ do |t|
|
|
13
13
|
GEO::SOFT.GDS(dataset, file.nil? ? t.name : File.dirname(t.name))
|
14
14
|
end
|
15
15
|
|
16
|
+
rule /^(GSE\d+)\/?(values|info\.yaml)?$/ do |t|
|
17
|
+
t.name =~ /^(GSE\d+)\/?(values|info\.yaml)?/
|
18
|
+
series = $1
|
19
|
+
file = $2
|
20
|
+
GEO::SOFT.GSE(series, file.nil? ? t.name : File.dirname(t.name))
|
21
|
+
end
|
22
|
+
|
23
|
+
|
16
24
|
rule /^(GDS\d+)\/comparison\/(.*)$/ do |t|
|
17
25
|
t.name =~ /^(GDS\d+)\/comparison\/(.*)/
|
18
26
|
dataset = $1
|
data/test/rbbt/GE/test_GEO.rb
CHANGED
@@ -71,9 +71,9 @@ class TestClass < Test::Unit::TestCase
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def test_GSE
|
74
|
-
gse="
|
75
|
-
info = GEO.GSE(gse)
|
76
|
-
assert_equal "
|
74
|
+
gse = "GSE21029"
|
75
|
+
info = GEO::SOFT.GSE(gse, "/tmp/gse")
|
76
|
+
assert_equal "GPL570", info[:platform]
|
77
77
|
end
|
78
78
|
|
79
79
|
|
@@ -98,7 +98,5 @@ class TestClass < Test::Unit::TestCase
|
|
98
98
|
|
99
99
|
assert GEO[dataset].comparison[GEO.comparison_name field, condition, control].produce.exists?
|
100
100
|
end
|
101
|
-
|
102
|
-
|
103
101
|
end
|
104
102
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-GE
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-01-
|
19
|
-
default_executable:
|
18
|
+
date: 2012-01-31 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: rbbt-util
|
@@ -48,7 +47,6 @@ files:
|
|
48
47
|
- share/lib/R/MA.R
|
49
48
|
- test/test_helper.rb
|
50
49
|
- test/rbbt/GE/test_GEO.rb
|
51
|
-
has_rdoc: true
|
52
50
|
homepage: http://github.com/mikisvaz/rbbt-GE
|
53
51
|
licenses: []
|
54
52
|
|
@@ -78,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
76
|
requirements: []
|
79
77
|
|
80
78
|
rubyforge_project:
|
81
|
-
rubygems_version: 1.
|
79
|
+
rubygems_version: 1.8.10
|
82
80
|
signing_key:
|
83
81
|
specification_version: 3
|
84
82
|
summary: Gene Expression in RBBT
|