rbbt-GE 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rbbt/GE/GEO.rb CHANGED
@@ -58,20 +58,27 @@ module GEO
58
58
  GSE_URL="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/by_series/#SERIES#/#SERIES#_family.soft.gz"
59
59
 
60
60
  GSE_INFO = {
61
+ :DELIMITER => "\\^PLATFORM",
61
62
  :title => "!Series_title",
62
63
  :channel_count => "!Sample_channel_count",
63
64
  :value_type => "!Series_value_type",
64
65
  :platform => "!Series_platform_id",
65
66
  :description => "!Series_summary*", # Join with \n
66
67
  }
67
-
68
+
69
+ GSE_SAMPLE_INFO = {
70
+ :DELIMITER => "\\^SAMPLE",
71
+ :title => "!Sample_title",
72
+ :accession => "!Sample_geo_accession",
73
+ :channel_count => "!Sample_channel_count",
74
+ }
75
+
68
76
  GDS_INFO = {
69
- :DELIMITER => "\\^SUBSET",
70
- :value_type => "!dataset_value_type",
71
- :channel_count => "!dataset_channel_count",
72
- :platform => "!dataset_platform",
73
- :reference_series => "!dataset_reference_series",
74
- :description => "!dataset_description",
77
+ :DELIMITER => "\\^SUBSET|!sample_table_begin",
78
+ :title => "!Sample_title",
79
+ :accession => "!Sample_geo_accession",
80
+ :channel_count => "!Sample_channel_count",
81
+ :platform => "!Sample_platform_id",
75
82
  }
76
83
 
77
84
  GDS_SUBSET_INFO = {
@@ -253,6 +260,64 @@ module GEO
253
260
 
254
261
  info
255
262
  end
263
+
264
+
265
+
266
+ def self.series_samples(stream)
267
+ text = stream.read
268
+
269
+ values = nil
270
+
271
+ sample_info = {}
272
+
273
+ samples = []
274
+ text.split(/\^SAMPLE/).each do |chunk|
275
+ info = get_info(chunk, GSE_SAMPLE_INFO)
276
+ sample = info[:accession]
277
+ next if sample.nil?
278
+
279
+ samples << sample
280
+
281
+ sample_values = TSV.open(StringIO.new(chunk.match(/!sample_table_begin(.*)!sample_table_end/msi)[0].strip), :type => :list, :header_hash => '')
282
+ sample_values.fields = [sample]
283
+
284
+ if values.nil?
285
+ values = sample_values
286
+ else
287
+ values.attach sample_values
288
+ end
289
+ sample_info[sample] = info
290
+ end
291
+
292
+ [values, sample_info]
293
+ end
294
+
295
+ def self.GSE(series, directory)
296
+ FileUtils.mkdir_p directory unless File.exists? directory
297
+
298
+ value_file = File.join(directory, 'values')
299
+ info_file = File.join(directory, 'info.yaml')
300
+
301
+ stream = Open.open(GSE_URL.gsub('#SERIES#', series), :nocache => true)
302
+
303
+ info = parse_header(stream, GSE_INFO)
304
+ info[:value_file] = value_file
305
+ info[:data_directory] = directory
306
+
307
+ Log.medium "Producing values file for #{ series }"
308
+ values, sample_info = series_samples(stream)
309
+
310
+ key_field = TSV.parse_header(GEO[info[:platform]]['codes'].open).key_field
311
+ values.key_field = key_field
312
+
313
+ Open.write(value_file, values.to_s)
314
+ Open.write(info_file, info.to_yaml)
315
+
316
+ info[:channel_count] ||= sample_info.values.first[:channel_count]
317
+ info[:value_type] ||= sample_info.values.first[:value_type]
318
+
319
+ info
320
+ end
256
321
  end
257
322
 
258
323
  def self.compare(dataset, field, condition, control, path)
@@ -13,6 +13,14 @@ rule /^(GDS\d+)\/?(values|info\.yaml)?$/ do |t|
13
13
  GEO::SOFT.GDS(dataset, file.nil? ? t.name : File.dirname(t.name))
14
14
  end
15
15
 
16
+ rule /^(GSE\d+)\/?(values|info\.yaml)?$/ do |t|
17
+ t.name =~ /^(GSE\d+)\/?(values|info\.yaml)?/
18
+ series = $1
19
+ file = $2
20
+ GEO::SOFT.GSE(series, file.nil? ? t.name : File.dirname(t.name))
21
+ end
22
+
23
+
16
24
  rule /^(GDS\d+)\/comparison\/(.*)$/ do |t|
17
25
  t.name =~ /^(GDS\d+)\/comparison\/(.*)/
18
26
  dataset = $1
@@ -71,9 +71,9 @@ class TestClass < Test::Unit::TestCase
71
71
  end
72
72
 
73
73
  def test_GSE
74
- gse="GSE966"
75
- info = GEO.GSE(gse)
76
- assert_equal "GPL764", info[:platform]
74
+ gse = "GSE21029"
75
+ info = GEO::SOFT.GSE(gse, "/tmp/gse")
76
+ assert_equal "GPL570", info[:platform]
77
77
  end
78
78
 
79
79
 
@@ -98,7 +98,5 @@ class TestClass < Test::Unit::TestCase
98
98
 
99
99
  assert GEO[dataset].comparison[GEO.comparison_name field, condition, control].produce.exists?
100
100
  end
101
-
102
-
103
101
  end
104
102
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-GE
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 0.1.0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-19 00:00:00 +01:00
19
- default_executable:
18
+ date: 2012-01-31 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -48,7 +47,6 @@ files:
48
47
  - share/lib/R/MA.R
49
48
  - test/test_helper.rb
50
49
  - test/rbbt/GE/test_GEO.rb
51
- has_rdoc: true
52
50
  homepage: http://github.com/mikisvaz/rbbt-GE
53
51
  licenses: []
54
52
 
@@ -78,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
76
  requirements: []
79
77
 
80
78
  rubyforge_project:
81
- rubygems_version: 1.6.2
79
+ rubygems_version: 1.8.10
82
80
  signing_key:
83
81
  specification_version: 3
84
82
  summary: Gene Expression in RBBT