rbbt-GE 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/GE/GEO.rb CHANGED
@@ -58,20 +58,27 @@ module GEO
58
58
  GSE_URL="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/by_series/#SERIES#/#SERIES#_family.soft.gz"
59
59
 
60
60
  GSE_INFO = {
61
+ :DELIMITER => "\\^PLATFORM",
61
62
  :title => "!Series_title",
62
63
  :channel_count => "!Sample_channel_count",
63
64
  :value_type => "!Series_value_type",
64
65
  :platform => "!Series_platform_id",
65
66
  :description => "!Series_summary*", # Join with \n
66
67
  }
67
-
68
+
69
+ GSE_SAMPLE_INFO = {
70
+ :DELIMITER => "\\^SAMPLE",
71
+ :title => "!Sample_title",
72
+ :accession => "!Sample_geo_accession",
73
+ :channel_count => "!Sample_channel_count",
74
+ }
75
+
68
76
  GDS_INFO = {
69
- :DELIMITER => "\\^SUBSET",
70
- :value_type => "!dataset_value_type",
71
- :channel_count => "!dataset_channel_count",
72
- :platform => "!dataset_platform",
73
- :reference_series => "!dataset_reference_series",
74
- :description => "!dataset_description",
77
+ :DELIMITER => "\\^SUBSET|!sample_table_begin",
78
+ :title => "!Sample_title",
79
+ :accession => "!Sample_geo_accession",
80
+ :channel_count => "!Sample_channel_count",
81
+ :platform => "!Sample_platform_id",
75
82
  }
76
83
 
77
84
  GDS_SUBSET_INFO = {
@@ -253,6 +260,64 @@ module GEO
253
260
 
254
261
  info
255
262
  end
263
+
264
+
265
+
266
+ def self.series_samples(stream)
267
+ text = stream.read
268
+
269
+ values = nil
270
+
271
+ sample_info = {}
272
+
273
+ samples = []
274
+ text.split(/\^SAMPLE/).each do |chunk|
275
+ info = get_info(chunk, GSE_SAMPLE_INFO)
276
+ sample = info[:accession]
277
+ next if sample.nil?
278
+
279
+ samples << sample
280
+
281
+ sample_values = TSV.open(StringIO.new(chunk.match(/!sample_table_begin(.*)!sample_table_end/msi)[0].strip), :type => :list, :header_hash => '')
282
+ sample_values.fields = [sample]
283
+
284
+ if values.nil?
285
+ values = sample_values
286
+ else
287
+ values.attach sample_values
288
+ end
289
+ sample_info[sample] = info
290
+ end
291
+
292
+ [values, sample_info]
293
+ end
294
+
295
+ def self.GSE(series, directory)
296
+ FileUtils.mkdir_p directory unless File.exists? directory
297
+
298
+ value_file = File.join(directory, 'values')
299
+ info_file = File.join(directory, 'info.yaml')
300
+
301
+ stream = Open.open(GSE_URL.gsub('#SERIES#', series), :nocache => true)
302
+
303
+ info = parse_header(stream, GSE_INFO)
304
+ info[:value_file] = value_file
305
+ info[:data_directory] = directory
306
+
307
+ Log.medium "Producing values file for #{ series }"
308
+ values, sample_info = series_samples(stream)
309
+
310
+ key_field = TSV.parse_header(GEO[info[:platform]]['codes'].open).key_field
311
+ values.key_field = key_field
312
+
313
+ Open.write(value_file, values.to_s)
314
+ Open.write(info_file, info.to_yaml)
315
+
316
+ info[:channel_count] ||= sample_info.values.first[:channel_count]
317
+ info[:value_type] ||= sample_info.values.first[:value_type]
318
+
319
+ info
320
+ end
256
321
  end
257
322
 
258
323
  def self.compare(dataset, field, condition, control, path)
@@ -13,6 +13,14 @@ rule /^(GDS\d+)\/?(values|info\.yaml)?$/ do |t|
13
13
  GEO::SOFT.GDS(dataset, file.nil? ? t.name : File.dirname(t.name))
14
14
  end
15
15
 
16
+ rule /^(GSE\d+)\/?(values|info\.yaml)?$/ do |t|
17
+ t.name =~ /^(GSE\d+)\/?(values|info\.yaml)?/
18
+ series = $1
19
+ file = $2
20
+ GEO::SOFT.GSE(series, file.nil? ? t.name : File.dirname(t.name))
21
+ end
22
+
23
+
16
24
  rule /^(GDS\d+)\/comparison\/(.*)$/ do |t|
17
25
  t.name =~ /^(GDS\d+)\/comparison\/(.*)/
18
26
  dataset = $1
@@ -71,9 +71,9 @@ class TestClass < Test::Unit::TestCase
71
71
  end
72
72
 
73
73
  def test_GSE
74
- gse="GSE966"
75
- info = GEO.GSE(gse)
76
- assert_equal "GPL764", info[:platform]
74
+ gse = "GSE21029"
75
+ info = GEO::SOFT.GSE(gse, "/tmp/gse")
76
+ assert_equal "GPL570", info[:platform]
77
77
  end
78
78
 
79
79
 
@@ -98,7 +98,5 @@ class TestClass < Test::Unit::TestCase
98
98
 
99
99
  assert GEO[dataset].comparison[GEO.comparison_name field, condition, control].produce.exists?
100
100
  end
101
-
102
-
103
101
  end
104
102
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-GE
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 0.1.0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-19 00:00:00 +01:00
19
- default_executable:
18
+ date: 2012-01-31 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -48,7 +47,6 @@ files:
48
47
  - share/lib/R/MA.R
49
48
  - test/test_helper.rb
50
49
  - test/rbbt/GE/test_GEO.rb
51
- has_rdoc: true
52
50
  homepage: http://github.com/mikisvaz/rbbt-GE
53
51
  licenses: []
54
52
 
@@ -78,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
76
  requirements: []
79
77
 
80
78
  rubyforge_project:
81
- rubygems_version: 1.6.2
79
+ rubygems_version: 1.8.10
82
80
  signing_key:
83
81
  specification_version: 3
84
82
  summary: Gene Expression in RBBT