bio-affy 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,15 +1,29 @@
1
1
  = bio-affy
2
2
 
3
- WARNING: This software is currently being developed! It may not be usable.
4
-
5
3
  Affymetrix microarray file format parser (CEL/CDF) for Ruby.
6
4
 
5
+ Are you tired waiting for R/Bioconductor to download and install? Are
6
+ you weary of R's slowness and memory consumption? Do you want Ruby's
7
+ convenience? Try creating a biogem, and use bio-affy's foreign
8
+ function interface (FFI) strategy for linking against R's C libraries.
9
+
10
+ For an example of the API see https://github.com/pjotrp/bioruby-affy/blob/master/spec/bio-affy_spec.rb
11
+
7
12
  == Introduction
8
13
 
9
- This is a port of the Biolib-1.0 Affy parser for the Ruby FFI, which
10
- in turn, is an adaptation of Ben Bolstad's work for R.
14
+ This is a port of the Biolib-1.0 Affy parser, which in turn is an
15
+ adaptation of Ben Bolstad's Affyio library for R/Bioconductor.
16
+
17
+ You can query CDF files for feature names of probesets, the number of probesets and
18
+ probe types, and the indices of probes on the array.
19
+
20
+ You can query CEL files for raw expression values of PM probes and MM probes.
21
+
22
+ This implementation allows processing one or more microarrays at a
23
+ time. It is not necessary to load all microarrays in RAM.
11
24
 
12
- To use this tool you do not need to know Ruby.
25
+ To use the command line tool you do not need to know Ruby (note, the command line
26
+ interface is not ready).
13
27
 
14
28
  == Install
15
29
 
@@ -28,18 +42,18 @@ Next run the tool with
28
42
  This module was written with
29
43
 
30
44
  ruby 1.9.3p0 (2011-10-30 revision 33570) [x86_64-linux]
31
- Using rake (0.9.2.2)
32
- Using bundler (1.0.21)
33
- Using diff-lcs (1.1.3)
34
- Using ffi (1.0.11)
35
- Using git (1.2.5)
36
- Using jeweler (1.6.4)
37
- Using mkrf (0.2.3)
38
- Using rcov (0.9.11)
39
- Using rspec-core (2.7.1)
40
- Using rspec-expectations (2.7.0)
41
- Using rspec-mocks (2.7.0)
42
- Using rspec (2.7.0)
45
+ Using rake (0.9.2.2)
46
+ Using bundler (1.0.21)
47
+ Using diff-lcs (1.1.3)
48
+ Using ffi (1.0.11)
49
+ Using git (1.2.5)
50
+ Using jeweler (1.6.4)
51
+ Using mkrf (0.2.3)
52
+ Using rcov (0.9.11)
53
+ Using rspec-core (2.7.1)
54
+ Using rspec-expectations (2.7.0)
55
+ Using rspec-mocks (2.7.0)
56
+ Using rspec (2.7.0)
43
57
 
44
58
  == Copyright
45
59
 
data/Rakefile CHANGED
@@ -27,7 +27,7 @@ Jeweler::Tasks.new do |gem|
27
27
  gem.authors = ["Pjotr Prins"]
28
28
  gem.extensions = "ext/src/mkrf_conf.rb"
29
29
  gem.files += Dir['lib/**/*'] + Dir['ext/**/*']
30
- gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|R|Rd|log)$/ }
30
+ gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|cdf|CEL|cel|R|Rd|log)$/ }
31
31
  gem.rubyforge_project = "nowarning"
32
32
 
33
33
  # dependencies defined in Gemfile
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.5.1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-affy"
8
- s.version = "0.5.0"
8
+ s.version = "0.5.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2011-12-05"
12
+ s.date = "2011-12-09"
13
13
  s.description = "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["bio-affy"]
@@ -2,6 +2,19 @@
2
2
  module Bio
3
3
 
4
4
  module Affy
5
+
6
+ module Find
7
+ def Find.probeset_by_feature_name cdf, name
8
+ num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
9
+ (0..num_probesets-1).each do | i |
10
+ probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,i)
11
+ probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
12
+ return i if probeset.name == name
13
+ end
14
+ nil
15
+ end
16
+ end
17
+
5
18
  end
6
19
 
7
20
  end
@@ -3,6 +3,17 @@ module Bio
3
3
 
4
4
  module Affy
5
5
 
6
+ class CDFProbeInfo < FFI::Struct
7
+ layout :x, :uint,
8
+ :y, :uint
9
+ def x
10
+ self[:x]
11
+ end
12
+ def y
13
+ self[:y]
14
+ end
15
+ end
16
+
6
17
  class CDFProbeSet < FFI::Struct
7
18
  layout :isQC, :int,
8
19
  :pm_num, :int,
@@ -10,6 +21,10 @@ module Bio
10
21
  :pm, :pointer,
11
22
  :mm, :pointer,
12
23
  :name, [:uint8, 64]
24
+
25
+ def name
26
+ self[:name].to_ptr.read_string
27
+ end
13
28
  end
14
29
 
15
30
  module Ext
@@ -29,6 +44,10 @@ module Bio
29
44
  attach_function :cel_num_intensities, [ :pointer ], :uint64
30
45
  attach_function :cdf_num_probesets, [ :pointer ], :uint64
31
46
  attach_function :cdf_probeset_info, [ :pointer, :int ], :pointer
47
+ attach_function :cel_pm, [:pointer, :pointer, :int, :int ], :double
48
+ attach_function :cdf_pmprobe_info, [:pointer, :int, :int], :pointer
49
+ # more bindings are available, check out the functions defined in ./ext/src
50
+ # and the biolib test_affyio.rb file
32
51
  end
33
52
 
34
53
  end
@@ -6,39 +6,132 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
6
6
  require 'bio-affy'
7
7
 
8
8
  DATADIR = File.join([ROOT,'test','data','affy'])
9
- CDF = File.join(DATADIR,"MG_U74Av2.CDF")
9
+ CDF = File.join(DATADIR,"MG_U74Av2.CDF") # GPL81
10
10
  CDF2 = File.join(DATADIR,"ATH1-121501.CDF")
11
11
  CEL1 = File.join(DATADIR,"GSM103328.CEL.gz")
12
12
 
13
- describe "BioAffy" do
13
+ describe "Bio::Affy::Ext1" do
14
+
14
15
  it "should find the shared library" do
15
16
  Bio::Affy::Ext.has_affyext(5).should == 60
16
17
  end
17
- it "should open a CDF file" do
18
+ end
19
+
20
+ describe "Bio::Affy::Ext" do
21
+ before :all do
22
+ # first start the R environment
18
23
  Bio::Affy::Ext.BioLib_R_Init()
19
- cdf = Bio::Affy::Ext.open_cdffile(CDF)
20
- cdf.null?.should == false
21
- num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
24
+ # load the CDF once
25
+ @cdf = Bio::Affy::Ext.open_cdffile(CDF)
26
+ # load a CEL file once
27
+ @cel = Bio::Affy::Ext.open_celfile(CEL1)
28
+ end
29
+ it "should open a CDF file" do
30
+ @cdf.null?.should == false
31
+ end
32
+ it "should count the probesets" do
33
+ # Open the Mouse CDF file - in Bioconductor this would be
34
+ #
35
+ # source("http://bioconductor.org/biocLite.R")
36
+ # biocLite("affy")
37
+ # library(affy)
38
+ # library(makecdfenv)
39
+ # make.cdf.package('test.cdf',species='test')
40
+ # exit and R CMD INSTALL testcdf/
41
+ # m <- ReadAffy(cdfname='test')
42
+ #
43
+ # because CDF files are not read directly. bio-affy, however can:
44
+ num_probesets = Bio::Affy::Ext.cdf_num_probesets(@cdf)
22
45
  num_probesets.should == 12501
23
46
  end
24
47
  it "should open a CEL file" do
25
- cel1 = Bio::Affy::Ext.open_celfile(CEL1)
26
- num = Bio::Affy::Ext.cel_num_intensities(cel1)
48
+ # Open the Mouse CEL files - in Bioconductor this would be
49
+ #
50
+ # source("http://bioconductor.org/biocLite.R")
51
+ # biocLite("affy")
52
+ # library(affy)
53
+ # m <- ReadAffy()
54
+ # dim(m)
55
+ # Cols Rows
56
+ # 640 640 == 409600
57
+
58
+ num = Bio::Affy::Ext.cel_num_intensities(@cel)
27
59
  num.should == 409600
28
60
  end
29
- it "should find the probe value for 1511" do
30
- cel = Bio::Affy::Ext.open_celfile(CEL1)
31
- probe_value = Bio::Affy::Ext.cel_intensity(cel,1510)
61
+ it "should find the CDF cel intensity value" do
62
+ # In Bioconductor, after m <- ReadAffy()
63
+ #
64
+ probe_value = Bio::Affy::Ext.cel_intensity(@cel,1510)
32
65
  probe_value.should == 10850.8
33
66
  end
34
- it "should name the probes for 1511" do
35
- cdf = Bio::Affy::Ext.open_cdffile(CDF)
36
- # memptr = MemoryPointer.new :pointer
37
- probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,1510)
67
+ it "should get the probeset indexes from the CDF" do
68
+ cdf_cols = 640 # (cdf.cols)
69
+ # R/Bioconductor:
70
+ #
71
+ # > as.vector(geneNames(m))[11657]
72
+ # [1] "98910_at"
73
+ #
74
+ # cat(indexProbes(m, which="pm", genenames="98910_at")[[1]],sep=",")
75
+ # 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474
76
+ #
77
+ # or
78
+ #
79
+ # pmindex(m,"98910_at")
80
+ #
81
+
82
+ pm0 = [ 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474 ]
83
+ pm0.each_with_index do | index, i |
84
+ # call with probeset, probenum
85
+ probe_ptr = Bio::Affy::Ext.cdf_pmprobe_info(@cdf,1510,i)
86
+ probe = Bio::Affy::CDFProbeInfo.new(probe_ptr)
87
+ # p [probe.x, probe.y]
88
+ # p [ index, probe.x, probe.y, probe.x + probe.y*@cdf.cols + 1]
89
+ (probe.x + probe.y*cdf_cols + 1).should == index
90
+ end
91
+
92
+
93
+ end
94
+
95
+ it "should get the probeset information" do
96
+ # In Bioconductor, after m <- ReadAffy()
97
+ #
98
+ # > length(featureNames(m))
99
+ # [1] 12488 (12501 in bio-affy - we add the 13 controls)
100
+ #
101
+ # Note also the feature numbering is different in the Bioconductor set:
102
+ # > as.vector(geneNames(m))[0:5]
103
+ # [1] "100001_at" "100002_at" "100003_at" "100004_at" "100005_at"
104
+ # > as.vector(geneNames(m))[1509:1512]
105
+ # [1] "101947_at" "101948_at" "101949_at" "101950_at"
106
+ # > as.vector(geneNames(m))[11657]
107
+ # [1] "98910_at" <- this is what we test at index 1510.
108
+ probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(@cdf,1510)
38
109
  probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
39
110
  probeset[:isQC].should == 0
40
111
  probeset[:pm_num].should == 16
41
112
  probeset[:mm_num].should == 16
113
+ # 98910_at 144 P 0.009985 (normalized on GEO)
42
114
  probeset[:name].to_ptr.read_string.should == "98910_at"
115
+ # now use the convenience methods
116
+ probeset.name.should == "98910_at"
117
+ end
118
+ it "should fetch the PM (perfect match) values" do
119
+ # Test PM values; as in R's pm(m)[1,1:8]
120
+ # mypmindex <- pmindex(m,"98910_at")
121
+ # cat(intensity(m)[mypmindex$`98910_at`],sep=",")
122
+ # Bioconductor 1.9 - even with test.cdf ought to be
123
+
124
+ pms = [ 120,768,1046,1220.3,345.3,171.3,138,171.3,189,343.3,605.3,1064.5,4429.3,854.3,2675,886.3]
125
+ pms.each_with_index do | e, i |
126
+ # p Biolib::Affyio.cel_pm(@microarrays[1],@cdf,1510,i)
127
+ Bio::Affy::Ext.cel_pm(@cel,@cdf,1510,i).should == e
128
+ end
129
+ end
130
+ # convenience methods
131
+ it "should find the probeset for 98910_at" do
132
+ probeset_index = Bio::Affy::Find.probeset_by_feature_name(@cdf,"98910_at")
133
+ probeset_index.should == 1510
43
134
  end
44
135
  end
136
+
137
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-affy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-05 00:00:00.000000000 Z
12
+ date: 2011-12-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &21542960 !ruby/object:Gem::Requirement
16
+ requirement: &9651640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.7.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *21542960
24
+ version_requirements: *9651640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bundler
27
- requirement: &21541680 !ruby/object:Gem::Requirement
27
+ requirement: &9650560 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.0.12
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *21541680
35
+ version_requirements: *9650560
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &21557520 !ruby/object:Gem::Requirement
38
+ requirement: &9649960 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.6.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *21557520
46
+ version_requirements: *9649960
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rcov
49
- requirement: &21557000 !ruby/object:Gem::Requirement
49
+ requirement: &9649400 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *21557000
57
+ version_requirements: *9649400
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: ffi
60
- requirement: &21556440 !ruby/object:Gem::Requirement
60
+ requirement: &9648680 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.11
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *21556440
68
+ version_requirements: *9648680
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: mkrf
71
- requirement: &21555820 !ruby/object:Gem::Requirement
71
+ requirement: &9647920 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.2.3
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *21555820
79
+ version_requirements: *9647920
80
80
  description: ! "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI
81
81
  binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
82
82
  email: pjotr.public01@thebird.nl
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
151
151
  version: '0'
152
152
  segments:
153
153
  - 0
154
- hash: -420869817303685437
154
+ hash: 1450233903929841244
155
155
  required_rubygems_version: !ruby/object:Gem::Requirement
156
156
  none: false
157
157
  requirements: