bio-affy 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,29 @@
1
1
  = bio-affy
2
2
 
3
- WARNING: This software is currently being developed! It may not be usable.
4
-
5
3
  Affymetrix microarray file format parser (CEL/CDF) for Ruby.
6
4
 
5
+ Are you tired waiting for R/Bioconductor to download and install? Are
6
+ you weary of R's slowness and memory consumption? Do you want Ruby's
7
+ convenience? Try creating a biogem, and use bio-affy's foreign
8
+ function interface (FFI) strategy for linking against R's C libraries.
9
+
10
+ For an example of the API see https://github.com/pjotrp/bioruby-affy/blob/master/spec/bio-affy_spec.rb
11
+
7
12
  == Introduction
8
13
 
9
- This is a port of the Biolib-1.0 Affy parser for the Ruby FFI, which
10
- in turn, is an adaptation of Ben Bolstad's work for R.
14
+ This is a port of the Biolib-1.0 Affy parser, which in turn is an
15
+ adaptation of Ben Bolstad's Affyio library for R/Bioconductor.
16
+
17
+ You can query CDF files for feature names of probesets, the number of probesets and
18
+ probe types, and the indices of probes on the array.
19
+
20
+ You can query CEL files for raw expression values of PM probes and MM probes.
21
+
22
+ This implementation allows processing one or more microarrays at a
23
+ time. It is not necessary to load all microarrays in RAM.
11
24
 
12
- To use this tool you do not need to know Ruby.
25
+ To use the command line tool you do not need to know Ruby (note, the command line
26
+ interface is not ready).
13
27
 
14
28
  == Install
15
29
 
@@ -28,18 +42,18 @@ Next run the tool with
28
42
  This module was written with
29
43
 
30
44
  ruby 1.9.3p0 (2011-10-30 revision 33570) [x86_64-linux]
31
- Using rake (0.9.2.2)
32
- Using bundler (1.0.21)
33
- Using diff-lcs (1.1.3)
34
- Using ffi (1.0.11)
35
- Using git (1.2.5)
36
- Using jeweler (1.6.4)
37
- Using mkrf (0.2.3)
38
- Using rcov (0.9.11)
39
- Using rspec-core (2.7.1)
40
- Using rspec-expectations (2.7.0)
41
- Using rspec-mocks (2.7.0)
42
- Using rspec (2.7.0)
45
+ Using rake (0.9.2.2)
46
+ Using bundler (1.0.21)
47
+ Using diff-lcs (1.1.3)
48
+ Using ffi (1.0.11)
49
+ Using git (1.2.5)
50
+ Using jeweler (1.6.4)
51
+ Using mkrf (0.2.3)
52
+ Using rcov (0.9.11)
53
+ Using rspec-core (2.7.1)
54
+ Using rspec-expectations (2.7.0)
55
+ Using rspec-mocks (2.7.0)
56
+ Using rspec (2.7.0)
43
57
 
44
58
  == Copyright
45
59
 
data/Rakefile CHANGED
@@ -27,7 +27,7 @@ Jeweler::Tasks.new do |gem|
27
27
  gem.authors = ["Pjotr Prins"]
28
28
  gem.extensions = "ext/src/mkrf_conf.rb"
29
29
  gem.files += Dir['lib/**/*'] + Dir['ext/**/*']
30
- gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|R|Rd|log)$/ }
30
+ gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|cdf|CEL|cel|R|Rd|log)$/ }
31
31
  gem.rubyforge_project = "nowarning"
32
32
 
33
33
  # dependencies defined in Gemfile
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.5.1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-affy"
8
- s.version = "0.5.0"
8
+ s.version = "0.5.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2011-12-05"
12
+ s.date = "2011-12-09"
13
13
  s.description = "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["bio-affy"]
@@ -2,6 +2,19 @@
2
2
  module Bio
3
3
 
4
4
  module Affy
5
+
6
+ module Find
7
+ def Find.probeset_by_feature_name cdf, name
8
+ num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
9
+ (0..num_probesets-1).each do | i |
10
+ probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,i)
11
+ probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
12
+ return i if probeset.name == name
13
+ end
14
+ nil
15
+ end
16
+ end
17
+
5
18
  end
6
19
 
7
20
  end
@@ -3,6 +3,17 @@ module Bio
3
3
 
4
4
  module Affy
5
5
 
6
+ class CDFProbeInfo < FFI::Struct
7
+ layout :x, :uint,
8
+ :y, :uint
9
+ def x
10
+ self[:x]
11
+ end
12
+ def y
13
+ self[:y]
14
+ end
15
+ end
16
+
6
17
  class CDFProbeSet < FFI::Struct
7
18
  layout :isQC, :int,
8
19
  :pm_num, :int,
@@ -10,6 +21,10 @@ module Bio
10
21
  :pm, :pointer,
11
22
  :mm, :pointer,
12
23
  :name, [:uint8, 64]
24
+
25
+ def name
26
+ self[:name].to_ptr.read_string
27
+ end
13
28
  end
14
29
 
15
30
  module Ext
@@ -29,6 +44,10 @@ module Bio
29
44
  attach_function :cel_num_intensities, [ :pointer ], :uint64
30
45
  attach_function :cdf_num_probesets, [ :pointer ], :uint64
31
46
  attach_function :cdf_probeset_info, [ :pointer, :int ], :pointer
47
+ attach_function :cel_pm, [:pointer, :pointer, :int, :int ], :double
48
+ attach_function :cdf_pmprobe_info, [:pointer, :int, :int], :pointer
49
+ # more bindings are available, check out the functions defined in ./ext/src
50
+ # and the biolib test_affyio.rb file
32
51
  end
33
52
 
34
53
  end
@@ -6,39 +6,132 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
6
6
  require 'bio-affy'
7
7
 
8
8
  DATADIR = File.join([ROOT,'test','data','affy'])
9
- CDF = File.join(DATADIR,"MG_U74Av2.CDF")
9
+ CDF = File.join(DATADIR,"MG_U74Av2.CDF") # GPL81
10
10
  CDF2 = File.join(DATADIR,"ATH1-121501.CDF")
11
11
  CEL1 = File.join(DATADIR,"GSM103328.CEL.gz")
12
12
 
13
- describe "BioAffy" do
13
+ describe "Bio::Affy::Ext1" do
14
+
14
15
  it "should find the shared library" do
15
16
  Bio::Affy::Ext.has_affyext(5).should == 60
16
17
  end
17
- it "should open a CDF file" do
18
+ end
19
+
20
+ describe "Bio::Affy::Ext" do
21
+ before :all do
22
+ # first start the R environment
18
23
  Bio::Affy::Ext.BioLib_R_Init()
19
- cdf = Bio::Affy::Ext.open_cdffile(CDF)
20
- cdf.null?.should == false
21
- num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
24
+ # load the CDF once
25
+ @cdf = Bio::Affy::Ext.open_cdffile(CDF)
26
+ # load a CEL file once
27
+ @cel = Bio::Affy::Ext.open_celfile(CEL1)
28
+ end
29
+ it "should open a CDF file" do
30
+ @cdf.null?.should == false
31
+ end
32
+ it "should count the probesets" do
33
+ # Open the Mouse CDF file - in Bioconductor this would be
34
+ #
35
+ # source("http://bioconductor.org/biocLite.R")
36
+ # biocLite("affy")
37
+ # library(affy)
38
+ # library(makecdfenv)
39
+ # make.cdf.package('test.cdf',species='test')
40
+ # exit and R CMD INSTALL testcdf/
41
+ # m <- ReadAffy(cdfname='test')
42
+ #
43
+ # because CDF files are not read directly. bio-affy, however can:
44
+ num_probesets = Bio::Affy::Ext.cdf_num_probesets(@cdf)
22
45
  num_probesets.should == 12501
23
46
  end
24
47
  it "should open a CEL file" do
25
- cel1 = Bio::Affy::Ext.open_celfile(CEL1)
26
- num = Bio::Affy::Ext.cel_num_intensities(cel1)
48
+ # Open the Mouse CEL files - in Bioconductor this would be
49
+ #
50
+ # source("http://bioconductor.org/biocLite.R")
51
+ # biocLite("affy")
52
+ # library(affy)
53
+ # m <- ReadAffy()
54
+ # dim(m)
55
+ # Cols Rows
56
+ # 640 640 == 409600
57
+
58
+ num = Bio::Affy::Ext.cel_num_intensities(@cel)
27
59
  num.should == 409600
28
60
  end
29
- it "should find the probe value for 1511" do
30
- cel = Bio::Affy::Ext.open_celfile(CEL1)
31
- probe_value = Bio::Affy::Ext.cel_intensity(cel,1510)
61
+ it "should find the CDF cel intensity value" do
62
+ # In Bioconductor, after m <- ReadAffy()
63
+ #
64
+ probe_value = Bio::Affy::Ext.cel_intensity(@cel,1510)
32
65
  probe_value.should == 10850.8
33
66
  end
34
- it "should name the probes for 1511" do
35
- cdf = Bio::Affy::Ext.open_cdffile(CDF)
36
- # memptr = MemoryPointer.new :pointer
37
- probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,1510)
67
+ it "should get the probeset indexes from the CDF" do
68
+ cdf_cols = 640 # (cdf.cols)
69
+ # R/Bioconductor:
70
+ #
71
+ # > as.vector(geneNames(m))[11657]
72
+ # [1] "98910_at"
73
+ #
74
+ # cat(indexProbes(m, which="pm", genenames="98910_at")[[1]],sep=",")
75
+ # 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474
76
+ #
77
+ # or
78
+ #
79
+ # pmindex(m,"98910_at")
80
+ #
81
+
82
+ pm0 = [ 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474 ]
83
+ pm0.each_with_index do | index, i |
84
+ # call with probeset, probenum
85
+ probe_ptr = Bio::Affy::Ext.cdf_pmprobe_info(@cdf,1510,i)
86
+ probe = Bio::Affy::CDFProbeInfo.new(probe_ptr)
87
+ # p [probe.x, probe.y]
88
+ # p [ index, probe.x, probe.y, probe.x + probe.y*@cdf.cols + 1]
89
+ (probe.x + probe.y*cdf_cols + 1).should == index
90
+ end
91
+
92
+
93
+ end
94
+
95
+ it "should get the probeset information" do
96
+ # In Bioconductor, after m <- ReadAffy()
97
+ #
98
+ # > length(featureNames(m))
99
+ # [1] 12488 (12501 in bio-affy - we add the 13 controls)
100
+ #
101
+ # Note also the feature numbering is different in the Bioconductor set:
102
+ # > as.vector(geneNames(m))[0:5]
103
+ # [1] "100001_at" "100002_at" "100003_at" "100004_at" "100005_at"
104
+ # > as.vector(geneNames(m))[1509:1512]
105
+ # [1] "101947_at" "101948_at" "101949_at" "101950_at"
106
+ # > as.vector(geneNames(m))[11657]
107
+ # [1] "98910_at" <- this is what we test at index 1510.
108
+ probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(@cdf,1510)
38
109
  probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
39
110
  probeset[:isQC].should == 0
40
111
  probeset[:pm_num].should == 16
41
112
  probeset[:mm_num].should == 16
113
+ # 98910_at 144 P 0.009985 (normalized on GEO)
42
114
  probeset[:name].to_ptr.read_string.should == "98910_at"
115
+ # now use the convenience methods
116
+ probeset.name.should == "98910_at"
117
+ end
118
+ it "should fetch the PM (perfect match) values" do
119
+ # Test PM values; as in R's pm(m)[1,1:8]
120
+ # mypmindex <- pmindex(m,"98910_at")
121
+ # cat(intensity(m)[mypmindex$`98910_at`],sep=",")
122
+ # Bioconductor 1.9 - even with test.cdf ought to be
123
+
124
+ pms = [ 120,768,1046,1220.3,345.3,171.3,138,171.3,189,343.3,605.3,1064.5,4429.3,854.3,2675,886.3]
125
+ pms.each_with_index do | e, i |
126
+ # p Biolib::Affyio.cel_pm(@microarrays[1],@cdf,1510,i)
127
+ Bio::Affy::Ext.cel_pm(@cel,@cdf,1510,i).should == e
128
+ end
129
+ end
130
+ # convenience methods
131
+ it "should find the probeset for 98910_at" do
132
+ probeset_index = Bio::Affy::Find.probeset_by_feature_name(@cdf,"98910_at")
133
+ probeset_index.should == 1510
43
134
  end
44
135
  end
136
+
137
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-affy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-05 00:00:00.000000000 Z
12
+ date: 2011-12-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &21542960 !ruby/object:Gem::Requirement
16
+ requirement: &9651640 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.7.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *21542960
24
+ version_requirements: *9651640
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bundler
27
- requirement: &21541680 !ruby/object:Gem::Requirement
27
+ requirement: &9650560 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.0.12
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *21541680
35
+ version_requirements: *9650560
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &21557520 !ruby/object:Gem::Requirement
38
+ requirement: &9649960 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.6.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *21557520
46
+ version_requirements: *9649960
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rcov
49
- requirement: &21557000 !ruby/object:Gem::Requirement
49
+ requirement: &9649400 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *21557000
57
+ version_requirements: *9649400
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: ffi
60
- requirement: &21556440 !ruby/object:Gem::Requirement
60
+ requirement: &9648680 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.11
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *21556440
68
+ version_requirements: *9648680
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: mkrf
71
- requirement: &21555820 !ruby/object:Gem::Requirement
71
+ requirement: &9647920 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.2.3
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *21555820
79
+ version_requirements: *9647920
80
80
  description: ! "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI
81
81
  binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
82
82
  email: pjotr.public01@thebird.nl
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
151
151
  version: '0'
152
152
  segments:
153
153
  - 0
154
- hash: -420869817303685437
154
+ hash: 1450233903929841244
155
155
  required_rubygems_version: !ruby/object:Gem::Requirement
156
156
  none: false
157
157
  requirements: