bio-affy 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +31 -17
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bio-affy.gemspec +2 -2
- data/lib/bio/affy.rb +13 -0
- data/lib/bio/affyext.rb +19 -0
- data/spec/bio-affy_spec.rb +108 -15
- metadata +15 -15
data/README.rdoc
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
= bio-affy
|
2
2
|
|
3
|
-
WARNING: This software is currently being developed! It may not be usable.
|
4
|
-
|
5
3
|
Affymetrix microarray file format parser (CEL/CDF) for Ruby.
|
6
4
|
|
5
|
+
Are you tired waiting for R/Bioconductor to download and install? Are
|
6
|
+
you weary of R's slowness and memory consumption? Do you want Ruby's
|
7
|
+
convenience? Try creating a biogem, and use bio-affy's foreign
|
8
|
+
function interface (FFI) strategy for linking against R's C libraries.
|
9
|
+
|
10
|
+
For an example of the API see https://github.com/pjotrp/bioruby-affy/blob/master/spec/bio-affy_spec.rb
|
11
|
+
|
7
12
|
== Introduction
|
8
13
|
|
9
|
-
This is a port of the Biolib-1.0 Affy parser
|
10
|
-
|
14
|
+
This is a port of the Biolib-1.0 Affy parser, which in turn is an
|
15
|
+
adaptation of Ben Bolstad's Affyio library for R/Bioconductor.
|
16
|
+
|
17
|
+
You can query CDF files for feature names of probesets, the number of probesets and
|
18
|
+
probe types, and the indices of probes on the array.
|
19
|
+
|
20
|
+
You can query CEL files for raw expression values of PM probes and MM probes.
|
21
|
+
|
22
|
+
This implementation allows processing one or more microarrays at a
|
23
|
+
time. It is not necessary to load all microarrays in RAM.
|
11
24
|
|
12
|
-
To use
|
25
|
+
To use the command line tool you do not need to know Ruby (note, the command line
|
26
|
+
interface is not ready).
|
13
27
|
|
14
28
|
== Install
|
15
29
|
|
@@ -28,18 +42,18 @@ Next run the tool with
|
|
28
42
|
This module was written with
|
29
43
|
|
30
44
|
ruby 1.9.3p0 (2011-10-30 revision 33570) [x86_64-linux]
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
Using rake (0.9.2.2)
|
46
|
+
Using bundler (1.0.21)
|
47
|
+
Using diff-lcs (1.1.3)
|
48
|
+
Using ffi (1.0.11)
|
49
|
+
Using git (1.2.5)
|
50
|
+
Using jeweler (1.6.4)
|
51
|
+
Using mkrf (0.2.3)
|
52
|
+
Using rcov (0.9.11)
|
53
|
+
Using rspec-core (2.7.1)
|
54
|
+
Using rspec-expectations (2.7.0)
|
55
|
+
Using rspec-mocks (2.7.0)
|
56
|
+
Using rspec (2.7.0)
|
43
57
|
|
44
58
|
== Copyright
|
45
59
|
|
data/Rakefile
CHANGED
@@ -27,7 +27,7 @@ Jeweler::Tasks.new do |gem|
|
|
27
27
|
gem.authors = ["Pjotr Prins"]
|
28
28
|
gem.extensions = "ext/src/mkrf_conf.rb"
|
29
29
|
gem.files += Dir['lib/**/*'] + Dir['ext/**/*']
|
30
|
-
gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|R|Rd|log)$/ }
|
30
|
+
gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|cdf|CEL|cel|R|Rd|log)$/ }
|
31
31
|
gem.rubyforge_project = "nowarning"
|
32
32
|
|
33
33
|
# dependencies defined in Gemfile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.1
|
data/bio-affy.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-affy"
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2011-12-
|
12
|
+
s.date = "2011-12-09"
|
13
13
|
s.description = "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-affy"]
|
data/lib/bio/affy.rb
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
module Bio
|
3
3
|
|
4
4
|
module Affy
|
5
|
+
|
6
|
+
module Find
|
7
|
+
def Find.probeset_by_feature_name cdf, name
|
8
|
+
num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
|
9
|
+
(0..num_probesets-1).each do | i |
|
10
|
+
probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,i)
|
11
|
+
probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
|
12
|
+
return i if probeset.name == name
|
13
|
+
end
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
5
18
|
end
|
6
19
|
|
7
20
|
end
|
data/lib/bio/affyext.rb
CHANGED
@@ -3,6 +3,17 @@ module Bio
|
|
3
3
|
|
4
4
|
module Affy
|
5
5
|
|
6
|
+
class CDFProbeInfo < FFI::Struct
|
7
|
+
layout :x, :uint,
|
8
|
+
:y, :uint
|
9
|
+
def x
|
10
|
+
self[:x]
|
11
|
+
end
|
12
|
+
def y
|
13
|
+
self[:y]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
6
17
|
class CDFProbeSet < FFI::Struct
|
7
18
|
layout :isQC, :int,
|
8
19
|
:pm_num, :int,
|
@@ -10,6 +21,10 @@ module Bio
|
|
10
21
|
:pm, :pointer,
|
11
22
|
:mm, :pointer,
|
12
23
|
:name, [:uint8, 64]
|
24
|
+
|
25
|
+
def name
|
26
|
+
self[:name].to_ptr.read_string
|
27
|
+
end
|
13
28
|
end
|
14
29
|
|
15
30
|
module Ext
|
@@ -29,6 +44,10 @@ module Bio
|
|
29
44
|
attach_function :cel_num_intensities, [ :pointer ], :uint64
|
30
45
|
attach_function :cdf_num_probesets, [ :pointer ], :uint64
|
31
46
|
attach_function :cdf_probeset_info, [ :pointer, :int ], :pointer
|
47
|
+
attach_function :cel_pm, [:pointer, :pointer, :int, :int ], :double
|
48
|
+
attach_function :cdf_pmprobe_info, [:pointer, :int, :int], :pointer
|
49
|
+
# more bindings are available, check out the functions defined in ./ext/src
|
50
|
+
# and the biolib test_affyio.rb file
|
32
51
|
end
|
33
52
|
|
34
53
|
end
|
data/spec/bio-affy_spec.rb
CHANGED
@@ -6,39 +6,132 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
6
6
|
require 'bio-affy'
|
7
7
|
|
8
8
|
DATADIR = File.join([ROOT,'test','data','affy'])
|
9
|
-
CDF = File.join(DATADIR,"MG_U74Av2.CDF")
|
9
|
+
CDF = File.join(DATADIR,"MG_U74Av2.CDF") # GPL81
|
10
10
|
CDF2 = File.join(DATADIR,"ATH1-121501.CDF")
|
11
11
|
CEL1 = File.join(DATADIR,"GSM103328.CEL.gz")
|
12
12
|
|
13
|
-
describe "
|
13
|
+
describe "Bio::Affy::Ext1" do
|
14
|
+
|
14
15
|
it "should find the shared library" do
|
15
16
|
Bio::Affy::Ext.has_affyext(5).should == 60
|
16
17
|
end
|
17
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Bio::Affy::Ext" do
|
21
|
+
before :all do
|
22
|
+
# first start the R environment
|
18
23
|
Bio::Affy::Ext.BioLib_R_Init()
|
19
|
-
|
20
|
-
cdf
|
21
|
-
|
24
|
+
# load the CDF once
|
25
|
+
@cdf = Bio::Affy::Ext.open_cdffile(CDF)
|
26
|
+
# load a CEL file once
|
27
|
+
@cel = Bio::Affy::Ext.open_celfile(CEL1)
|
28
|
+
end
|
29
|
+
it "should open a CDF file" do
|
30
|
+
@cdf.null?.should == false
|
31
|
+
end
|
32
|
+
it "should count the probesets" do
|
33
|
+
# Open the Mouse CDF file - in Bioconductor this would be
|
34
|
+
#
|
35
|
+
# source("http://bioconductor.org/biocLite.R")
|
36
|
+
# biocLite("affy")
|
37
|
+
# library(affy)
|
38
|
+
# library(makecdfenv)
|
39
|
+
# make.cdf.package('test.cdf',species='test')
|
40
|
+
# exit and R CMD INSTALL testcdf/
|
41
|
+
# m <- ReadAffy(cdfname='test')
|
42
|
+
#
|
43
|
+
# because CDF files are not read directly. bio-affy, however can:
|
44
|
+
num_probesets = Bio::Affy::Ext.cdf_num_probesets(@cdf)
|
22
45
|
num_probesets.should == 12501
|
23
46
|
end
|
24
47
|
it "should open a CEL file" do
|
25
|
-
|
26
|
-
|
48
|
+
# Open the Mouse CEL files - in Bioconductor this would be
|
49
|
+
#
|
50
|
+
# source("http://bioconductor.org/biocLite.R")
|
51
|
+
# biocLite("affy")
|
52
|
+
# library(affy)
|
53
|
+
# m <- ReadAffy()
|
54
|
+
# dim(m)
|
55
|
+
# Cols Rows
|
56
|
+
# 640 640 == 409600
|
57
|
+
|
58
|
+
num = Bio::Affy::Ext.cel_num_intensities(@cel)
|
27
59
|
num.should == 409600
|
28
60
|
end
|
29
|
-
it "should find the
|
30
|
-
|
31
|
-
|
61
|
+
it "should find the CDF cel intensity value" do
|
62
|
+
# In Bioconductor, after m <- ReadAffy()
|
63
|
+
#
|
64
|
+
probe_value = Bio::Affy::Ext.cel_intensity(@cel,1510)
|
32
65
|
probe_value.should == 10850.8
|
33
66
|
end
|
34
|
-
it "should
|
35
|
-
|
36
|
-
#
|
37
|
-
|
67
|
+
it "should get the probeset indexes from the CDF" do
|
68
|
+
cdf_cols = 640 # (cdf.cols)
|
69
|
+
# R/Bioconductor:
|
70
|
+
#
|
71
|
+
# > as.vector(geneNames(m))[11657]
|
72
|
+
# [1] "98910_at"
|
73
|
+
#
|
74
|
+
# cat(indexProbes(m, which="pm", genenames="98910_at")[[1]],sep=",")
|
75
|
+
# 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474
|
76
|
+
#
|
77
|
+
# or
|
78
|
+
#
|
79
|
+
# pmindex(m,"98910_at")
|
80
|
+
#
|
81
|
+
|
82
|
+
pm0 = [ 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474 ]
|
83
|
+
pm0.each_with_index do | index, i |
|
84
|
+
# call with probeset, probenum
|
85
|
+
probe_ptr = Bio::Affy::Ext.cdf_pmprobe_info(@cdf,1510,i)
|
86
|
+
probe = Bio::Affy::CDFProbeInfo.new(probe_ptr)
|
87
|
+
# p [probe.x, probe.y]
|
88
|
+
# p [ index, probe.x, probe.y, probe.x + probe.y*@cdf.cols + 1]
|
89
|
+
(probe.x + probe.y*cdf_cols + 1).should == index
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should get the probeset information" do
|
96
|
+
# In Bioconductor, after m <- ReadAffy()
|
97
|
+
#
|
98
|
+
# > length(featureNames(m))
|
99
|
+
# [1] 12488 (12501 in bio-affy - we add the 13 controls)
|
100
|
+
#
|
101
|
+
# Note also the feature numbering is different in the Bioconductor set:
|
102
|
+
# > as.vector(geneNames(m))[0:5]
|
103
|
+
# [1] "100001_at" "100002_at" "100003_at" "100004_at" "100005_at"
|
104
|
+
# > as.vector(geneNames(m))[1509:1512]
|
105
|
+
# [1] "101947_at" "101948_at" "101949_at" "101950_at"
|
106
|
+
# > as.vector(geneNames(m))[11657]
|
107
|
+
# [1] "98910_at" <- this is what we test at index 1510.
|
108
|
+
probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(@cdf,1510)
|
38
109
|
probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
|
39
110
|
probeset[:isQC].should == 0
|
40
111
|
probeset[:pm_num].should == 16
|
41
112
|
probeset[:mm_num].should == 16
|
113
|
+
# 98910_at 144 P 0.009985 (normalized on GEO)
|
42
114
|
probeset[:name].to_ptr.read_string.should == "98910_at"
|
115
|
+
# now use the convenience methods
|
116
|
+
probeset.name.should == "98910_at"
|
117
|
+
end
|
118
|
+
it "should fetch the PM (perfect match) values" do
|
119
|
+
# Test PM values; as in R's pm(m)[1,1:8]
|
120
|
+
# mypmindex <- pmindex(m,"98910_at")
|
121
|
+
# cat(intensity(m)[mypmindex$`98910_at`],sep=",")
|
122
|
+
# Bioconductor 1.9 - even with test.cdf ought to be
|
123
|
+
|
124
|
+
pms = [ 120,768,1046,1220.3,345.3,171.3,138,171.3,189,343.3,605.3,1064.5,4429.3,854.3,2675,886.3]
|
125
|
+
pms.each_with_index do | e, i |
|
126
|
+
# p Biolib::Affyio.cel_pm(@microarrays[1],@cdf,1510,i)
|
127
|
+
Bio::Affy::Ext.cel_pm(@cel,@cdf,1510,i).should == e
|
128
|
+
end
|
129
|
+
end
|
130
|
+
# convenience methods
|
131
|
+
it "should find the probeset for 98910_at" do
|
132
|
+
probeset_index = Bio::Affy::Find.probeset_by_feature_name(@cdf,"98910_at")
|
133
|
+
probeset_index.should == 1510
|
43
134
|
end
|
44
135
|
end
|
136
|
+
|
137
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-affy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-12-
|
12
|
+
date: 2011-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &9651640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.7.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *9651640
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &9650560 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.12
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *9650560
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &9649960 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.6.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *9649960
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rcov
|
49
|
-
requirement: &
|
49
|
+
requirement: &9649400 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *9649400
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: ffi
|
60
|
-
requirement: &
|
60
|
+
requirement: &9648680 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.11
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *9648680
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: mkrf
|
71
|
-
requirement: &
|
71
|
+
requirement: &9647920 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.2.3
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *9647920
|
80
80
|
description: ! "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI
|
81
81
|
binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
|
82
82
|
email: pjotr.public01@thebird.nl
|
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
151
151
|
version: '0'
|
152
152
|
segments:
|
153
153
|
- 0
|
154
|
-
hash:
|
154
|
+
hash: 1450233903929841244
|
155
155
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
156
|
none: false
|
157
157
|
requirements:
|