bio-affy 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +31 -17
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bio-affy.gemspec +2 -2
- data/lib/bio/affy.rb +13 -0
- data/lib/bio/affyext.rb +19 -0
- data/spec/bio-affy_spec.rb +108 -15
- metadata +15 -15
data/README.rdoc
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
= bio-affy
|
2
2
|
|
3
|
-
WARNING: This software is currently being developed! It may not be usable.
|
4
|
-
|
5
3
|
Affymetrix microarray file format parser (CEL/CDF) for Ruby.
|
6
4
|
|
5
|
+
Are you tired waiting for R/Bioconductor to download and install? Are
|
6
|
+
you weary of R's slowness and memory consumption? Do you want Ruby's
|
7
|
+
convenience? Try creating a biogem, and use bio-affy's foreign
|
8
|
+
function interface (FFI) strategy for linking against R's C libraries.
|
9
|
+
|
10
|
+
For an example of the API see https://github.com/pjotrp/bioruby-affy/blob/master/spec/bio-affy_spec.rb
|
11
|
+
|
7
12
|
== Introduction
|
8
13
|
|
9
|
-
This is a port of the Biolib-1.0 Affy parser
|
10
|
-
|
14
|
+
This is a port of the Biolib-1.0 Affy parser, which in turn is an
|
15
|
+
adaptation of Ben Bolstad's Affyio library for R/Bioconductor.
|
16
|
+
|
17
|
+
You can query CDF files for feature names of probesets, the number of probesets and
|
18
|
+
probe types, and the indices of probes on the array.
|
19
|
+
|
20
|
+
You can query CEL files for raw expression values of PM probes and MM probes.
|
21
|
+
|
22
|
+
This implementation allows processing one or more microarrays at a
|
23
|
+
time. It is not necessary to load all microarrays in RAM.
|
11
24
|
|
12
|
-
To use
|
25
|
+
To use the command line tool you do not need to know Ruby (note, the command line
|
26
|
+
interface is not ready).
|
13
27
|
|
14
28
|
== Install
|
15
29
|
|
@@ -28,18 +42,18 @@ Next run the tool with
|
|
28
42
|
This module was written with
|
29
43
|
|
30
44
|
ruby 1.9.3p0 (2011-10-30 revision 33570) [x86_64-linux]
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
Using rake (0.9.2.2)
|
46
|
+
Using bundler (1.0.21)
|
47
|
+
Using diff-lcs (1.1.3)
|
48
|
+
Using ffi (1.0.11)
|
49
|
+
Using git (1.2.5)
|
50
|
+
Using jeweler (1.6.4)
|
51
|
+
Using mkrf (0.2.3)
|
52
|
+
Using rcov (0.9.11)
|
53
|
+
Using rspec-core (2.7.1)
|
54
|
+
Using rspec-expectations (2.7.0)
|
55
|
+
Using rspec-mocks (2.7.0)
|
56
|
+
Using rspec (2.7.0)
|
43
57
|
|
44
58
|
== Copyright
|
45
59
|
|
data/Rakefile
CHANGED
@@ -27,7 +27,7 @@ Jeweler::Tasks.new do |gem|
|
|
27
27
|
gem.authors = ["Pjotr Prins"]
|
28
28
|
gem.extensions = "ext/src/mkrf_conf.rb"
|
29
29
|
gem.files += Dir['lib/**/*'] + Dir['ext/**/*']
|
30
|
-
gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|R|Rd|log)$/ }
|
30
|
+
gem.files.reject! { | n | n =~ /\.(o|so|gz|CDF|cdf|CEL|cel|R|Rd|log)$/ }
|
31
31
|
gem.rubyforge_project = "nowarning"
|
32
32
|
|
33
33
|
# dependencies defined in Gemfile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.1
|
data/bio-affy.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-affy"
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2011-12-
|
12
|
+
s.date = "2011-12-09"
|
13
13
|
s.description = "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-affy"]
|
data/lib/bio/affy.rb
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
module Bio
|
3
3
|
|
4
4
|
module Affy
|
5
|
+
|
6
|
+
module Find
|
7
|
+
def Find.probeset_by_feature_name cdf, name
|
8
|
+
num_probesets = Bio::Affy::Ext.cdf_num_probesets(cdf)
|
9
|
+
(0..num_probesets-1).each do | i |
|
10
|
+
probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(cdf,i)
|
11
|
+
probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
|
12
|
+
return i if probeset.name == name
|
13
|
+
end
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
5
18
|
end
|
6
19
|
|
7
20
|
end
|
data/lib/bio/affyext.rb
CHANGED
@@ -3,6 +3,17 @@ module Bio
|
|
3
3
|
|
4
4
|
module Affy
|
5
5
|
|
6
|
+
class CDFProbeInfo < FFI::Struct
|
7
|
+
layout :x, :uint,
|
8
|
+
:y, :uint
|
9
|
+
def x
|
10
|
+
self[:x]
|
11
|
+
end
|
12
|
+
def y
|
13
|
+
self[:y]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
6
17
|
class CDFProbeSet < FFI::Struct
|
7
18
|
layout :isQC, :int,
|
8
19
|
:pm_num, :int,
|
@@ -10,6 +21,10 @@ module Bio
|
|
10
21
|
:pm, :pointer,
|
11
22
|
:mm, :pointer,
|
12
23
|
:name, [:uint8, 64]
|
24
|
+
|
25
|
+
def name
|
26
|
+
self[:name].to_ptr.read_string
|
27
|
+
end
|
13
28
|
end
|
14
29
|
|
15
30
|
module Ext
|
@@ -29,6 +44,10 @@ module Bio
|
|
29
44
|
attach_function :cel_num_intensities, [ :pointer ], :uint64
|
30
45
|
attach_function :cdf_num_probesets, [ :pointer ], :uint64
|
31
46
|
attach_function :cdf_probeset_info, [ :pointer, :int ], :pointer
|
47
|
+
attach_function :cel_pm, [:pointer, :pointer, :int, :int ], :double
|
48
|
+
attach_function :cdf_pmprobe_info, [:pointer, :int, :int], :pointer
|
49
|
+
# more bindings are available, check out the functions defined in ./ext/src
|
50
|
+
# and the biolib test_affyio.rb file
|
32
51
|
end
|
33
52
|
|
34
53
|
end
|
data/spec/bio-affy_spec.rb
CHANGED
@@ -6,39 +6,132 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
6
6
|
require 'bio-affy'
|
7
7
|
|
8
8
|
DATADIR = File.join([ROOT,'test','data','affy'])
|
9
|
-
CDF = File.join(DATADIR,"MG_U74Av2.CDF")
|
9
|
+
CDF = File.join(DATADIR,"MG_U74Av2.CDF") # GPL81
|
10
10
|
CDF2 = File.join(DATADIR,"ATH1-121501.CDF")
|
11
11
|
CEL1 = File.join(DATADIR,"GSM103328.CEL.gz")
|
12
12
|
|
13
|
-
describe "
|
13
|
+
describe "Bio::Affy::Ext1" do
|
14
|
+
|
14
15
|
it "should find the shared library" do
|
15
16
|
Bio::Affy::Ext.has_affyext(5).should == 60
|
16
17
|
end
|
17
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Bio::Affy::Ext" do
|
21
|
+
before :all do
|
22
|
+
# first start the R environment
|
18
23
|
Bio::Affy::Ext.BioLib_R_Init()
|
19
|
-
|
20
|
-
cdf
|
21
|
-
|
24
|
+
# load the CDF once
|
25
|
+
@cdf = Bio::Affy::Ext.open_cdffile(CDF)
|
26
|
+
# load a CEL file once
|
27
|
+
@cel = Bio::Affy::Ext.open_celfile(CEL1)
|
28
|
+
end
|
29
|
+
it "should open a CDF file" do
|
30
|
+
@cdf.null?.should == false
|
31
|
+
end
|
32
|
+
it "should count the probesets" do
|
33
|
+
# Open the Mouse CDF file - in Bioconductor this would be
|
34
|
+
#
|
35
|
+
# source("http://bioconductor.org/biocLite.R")
|
36
|
+
# biocLite("affy")
|
37
|
+
# library(affy)
|
38
|
+
# library(makecdfenv)
|
39
|
+
# make.cdf.package('test.cdf',species='test')
|
40
|
+
# exit and R CMD INSTALL testcdf/
|
41
|
+
# m <- ReadAffy(cdfname='test')
|
42
|
+
#
|
43
|
+
# because CDF files are not read directly. bio-affy, however can:
|
44
|
+
num_probesets = Bio::Affy::Ext.cdf_num_probesets(@cdf)
|
22
45
|
num_probesets.should == 12501
|
23
46
|
end
|
24
47
|
it "should open a CEL file" do
|
25
|
-
|
26
|
-
|
48
|
+
# Open the Mouse CEL files - in Bioconductor this would be
|
49
|
+
#
|
50
|
+
# source("http://bioconductor.org/biocLite.R")
|
51
|
+
# biocLite("affy")
|
52
|
+
# library(affy)
|
53
|
+
# m <- ReadAffy()
|
54
|
+
# dim(m)
|
55
|
+
# Cols Rows
|
56
|
+
# 640 640 == 409600
|
57
|
+
|
58
|
+
num = Bio::Affy::Ext.cel_num_intensities(@cel)
|
27
59
|
num.should == 409600
|
28
60
|
end
|
29
|
-
it "should find the
|
30
|
-
|
31
|
-
|
61
|
+
it "should find the CDF cel intensity value" do
|
62
|
+
# In Bioconductor, after m <- ReadAffy()
|
63
|
+
#
|
64
|
+
probe_value = Bio::Affy::Ext.cel_intensity(@cel,1510)
|
32
65
|
probe_value.should == 10850.8
|
33
66
|
end
|
34
|
-
it "should
|
35
|
-
|
36
|
-
#
|
37
|
-
|
67
|
+
it "should get the probeset indexes from the CDF" do
|
68
|
+
cdf_cols = 640 # (cdf.cols)
|
69
|
+
# R/Bioconductor:
|
70
|
+
#
|
71
|
+
# > as.vector(geneNames(m))[11657]
|
72
|
+
# [1] "98910_at"
|
73
|
+
#
|
74
|
+
# cat(indexProbes(m, which="pm", genenames="98910_at")[[1]],sep=",")
|
75
|
+
# 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474
|
76
|
+
#
|
77
|
+
# or
|
78
|
+
#
|
79
|
+
# pmindex(m,"98910_at")
|
80
|
+
#
|
81
|
+
|
82
|
+
pm0 = [ 344297,177348,21247,246762,200777,166097,382469,397538,66238,344987,11503,253234,206965,103391,54927,333474 ]
|
83
|
+
pm0.each_with_index do | index, i |
|
84
|
+
# call with probeset, probenum
|
85
|
+
probe_ptr = Bio::Affy::Ext.cdf_pmprobe_info(@cdf,1510,i)
|
86
|
+
probe = Bio::Affy::CDFProbeInfo.new(probe_ptr)
|
87
|
+
# p [probe.x, probe.y]
|
88
|
+
# p [ index, probe.x, probe.y, probe.x + probe.y*@cdf.cols + 1]
|
89
|
+
(probe.x + probe.y*cdf_cols + 1).should == index
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should get the probeset information" do
|
96
|
+
# In Bioconductor, after m <- ReadAffy()
|
97
|
+
#
|
98
|
+
# > length(featureNames(m))
|
99
|
+
# [1] 12488 (12501 in bio-affy - we add the 13 controls)
|
100
|
+
#
|
101
|
+
# Note also the feature numbering is different in the Bioconductor set:
|
102
|
+
# > as.vector(geneNames(m))[0:5]
|
103
|
+
# [1] "100001_at" "100002_at" "100003_at" "100004_at" "100005_at"
|
104
|
+
# > as.vector(geneNames(m))[1509:1512]
|
105
|
+
# [1] "101947_at" "101948_at" "101949_at" "101950_at"
|
106
|
+
# > as.vector(geneNames(m))[11657]
|
107
|
+
# [1] "98910_at" <- this is what we test at index 1510.
|
108
|
+
probeset_ptr = Bio::Affy::Ext.cdf_probeset_info(@cdf,1510)
|
38
109
|
probeset = Bio::Affy::CDFProbeSet.new(probeset_ptr)
|
39
110
|
probeset[:isQC].should == 0
|
40
111
|
probeset[:pm_num].should == 16
|
41
112
|
probeset[:mm_num].should == 16
|
113
|
+
# 98910_at 144 P 0.009985 (normalized on GEO)
|
42
114
|
probeset[:name].to_ptr.read_string.should == "98910_at"
|
115
|
+
# now use the convenience methods
|
116
|
+
probeset.name.should == "98910_at"
|
117
|
+
end
|
118
|
+
it "should fetch the PM (perfect match) values" do
|
119
|
+
# Test PM values; as in R's pm(m)[1,1:8]
|
120
|
+
# mypmindex <- pmindex(m,"98910_at")
|
121
|
+
# cat(intensity(m)[mypmindex$`98910_at`],sep=",")
|
122
|
+
# Bioconductor 1.9 - even with test.cdf ought to be
|
123
|
+
|
124
|
+
pms = [ 120,768,1046,1220.3,345.3,171.3,138,171.3,189,343.3,605.3,1064.5,4429.3,854.3,2675,886.3]
|
125
|
+
pms.each_with_index do | e, i |
|
126
|
+
# p Biolib::Affyio.cel_pm(@microarrays[1],@cdf,1510,i)
|
127
|
+
Bio::Affy::Ext.cel_pm(@cel,@cdf,1510,i).should == e
|
128
|
+
end
|
129
|
+
end
|
130
|
+
# convenience methods
|
131
|
+
it "should find the probeset for 98910_at" do
|
132
|
+
probeset_index = Bio::Affy::Find.probeset_by_feature_name(@cdf,"98910_at")
|
133
|
+
probeset_index.should == 1510
|
43
134
|
end
|
44
135
|
end
|
136
|
+
|
137
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-affy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-12-
|
12
|
+
date: 2011-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &9651640 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.7.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *9651640
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &9650560 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.12
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *9650560
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &9649960 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.6.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *9649960
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rcov
|
49
|
-
requirement: &
|
49
|
+
requirement: &9649400 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *9649400
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: ffi
|
60
|
-
requirement: &
|
60
|
+
requirement: &9648680 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.11
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *9648680
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: mkrf
|
71
|
-
requirement: &
|
71
|
+
requirement: &9647920 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.2.3
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *9647920
|
80
80
|
description: ! "Affymetrix microarray file format parser\n (CEL/CDF) for Ruby. FFI
|
81
81
|
binding to Biolib port of R/Affyio by Benjamin Milo Bolstad"
|
82
82
|
email: pjotr.public01@thebird.nl
|
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
151
151
|
version: '0'
|
152
152
|
segments:
|
153
153
|
- 0
|
154
|
-
hash:
|
154
|
+
hash: 1450233903929841244
|
155
155
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
156
|
none: false
|
157
157
|
requirements:
|