bio-affy 0.1.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,51 @@
1
+ dnl
2
+ dnl Configuration things for affyR.
3
+ dnl (http://www.cbs.dtu.dk/laurent/download/affyR/
4
+ dnl What is below (and in the other configuration fiels
5
+ dnl was taken from different configuration scripts for R version 1.3.0.
6
+ dnl
7
+ dnl Acknowledgments: The author(s) of the R configure scripts, Kurt Hornik for the tip with autoconf.
8
+ dnl
9
+ dnl Laurent 2001
10
+
11
+
12
+ AC_INIT("DESCRIPTION")
13
+
14
+ dnl
15
+ dnl Are things (still) the same ?
16
+ dnl (taken from the 'writing R extensions manual')
17
+
18
+
19
+ R_ZLIB
20
+
21
+
22
+ AC_CHECK_LIB(pthread, pthread_create)
23
+
24
+ AC_TRY_LINK_FUNC(pthread_create, [use_pthreads=yes], [use_pthreads=no])
25
+
26
+ AC_MSG_CHECKING([if we can use pthreads])
27
+ AC_MSG_RESULT($use_pthreads)
28
+
29
+ AC_MSG_CHECKING([if PTHREAD_STACK_MIN is defined])
30
+ AC_COMPILE_IFELSE([
31
+ #include <pthread.h>
32
+ #include <limits.h>
33
+
34
+ int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
35
+ }
36
+ ],[use_pthread_stack_min=yes], [use_pthread_stack_min=no])
37
+ AC_MSG_RESULT($use_pthread_stack_min)
38
+
39
+ if test "x$use_pthreads" = "xyes"
40
+ then
41
+ if test "x$use_pthread_stack_min" = "xyes"
42
+ then
43
+ LIBS="$LIBS -lpthread"
44
+ AC_DEFINE(USE_PTHREADS, 1)
45
+ fi
46
+ fi
47
+
48
+
49
+
50
+
51
+ AC_OUTPUT(src/Makevars)
@@ -0,0 +1,22 @@
1
+ \name{check.cdf.type}
2
+ \alias{check.cdf.type}
3
+ \title{CDF file format function}
4
+ \description{This function returns a text string giving the file format
5
+ for the supplied filename
6
+ }
7
+ \usage{check.cdf.type(filename)
8
+ }
9
+ \arguments{
10
+ \item{filename}{fullpath to a cdf file}
11
+ }
12
+ \value{Returns a string which is currently one of:
13
+
14
+ \item{text}{the cdf file is of the text format}
15
+ \item{xda}{the cdf file is of the binary format used in GCOS}
16
+ \item{unknown}{the parser can not handle this format or does not
17
+ recognize this file as a CDF file}
18
+
19
+ }
20
+
21
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
22
+ \keyword{IO}
@@ -0,0 +1,20 @@
1
+ \name{read.cdffile.list}
2
+ \alias{read.cdffile.list}
3
+ \title{Read CDF file into an R list}
4
+ \description{This function reads the entire contents of a cdf file into
5
+ an R list structure
6
+ }
7
+ \usage{read.cdffile.list(filename, cdf.path = getwd())
8
+ }
9
+ \arguments{
10
+ \item{filename}{name of CDF file}
11
+ \item{cdf.path}{path to cdf file}
12
+ }
13
+ \value{returns a \code{list} structure. The exact contents may vary
14
+ depending on the file format of the cdf file (see \code{\link{check.cdf.type}})
15
+ }
16
+ \details{
17
+ Note that this function can be very memory intensive with large CDF files.
18
+ }
19
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
20
+ \keyword{IO}
@@ -0,0 +1,23 @@
1
+ \name{read.celfile}
2
+ \alias{read.celfile}
3
+ \title{Read a CEL file into an R list}
4
+ \description{This function reads the entire contents of a CEL file into
5
+ an R list structure
6
+ }
7
+ \usage{read.celfile(filename,intensity.means.only=FALSE)
8
+ }
9
+ \arguments{
10
+ \item{filename}{name of CEL file}
11
+ \item{intensity.means.only}{If \code{TRUE} then read on only the MEAN section in INTENSITY}
12
+ }
13
+ \value{returns a \code{list} structure. The exact contents may vary
14
+ depending on the file format of the CEL file
15
+ }
16
+ \details{
17
+ The list has four main items. HEADER, INTENSITY, MASKS, OUTLIERS. Note
18
+ that INTENSITY is a list of three vectors MEAN, STDEV, NPIXELS. HEADER
19
+ is also a list. Both of MASKS and OUTLIERS are matrices.
20
+
21
+ }
22
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
23
+ \keyword{IO}
@@ -0,0 +1,22 @@
1
+ \name{read.celfile.header}
2
+ \alias{read.celfile.header}
3
+ \title{Read header information from cel file}
4
+ \description{
5
+ This function reads some of the header information (appears before probe
6
+ intensity data) from the supplied cel file.
7
+ }
8
+ \usage{read.celfile.header(filename,info=c("basic","full"),verbose=FALSE)
9
+ }
10
+ \arguments{
11
+ \item{filename}{name of CEL file. May be fully pathed}
12
+ \item{info}{A string. \code{basic} returns the dimensions of the chip
13
+ and the name of the CDF file used when the CEL file was
14
+ produced. \code{full} returns more information in greater detail.}
15
+ \item{verbose}{a \code{\link{logical}}. When true the parsing routine
16
+ prints more information, typically useful for debugging.}
17
+ }
18
+ \value{
19
+ A \code{list} data structure.
20
+ }
21
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
22
+ \keyword{IO}
@@ -0,0 +1,31 @@
1
+ \name{read.celfile.probeintensity.matrices}
2
+ \alias{read.celfile.probeintensity.matrices}
3
+ \title{Read PM or MM from CEL file into matrices}
4
+ \description{This function reads PM, MM or both types of intensities
5
+ into matrices. These matrices have all the probes for a probeset in
6
+ adjacent rows
7
+ }
8
+ \usage{read.celfile.probeintensity.matrices(filenames, cdfInfo, rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE, verbose=FALSE, which= c("pm","mm","both"))
9
+ }
10
+ \arguments{
11
+ \item{filenames}{a character vector of filenames}
12
+ \item{cdfInfo}{a list with items giving PM and MM locations for
13
+ desired probesets. In same structure as returned by \code{\link[makecdfenv]{make.cdf.package}}}
14
+ \item{rm.mask}{a \code{\link{logical}}. Return these probes as NA if
15
+ there are in the [MASK] section of the CEL file}
16
+ \item{rm.outliers}{a \code{\link{logical}}. Return these probes as NA if
17
+ there are in the [OUTLIERS] section of the CEL file}.
18
+ \item{rm.extra}{a \code{\link{logical}}. Return these probes as NA if
19
+ there are in the [OUTLIERS] section of the CEL file}.
20
+ \item{verbose}{a \code{\link{logical}}. When true the parsing routine
21
+ prints more information, typically useful for debugging.}
22
+
23
+ \item{which}{a string specifing which probe type to return}
24
+
25
+ }
26
+ \value{returns a \code{\link{list}} of \code{\link{matrix}} items. One
27
+ matrix contains PM probe intensities, with probes in rows and arrays
28
+ in columns
29
+ }
30
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
31
+ \keyword{IO}
@@ -0,0 +1,39 @@
1
+ cmake_minimum_required(VERSION 2.6)
2
+
3
+ PROJECT(Biolib_affyio)
4
+
5
+ SET (M_NAME affyio)
6
+ # SET (M_VERSION 1.8)
7
+
8
+ IF(NOT BUILD_LIBS)
9
+ SET (MAP_ROOT ../../../..)
10
+ SET (CMAKE_MODULE_PATH ${MAP_ROOT}/tools/cmake-support/modules)
11
+ ENDIF(NOT BUILD_LIBS)
12
+
13
+ SET(USE_ZLIB TRUE)
14
+ SET(USE_RLIB TRUE)
15
+
16
+ FIND_PACKAGE(Map REQUIRED)
17
+ FIND_PACKAGE(MapLibraries REQUIRED)
18
+
19
+ INCLUDE_DIRECTORIES(.)
20
+
21
+ NAME_CLIB()
22
+
23
+ ADD_LIBRARY(${LIBNAME} SHARED
24
+ biolib_affyio.c
25
+ read_abatch.c
26
+ read_cdf_xda.c
27
+ read_cdffile2.c
28
+ fread_functions.c
29
+ read_generic.c
30
+ read_celfile_generic.c
31
+ )
32
+
33
+ BUILD_CLIB()
34
+
35
+ INSTALL_CLIB()
36
+
37
+ ADD_CUSTOM_TARGET(apidoc
38
+ COMMAND ${CMAKE_COMMAND} -E echo_append "Building API Documentation for Affyio..."
39
+ )
@@ -0,0 +1,3 @@
1
+ PKG_CFLAGS = @CFLAGS@
2
+ PKG_LIBS = @LIBS@
3
+ PKG_CPPFLAGS = @DEFS@
@@ -0,0 +1,2 @@
1
+ PKG_CPPFLAGS += -DHAVE_ZLIB
2
+ PKG_LIBS += -lRzlib
@@ -0,0 +1,43 @@
1
+ # Generated by mkrf
2
+ require 'rake/clean'
3
+
4
+ CLEAN.include('*.o')
5
+ CLOBBER.include('libaffyext.so', 'mkrf.log')
6
+
7
+ SRC = FileList['*.c']
8
+ OBJ = SRC.ext('o')
9
+ CC = 'gcc'
10
+
11
+ ADDITIONAL_OBJECTS = ''
12
+
13
+ LDSHARED = "gcc -shared -L/usr/lib64/R/lib -lR"
14
+
15
+ LIBPATH = "-L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib "
16
+
17
+ INCLUDES = "-I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1 -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/site_ruby/1.9.1 -I."
18
+
19
+ LIBS = "-lpthread -lrt -ldl -lcrypt -lm"
20
+
21
+ CFLAGS = "-fPIC -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Werror=pointer-arith -Werror=write-strings -Werror=implicit-function-declaration -fPIC -I/usr/share/R/include "
22
+
23
+ RUBYARCHDIR = "#{ENV["RUBYARCHDIR"]}"
24
+ LIBRUBYARG_SHARED = "-Wl,-R -Wl,/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -lruby"
25
+
26
+ task :default => ['libaffyext.so']
27
+
28
+ rule '.o' => '.c' do |t|
29
+ sh "#{CC} #{CFLAGS} #{INCLUDES} -c #{t.source}"
30
+ end
31
+
32
+ desc "Build this extension"
33
+ file 'libaffyext.so' => OBJ do
34
+ sh "#{LDSHARED} #{LIBPATH} -o libaffyext.so #{OBJ} #{ADDITIONAL_OBJECTS} #{LIBS} #{LIBRUBYARG_SHARED}"
35
+ end
36
+
37
+ desc "Install this extension"
38
+ task :install => 'libaffyext.so' do
39
+ makedirs "#{RUBYARCHDIR}"
40
+ install "libaffyext.so", "#{RUBYARCHDIR}"
41
+ end
42
+
43
+
@@ -0,0 +1,416 @@
1
+ /*! BIOLIB 'C' interface to affyio - for linking against the Bio* languages
2
+ *
3
+ * \defgroup affyio AffyIO
4
+ *
5
+ *
6
+ * Author: Pjotr Prins 2008
7
+ */
8
+ /*@{*/
9
+
10
+ #include <stdlib.h>
11
+ #include <string.h>
12
+ #include <R.h> // For R's Free function only
13
+ #include <biolib_affyio.h>
14
+
15
+ /*
16
+
17
+ Test for the shared libray binding. Returns the input value + 55
18
+
19
+ */
20
+
21
+ int has_affyext(int start)
22
+ {
23
+ return start + 55;
24
+ }
25
+
26
+ /*!
27
+ * Open a cel file using the Affyio library and return a pointer to a CELOBJECT,
28
+ * which maintains state keeping track of the opened CEL data. The full array
29
+ * data gets stored in memory - including stddev, npixels, masks and outliers.
30
+ *
31
+ * Use the direct celfile_methods instead, for more effecient memory usage.
32
+ *
33
+ * @param celfilename points to a valid Affy CEL file (or .gz edition)
34
+ *
35
+ * @return pointer to CELOBJECT
36
+ *
37
+ * @see close_celfile
38
+ * @see cel_intensities
39
+ * @see cel_stddev
40
+ * @see celfile_intensities
41
+ * @see open_cdffile
42
+ */
43
+
44
+ CELOBJECT *open_celfile(const char *celfilename)
45
+ {
46
+ CELOBJECT *celobject = (CELOBJECT *)malloc(sizeof(CELOBJECT));
47
+ if (celobject) {
48
+ memset(celobject,0,sizeof(CELOBJECT));
49
+ strncpy(celobject->id,"[CEL]",6);
50
+ celobject->cel = read_cel_file(celfilename, 1);
51
+ }
52
+ return celobject;
53
+ }
54
+
55
+ /**
56
+ * Close a CELOBJECT opened with open_celfile and frees up RAM
57
+ *
58
+ * @param object
59
+ *
60
+ */
61
+
62
+ void close_celfile(CELOBJECT *object)
63
+ {
64
+ CEL *cel = object->cel;
65
+
66
+ Free(cel->header.cdfName);
67
+ Free(cel->header.DatHeader);
68
+ Free(cel->header.Algorithm);
69
+ Free(cel->header.AlgorithmParameters);
70
+
71
+
72
+ Free(cel->intensities);
73
+ Free(cel->stddev);
74
+ Free(cel->npixels);
75
+
76
+ Free(cel->masks_x);
77
+ Free(cel->masks_y);
78
+ Free(cel->outliers_x);
79
+ Free(cel->outliers_y);
80
+
81
+ Free(cel);
82
+
83
+ if (object->masks != NULL) { free(object->masks); }
84
+ if (object->outliers != NULL) { free(object->outliers); }
85
+ free(object);
86
+ }
87
+
88
+ /*! @return number of columns on the chip */
89
+
90
+ unsigned long cel_num_cols(CELOBJECT *object)
91
+ {
92
+ return object->cel->header.cols;
93
+ }
94
+
95
+ /*! @return number of rows on the chip */
96
+
97
+ unsigned long cel_num_rows(CELOBJECT *object)
98
+ {
99
+ return object->cel->header.rows;
100
+ }
101
+
102
+ /*! @return number of masked probes */
103
+
104
+ unsigned long cel_num_masks(CELOBJECT *object)
105
+ {
106
+ return object->cel->nmasks*2;
107
+ }
108
+
109
+ /*! @return number of outliers */
110
+
111
+ unsigned long cel_num_outliers(CELOBJECT *object)
112
+ {
113
+ return object->cel->noutliers*2;
114
+ }
115
+
116
+
117
+ /*! @return number of cells (probes) - [INTENSITY]->NumberCells (rows*cols) */
118
+
119
+ unsigned long cel_size(CELOBJECT *object)
120
+ {
121
+ return object->cel->header.rows * object->cel->header.cols;
122
+ }
123
+
124
+ /*! @return number of intensities (probes) */
125
+
126
+ unsigned long cel_num_intensities(CELOBJECT *object)
127
+ {
128
+ return object->cel->header.rows * object->cel->header.cols;
129
+ }
130
+
131
+ /*!
132
+ * @parameter index of cel/probe
133
+ * @return an intensity value
134
+ * @see cel_pm
135
+ */
136
+
137
+ double cel_intensity(CELOBJECT *object, unsigned long index)
138
+ {
139
+ return object->cel->intensities[index];
140
+ }
141
+
142
+ /*!
143
+ * @parameter x pos on chip
144
+ * @parameter y pos on chip
145
+ * @return an intensity value
146
+ * @see cel_pm
147
+ */
148
+
149
+ double cel_intensity_xy(CELOBJECT *object, unsigned long x, unsigned long y)
150
+ {
151
+ return object->cel->intensities[y*cel_num_rows(object)+x];
152
+ }
153
+
154
+ /*! @return a double array to the cel/probe intensities
155
+ * @see cel_pm
156
+ */
157
+
158
+ double *cel_intensities(CELOBJECT *object)
159
+ {
160
+ return object->cel->intensities;
161
+ }
162
+
163
+ /*! @return the stddev for the cel/probe */
164
+
165
+ double cel_stddev(CELOBJECT *object, unsigned long index)
166
+ {
167
+ return object->cel->stddev[index];
168
+ }
169
+
170
+ /*! @return a double array to the stddev */
171
+
172
+ double *cel_stddevs(CELOBJECT *object)
173
+ {
174
+ return object->cel->stddev;
175
+ }
176
+
177
+ /*!
178
+ * @param index is the nth item in the mask list
179
+ * @return the cel/probe mask index
180
+ * @see cel_num_masks
181
+ */
182
+
183
+ unsigned int cel_mask(CELOBJECT *object, unsigned long index)
184
+ {
185
+ CEL *cel = object->cel;
186
+ if (!object->masks) {
187
+ // initialize
188
+ object->masks = malloc(cel->nmasks*2*sizeof(short));
189
+ int i;
190
+ for (i=0; i < cel->nmasks; i++){
191
+ object->masks[i] = (int)cel->masks_x[i];
192
+ object->masks[cel->nmasks + i] = (int)cel->masks_y[i];
193
+ }
194
+ }
195
+ return object->masks[index];
196
+ }
197
+
198
+ /*!
199
+ * @param index is the nth item in the outlier list
200
+ * @return the outlier cel/probe index
201
+ * @see cel_num_outliers
202
+ */
203
+ unsigned int cel_outlier(CELOBJECT *object, unsigned long index)
204
+ {
205
+ CEL *cel = object->cel;
206
+ if (!object->outliers) {
207
+ // initialize
208
+ object->outliers = malloc(cel->noutliers*2*sizeof(short));
209
+ int i;
210
+ for (i=0; i < cel->noutliers; i++){
211
+ object->outliers[i] = (int)cel->outliers_x[i];
212
+ object->outliers[cel->noutliers + i] = (int)cel->outliers_y[i];
213
+ }
214
+ }
215
+ return object->outliers[index];
216
+ }
217
+
218
+ /* ========================================================================= */
219
+
220
+ static void cdf_alloc_probesets(CDFOBJECT *cdfobject)
221
+ {
222
+ cdfobject->probeset = malloc(sizeof(CDFPROBESET)*cdf_num_probesets(cdfobject));
223
+ memset(cdfobject->probeset,0,sizeof(CDFPROBESET));
224
+ }
225
+
226
+ /*
227
+ * Fill the structures that keep track of probesets and probes
228
+ */
229
+
230
+ static void cdf_calc_text_probe_refs(CDFOBJECT *cdfobject)
231
+ {
232
+ cdf_text my_cdf;
233
+ memcpy(&my_cdf,&cdfobject->text,sizeof(cdf_text));
234
+ CDFPROBESET *probeset = cdfobject->probeset;
235
+
236
+ int i,j,k,l;
237
+ for (i=0; i < my_cdf.header.numberofunits; i++) {
238
+ for (j=0; j < my_cdf.units[i].numberblocks; j++) {
239
+ CDFPROBESET *pset = &probeset[i];
240
+ // here we copy the probeset name:
241
+ strncpy(pset->name,my_cdf.units[i].blocks[j].name,sizeof(pset->name)-1);
242
+ // get the number of probes
243
+ int cells = my_cdf.units[i].blocks[j].num_cells;
244
+ pset->isQC = 0;
245
+ pset->pm_num = cells/2;
246
+ pset->mm_num = cells/2;
247
+ pset->pm = malloc(sizeof(CDFPROBE)*pset->pm_num);
248
+ pset->mm = malloc(sizeof(CDFPROBE)*pset->mm_num);
249
+ /*
250
+ // assuming even cells are PM and odd cells are MM
251
+ for (k=0; k < cells/2; k++) {
252
+ CDFPROBE *pm = &pset->pm[k];
253
+ CDFPROBE *mm = &pset->mm[k];
254
+ pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
255
+ pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
256
+ mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
257
+ mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
258
+ }
259
+ */
260
+ // assuming PM always sits above an MM cel
261
+ for (k=0; k < cells/2; k++) {
262
+ CDFPROBE *pm = &pset->pm[k];
263
+ CDFPROBE *mm = &pset->mm[k];
264
+ pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
265
+ pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
266
+ mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
267
+ mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
268
+ if (pm->x!=mm->x) { error("PM cel does not sit above MM cel"); }
269
+ if (pm->y > mm->y) {
270
+ int swap = pm->y; pm->y = mm->y; mm->y = swap;
271
+ }
272
+ }
273
+ }
274
+ }
275
+ }
276
+
277
+ /*!
278
+ * Open a cdf file using the Affyio library and return a pointer to a CDFOBJECT,
279
+ * which maintains state keeping track of the opened CDF data. Unlike the Affyio
280
+ * internal representation the Biolib affyio CDF methods represent a unified
281
+ * layout for Affymetrix chips. Basically every probeset can return the name, probe
282
+ * values (PM, MM) and QC. Loading all other information is trivial, as Affyio
283
+ * makes it available - but not implemented here, at this point.
284
+ *
285
+ * @note FIXME: XDA format not tested
286
+ *
287
+ * @param cdffilename points to a valid Affy CDF file (textual or XDA binary)
288
+ *
289
+ * @return pointer to CDFOBJECT
290
+ *
291
+ * @see close_cdffile
292
+ */
293
+
294
+ CDFOBJECT *open_cdffile(const char *cdffilename)
295
+ {
296
+ CDFOBJECT *cdfobject = (CDFOBJECT *)malloc(sizeof(CDFOBJECT));
297
+ if (cdfobject) {
298
+ memset(cdfobject,0,sizeof(CDFOBJECT));
299
+ strncpy(cdfobject->id,"[CDF]",6);
300
+ if (isTextCDFFile(cdffilename)) {
301
+ cdfobject->isText = 1;
302
+ if (!read_cdf_text(cdffilename, &cdfobject->text)) {
303
+ free(cdfobject);
304
+ cdfobject = NULL;
305
+ }
306
+ else {
307
+ cdfobject->cols = cdfobject->text.header.cols;
308
+ cdfobject->rows = cdfobject->text.header.rows;
309
+ cdf_alloc_probesets(cdfobject);
310
+ cdf_calc_text_probe_refs(cdfobject);
311
+ }
312
+ }
313
+ if (check_cdf_xda(cdffilename)) {
314
+ if (!read_cdf_xda(cdffilename,&cdfobject->xda)) {
315
+ free(cdfobject);
316
+ cdfobject = NULL;
317
+ }
318
+ else {
319
+ cdfobject->cols = cdfobject->xda.header.cols;
320
+ cdfobject->rows = cdfobject->xda.header.rows;
321
+ cdf_alloc_probesets(cdfobject);
322
+ }
323
+ }
324
+ }
325
+ return cdfobject;
326
+ }
327
+
328
+
329
+ void close_cdffile(CDFOBJECT *cdfobject)
330
+ {
331
+ if (cdfobject) {
332
+ free(cdfobject->probeset);
333
+ if (cdfobject->isText) {
334
+ dealloc_cdf_text(&cdfobject->text);
335
+ }
336
+ else {
337
+ dealloc_cdf_xda(&cdfobject->xda);
338
+ }
339
+ }
340
+ }
341
+
342
+ /*! @return number of probesets (units*blocks) including QC units */
343
+
344
+ unsigned long cdf_num_probesets(CDFOBJECT *cdfobject)
345
+ {
346
+ unsigned long probesets = 0;
347
+ if (cdfobject->isText) {
348
+ probesets = cdfobject->text.header.NumQCUnits;
349
+ int i,j;
350
+ for (i=0; i < cdfobject->text.header.numberofunits; i++) {
351
+ probesets += cdfobject->text.units[i].numberblocks;
352
+ }
353
+ }
354
+ else {
355
+ probesets = cdfobject->xda.header.n_units +
356
+ cdfobject->xda.header.n_qc_units;
357
+ }
358
+ return probesets;
359
+ }
360
+
361
+ /*!
362
+ * @return CDFPROBESET related to probeset number
363
+ */
364
+ CDFPROBESET *cdf_probeset_info(CDFOBJECT *cdfobject, unsigned int probeset)
365
+ {
366
+ return &cdfobject->probeset[probeset];
367
+ }
368
+
369
+ /*!
370
+ * @return CDFPROBE related to probe in probeset
371
+ */
372
+ CDFPROBE *cdf_pmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
373
+ {
374
+ return &cdfobject->probeset[probeset].pm[probe];
375
+ }
376
+
377
+ /*!
378
+ * @return CDFPROBE related to probe in probeset
379
+ */
380
+ CDFPROBE *cdf_mmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
381
+ {
382
+ return &cdfobject->probeset[probeset].mm[probe];
383
+ }
384
+
385
+ /*!
386
+ * \note FIXME: in progress...
387
+ *
388
+ * @return PM value (perfect match) of the probe number in probeset; -1 on error
389
+ */
390
+
391
+ double cel_pm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
392
+ {
393
+ CDFPROBESET *pset = &cdfobject->probeset[probeset];
394
+ if (pset) {
395
+ CDFPROBE *p = &pset->pm[probe];
396
+ return cel_intensity_xy(celobject, p->x, p->y);
397
+ }
398
+ return -1;
399
+ }
400
+
401
+ /*!
402
+ * @return MM value (mismatch) of the probe number in probeset; -1 on error
403
+ */
404
+ double cel_mm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
405
+ {
406
+ CDFPROBESET *pset = &cdfobject->probeset[probeset];
407
+ if (pset) {
408
+ CDFPROBE *p = &pset->mm[probe];
409
+ return cel_intensity_xy(celobject, p->x, p->y);
410
+ }
411
+ return -1;
412
+ }
413
+
414
+
415
+
416
+ /*@}*/