bio-affy 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,51 @@
1
+ dnl
2
+ dnl Configuration things for affyR.
3
+ dnl (http://www.cbs.dtu.dk/laurent/download/affyR/
4
+ dnl What is below (and in the other configuration fiels
5
+ dnl was taken from different configuration scripts for R version 1.3.0.
6
+ dnl
7
+ dnl Acknowledgments: The author(s) of the R configure scripts, Kurt Hornik for the tip with autoconf.
8
+ dnl
9
+ dnl Laurent 2001
10
+
11
+
12
+ AC_INIT("DESCRIPTION")
13
+
14
+ dnl
15
+ dnl Are things (still) the same ?
16
+ dnl (taken from the 'writing R extensions manual')
17
+
18
+
19
+ R_ZLIB
20
+
21
+
22
+ AC_CHECK_LIB(pthread, pthread_create)
23
+
24
+ AC_TRY_LINK_FUNC(pthread_create, [use_pthreads=yes], [use_pthreads=no])
25
+
26
+ AC_MSG_CHECKING([if we can use pthreads])
27
+ AC_MSG_RESULT($use_pthreads)
28
+
29
+ AC_MSG_CHECKING([if PTHREAD_STACK_MIN is defined])
30
+ AC_COMPILE_IFELSE([
31
+ #include <pthread.h>
32
+ #include <limits.h>
33
+
34
+ int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
35
+ }
36
+ ],[use_pthread_stack_min=yes], [use_pthread_stack_min=no])
37
+ AC_MSG_RESULT($use_pthread_stack_min)
38
+
39
+ if test "x$use_pthreads" = "xyes"
40
+ then
41
+ if test "x$use_pthread_stack_min" = "xyes"
42
+ then
43
+ LIBS="$LIBS -lpthread"
44
+ AC_DEFINE(USE_PTHREADS, 1)
45
+ fi
46
+ fi
47
+
48
+
49
+
50
+
51
+ AC_OUTPUT(src/Makevars)
@@ -0,0 +1,22 @@
1
+ \name{check.cdf.type}
2
+ \alias{check.cdf.type}
3
+ \title{CDF file format function}
4
+ \description{This function returns a text string giving the file format
5
+ for the supplied filename
6
+ }
7
+ \usage{check.cdf.type(filename)
8
+ }
9
+ \arguments{
10
+ \item{filename}{fullpath to a cdf file}
11
+ }
12
+ \value{Returns a string which is currently one of:
13
+
14
+ \item{text}{the cdf file is of the text format}
15
+ \item{xda}{the cdf file is of the binary format used in GCOS}
16
+ \item{unknown}{the parser can not handle this format or does not
17
+ recognize this file as a CDF file}
18
+
19
+ }
20
+
21
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
22
+ \keyword{IO}
@@ -0,0 +1,20 @@
1
+ \name{read.cdffile.list}
2
+ \alias{read.cdffile.list}
3
+ \title{Read CDF file into an R list}
4
+ \description{This function reads the entire contents of a cdf file into
5
+ an R list structure
6
+ }
7
+ \usage{read.cdffile.list(filename, cdf.path = getwd())
8
+ }
9
+ \arguments{
10
+ \item{filename}{name of CDF file}
11
+ \item{cdf.path}{path to cdf file}
12
+ }
13
+ \value{returns a \code{list} structure. The exact contents may vary
14
+ depending on the file format of the cdf file (see \code{\link{check.cdf.type}})
15
+ }
16
+ \details{
17
+ Note that this function can be very memory intensive with large CDF files.
18
+ }
19
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
20
+ \keyword{IO}
@@ -0,0 +1,23 @@
1
+ \name{read.celfile}
2
+ \alias{read.celfile}
3
+ \title{Read a CEL file into an R list}
4
+ \description{This function reads the entire contents of a CEL file into
5
+ an R list structure
6
+ }
7
+ \usage{read.celfile(filename,intensity.means.only=FALSE)
8
+ }
9
+ \arguments{
10
+ \item{filename}{name of CEL file}
11
+ \item{intensity.means.only}{If \code{TRUE} then read on only the MEAN section in INTENSITY}
12
+ }
13
+ \value{returns a \code{list} structure. The exact contents may vary
14
+ depending on the file format of the CEL file
15
+ }
16
+ \details{
17
+ The list has four main items. HEADER, INTENSITY, MASKS, OUTLIERS. Note
18
+ that INTENSITY is a list of three vectors MEAN, STDEV, NPIXELS. HEADER
19
+ is also a list. Both of MASKS and OUTLIERS are matrices.
20
+
21
+ }
22
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
23
+ \keyword{IO}
@@ -0,0 +1,22 @@
1
+ \name{read.celfile.header}
2
+ \alias{read.celfile.header}
3
+ \title{Read header information from cel file}
4
+ \description{
5
+ This function reads some of the header information (appears before probe
6
+ intensity data) from the supplied cel file.
7
+ }
8
+ \usage{read.celfile.header(filename,info=c("basic","full"),verbose=FALSE)
9
+ }
10
+ \arguments{
11
+ \item{filename}{name of CEL file. May be fully pathed}
12
+ \item{info}{A string. \code{basic} returns the dimensions of the chip
13
+ and the name of the CDF file used when the CEL file was
14
+ produced. \code{full} returns more information in greater detail.}
15
+ \item{verbose}{a \code{\link{logical}}. When true the parsing routine
16
+ prints more information, typically useful for debugging.}
17
+ }
18
+ \value{
19
+ A \code{list} data structure.
20
+ }
21
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
22
+ \keyword{IO}
@@ -0,0 +1,31 @@
1
+ \name{read.celfile.probeintensity.matrices}
2
+ \alias{read.celfile.probeintensity.matrices}
3
+ \title{Read PM or MM from CEL file into matrices}
4
+ \description{This function reads PM, MM or both types of intensities
5
+ into matrices. These matrices have all the probes for a probeset in
6
+ adjacent rows
7
+ }
8
+ \usage{read.celfile.probeintensity.matrices(filenames, cdfInfo, rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE, verbose=FALSE, which= c("pm","mm","both"))
9
+ }
10
+ \arguments{
11
+ \item{filenames}{a character vector of filenames}
12
+ \item{cdfInfo}{a list with items giving PM and MM locations for
13
+ desired probesets. In same structure as returned by \code{\link[makecdfenv]{make.cdf.package}}}
14
+ \item{rm.mask}{a \code{\link{logical}}. Return these probes as NA if
15
+ there are in the [MASK] section of the CEL file}
16
+ \item{rm.outliers}{a \code{\link{logical}}. Return these probes as NA if
17
+ there are in the [OUTLIERS] section of the CEL file}.
18
+ \item{rm.extra}{a \code{\link{logical}}. Return these probes as NA if
19
+ there are in the [OUTLIERS] section of the CEL file}.
20
+ \item{verbose}{a \code{\link{logical}}. When true the parsing routine
21
+ prints more information, typically useful for debugging.}
22
+
23
+ \item{which}{a string specifing which probe type to return}
24
+
25
+ }
26
+ \value{returns a \code{\link{list}} of \code{\link{matrix}} items. One
27
+ matrix contains PM probe intensities, with probes in rows and arrays
28
+ in columns
29
+ }
30
+ \author{B. M. Bolstad <bmb@bmbolstad.com>}
31
+ \keyword{IO}
@@ -0,0 +1,39 @@
1
+ cmake_minimum_required(VERSION 2.6)
2
+
3
+ PROJECT(Biolib_affyio)
4
+
5
+ SET (M_NAME affyio)
6
+ # SET (M_VERSION 1.8)
7
+
8
+ IF(NOT BUILD_LIBS)
9
+ SET (MAP_ROOT ../../../..)
10
+ SET (CMAKE_MODULE_PATH ${MAP_ROOT}/tools/cmake-support/modules)
11
+ ENDIF(NOT BUILD_LIBS)
12
+
13
+ SET(USE_ZLIB TRUE)
14
+ SET(USE_RLIB TRUE)
15
+
16
+ FIND_PACKAGE(Map REQUIRED)
17
+ FIND_PACKAGE(MapLibraries REQUIRED)
18
+
19
+ INCLUDE_DIRECTORIES(.)
20
+
21
+ NAME_CLIB()
22
+
23
+ ADD_LIBRARY(${LIBNAME} SHARED
24
+ biolib_affyio.c
25
+ read_abatch.c
26
+ read_cdf_xda.c
27
+ read_cdffile2.c
28
+ fread_functions.c
29
+ read_generic.c
30
+ read_celfile_generic.c
31
+ )
32
+
33
+ BUILD_CLIB()
34
+
35
+ INSTALL_CLIB()
36
+
37
+ ADD_CUSTOM_TARGET(apidoc
38
+ COMMAND ${CMAKE_COMMAND} -E echo_append "Building API Documentation for Affyio..."
39
+ )
@@ -0,0 +1,3 @@
1
+ PKG_CFLAGS = @CFLAGS@
2
+ PKG_LIBS = @LIBS@
3
+ PKG_CPPFLAGS = @DEFS@
@@ -0,0 +1,2 @@
1
+ PKG_CPPFLAGS += -DHAVE_ZLIB
2
+ PKG_LIBS += -lRzlib
@@ -0,0 +1,43 @@
1
+ # Generated by mkrf
2
+ require 'rake/clean'
3
+
4
+ CLEAN.include('*.o')
5
+ CLOBBER.include('libaffyext.so', 'mkrf.log')
6
+
7
+ SRC = FileList['*.c']
8
+ OBJ = SRC.ext('o')
9
+ CC = 'gcc'
10
+
11
+ ADDITIONAL_OBJECTS = ''
12
+
13
+ LDSHARED = "gcc -shared -L/usr/lib64/R/lib -lR"
14
+
15
+ LIBPATH = "-L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib "
16
+
17
+ INCLUDES = "-I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1 -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/site_ruby/1.9.1 -I."
18
+
19
+ LIBS = "-lpthread -lrt -ldl -lcrypt -lm"
20
+
21
+ CFLAGS = "-fPIC -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Werror=pointer-arith -Werror=write-strings -Werror=implicit-function-declaration -fPIC -I/usr/share/R/include "
22
+
23
+ RUBYARCHDIR = "#{ENV["RUBYARCHDIR"]}"
24
+ LIBRUBYARG_SHARED = "-Wl,-R -Wl,/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -lruby"
25
+
26
+ task :default => ['libaffyext.so']
27
+
28
+ rule '.o' => '.c' do |t|
29
+ sh "#{CC} #{CFLAGS} #{INCLUDES} -c #{t.source}"
30
+ end
31
+
32
+ desc "Build this extension"
33
+ file 'libaffyext.so' => OBJ do
34
+ sh "#{LDSHARED} #{LIBPATH} -o libaffyext.so #{OBJ} #{ADDITIONAL_OBJECTS} #{LIBS} #{LIBRUBYARG_SHARED}"
35
+ end
36
+
37
+ desc "Install this extension"
38
+ task :install => 'libaffyext.so' do
39
+ makedirs "#{RUBYARCHDIR}"
40
+ install "libaffyext.so", "#{RUBYARCHDIR}"
41
+ end
42
+
43
+
@@ -0,0 +1,416 @@
1
+ /*! BIOLIB 'C' interface to affyio - for linking against the Bio* languages
2
+ *
3
+ * \defgroup affyio AffyIO
4
+ *
5
+ *
6
+ * Author: Pjotr Prins 2008
7
+ */
8
+ /*@{*/
9
+
10
+ #include <stdlib.h>
11
+ #include <string.h>
12
+ #include <R.h> // For R's Free function only
13
+ #include <biolib_affyio.h>
14
+
15
+ /*
16
+
17
+ Test for the shared libray binding. Returns the input value + 55
18
+
19
+ */
20
+
21
+ int has_affyext(int start)
22
+ {
23
+ return start + 55;
24
+ }
25
+
26
+ /*!
27
+ * Open a cel file using the Affyio library and return a pointer to a CELOBJECT,
28
+ * which maintains state keeping track of the opened CEL data. The full array
29
+ * data gets stored in memory - including stddev, npixels, masks and outliers.
30
+ *
31
+ * Use the direct celfile_methods instead, for more effecient memory usage.
32
+ *
33
+ * @param celfilename points to a valid Affy CEL file (or .gz edition)
34
+ *
35
+ * @return pointer to CELOBJECT
36
+ *
37
+ * @see close_celfile
38
+ * @see cel_intensities
39
+ * @see cel_stddev
40
+ * @see celfile_intensities
41
+ * @see open_cdffile
42
+ */
43
+
44
+ CELOBJECT *open_celfile(const char *celfilename)
45
+ {
46
+ CELOBJECT *celobject = (CELOBJECT *)malloc(sizeof(CELOBJECT));
47
+ if (celobject) {
48
+ memset(celobject,0,sizeof(CELOBJECT));
49
+ strncpy(celobject->id,"[CEL]",6);
50
+ celobject->cel = read_cel_file(celfilename, 1);
51
+ }
52
+ return celobject;
53
+ }
54
+
55
+ /**
56
+ * Close a CELOBJECT opened with open_celfile and frees up RAM
57
+ *
58
+ * @param object
59
+ *
60
+ */
61
+
62
+ void close_celfile(CELOBJECT *object)
63
+ {
64
+ CEL *cel = object->cel;
65
+
66
+ Free(cel->header.cdfName);
67
+ Free(cel->header.DatHeader);
68
+ Free(cel->header.Algorithm);
69
+ Free(cel->header.AlgorithmParameters);
70
+
71
+
72
+ Free(cel->intensities);
73
+ Free(cel->stddev);
74
+ Free(cel->npixels);
75
+
76
+ Free(cel->masks_x);
77
+ Free(cel->masks_y);
78
+ Free(cel->outliers_x);
79
+ Free(cel->outliers_y);
80
+
81
+ Free(cel);
82
+
83
+ if (object->masks != NULL) { free(object->masks); }
84
+ if (object->outliers != NULL) { free(object->outliers); }
85
+ free(object);
86
+ }
87
+
88
+ /*! @return number of columns on the chip */
89
+
90
+ unsigned long cel_num_cols(CELOBJECT *object)
91
+ {
92
+ return object->cel->header.cols;
93
+ }
94
+
95
+ /*! @return number of rows on the chip */
96
+
97
+ unsigned long cel_num_rows(CELOBJECT *object)
98
+ {
99
+ return object->cel->header.rows;
100
+ }
101
+
102
+ /*! @return number of masked probes */
103
+
104
+ unsigned long cel_num_masks(CELOBJECT *object)
105
+ {
106
+ return object->cel->nmasks*2;
107
+ }
108
+
109
+ /*! @return number of outliers */
110
+
111
+ unsigned long cel_num_outliers(CELOBJECT *object)
112
+ {
113
+ return object->cel->noutliers*2;
114
+ }
115
+
116
+
117
+ /*! @return number of cells (probes) - [INTENSITY]->NumberCells (rows*cols) */
118
+
119
+ unsigned long cel_size(CELOBJECT *object)
120
+ {
121
+ return object->cel->header.rows * object->cel->header.cols;
122
+ }
123
+
124
+ /*! @return number of intensities (probes) */
125
+
126
+ unsigned long cel_num_intensities(CELOBJECT *object)
127
+ {
128
+ return object->cel->header.rows * object->cel->header.cols;
129
+ }
130
+
131
+ /*!
132
+ * @parameter index of cel/probe
133
+ * @return an intensity value
134
+ * @see cel_pm
135
+ */
136
+
137
+ double cel_intensity(CELOBJECT *object, unsigned long index)
138
+ {
139
+ return object->cel->intensities[index];
140
+ }
141
+
142
+ /*!
143
+ * @parameter x pos on chip
144
+ * @parameter y pos on chip
145
+ * @return an intensity value
146
+ * @see cel_pm
147
+ */
148
+
149
+ double cel_intensity_xy(CELOBJECT *object, unsigned long x, unsigned long y)
150
+ {
151
+ return object->cel->intensities[y*cel_num_rows(object)+x];
152
+ }
153
+
154
+ /*! @return a double array to the cel/probe intensities
155
+ * @see cel_pm
156
+ */
157
+
158
+ double *cel_intensities(CELOBJECT *object)
159
+ {
160
+ return object->cel->intensities;
161
+ }
162
+
163
+ /*! @return the stddev for the cel/probe */
164
+
165
+ double cel_stddev(CELOBJECT *object, unsigned long index)
166
+ {
167
+ return object->cel->stddev[index];
168
+ }
169
+
170
+ /*! @return a double array to the stddev */
171
+
172
+ double *cel_stddevs(CELOBJECT *object)
173
+ {
174
+ return object->cel->stddev;
175
+ }
176
+
177
+ /*!
178
+ * @param index is the nth item in the mask list
179
+ * @return the cel/probe mask index
180
+ * @see cel_num_masks
181
+ */
182
+
183
+ unsigned int cel_mask(CELOBJECT *object, unsigned long index)
184
+ {
185
+ CEL *cel = object->cel;
186
+ if (!object->masks) {
187
+ // initialize
188
+ object->masks = malloc(cel->nmasks*2*sizeof(short));
189
+ int i;
190
+ for (i=0; i < cel->nmasks; i++){
191
+ object->masks[i] = (int)cel->masks_x[i];
192
+ object->masks[cel->nmasks + i] = (int)cel->masks_y[i];
193
+ }
194
+ }
195
+ return object->masks[index];
196
+ }
197
+
198
+ /*!
199
+ * @param index is the nth item in the outlier list
200
+ * @return the outlier cel/probe index
201
+ * @see cel_num_outliers
202
+ */
203
+ unsigned int cel_outlier(CELOBJECT *object, unsigned long index)
204
+ {
205
+ CEL *cel = object->cel;
206
+ if (!object->outliers) {
207
+ // initialize
208
+ object->outliers = malloc(cel->noutliers*2*sizeof(short));
209
+ int i;
210
+ for (i=0; i < cel->noutliers; i++){
211
+ object->outliers[i] = (int)cel->outliers_x[i];
212
+ object->outliers[cel->noutliers + i] = (int)cel->outliers_y[i];
213
+ }
214
+ }
215
+ return object->outliers[index];
216
+ }
217
+
218
+ /* ========================================================================= */
219
+
220
+ static void cdf_alloc_probesets(CDFOBJECT *cdfobject)
221
+ {
222
+ cdfobject->probeset = malloc(sizeof(CDFPROBESET)*cdf_num_probesets(cdfobject));
223
+ memset(cdfobject->probeset,0,sizeof(CDFPROBESET));
224
+ }
225
+
226
+ /*
227
+ * Fill the structures that keep track of probesets and probes
228
+ */
229
+
230
+ static void cdf_calc_text_probe_refs(CDFOBJECT *cdfobject)
231
+ {
232
+ cdf_text my_cdf;
233
+ memcpy(&my_cdf,&cdfobject->text,sizeof(cdf_text));
234
+ CDFPROBESET *probeset = cdfobject->probeset;
235
+
236
+ int i,j,k,l;
237
+ for (i=0; i < my_cdf.header.numberofunits; i++) {
238
+ for (j=0; j < my_cdf.units[i].numberblocks; j++) {
239
+ CDFPROBESET *pset = &probeset[i];
240
+ // here we copy the probeset name:
241
+ strncpy(pset->name,my_cdf.units[i].blocks[j].name,sizeof(pset->name)-1);
242
+ // get the number of probes
243
+ int cells = my_cdf.units[i].blocks[j].num_cells;
244
+ pset->isQC = 0;
245
+ pset->pm_num = cells/2;
246
+ pset->mm_num = cells/2;
247
+ pset->pm = malloc(sizeof(CDFPROBE)*pset->pm_num);
248
+ pset->mm = malloc(sizeof(CDFPROBE)*pset->mm_num);
249
+ /*
250
+ // assuming even cells are PM and odd cells are MM
251
+ for (k=0; k < cells/2; k++) {
252
+ CDFPROBE *pm = &pset->pm[k];
253
+ CDFPROBE *mm = &pset->mm[k];
254
+ pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
255
+ pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
256
+ mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
257
+ mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
258
+ }
259
+ */
260
+ // assuming PM always sits above an MM cel
261
+ for (k=0; k < cells/2; k++) {
262
+ CDFPROBE *pm = &pset->pm[k];
263
+ CDFPROBE *mm = &pset->mm[k];
264
+ pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
265
+ pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
266
+ mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
267
+ mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
268
+ if (pm->x!=mm->x) { error("PM cel does not sit above MM cel"); }
269
+ if (pm->y > mm->y) {
270
+ int swap = pm->y; pm->y = mm->y; mm->y = swap;
271
+ }
272
+ }
273
+ }
274
+ }
275
+ }
276
+
277
+ /*!
278
+ * Open a cdf file using the Affyio library and return a pointer to a CDFOBJECT,
279
+ * which maintains state keeping track of the opened CDF data. Unlike the Affyio
280
+ * internal representation the Biolib affyio CDF methods represent a unified
281
+ * layout for Affymetrix chips. Basically every probeset can return the name, probe
282
+ * values (PM, MM) and QC. Loading all other information is trivial, as Affyio
283
+ * makes it available - but not implemented here, at this point.
284
+ *
285
+ * @note FIXME: XDA format not tested
286
+ *
287
+ * @param cdffilename points to a valid Affy CDF file (textual or XDA binary)
288
+ *
289
+ * @return pointer to CDFOBJECT
290
+ *
291
+ * @see close_cdffile
292
+ */
293
+
294
+ CDFOBJECT *open_cdffile(const char *cdffilename)
295
+ {
296
+ CDFOBJECT *cdfobject = (CDFOBJECT *)malloc(sizeof(CDFOBJECT));
297
+ if (cdfobject) {
298
+ memset(cdfobject,0,sizeof(CDFOBJECT));
299
+ strncpy(cdfobject->id,"[CDF]",6);
300
+ if (isTextCDFFile(cdffilename)) {
301
+ cdfobject->isText = 1;
302
+ if (!read_cdf_text(cdffilename, &cdfobject->text)) {
303
+ free(cdfobject);
304
+ cdfobject = NULL;
305
+ }
306
+ else {
307
+ cdfobject->cols = cdfobject->text.header.cols;
308
+ cdfobject->rows = cdfobject->text.header.rows;
309
+ cdf_alloc_probesets(cdfobject);
310
+ cdf_calc_text_probe_refs(cdfobject);
311
+ }
312
+ }
313
+ if (check_cdf_xda(cdffilename)) {
314
+ if (!read_cdf_xda(cdffilename,&cdfobject->xda)) {
315
+ free(cdfobject);
316
+ cdfobject = NULL;
317
+ }
318
+ else {
319
+ cdfobject->cols = cdfobject->xda.header.cols;
320
+ cdfobject->rows = cdfobject->xda.header.rows;
321
+ cdf_alloc_probesets(cdfobject);
322
+ }
323
+ }
324
+ }
325
+ return cdfobject;
326
+ }
327
+
328
+
329
+ void close_cdffile(CDFOBJECT *cdfobject)
330
+ {
331
+ if (cdfobject) {
332
+ free(cdfobject->probeset);
333
+ if (cdfobject->isText) {
334
+ dealloc_cdf_text(&cdfobject->text);
335
+ }
336
+ else {
337
+ dealloc_cdf_xda(&cdfobject->xda);
338
+ }
339
+ }
340
+ }
341
+
342
+ /*! @return number of probesets (units*blocks) including QC units */
343
+
344
+ unsigned long cdf_num_probesets(CDFOBJECT *cdfobject)
345
+ {
346
+ unsigned long probesets = 0;
347
+ if (cdfobject->isText) {
348
+ probesets = cdfobject->text.header.NumQCUnits;
349
+ int i,j;
350
+ for (i=0; i < cdfobject->text.header.numberofunits; i++) {
351
+ probesets += cdfobject->text.units[i].numberblocks;
352
+ }
353
+ }
354
+ else {
355
+ probesets = cdfobject->xda.header.n_units +
356
+ cdfobject->xda.header.n_qc_units;
357
+ }
358
+ return probesets;
359
+ }
360
+
361
+ /*!
362
+ * @return CDFPROBESET related to probeset number
363
+ */
364
+ CDFPROBESET *cdf_probeset_info(CDFOBJECT *cdfobject, unsigned int probeset)
365
+ {
366
+ return &cdfobject->probeset[probeset];
367
+ }
368
+
369
+ /*!
370
+ * @return CDFPROBE related to probe in probeset
371
+ */
372
+ CDFPROBE *cdf_pmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
373
+ {
374
+ return &cdfobject->probeset[probeset].pm[probe];
375
+ }
376
+
377
+ /*!
378
+ * @return CDFPROBE related to probe in probeset
379
+ */
380
+ CDFPROBE *cdf_mmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
381
+ {
382
+ return &cdfobject->probeset[probeset].mm[probe];
383
+ }
384
+
385
+ /*!
386
+ * \note FIXME: in progress...
387
+ *
388
+ * @return PM value (perfect match) of the probe number in probeset; -1 on error
389
+ */
390
+
391
+ double cel_pm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
392
+ {
393
+ CDFPROBESET *pset = &cdfobject->probeset[probeset];
394
+ if (pset) {
395
+ CDFPROBE *p = &pset->pm[probe];
396
+ return cel_intensity_xy(celobject, p->x, p->y);
397
+ }
398
+ return -1;
399
+ }
400
+
401
+ /*!
402
+ * @return MM value (mismatch) of the probe number in probeset; -1 on error
403
+ */
404
+ double cel_mm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
405
+ {
406
+ CDFPROBESET *pset = &cdfobject->probeset[probeset];
407
+ if (pset) {
408
+ CDFPROBE *p = &pset->mm[probe];
409
+ return cel_intensity_xy(celobject, p->x, p->y);
410
+ }
411
+ return -1;
412
+ }
413
+
414
+
415
+
416
+ /*@}*/