bio-affy 0.1.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
data/ext/configure.in
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
dnl
|
2
|
+
dnl Configuration things for affyR.
|
3
|
+
dnl (http://www.cbs.dtu.dk/laurent/download/affyR/
|
4
|
+
dnl What is below (and in the other configuration fiels
|
5
|
+
dnl was taken from different configuration scripts for R version 1.3.0.
|
6
|
+
dnl
|
7
|
+
dnl Acknowledgments: The author(s) of the R configure scripts, Kurt Hornik for the tip with autoconf.
|
8
|
+
dnl
|
9
|
+
dnl Laurent 2001
|
10
|
+
|
11
|
+
|
12
|
+
AC_INIT("DESCRIPTION")
|
13
|
+
|
14
|
+
dnl
|
15
|
+
dnl Are things (still) the same ?
|
16
|
+
dnl (taken from the 'writing R extensions manual')
|
17
|
+
|
18
|
+
|
19
|
+
R_ZLIB
|
20
|
+
|
21
|
+
|
22
|
+
AC_CHECK_LIB(pthread, pthread_create)
|
23
|
+
|
24
|
+
AC_TRY_LINK_FUNC(pthread_create, [use_pthreads=yes], [use_pthreads=no])
|
25
|
+
|
26
|
+
AC_MSG_CHECKING([if we can use pthreads])
|
27
|
+
AC_MSG_RESULT($use_pthreads)
|
28
|
+
|
29
|
+
AC_MSG_CHECKING([if PTHREAD_STACK_MIN is defined])
|
30
|
+
AC_COMPILE_IFELSE([
|
31
|
+
#include <pthread.h>
|
32
|
+
#include <limits.h>
|
33
|
+
|
34
|
+
int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
|
35
|
+
}
|
36
|
+
],[use_pthread_stack_min=yes], [use_pthread_stack_min=no])
|
37
|
+
AC_MSG_RESULT($use_pthread_stack_min)
|
38
|
+
|
39
|
+
if test "x$use_pthreads" = "xyes"
|
40
|
+
then
|
41
|
+
if test "x$use_pthread_stack_min" = "xyes"
|
42
|
+
then
|
43
|
+
LIBS="$LIBS -lpthread"
|
44
|
+
AC_DEFINE(USE_PTHREADS, 1)
|
45
|
+
fi
|
46
|
+
fi
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
AC_OUTPUT(src/Makevars)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
\name{check.cdf.type}
|
2
|
+
\alias{check.cdf.type}
|
3
|
+
\title{CDF file format function}
|
4
|
+
\description{This function returns a text string giving the file format
|
5
|
+
for the supplied filename
|
6
|
+
}
|
7
|
+
\usage{check.cdf.type(filename)
|
8
|
+
}
|
9
|
+
\arguments{
|
10
|
+
\item{filename}{fullpath to a cdf file}
|
11
|
+
}
|
12
|
+
\value{Returns a string which is currently one of:
|
13
|
+
|
14
|
+
\item{text}{the cdf file is of the text format}
|
15
|
+
\item{xda}{the cdf file is of the binary format used in GCOS}
|
16
|
+
\item{unknown}{the parser can not handle this format or does not
|
17
|
+
recognize this file as a CDF file}
|
18
|
+
|
19
|
+
}
|
20
|
+
|
21
|
+
\author{B. M. Bolstad <bmb@bmbolstad.com>}
|
22
|
+
\keyword{IO}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
\name{read.cdffile.list}
|
2
|
+
\alias{read.cdffile.list}
|
3
|
+
\title{Read CDF file into an R list}
|
4
|
+
\description{This function reads the entire contents of a cdf file into
|
5
|
+
an R list structure
|
6
|
+
}
|
7
|
+
\usage{read.cdffile.list(filename, cdf.path = getwd())
|
8
|
+
}
|
9
|
+
\arguments{
|
10
|
+
\item{filename}{name of CDF file}
|
11
|
+
\item{cdf.path}{path to cdf file}
|
12
|
+
}
|
13
|
+
\value{returns a \code{list} structure. The exact contents may vary
|
14
|
+
depending on the file format of the cdf file (see \code{\link{check.cdf.type}})
|
15
|
+
}
|
16
|
+
\details{
|
17
|
+
Note that this function can be very memory intensive with large CDF files.
|
18
|
+
}
|
19
|
+
\author{B. M. Bolstad <bmb@bmbolstad.com>}
|
20
|
+
\keyword{IO}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
\name{read.celfile}
|
2
|
+
\alias{read.celfile}
|
3
|
+
\title{Read a CEL file into an R list}
|
4
|
+
\description{This function reads the entire contents of a CEL file into
|
5
|
+
an R list structure
|
6
|
+
}
|
7
|
+
\usage{read.celfile(filename,intensity.means.only=FALSE)
|
8
|
+
}
|
9
|
+
\arguments{
|
10
|
+
\item{filename}{name of CEL file}
|
11
|
+
\item{intensity.means.only}{If \code{TRUE} then read on only the MEAN section in INTENSITY}
|
12
|
+
}
|
13
|
+
\value{returns a \code{list} structure. The exact contents may vary
|
14
|
+
depending on the file format of the CEL file
|
15
|
+
}
|
16
|
+
\details{
|
17
|
+
The list has four main items. HEADER, INTENSITY, MASKS, OUTLIERS. Note
|
18
|
+
that INTENSITY is a list of three vectors MEAN, STDEV, NPIXELS. HEADER
|
19
|
+
is also a list. Both of MASKS and OUTLIERS are matrices.
|
20
|
+
|
21
|
+
}
|
22
|
+
\author{B. M. Bolstad <bmb@bmbolstad.com>}
|
23
|
+
\keyword{IO}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
\name{read.celfile.header}
|
2
|
+
\alias{read.celfile.header}
|
3
|
+
\title{Read header information from cel file}
|
4
|
+
\description{
|
5
|
+
This function reads some of the header information (appears before probe
|
6
|
+
intensity data) from the supplied cel file.
|
7
|
+
}
|
8
|
+
\usage{read.celfile.header(filename,info=c("basic","full"),verbose=FALSE)
|
9
|
+
}
|
10
|
+
\arguments{
|
11
|
+
\item{filename}{name of CEL file. May be fully pathed}
|
12
|
+
\item{info}{A string. \code{basic} returns the dimensions of the chip
|
13
|
+
and the name of the CDF file used when the CEL file was
|
14
|
+
produced. \code{full} returns more information in greater detail.}
|
15
|
+
\item{verbose}{a \code{\link{logical}}. When true the parsing routine
|
16
|
+
prints more information, typically useful for debugging.}
|
17
|
+
}
|
18
|
+
\value{
|
19
|
+
A \code{list} data structure.
|
20
|
+
}
|
21
|
+
\author{B. M. Bolstad <bmb@bmbolstad.com>}
|
22
|
+
\keyword{IO}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
\name{read.celfile.probeintensity.matrices}
|
2
|
+
\alias{read.celfile.probeintensity.matrices}
|
3
|
+
\title{Read PM or MM from CEL file into matrices}
|
4
|
+
\description{This function reads PM, MM or both types of intensities
|
5
|
+
into matrices. These matrices have all the probes for a probeset in
|
6
|
+
adjacent rows
|
7
|
+
}
|
8
|
+
\usage{read.celfile.probeintensity.matrices(filenames, cdfInfo, rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE, verbose=FALSE, which= c("pm","mm","both"))
|
9
|
+
}
|
10
|
+
\arguments{
|
11
|
+
\item{filenames}{a character vector of filenames}
|
12
|
+
\item{cdfInfo}{a list with items giving PM and MM locations for
|
13
|
+
desired probesets. In same structure as returned by \code{\link[makecdfenv]{make.cdf.package}}}
|
14
|
+
\item{rm.mask}{a \code{\link{logical}}. Return these probes as NA if
|
15
|
+
there are in the [MASK] section of the CEL file}
|
16
|
+
\item{rm.outliers}{a \code{\link{logical}}. Return these probes as NA if
|
17
|
+
there are in the [OUTLIERS] section of the CEL file}.
|
18
|
+
\item{rm.extra}{a \code{\link{logical}}. Return these probes as NA if
|
19
|
+
there are in the [OUTLIERS] section of the CEL file}.
|
20
|
+
\item{verbose}{a \code{\link{logical}}. When true the parsing routine
|
21
|
+
prints more information, typically useful for debugging.}
|
22
|
+
|
23
|
+
\item{which}{a string specifing which probe type to return}
|
24
|
+
|
25
|
+
}
|
26
|
+
\value{returns a \code{\link{list}} of \code{\link{matrix}} items. One
|
27
|
+
matrix contains PM probe intensities, with probes in rows and arrays
|
28
|
+
in columns
|
29
|
+
}
|
30
|
+
\author{B. M. Bolstad <bmb@bmbolstad.com>}
|
31
|
+
\keyword{IO}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
cmake_minimum_required(VERSION 2.6)
|
2
|
+
|
3
|
+
PROJECT(Biolib_affyio)
|
4
|
+
|
5
|
+
SET (M_NAME affyio)
|
6
|
+
# SET (M_VERSION 1.8)
|
7
|
+
|
8
|
+
IF(NOT BUILD_LIBS)
|
9
|
+
SET (MAP_ROOT ../../../..)
|
10
|
+
SET (CMAKE_MODULE_PATH ${MAP_ROOT}/tools/cmake-support/modules)
|
11
|
+
ENDIF(NOT BUILD_LIBS)
|
12
|
+
|
13
|
+
SET(USE_ZLIB TRUE)
|
14
|
+
SET(USE_RLIB TRUE)
|
15
|
+
|
16
|
+
FIND_PACKAGE(Map REQUIRED)
|
17
|
+
FIND_PACKAGE(MapLibraries REQUIRED)
|
18
|
+
|
19
|
+
INCLUDE_DIRECTORIES(.)
|
20
|
+
|
21
|
+
NAME_CLIB()
|
22
|
+
|
23
|
+
ADD_LIBRARY(${LIBNAME} SHARED
|
24
|
+
biolib_affyio.c
|
25
|
+
read_abatch.c
|
26
|
+
read_cdf_xda.c
|
27
|
+
read_cdffile2.c
|
28
|
+
fread_functions.c
|
29
|
+
read_generic.c
|
30
|
+
read_celfile_generic.c
|
31
|
+
)
|
32
|
+
|
33
|
+
BUILD_CLIB()
|
34
|
+
|
35
|
+
INSTALL_CLIB()
|
36
|
+
|
37
|
+
ADD_CUSTOM_TARGET(apidoc
|
38
|
+
COMMAND ${CMAKE_COMMAND} -E echo_append "Building API Documentation for Affyio..."
|
39
|
+
)
|
data/ext/src/Makevars.in
ADDED
data/ext/src/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Generated by mkrf
|
2
|
+
require 'rake/clean'
|
3
|
+
|
4
|
+
CLEAN.include('*.o')
|
5
|
+
CLOBBER.include('libaffyext.so', 'mkrf.log')
|
6
|
+
|
7
|
+
SRC = FileList['*.c']
|
8
|
+
OBJ = SRC.ext('o')
|
9
|
+
CC = 'gcc'
|
10
|
+
|
11
|
+
ADDITIONAL_OBJECTS = ''
|
12
|
+
|
13
|
+
LDSHARED = "gcc -shared -L/usr/lib64/R/lib -lR"
|
14
|
+
|
15
|
+
LIBPATH = "-L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib "
|
16
|
+
|
17
|
+
INCLUDES = "-I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1 -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/include/ruby-1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/1.9.1/x86_64-linux -I/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib/ruby/site_ruby/1.9.1 -I."
|
18
|
+
|
19
|
+
LIBS = "-lpthread -lrt -ldl -lcrypt -lm"
|
20
|
+
|
21
|
+
CFLAGS = "-fPIC -O3 -ggdb -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Werror=pointer-arith -Werror=write-strings -Werror=implicit-function-declaration -fPIC -I/usr/share/R/include "
|
22
|
+
|
23
|
+
RUBYARCHDIR = "#{ENV["RUBYARCHDIR"]}"
|
24
|
+
LIBRUBYARG_SHARED = "-Wl,-R -Wl,/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -L/home/wrk/.rvm/rubies/ruby-1.9.3-p0/lib -lruby"
|
25
|
+
|
26
|
+
task :default => ['libaffyext.so']
|
27
|
+
|
28
|
+
rule '.o' => '.c' do |t|
|
29
|
+
sh "#{CC} #{CFLAGS} #{INCLUDES} -c #{t.source}"
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "Build this extension"
|
33
|
+
file 'libaffyext.so' => OBJ do
|
34
|
+
sh "#{LDSHARED} #{LIBPATH} -o libaffyext.so #{OBJ} #{ADDITIONAL_OBJECTS} #{LIBS} #{LIBRUBYARG_SHARED}"
|
35
|
+
end
|
36
|
+
|
37
|
+
desc "Install this extension"
|
38
|
+
task :install => 'libaffyext.so' do
|
39
|
+
makedirs "#{RUBYARCHDIR}"
|
40
|
+
install "libaffyext.so", "#{RUBYARCHDIR}"
|
41
|
+
end
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,416 @@
|
|
1
|
+
/*! BIOLIB 'C' interface to affyio - for linking against the Bio* languages
|
2
|
+
*
|
3
|
+
* \defgroup affyio AffyIO
|
4
|
+
*
|
5
|
+
*
|
6
|
+
* Author: Pjotr Prins 2008
|
7
|
+
*/
|
8
|
+
/*@{*/
|
9
|
+
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <string.h>
|
12
|
+
#include <R.h> // For R's Free function only
|
13
|
+
#include <biolib_affyio.h>
|
14
|
+
|
15
|
+
/*
|
16
|
+
|
17
|
+
Test for the shared libray binding. Returns the input value + 55
|
18
|
+
|
19
|
+
*/
|
20
|
+
|
21
|
+
int has_affyext(int start)
|
22
|
+
{
|
23
|
+
return start + 55;
|
24
|
+
}
|
25
|
+
|
26
|
+
/*!
|
27
|
+
* Open a cel file using the Affyio library and return a pointer to a CELOBJECT,
|
28
|
+
* which maintains state keeping track of the opened CEL data. The full array
|
29
|
+
* data gets stored in memory - including stddev, npixels, masks and outliers.
|
30
|
+
*
|
31
|
+
* Use the direct celfile_methods instead, for more effecient memory usage.
|
32
|
+
*
|
33
|
+
* @param celfilename points to a valid Affy CEL file (or .gz edition)
|
34
|
+
*
|
35
|
+
* @return pointer to CELOBJECT
|
36
|
+
*
|
37
|
+
* @see close_celfile
|
38
|
+
* @see cel_intensities
|
39
|
+
* @see cel_stddev
|
40
|
+
* @see celfile_intensities
|
41
|
+
* @see open_cdffile
|
42
|
+
*/
|
43
|
+
|
44
|
+
CELOBJECT *open_celfile(const char *celfilename)
|
45
|
+
{
|
46
|
+
CELOBJECT *celobject = (CELOBJECT *)malloc(sizeof(CELOBJECT));
|
47
|
+
if (celobject) {
|
48
|
+
memset(celobject,0,sizeof(CELOBJECT));
|
49
|
+
strncpy(celobject->id,"[CEL]",6);
|
50
|
+
celobject->cel = read_cel_file(celfilename, 1);
|
51
|
+
}
|
52
|
+
return celobject;
|
53
|
+
}
|
54
|
+
|
55
|
+
/**
|
56
|
+
* Close a CELOBJECT opened with open_celfile and frees up RAM
|
57
|
+
*
|
58
|
+
* @param object
|
59
|
+
*
|
60
|
+
*/
|
61
|
+
|
62
|
+
void close_celfile(CELOBJECT *object)
|
63
|
+
{
|
64
|
+
CEL *cel = object->cel;
|
65
|
+
|
66
|
+
Free(cel->header.cdfName);
|
67
|
+
Free(cel->header.DatHeader);
|
68
|
+
Free(cel->header.Algorithm);
|
69
|
+
Free(cel->header.AlgorithmParameters);
|
70
|
+
|
71
|
+
|
72
|
+
Free(cel->intensities);
|
73
|
+
Free(cel->stddev);
|
74
|
+
Free(cel->npixels);
|
75
|
+
|
76
|
+
Free(cel->masks_x);
|
77
|
+
Free(cel->masks_y);
|
78
|
+
Free(cel->outliers_x);
|
79
|
+
Free(cel->outliers_y);
|
80
|
+
|
81
|
+
Free(cel);
|
82
|
+
|
83
|
+
if (object->masks != NULL) { free(object->masks); }
|
84
|
+
if (object->outliers != NULL) { free(object->outliers); }
|
85
|
+
free(object);
|
86
|
+
}
|
87
|
+
|
88
|
+
/*! @return number of columns on the chip */
|
89
|
+
|
90
|
+
unsigned long cel_num_cols(CELOBJECT *object)
|
91
|
+
{
|
92
|
+
return object->cel->header.cols;
|
93
|
+
}
|
94
|
+
|
95
|
+
/*! @return number of rows on the chip */
|
96
|
+
|
97
|
+
unsigned long cel_num_rows(CELOBJECT *object)
|
98
|
+
{
|
99
|
+
return object->cel->header.rows;
|
100
|
+
}
|
101
|
+
|
102
|
+
/*! @return number of masked probes */
|
103
|
+
|
104
|
+
unsigned long cel_num_masks(CELOBJECT *object)
|
105
|
+
{
|
106
|
+
return object->cel->nmasks*2;
|
107
|
+
}
|
108
|
+
|
109
|
+
/*! @return number of outliers */
|
110
|
+
|
111
|
+
unsigned long cel_num_outliers(CELOBJECT *object)
|
112
|
+
{
|
113
|
+
return object->cel->noutliers*2;
|
114
|
+
}
|
115
|
+
|
116
|
+
|
117
|
+
/*! @return number of cells (probes) - [INTENSITY]->NumberCells (rows*cols) */
|
118
|
+
|
119
|
+
unsigned long cel_size(CELOBJECT *object)
|
120
|
+
{
|
121
|
+
return object->cel->header.rows * object->cel->header.cols;
|
122
|
+
}
|
123
|
+
|
124
|
+
/*! @return number of intensities (probes) */
|
125
|
+
|
126
|
+
unsigned long cel_num_intensities(CELOBJECT *object)
|
127
|
+
{
|
128
|
+
return object->cel->header.rows * object->cel->header.cols;
|
129
|
+
}
|
130
|
+
|
131
|
+
/*!
|
132
|
+
* @parameter index of cel/probe
|
133
|
+
* @return an intensity value
|
134
|
+
* @see cel_pm
|
135
|
+
*/
|
136
|
+
|
137
|
+
double cel_intensity(CELOBJECT *object, unsigned long index)
|
138
|
+
{
|
139
|
+
return object->cel->intensities[index];
|
140
|
+
}
|
141
|
+
|
142
|
+
/*!
|
143
|
+
* @parameter x pos on chip
|
144
|
+
* @parameter y pos on chip
|
145
|
+
* @return an intensity value
|
146
|
+
* @see cel_pm
|
147
|
+
*/
|
148
|
+
|
149
|
+
double cel_intensity_xy(CELOBJECT *object, unsigned long x, unsigned long y)
|
150
|
+
{
|
151
|
+
return object->cel->intensities[y*cel_num_rows(object)+x];
|
152
|
+
}
|
153
|
+
|
154
|
+
/*! @return a double array to the cel/probe intensities
|
155
|
+
* @see cel_pm
|
156
|
+
*/
|
157
|
+
|
158
|
+
double *cel_intensities(CELOBJECT *object)
|
159
|
+
{
|
160
|
+
return object->cel->intensities;
|
161
|
+
}
|
162
|
+
|
163
|
+
/*! @return the stddev for the cel/probe */
|
164
|
+
|
165
|
+
double cel_stddev(CELOBJECT *object, unsigned long index)
|
166
|
+
{
|
167
|
+
return object->cel->stddev[index];
|
168
|
+
}
|
169
|
+
|
170
|
+
/*! @return a double array to the stddev */
|
171
|
+
|
172
|
+
double *cel_stddevs(CELOBJECT *object)
|
173
|
+
{
|
174
|
+
return object->cel->stddev;
|
175
|
+
}
|
176
|
+
|
177
|
+
/*!
|
178
|
+
* @param index is the nth item in the mask list
|
179
|
+
* @return the cel/probe mask index
|
180
|
+
* @see cel_num_masks
|
181
|
+
*/
|
182
|
+
|
183
|
+
unsigned int cel_mask(CELOBJECT *object, unsigned long index)
|
184
|
+
{
|
185
|
+
CEL *cel = object->cel;
|
186
|
+
if (!object->masks) {
|
187
|
+
// initialize
|
188
|
+
object->masks = malloc(cel->nmasks*2*sizeof(short));
|
189
|
+
int i;
|
190
|
+
for (i=0; i < cel->nmasks; i++){
|
191
|
+
object->masks[i] = (int)cel->masks_x[i];
|
192
|
+
object->masks[cel->nmasks + i] = (int)cel->masks_y[i];
|
193
|
+
}
|
194
|
+
}
|
195
|
+
return object->masks[index];
|
196
|
+
}
|
197
|
+
|
198
|
+
/*!
|
199
|
+
* @param index is the nth item in the outlier list
|
200
|
+
* @return the outlier cel/probe index
|
201
|
+
* @see cel_num_outliers
|
202
|
+
*/
|
203
|
+
unsigned int cel_outlier(CELOBJECT *object, unsigned long index)
|
204
|
+
{
|
205
|
+
CEL *cel = object->cel;
|
206
|
+
if (!object->outliers) {
|
207
|
+
// initialize
|
208
|
+
object->outliers = malloc(cel->noutliers*2*sizeof(short));
|
209
|
+
int i;
|
210
|
+
for (i=0; i < cel->noutliers; i++){
|
211
|
+
object->outliers[i] = (int)cel->outliers_x[i];
|
212
|
+
object->outliers[cel->noutliers + i] = (int)cel->outliers_y[i];
|
213
|
+
}
|
214
|
+
}
|
215
|
+
return object->outliers[index];
|
216
|
+
}
|
217
|
+
|
218
|
+
/* ========================================================================= */
|
219
|
+
|
220
|
+
static void cdf_alloc_probesets(CDFOBJECT *cdfobject)
|
221
|
+
{
|
222
|
+
cdfobject->probeset = malloc(sizeof(CDFPROBESET)*cdf_num_probesets(cdfobject));
|
223
|
+
memset(cdfobject->probeset,0,sizeof(CDFPROBESET));
|
224
|
+
}
|
225
|
+
|
226
|
+
/*
|
227
|
+
* Fill the structures that keep track of probesets and probes
|
228
|
+
*/
|
229
|
+
|
230
|
+
static void cdf_calc_text_probe_refs(CDFOBJECT *cdfobject)
|
231
|
+
{
|
232
|
+
cdf_text my_cdf;
|
233
|
+
memcpy(&my_cdf,&cdfobject->text,sizeof(cdf_text));
|
234
|
+
CDFPROBESET *probeset = cdfobject->probeset;
|
235
|
+
|
236
|
+
int i,j,k,l;
|
237
|
+
for (i=0; i < my_cdf.header.numberofunits; i++) {
|
238
|
+
for (j=0; j < my_cdf.units[i].numberblocks; j++) {
|
239
|
+
CDFPROBESET *pset = &probeset[i];
|
240
|
+
// here we copy the probeset name:
|
241
|
+
strncpy(pset->name,my_cdf.units[i].blocks[j].name,sizeof(pset->name)-1);
|
242
|
+
// get the number of probes
|
243
|
+
int cells = my_cdf.units[i].blocks[j].num_cells;
|
244
|
+
pset->isQC = 0;
|
245
|
+
pset->pm_num = cells/2;
|
246
|
+
pset->mm_num = cells/2;
|
247
|
+
pset->pm = malloc(sizeof(CDFPROBE)*pset->pm_num);
|
248
|
+
pset->mm = malloc(sizeof(CDFPROBE)*pset->mm_num);
|
249
|
+
/*
|
250
|
+
// assuming even cells are PM and odd cells are MM
|
251
|
+
for (k=0; k < cells/2; k++) {
|
252
|
+
CDFPROBE *pm = &pset->pm[k];
|
253
|
+
CDFPROBE *mm = &pset->mm[k];
|
254
|
+
pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
|
255
|
+
pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
|
256
|
+
mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
|
257
|
+
mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
|
258
|
+
}
|
259
|
+
*/
|
260
|
+
// assuming PM always sits above an MM cel
|
261
|
+
for (k=0; k < cells/2; k++) {
|
262
|
+
CDFPROBE *pm = &pset->pm[k];
|
263
|
+
CDFPROBE *mm = &pset->mm[k];
|
264
|
+
pm->x = my_cdf.units[i].blocks[j].probes[k*2].x;
|
265
|
+
pm->y = my_cdf.units[i].blocks[j].probes[k*2].y;
|
266
|
+
mm->x = my_cdf.units[i].blocks[j].probes[k*2+1].x;
|
267
|
+
mm->y = my_cdf.units[i].blocks[j].probes[k*2+1].y;
|
268
|
+
if (pm->x!=mm->x) { error("PM cel does not sit above MM cel"); }
|
269
|
+
if (pm->y > mm->y) {
|
270
|
+
int swap = pm->y; pm->y = mm->y; mm->y = swap;
|
271
|
+
}
|
272
|
+
}
|
273
|
+
}
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
/*!
|
278
|
+
* Open a cdf file using the Affyio library and return a pointer to a CDFOBJECT,
|
279
|
+
* which maintains state keeping track of the opened CDF data. Unlike the Affyio
|
280
|
+
* internal representation the Biolib affyio CDF methods represent a unified
|
281
|
+
* layout for Affymetrix chips. Basically every probeset can return the name, probe
|
282
|
+
* values (PM, MM) and QC. Loading all other information is trivial, as Affyio
|
283
|
+
* makes it available - but not implemented here, at this point.
|
284
|
+
*
|
285
|
+
* @note FIXME: XDA format not tested
|
286
|
+
*
|
287
|
+
* @param cdffilename points to a valid Affy CDF file (textual or XDA binary)
|
288
|
+
*
|
289
|
+
* @return pointer to CDFOBJECT
|
290
|
+
*
|
291
|
+
* @see close_cdffile
|
292
|
+
*/
|
293
|
+
|
294
|
+
CDFOBJECT *open_cdffile(const char *cdffilename)
|
295
|
+
{
|
296
|
+
CDFOBJECT *cdfobject = (CDFOBJECT *)malloc(sizeof(CDFOBJECT));
|
297
|
+
if (cdfobject) {
|
298
|
+
memset(cdfobject,0,sizeof(CDFOBJECT));
|
299
|
+
strncpy(cdfobject->id,"[CDF]",6);
|
300
|
+
if (isTextCDFFile(cdffilename)) {
|
301
|
+
cdfobject->isText = 1;
|
302
|
+
if (!read_cdf_text(cdffilename, &cdfobject->text)) {
|
303
|
+
free(cdfobject);
|
304
|
+
cdfobject = NULL;
|
305
|
+
}
|
306
|
+
else {
|
307
|
+
cdfobject->cols = cdfobject->text.header.cols;
|
308
|
+
cdfobject->rows = cdfobject->text.header.rows;
|
309
|
+
cdf_alloc_probesets(cdfobject);
|
310
|
+
cdf_calc_text_probe_refs(cdfobject);
|
311
|
+
}
|
312
|
+
}
|
313
|
+
if (check_cdf_xda(cdffilename)) {
|
314
|
+
if (!read_cdf_xda(cdffilename,&cdfobject->xda)) {
|
315
|
+
free(cdfobject);
|
316
|
+
cdfobject = NULL;
|
317
|
+
}
|
318
|
+
else {
|
319
|
+
cdfobject->cols = cdfobject->xda.header.cols;
|
320
|
+
cdfobject->rows = cdfobject->xda.header.rows;
|
321
|
+
cdf_alloc_probesets(cdfobject);
|
322
|
+
}
|
323
|
+
}
|
324
|
+
}
|
325
|
+
return cdfobject;
|
326
|
+
}
|
327
|
+
|
328
|
+
|
329
|
+
void close_cdffile(CDFOBJECT *cdfobject)
|
330
|
+
{
|
331
|
+
if (cdfobject) {
|
332
|
+
free(cdfobject->probeset);
|
333
|
+
if (cdfobject->isText) {
|
334
|
+
dealloc_cdf_text(&cdfobject->text);
|
335
|
+
}
|
336
|
+
else {
|
337
|
+
dealloc_cdf_xda(&cdfobject->xda);
|
338
|
+
}
|
339
|
+
}
|
340
|
+
}
|
341
|
+
|
342
|
+
/*! @return number of probesets (units*blocks) including QC units */
|
343
|
+
|
344
|
+
unsigned long cdf_num_probesets(CDFOBJECT *cdfobject)
|
345
|
+
{
|
346
|
+
unsigned long probesets = 0;
|
347
|
+
if (cdfobject->isText) {
|
348
|
+
probesets = cdfobject->text.header.NumQCUnits;
|
349
|
+
int i,j;
|
350
|
+
for (i=0; i < cdfobject->text.header.numberofunits; i++) {
|
351
|
+
probesets += cdfobject->text.units[i].numberblocks;
|
352
|
+
}
|
353
|
+
}
|
354
|
+
else {
|
355
|
+
probesets = cdfobject->xda.header.n_units +
|
356
|
+
cdfobject->xda.header.n_qc_units;
|
357
|
+
}
|
358
|
+
return probesets;
|
359
|
+
}
|
360
|
+
|
361
|
+
/*!
|
362
|
+
* @return CDFPROBESET related to probeset number
|
363
|
+
*/
|
364
|
+
CDFPROBESET *cdf_probeset_info(CDFOBJECT *cdfobject, unsigned int probeset)
|
365
|
+
{
|
366
|
+
return &cdfobject->probeset[probeset];
|
367
|
+
}
|
368
|
+
|
369
|
+
/*!
|
370
|
+
* @return CDFPROBE related to probe in probeset
|
371
|
+
*/
|
372
|
+
CDFPROBE *cdf_pmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
|
373
|
+
{
|
374
|
+
return &cdfobject->probeset[probeset].pm[probe];
|
375
|
+
}
|
376
|
+
|
377
|
+
/*!
|
378
|
+
* @return CDFPROBE related to probe in probeset
|
379
|
+
*/
|
380
|
+
CDFPROBE *cdf_mmprobe_info(CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
|
381
|
+
{
|
382
|
+
return &cdfobject->probeset[probeset].mm[probe];
|
383
|
+
}
|
384
|
+
|
385
|
+
/*!
|
386
|
+
* \note FIXME: in progress...
|
387
|
+
*
|
388
|
+
* @return PM value (perfect match) of the probe number in probeset; -1 on error
|
389
|
+
*/
|
390
|
+
|
391
|
+
double cel_pm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
|
392
|
+
{
|
393
|
+
CDFPROBESET *pset = &cdfobject->probeset[probeset];
|
394
|
+
if (pset) {
|
395
|
+
CDFPROBE *p = &pset->pm[probe];
|
396
|
+
return cel_intensity_xy(celobject, p->x, p->y);
|
397
|
+
}
|
398
|
+
return -1;
|
399
|
+
}
|
400
|
+
|
401
|
+
/*!
|
402
|
+
* @return MM value (mismatch) of the probe number in probeset; -1 on error
|
403
|
+
*/
|
404
|
+
double cel_mm(CELOBJECT *celobject, CDFOBJECT *cdfobject, unsigned int probeset, unsigned int probe)
|
405
|
+
{
|
406
|
+
CDFPROBESET *pset = &cdfobject->probeset[probeset];
|
407
|
+
if (pset) {
|
408
|
+
CDFPROBE *p = &pset->mm[probe];
|
409
|
+
return cel_intensity_xy(celobject, p->x, p->y);
|
410
|
+
}
|
411
|
+
return -1;
|
412
|
+
}
|
413
|
+
|
414
|
+
|
415
|
+
|
416
|
+
/*@}*/
|