bio-affy 0.1.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
Binary file
|
data/ext/src/read_cdf.h
ADDED
@@ -0,0 +1,347 @@
|
|
1
|
+
/*****************************************************************
|
2
|
+
**
|
3
|
+
** This file contains the record definitions for the CDF files.
|
4
|
+
** They come in two formats (textual and binary). The first set
|
5
|
+
** represents the textual form (starting with cdf_text_..
|
6
|
+
**
|
7
|
+
******************************************************************/
|
8
|
+
|
9
|
+
/*****************************************************************
|
10
|
+
** Textual CDF formats
|
11
|
+
**
|
12
|
+
** A structure for holding information in the
|
13
|
+
** "CDF" and "Chip" sections (basically header information)
|
14
|
+
**
|
15
|
+
******************************************************************/
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
|
19
|
+
char *version;
|
20
|
+
char *name;
|
21
|
+
int rows,cols;
|
22
|
+
int numberofunits;
|
23
|
+
int maxunit;
|
24
|
+
int NumQCUnits;
|
25
|
+
char *chipreference;
|
26
|
+
} cdf_text_header;
|
27
|
+
|
28
|
+
/*****************************************************************
|
29
|
+
**
|
30
|
+
**
|
31
|
+
** A structure for holding QC probe information
|
32
|
+
** Note the "CYCLES" item is ignored and never parsed
|
33
|
+
**
|
34
|
+
******************************************************************/
|
35
|
+
|
36
|
+
|
37
|
+
typedef struct {
|
38
|
+
int x;
|
39
|
+
int y;
|
40
|
+
char *probe;
|
41
|
+
int plen;
|
42
|
+
int atom;
|
43
|
+
int index;
|
44
|
+
int match;
|
45
|
+
int bg;
|
46
|
+
} cdf_text_qc_probe;
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
/*******************************************************************
|
55
|
+
**
|
56
|
+
** A structure for holding QC units information. These are
|
57
|
+
** areas of the chip that contain probes that may or may not be useful
|
58
|
+
** for QC and other purposes.
|
59
|
+
**
|
60
|
+
**
|
61
|
+
*******************************************************************/
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
typedef struct{
|
66
|
+
int type;
|
67
|
+
unsigned int n_probes;
|
68
|
+
int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
|
69
|
+
cdf_text_qc_probe *qc_probes;
|
70
|
+
|
71
|
+
} cdf_text_qc_unit;
|
72
|
+
|
73
|
+
|
74
|
+
/*******************************************************************
|
75
|
+
**
|
76
|
+
** A structure for holding probe information for unit_blocks_probes
|
77
|
+
**
|
78
|
+
** probes are stored within blocks
|
79
|
+
**
|
80
|
+
*******************************************************************/
|
81
|
+
|
82
|
+
typedef struct{
|
83
|
+
int x;
|
84
|
+
int y;
|
85
|
+
char *probe;
|
86
|
+
char *feat;
|
87
|
+
char *qual;
|
88
|
+
int expos;
|
89
|
+
int pos;
|
90
|
+
char *cbase;
|
91
|
+
char *pbase;
|
92
|
+
char *tbase;
|
93
|
+
int atom;
|
94
|
+
int index;
|
95
|
+
int codonid;
|
96
|
+
int codon;
|
97
|
+
int regiontype;
|
98
|
+
char* region;
|
99
|
+
} cdf_text_unit_block_probe;
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
/*******************************************************************
|
105
|
+
**
|
106
|
+
** A structure holding Unit_blocks
|
107
|
+
**
|
108
|
+
** blocks are stored within units.
|
109
|
+
** blocks contain many probes
|
110
|
+
**
|
111
|
+
*******************************************************************/
|
112
|
+
|
113
|
+
typedef struct{
|
114
|
+
char *name;
|
115
|
+
int blocknumber;
|
116
|
+
int num_atoms;
|
117
|
+
int num_cells;
|
118
|
+
int start_position;
|
119
|
+
int stop_position;
|
120
|
+
int direction;
|
121
|
+
cdf_text_unit_block_probe *probes;
|
122
|
+
|
123
|
+
} cdf_text_unit_block;
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
/*******************************************************************
|
131
|
+
**
|
132
|
+
** A structure for holding "Units" AKA known as probesets
|
133
|
+
**
|
134
|
+
** Each unit contains one or more blocks. Each block contains one or
|
135
|
+
** more probes
|
136
|
+
**
|
137
|
+
*******************************************************************/
|
138
|
+
|
139
|
+
|
140
|
+
typedef struct{
|
141
|
+
char *name;
|
142
|
+
int direction;
|
143
|
+
int num_atoms;
|
144
|
+
int num_cells;
|
145
|
+
int unit_number;
|
146
|
+
int unit_type;
|
147
|
+
int numberblocks;
|
148
|
+
int MutationType;
|
149
|
+
cdf_text_unit_block *blocks;
|
150
|
+
} cdf_text_unit;
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
/*******************************************************************
|
155
|
+
**
|
156
|
+
** A structure for holding a text CDF file
|
157
|
+
**
|
158
|
+
** text cdf files consist of
|
159
|
+
** basic header information
|
160
|
+
** qcunits
|
161
|
+
** - qc probes
|
162
|
+
** units (aka probesets)
|
163
|
+
** - blocks
|
164
|
+
** - probes
|
165
|
+
**
|
166
|
+
**
|
167
|
+
*******************************************************************/
|
168
|
+
|
169
|
+
typedef struct{
|
170
|
+
cdf_text_header header;
|
171
|
+
cdf_text_qc_unit *qc_units;
|
172
|
+
cdf_text_unit *units;
|
173
|
+
} cdf_text;
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
/************************************************************************
|
178
|
+
**
|
179
|
+
** Structures for holding the CDF file information. Basically
|
180
|
+
** header/general information that appears at the start of the CDF file
|
181
|
+
**
|
182
|
+
************************************************************************/
|
183
|
+
|
184
|
+
typedef struct {
|
185
|
+
int magicnumber;
|
186
|
+
int version_number;
|
187
|
+
unsigned short rows,cols;
|
188
|
+
int n_units,n_qc_units;
|
189
|
+
int len_ref_seq;
|
190
|
+
int i;
|
191
|
+
char *ref_seq;
|
192
|
+
} cdf_xda_header;
|
193
|
+
|
194
|
+
|
195
|
+
/****************************************************************************
|
196
|
+
**
|
197
|
+
** The following two structures store QC units and QC unit probe information
|
198
|
+
**
|
199
|
+
** QC information, repeated for each QC unit:
|
200
|
+
** Type - unsigned short
|
201
|
+
** Number of probes - integer
|
202
|
+
**
|
203
|
+
** Probe information, repeated for each probe in the QC unit:
|
204
|
+
** X coordinate - unsigned short
|
205
|
+
** Y coordinate - unsigned short
|
206
|
+
** Probe length - unsigned char
|
207
|
+
** Perfect match flag - unsigned char
|
208
|
+
** Background probe flag - unsigned char
|
209
|
+
**
|
210
|
+
****************************************************************************/
|
211
|
+
|
212
|
+
|
213
|
+
typedef struct{
|
214
|
+
unsigned short x;
|
215
|
+
unsigned short y;
|
216
|
+
unsigned char probelength;
|
217
|
+
unsigned char pmflag;
|
218
|
+
unsigned char bgprobeflag;
|
219
|
+
|
220
|
+
} cdf_qc_probe;
|
221
|
+
|
222
|
+
typedef struct{
|
223
|
+
unsigned short type;
|
224
|
+
unsigned int n_probes;
|
225
|
+
|
226
|
+
cdf_qc_probe *qc_probes;
|
227
|
+
|
228
|
+
} cdf_qc_unit;
|
229
|
+
|
230
|
+
|
231
|
+
/****************************************************************************
|
232
|
+
**
|
233
|
+
** The following three structures store information for units (sometimes called
|
234
|
+
** probesets), blocks (of which there are one or more within a unit) and cells
|
235
|
+
** sometimes called probe of which there are one or more within each block
|
236
|
+
**
|
237
|
+
**
|
238
|
+
** Unit information, repeated for each unit:
|
239
|
+
**
|
240
|
+
** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
|
241
|
+
** Direction - unsigned char
|
242
|
+
** Number of atoms - integer
|
243
|
+
** Number of blocks - integer (always 1 for expression units)
|
244
|
+
** Number of cells - integer
|
245
|
+
** Unit number (probe set number) - integer
|
246
|
+
** Number of cells per atom - unsigned char
|
247
|
+
**
|
248
|
+
**
|
249
|
+
**
|
250
|
+
** Block information, repeated for each block in the unit:
|
251
|
+
**
|
252
|
+
** Number of atoms - integer
|
253
|
+
** Number of cells - integer
|
254
|
+
** Number of cells per atom - unsigned char
|
255
|
+
** Direction - unsigned char
|
256
|
+
** The position of the first atom - integer
|
257
|
+
** <unused integer value> - integer
|
258
|
+
** The block name - char[64]
|
259
|
+
**
|
260
|
+
**
|
261
|
+
**
|
262
|
+
** Cell information, repeated for each cell in the block:
|
263
|
+
**
|
264
|
+
** Atom number - integer
|
265
|
+
** X coordinate - unsigned short
|
266
|
+
** Y coordinate - unsigned short
|
267
|
+
** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
|
268
|
+
** Base of probe at substitution position - char
|
269
|
+
** Base of target at interrogation position - char
|
270
|
+
**
|
271
|
+
**
|
272
|
+
****************************************************************************/
|
273
|
+
|
274
|
+
|
275
|
+
typedef struct{
|
276
|
+
int atomnumber;
|
277
|
+
unsigned short x;
|
278
|
+
unsigned short y;
|
279
|
+
int indexpos;
|
280
|
+
char pbase;
|
281
|
+
char tbase;
|
282
|
+
} cdf_unit_cell;
|
283
|
+
|
284
|
+
|
285
|
+
typedef struct{
|
286
|
+
int natoms;
|
287
|
+
int ncells;
|
288
|
+
unsigned char ncellperatom;
|
289
|
+
unsigned char direction;
|
290
|
+
int firstatom;
|
291
|
+
int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
|
292
|
+
char blockname[64];
|
293
|
+
|
294
|
+
cdf_unit_cell *unit_cells;
|
295
|
+
|
296
|
+
} cdf_unit_block;
|
297
|
+
|
298
|
+
|
299
|
+
typedef struct{
|
300
|
+
unsigned short unittype;
|
301
|
+
unsigned char direction;
|
302
|
+
int natoms;
|
303
|
+
int nblocks;
|
304
|
+
int ncells;
|
305
|
+
int unitnumber;
|
306
|
+
unsigned char ncellperatom;
|
307
|
+
|
308
|
+
cdf_unit_block *unit_block;
|
309
|
+
|
310
|
+
} cdf_unit;
|
311
|
+
|
312
|
+
|
313
|
+
/****************************************************************************
|
314
|
+
**
|
315
|
+
** A data structure for holding CDF information read from a xda format cdf file
|
316
|
+
**
|
317
|
+
** note that this structure reads in everything including things that might not
|
318
|
+
** be of any subsequent use.
|
319
|
+
**
|
320
|
+
****************************************************************************/
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
typedef struct {
|
325
|
+
|
326
|
+
cdf_xda_header header; /* Header information */
|
327
|
+
char **probesetnames; /* Names of probesets */
|
328
|
+
|
329
|
+
int *qc_start; /* These are used for random access */
|
330
|
+
int *units_start;
|
331
|
+
|
332
|
+
cdf_qc_unit *qc_units;
|
333
|
+
cdf_unit *units;
|
334
|
+
|
335
|
+
|
336
|
+
} cdf_xda;
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
|
341
|
+
// int check_xda_file(const char *filename);
|
342
|
+
int isTextCDFFile(const char *filename);
|
343
|
+
int read_cdf_text(const char *filename, cdf_text *mycdf);
|
344
|
+
void dealloc_cdf_text(cdf_text *my_cdf);
|
345
|
+
int read_cdf_xda(const char *filename,cdf_xda *my_cdf);
|
346
|
+
void dealloc_cdf_xda(cdf_xda *my_cdf);
|
347
|
+
|
@@ -0,0 +1,1342 @@
|
|
1
|
+
/****************************************************************
|
2
|
+
**
|
3
|
+
** File: read_cdf_xda.c
|
4
|
+
**
|
5
|
+
** Implementation by: B. M. Bolstad <bmb@bmbolstad.com>
|
6
|
+
**
|
7
|
+
** A parser designed to read the binary format cdf files.
|
8
|
+
** Sometimes called the xda format.
|
9
|
+
**
|
10
|
+
** Implemented based on documentation available from Affymetrix
|
11
|
+
**
|
12
|
+
** Implementation begun 2005.
|
13
|
+
**
|
14
|
+
** Modification Dates
|
15
|
+
** Feb 4 - Initial version
|
16
|
+
** Feb 5 - A bunch of hacks for SNP chips.
|
17
|
+
** Apr 20
|
18
|
+
** Aug 16, 2005 - Fix potential big endian bug
|
19
|
+
** Sep 22, 2005 - Fix some signed/unsigned bugs
|
20
|
+
** Dec 1, 2005 - Comment cleaning
|
21
|
+
** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
|
22
|
+
** May 31, 2006 - fix some compiler warnings
|
23
|
+
** Aug 23, 2006 - fix a potential (but at current time non-existant) problem
|
24
|
+
** when there are 0 qcunits or 0 units
|
25
|
+
** Aug 25, 2007 - Move file reading functions to centralized location
|
26
|
+
** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays)
|
27
|
+
** Nov 12, 2008 - Fix crash
|
28
|
+
** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
|
29
|
+
**
|
30
|
+
****************************************************************/
|
31
|
+
|
32
|
+
/** --- includes --- */
|
33
|
+
#include <R.h>
|
34
|
+
#include <Rdefines.h>
|
35
|
+
|
36
|
+
#include "stdlib.h"
|
37
|
+
#include "stdio.h"
|
38
|
+
#include "fread_functions.h"
|
39
|
+
#include <ctype.h>
|
40
|
+
|
41
|
+
/* #define READ_CDF_DEBUG */
|
42
|
+
/* #define READ_CDF_DEBUG_SNP */
|
43
|
+
#define READ_CDF_NOSNP
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
/************************************************************************
|
48
|
+
**
|
49
|
+
** Structures for holding the CDF file information. Basically
|
50
|
+
** header/general information that appears at the start of the CDF file
|
51
|
+
**
|
52
|
+
************************************************************************/
|
53
|
+
|
54
|
+
typedef struct {
|
55
|
+
int magicnumber;
|
56
|
+
int version_number;
|
57
|
+
unsigned short rows,cols;
|
58
|
+
int n_units,n_qc_units;
|
59
|
+
int len_ref_seq;
|
60
|
+
int i;
|
61
|
+
char *ref_seq;
|
62
|
+
} cdf_xda_header;
|
63
|
+
|
64
|
+
|
65
|
+
/****************************************************************************
|
66
|
+
**
|
67
|
+
** The following two structures store QC units and QC unit probe information
|
68
|
+
**
|
69
|
+
** QC information, repeated for each QC unit:
|
70
|
+
** Type - unsigned short
|
71
|
+
** Number of probes - integer
|
72
|
+
**
|
73
|
+
** Probe information, repeated for each probe in the QC unit:
|
74
|
+
** X coordinate - unsigned short
|
75
|
+
** Y coordinate - unsigned short
|
76
|
+
** Probe length - unsigned char
|
77
|
+
** Perfect match flag - unsigned char
|
78
|
+
** Background probe flag - unsigned char
|
79
|
+
**
|
80
|
+
****************************************************************************/
|
81
|
+
|
82
|
+
|
83
|
+
typedef struct{
|
84
|
+
unsigned short x;
|
85
|
+
unsigned short y;
|
86
|
+
unsigned char probelength;
|
87
|
+
unsigned char pmflag;
|
88
|
+
unsigned char bgprobeflag;
|
89
|
+
|
90
|
+
} cdf_qc_probe;
|
91
|
+
|
92
|
+
typedef struct{
|
93
|
+
unsigned short type;
|
94
|
+
unsigned int n_probes;
|
95
|
+
|
96
|
+
cdf_qc_probe *qc_probes;
|
97
|
+
|
98
|
+
} cdf_qc_unit;
|
99
|
+
|
100
|
+
|
101
|
+
/****************************************************************************
|
102
|
+
**
|
103
|
+
** The following three structures store information for units (sometimes called
|
104
|
+
** probesets), blocks (of which there are one or more within a unit) and cells
|
105
|
+
** sometimes called probe of which there are one or more within each block
|
106
|
+
**
|
107
|
+
**
|
108
|
+
** Unit information, repeated for each unit:
|
109
|
+
**
|
110
|
+
** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
|
111
|
+
** Direction - unsigned char
|
112
|
+
** Number of atoms - integer
|
113
|
+
** Number of blocks - integer (always 1 for expression units)
|
114
|
+
** Number of cells - integer
|
115
|
+
** Unit number (probe set number) - integer
|
116
|
+
** Number of cells per atom - unsigned char
|
117
|
+
**
|
118
|
+
**
|
119
|
+
**
|
120
|
+
** Block information, repeated for each block in the unit:
|
121
|
+
**
|
122
|
+
** Number of atoms - integer
|
123
|
+
** Number of cells - integer
|
124
|
+
** Number of cells per atom - unsigned char
|
125
|
+
** Direction - unsigned char
|
126
|
+
** The position of the first atom - integer
|
127
|
+
** <unused integer value> - integer
|
128
|
+
** The block name - char[64]
|
129
|
+
**
|
130
|
+
**
|
131
|
+
**
|
132
|
+
** Cell information, repeated for each cell in the block:
|
133
|
+
**
|
134
|
+
** Atom number - integer
|
135
|
+
** X coordinate - unsigned short
|
136
|
+
** Y coordinate - unsigned short
|
137
|
+
** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
|
138
|
+
** Base of probe at substitution position - char
|
139
|
+
** Base of target at interrogation position - char
|
140
|
+
**
|
141
|
+
**
|
142
|
+
****************************************************************************/
|
143
|
+
|
144
|
+
|
145
|
+
typedef struct{
|
146
|
+
int atomnumber;
|
147
|
+
unsigned short x;
|
148
|
+
unsigned short y;
|
149
|
+
int indexpos;
|
150
|
+
char pbase;
|
151
|
+
char tbase;
|
152
|
+
} cdf_unit_cell;
|
153
|
+
|
154
|
+
|
155
|
+
typedef struct{
|
156
|
+
int natoms;
|
157
|
+
int ncells;
|
158
|
+
unsigned char ncellperatom;
|
159
|
+
unsigned char direction;
|
160
|
+
int firstatom;
|
161
|
+
int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
|
162
|
+
char blockname[64];
|
163
|
+
|
164
|
+
cdf_unit_cell *unit_cells;
|
165
|
+
|
166
|
+
} cdf_unit_block;
|
167
|
+
|
168
|
+
|
169
|
+
typedef struct{
|
170
|
+
unsigned short unittype;
|
171
|
+
unsigned char direction;
|
172
|
+
int natoms;
|
173
|
+
int nblocks;
|
174
|
+
int ncells;
|
175
|
+
int unitnumber;
|
176
|
+
unsigned char ncellperatom;
|
177
|
+
|
178
|
+
cdf_unit_block *unit_block;
|
179
|
+
|
180
|
+
} cdf_unit;
|
181
|
+
|
182
|
+
|
183
|
+
/****************************************************************************
|
184
|
+
**
|
185
|
+
** A data structure for holding CDF information read from a xda format cdf file
|
186
|
+
**
|
187
|
+
** note that this structure reads in everything including things that might not
|
188
|
+
** be of any subsequent use.
|
189
|
+
**
|
190
|
+
****************************************************************************/
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
typedef struct {
|
195
|
+
|
196
|
+
cdf_xda_header header; /* Header information */
|
197
|
+
char **probesetnames; /* Names of probesets */
|
198
|
+
|
199
|
+
int *qc_start; /* These are used for random access */
|
200
|
+
int *units_start;
|
201
|
+
|
202
|
+
cdf_qc_unit *qc_units;
|
203
|
+
cdf_unit *units;
|
204
|
+
|
205
|
+
|
206
|
+
} cdf_xda;
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
/*************************************************************************
|
220
|
+
**
|
221
|
+
** int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream)
|
222
|
+
**
|
223
|
+
** cdf_qc_unit *my_unit - preallocated space to store qc unit information
|
224
|
+
** int filelocation - indexing/location information used to read information
|
225
|
+
** from file
|
226
|
+
** FILE *instream - a pre-opened file to read from
|
227
|
+
**
|
228
|
+
** reads a specificed qc_unit from the file. Allocates space for the cdf_qc_probes
|
229
|
+
** and also reads them in
|
230
|
+
**
|
231
|
+
**
|
232
|
+
*************************************************************************/
|
233
|
+
|
234
|
+
int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream){
|
235
|
+
|
236
|
+
int i;
|
237
|
+
|
238
|
+
|
239
|
+
fseek(instream,filelocation,SEEK_SET);
|
240
|
+
|
241
|
+
fread_uint16(&(my_unit->type),1,instream);
|
242
|
+
fread_uint32(&(my_unit->n_probes),1,instream);
|
243
|
+
|
244
|
+
|
245
|
+
my_unit->qc_probes = Calloc(my_unit->n_probes,cdf_qc_probe);
|
246
|
+
|
247
|
+
for (i=0; i < my_unit->n_probes; i++){
|
248
|
+
fread_uint16(&(my_unit->qc_probes[i].x),1,instream);
|
249
|
+
fread_uint16(&(my_unit->qc_probes[i].y),1,instream);
|
250
|
+
fread_uchar(&(my_unit->qc_probes[i].probelength),1,instream);
|
251
|
+
fread_uchar(&(my_unit->qc_probes[i].pmflag),1,instream);
|
252
|
+
fread_uchar(&(my_unit->qc_probes[i].bgprobeflag),1,instream);
|
253
|
+
|
254
|
+
}
|
255
|
+
return 1;
|
256
|
+
}
|
257
|
+
|
258
|
+
/*************************************************************************
|
259
|
+
**
|
260
|
+
** int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream)
|
261
|
+
**
|
262
|
+
** cdf_qc_unit *my_unit - preallocated space to store unit (aka probeset) information
|
263
|
+
** int filelocation - indexing/location information used to read information
|
264
|
+
** from file
|
265
|
+
** FILE *instream - a pre-opened file to read from
|
266
|
+
**
|
267
|
+
** reads a specified probeset into the my_unit, including all blocks and all probes
|
268
|
+
** it is assumed that the unit itself is preallocated. Blocks and probes within
|
269
|
+
** the blocks are allocated by this function.
|
270
|
+
**
|
271
|
+
*************************************************************************/
|
272
|
+
|
273
|
+
int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){
|
274
|
+
|
275
|
+
int i,j;
|
276
|
+
|
277
|
+
fseek(instream,filelocation,SEEK_SET);
|
278
|
+
|
279
|
+
fread_uint16(&(my_unit->unittype),1,instream);
|
280
|
+
fread_uchar(&(my_unit->direction),1,instream);
|
281
|
+
|
282
|
+
|
283
|
+
fread_int32(&(my_unit->natoms),1,instream);
|
284
|
+
fread_int32(&(my_unit->nblocks),1,instream);
|
285
|
+
fread_int32(&(my_unit->ncells),1,instream);
|
286
|
+
fread_int32(&(my_unit->unitnumber),1,instream);
|
287
|
+
fread_uchar(&(my_unit->ncellperatom),1,instream);
|
288
|
+
|
289
|
+
my_unit->unit_block = Calloc(my_unit->nblocks,cdf_unit_block);
|
290
|
+
|
291
|
+
for (i=0; i < my_unit->nblocks; i++){
|
292
|
+
fread_int32(&(my_unit->unit_block[i].natoms),1,instream);
|
293
|
+
fread_int32(&(my_unit->unit_block[i].ncells),1,instream);
|
294
|
+
fread_uchar(&(my_unit->unit_block[i].ncellperatom),1,instream);
|
295
|
+
fread_uchar(&(my_unit->unit_block[i].direction),1,instream);
|
296
|
+
fread_int32(&(my_unit->unit_block[i].firstatom),1,instream);
|
297
|
+
fread_int32(&(my_unit->unit_block[i].unused),1,instream);
|
298
|
+
fread_char(my_unit->unit_block[i].blockname,64,instream);
|
299
|
+
|
300
|
+
my_unit->unit_block[i].unit_cells = Calloc(my_unit->unit_block[i].ncells,cdf_unit_cell);
|
301
|
+
|
302
|
+
for (j=0; j < my_unit->unit_block[i].ncells; j++){
|
303
|
+
fread_int32(&(my_unit->unit_block[i].unit_cells[j].atomnumber),1,instream);
|
304
|
+
fread_uint16(&(my_unit->unit_block[i].unit_cells[j].x),1,instream);
|
305
|
+
fread_uint16(&(my_unit->unit_block[i].unit_cells[j].y),1,instream);
|
306
|
+
fread_int32(&(my_unit->unit_block[i].unit_cells[j].indexpos),1,instream);
|
307
|
+
fread_char(&(my_unit->unit_block[i].unit_cells[j].pbase),1,instream);
|
308
|
+
fread_char(&(my_unit->unit_block[i].unit_cells[j].tbase),1,instream);
|
309
|
+
}
|
310
|
+
|
311
|
+
|
312
|
+
}
|
313
|
+
|
314
|
+
|
315
|
+
return 1;
|
316
|
+
|
317
|
+
}
|
318
|
+
|
319
|
+
/*************************************************************************
|
320
|
+
**
|
321
|
+
** static void dealloc_cdf_xda(cdf_xda *my_cdf)
|
322
|
+
**
|
323
|
+
** Deallocates all the previously allocated memory.
|
324
|
+
**
|
325
|
+
*************************************************************************/
|
326
|
+
|
327
|
+
void dealloc_cdf_xda(cdf_xda *my_cdf){
|
328
|
+
|
329
|
+
int i;
|
330
|
+
|
331
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
332
|
+
Free(my_cdf->probesetnames[i]);
|
333
|
+
}
|
334
|
+
Free(my_cdf->probesetnames);
|
335
|
+
|
336
|
+
Free(my_cdf->qc_start);
|
337
|
+
Free(my_cdf->units_start);
|
338
|
+
|
339
|
+
for (i=0; i < my_cdf->header.n_qc_units; i++){
|
340
|
+
Free(my_cdf->qc_units[i].qc_probes);
|
341
|
+
}
|
342
|
+
|
343
|
+
Free(my_cdf->qc_units);
|
344
|
+
|
345
|
+
|
346
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
347
|
+
Free(my_cdf->units[i].unit_block);
|
348
|
+
}
|
349
|
+
Free(my_cdf->units);
|
350
|
+
Free(my_cdf->header.ref_seq);
|
351
|
+
|
352
|
+
}
|
353
|
+
|
354
|
+
|
355
|
+
|
356
|
+
/*************************************************************
|
357
|
+
**
|
358
|
+
** int read_cdf_xda(const char *filename)
|
359
|
+
**
|
360
|
+
** filename - Name of the prospective binary cel file
|
361
|
+
**
|
362
|
+
** Returns 1 if the file was completely successfully parsed
|
363
|
+
** otherwise 0 (and possible prints a message to screen)
|
364
|
+
**
|
365
|
+
**
|
366
|
+
**
|
367
|
+
**
|
368
|
+
*************************************************************/
|
369
|
+
|
370
|
+
int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
|
371
|
+
|
372
|
+
FILE *infile;
|
373
|
+
|
374
|
+
int i;
|
375
|
+
|
376
|
+
if ((infile = fopen(filename, "rb")) == NULL)
|
377
|
+
{
|
378
|
+
error("Unable to open the file %s",filename);
|
379
|
+
return 0;
|
380
|
+
}
|
381
|
+
|
382
|
+
if (!fread_int32(&my_cdf->header.magicnumber,1,infile)){
|
383
|
+
return 0;
|
384
|
+
}
|
385
|
+
|
386
|
+
if (!fread_int32(&my_cdf->header.version_number,1,infile)){
|
387
|
+
return 0;
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
if (my_cdf->header.magicnumber != 67){
|
392
|
+
Rprintf("Magic number is not 67. This is probably not a binary cdf file.\n");
|
393
|
+
return 0;
|
394
|
+
}
|
395
|
+
|
396
|
+
if (my_cdf->header.version_number != 1){
|
397
|
+
Rprintf("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number);
|
398
|
+
return 0;
|
399
|
+
}
|
400
|
+
if (!fread_uint16(&my_cdf->header.cols,1,infile)){
|
401
|
+
return 0;
|
402
|
+
}
|
403
|
+
if (!fread_uint16(&my_cdf->header.rows,1,infile)){
|
404
|
+
return 0;
|
405
|
+
}
|
406
|
+
|
407
|
+
if (!fread_int32(&my_cdf->header.n_units,1,infile)){
|
408
|
+
return 0;
|
409
|
+
}
|
410
|
+
|
411
|
+
if (!fread_int32(&my_cdf->header.n_qc_units,1,infile)){
|
412
|
+
return 0;
|
413
|
+
}
|
414
|
+
|
415
|
+
|
416
|
+
if (!fread_int32(&my_cdf->header.len_ref_seq,1,infile)){
|
417
|
+
return 0;
|
418
|
+
}
|
419
|
+
|
420
|
+
my_cdf->header.ref_seq = Calloc(my_cdf->header.len_ref_seq,char);
|
421
|
+
|
422
|
+
fread_char(my_cdf->header.ref_seq, my_cdf->header.len_ref_seq, infile);
|
423
|
+
my_cdf->probesetnames = Calloc(my_cdf->header.n_units,char *);
|
424
|
+
|
425
|
+
|
426
|
+
for (i =0; i < my_cdf->header.n_units;i++){
|
427
|
+
my_cdf->probesetnames[i] = Calloc(64,char);
|
428
|
+
if (!fread_char(my_cdf->probesetnames[i], 64, infile)){
|
429
|
+
return 0;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
my_cdf->qc_start = Calloc(my_cdf->header.n_qc_units,int);
|
436
|
+
my_cdf->units_start = Calloc(my_cdf->header.n_units,int);
|
437
|
+
|
438
|
+
/*** Old code that might fail if there is 0 QCunits or 0 Units
|
439
|
+
if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)
|
440
|
+
|| !fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)){
|
441
|
+
return 0;
|
442
|
+
}
|
443
|
+
***/
|
444
|
+
|
445
|
+
if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)) {
|
446
|
+
if(my_cdf->header.n_qc_units != 0) {
|
447
|
+
return 0;
|
448
|
+
}
|
449
|
+
}
|
450
|
+
|
451
|
+
if(!fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)) {
|
452
|
+
if(my_cdf->header.n_units != 0) {
|
453
|
+
return 0;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
/* We will read in all the QC and Standard Units, rather than
|
458
|
+
random accessing what we need */
|
459
|
+
my_cdf->qc_units = Calloc(my_cdf->header.n_qc_units,cdf_qc_unit);
|
460
|
+
|
461
|
+
|
462
|
+
for (i =0; i < my_cdf->header.n_qc_units; i++){
|
463
|
+
if (!read_cdf_qcunit(&my_cdf->qc_units[i],my_cdf->qc_start[i],infile)){
|
464
|
+
return 0;
|
465
|
+
}
|
466
|
+
}
|
467
|
+
|
468
|
+
my_cdf->units = Calloc(my_cdf->header.n_units,cdf_unit);
|
469
|
+
|
470
|
+
|
471
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
472
|
+
if (!read_cdf_unit(&my_cdf->units[i],my_cdf->units_start[i],infile)){
|
473
|
+
return 0;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
|
477
|
+
|
478
|
+
#ifdef READ_CDF_DEBUG
|
479
|
+
Rprintf("%d %d %d %d %d\n",my_cdf->header.cols,my_cdf->header.rows,my_cdf->header.n_units,my_cdf->header.n_qc_units,my_cdf->header.len_ref_seq);
|
480
|
+
for (i =0; i < my_cdf->header.n_units;i++){
|
481
|
+
Rprintf("%s\n",my_cdf->probesetnames[i]);
|
482
|
+
}
|
483
|
+
|
484
|
+
for (i =0; i < my_cdf->header.n_qc_units;i++){
|
485
|
+
Rprintf("%d\n",my_cdf->qc_start[i]);
|
486
|
+
}
|
487
|
+
|
488
|
+
for (i =0; i < my_cdf->header.n_qc_units;i++){
|
489
|
+
Rprintf("%d\n",my_cdf->units_start[i]);
|
490
|
+
}
|
491
|
+
|
492
|
+
Rprintf("%d %d\n",my_cdf->qc_units[0].type,my_cdf->qc_units[0].n_probes);
|
493
|
+
|
494
|
+
for (i=0; i < my_cdf->qc_units[0].n_probes; i++){
|
495
|
+
Rprintf("%d %d %d %u %d\n",my_cdf->qc_units[0].qc_probes[i].x,my_cdf->qc_units[0].qc_probes[i].y,
|
496
|
+
my_cdf->qc_units[0].qc_probes[i].probelength,
|
497
|
+
my_cdf->qc_units[0].qc_probes[i].pmflag,
|
498
|
+
my_cdf->qc_units[0].qc_probes[i].bgprobeflag);
|
499
|
+
|
500
|
+
}
|
501
|
+
|
502
|
+
|
503
|
+
Rprintf("%u %u %d %d %d %d %u\n",my_cdf->units[0].unittype,my_cdf->units[0].direction,
|
504
|
+
my_cdf->units[0].natoms,
|
505
|
+
my_cdf->units[0].nblocks,
|
506
|
+
my_cdf->units[0].ncells,
|
507
|
+
my_cdf->units[0].unitnumber,
|
508
|
+
my_cdf->units[0].ncellperatom);
|
509
|
+
|
510
|
+
Rprintf("%d %d %u %u %d %d %s\n",my_cdf->units[0].unit_block[0].natoms,my_cdf->units[0].unit_block[0].ncells,
|
511
|
+
my_cdf->units[0].unit_block[0].ncellperatom,
|
512
|
+
my_cdf->units[0].unit_block[0].direction,
|
513
|
+
my_cdf->units[0].unit_block[0].firstatom,
|
514
|
+
my_cdf->units[0].unit_block[0].unused,
|
515
|
+
my_cdf->units[0].unit_block[0].blockname);
|
516
|
+
|
517
|
+
for (i=0; i <my_cdf->units[0].unit_block[0].ncells ; i++){
|
518
|
+
Rprintf("%d %u %u %d %c %c\n",
|
519
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].atomnumber,
|
520
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].x,
|
521
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].y,
|
522
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].indexpos,
|
523
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].pbase,
|
524
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].tbase);
|
525
|
+
}
|
526
|
+
#endif
|
527
|
+
|
528
|
+
fclose(infile);
|
529
|
+
return 1;
|
530
|
+
|
531
|
+
/* fseek() */
|
532
|
+
}
|
533
|
+
|
534
|
+
|
535
|
+
|
536
|
+
/*************************************************************
|
537
|
+
**
|
538
|
+
** static int check_cdf_xda(const char *filename)
|
539
|
+
**
|
540
|
+
** Opens the file give by filename and checks it to see if
|
541
|
+
** it looks like a binary CDF file. returns 0 if
|
542
|
+
** the file looks like it is not a binary CDF aka xda format
|
543
|
+
** cdf file
|
544
|
+
**
|
545
|
+
**
|
546
|
+
*************************************************************/
|
547
|
+
|
548
|
+
|
549
|
+
int check_cdf_xda(const char *filename){
|
550
|
+
|
551
|
+
FILE *infile;
|
552
|
+
|
553
|
+
|
554
|
+
int magicnumber,version_number;
|
555
|
+
|
556
|
+
if ((infile = fopen(filename, "rb")) == NULL)
|
557
|
+
{
|
558
|
+
error("Unable to open the file %s",filename);
|
559
|
+
return 0;
|
560
|
+
}
|
561
|
+
|
562
|
+
if (!fread_int32(&magicnumber,1,infile)){
|
563
|
+
error("File corrupt or truncated?");
|
564
|
+
return 0;
|
565
|
+
}
|
566
|
+
|
567
|
+
if (!fread_int32(&version_number,1,infile)){
|
568
|
+
error("File corrupt or truncated?");
|
569
|
+
return 0;
|
570
|
+
}
|
571
|
+
|
572
|
+
|
573
|
+
if (magicnumber != 67){
|
574
|
+
/* error("Magic number is not 67. This is probably not a binary cdf file.\n"); */
|
575
|
+
return 0;
|
576
|
+
}
|
577
|
+
|
578
|
+
if (version_number != 1){
|
579
|
+
/* error("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); */
|
580
|
+
return 0;
|
581
|
+
}
|
582
|
+
|
583
|
+
return 1;
|
584
|
+
|
585
|
+
}
|
586
|
+
|
587
|
+
|
588
|
+
|
589
|
+
/*************************************************************
|
590
|
+
**
|
591
|
+
** static int isPM(char pbase,char tbase)
|
592
|
+
**
|
593
|
+
** char pbase - probe base at substitution position
|
594
|
+
** char tbase - target base at substitution position
|
595
|
+
**
|
596
|
+
** this function works out whether a probe is a PM or MM
|
597
|
+
**
|
598
|
+
**
|
599
|
+
*************************************************************/
|
600
|
+
|
601
|
+
|
602
|
+
static int isPM(char pbase,char tbase){
|
603
|
+
/*
|
604
|
+
if (Pbase.Cmp(Tbase) == 0){
|
605
|
+
*isPM = false;
|
606
|
+
} else if (((Pbase.Cmp("A")== 0) && (Tbase.Cmp("T") != 0)) || ((Pbase.Cmp("T")
|
607
|
+
== 0) && (Tbase.Cmp("A") != 0))){
|
608
|
+
*isPM = false;
|
609
|
+
} else if (((Pbase.Cmp("C")== 0) && (Tbase.Cmp("G") != 0)) || ((Pbase.Cmp("G")
|
610
|
+
== 0) && (Tbase.Cmp("C") != 0))){
|
611
|
+
*isPM = false;
|
612
|
+
} else {
|
613
|
+
*isPM = true;
|
614
|
+
}
|
615
|
+
*/
|
616
|
+
|
617
|
+
pbase = toupper(pbase);
|
618
|
+
tbase = toupper(tbase);
|
619
|
+
|
620
|
+
if (pbase == tbase){
|
621
|
+
return 0;
|
622
|
+
} else if ((( pbase == 'A') && (tbase != 'T')) || (( pbase == 'T') && (tbase != 'A'))){
|
623
|
+
return 0;
|
624
|
+
} else if ((( pbase == 'C') && (tbase != 'G')) || (( pbase == 'G') && (tbase != 'C'))){
|
625
|
+
return 0;
|
626
|
+
}
|
627
|
+
return 1;
|
628
|
+
|
629
|
+
|
630
|
+
}
|
631
|
+
|
632
|
+
|
633
|
+
|
634
|
+
/*************************************************************
|
635
|
+
**
|
636
|
+
** SEXP CheckCDFXDA(SEXP filename)
|
637
|
+
**
|
638
|
+
** Takes a given file name and returns 1 if it is a xda format CDF file
|
639
|
+
** otherwise it returns 0
|
640
|
+
**
|
641
|
+
*************************************************************/
|
642
|
+
|
643
|
+
|
644
|
+
|
645
|
+
SEXP CheckCDFXDA(SEXP filename){
|
646
|
+
SEXP tmp;
|
647
|
+
int good;
|
648
|
+
const char *cur_file_name;
|
649
|
+
|
650
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
651
|
+
|
652
|
+
good = check_cdf_xda(cur_file_name);
|
653
|
+
|
654
|
+
PROTECT(tmp= allocVector(INTSXP,1));
|
655
|
+
|
656
|
+
INTEGER(tmp)[0] = good;
|
657
|
+
|
658
|
+
UNPROTECT(1);
|
659
|
+
return tmp;
|
660
|
+
}
|
661
|
+
|
662
|
+
|
663
|
+
|
664
|
+
|
665
|
+
|
666
|
+
|
667
|
+
SEXP ReadCDFFile(SEXP filename){
|
668
|
+
|
669
|
+
SEXP CDFInfo;
|
670
|
+
SEXP Dimensions;
|
671
|
+
SEXP LocMap= R_NilValue,tempLocMap;
|
672
|
+
SEXP CurLocs;
|
673
|
+
SEXP PSnames = R_NilValue,tempPSnames;
|
674
|
+
SEXP ColNames;
|
675
|
+
SEXP dimnames;
|
676
|
+
|
677
|
+
cdf_xda my_cdf;
|
678
|
+
const char *cur_file_name;
|
679
|
+
/* char *tmp_name; */
|
680
|
+
|
681
|
+
int i,j,k;
|
682
|
+
int cur_blocks,cur_cells, cur_atoms;
|
683
|
+
/* int which_probetype; */
|
684
|
+
int which_psname=0;
|
685
|
+
|
686
|
+
cdf_unit_cell *current_cell;
|
687
|
+
|
688
|
+
double *curlocs;
|
689
|
+
|
690
|
+
/* int nrows, ncols; */
|
691
|
+
|
692
|
+
|
693
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
694
|
+
|
695
|
+
if (!read_cdf_xda(cur_file_name,&my_cdf)){
|
696
|
+
error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
|
697
|
+
}
|
698
|
+
|
699
|
+
|
700
|
+
/* We output:
|
701
|
+
nrows, ncols in an integer vector, plus a list of probesets PM MM locations (in the BioC style) */
|
702
|
+
PROTECT(CDFInfo = allocVector(VECSXP,2));
|
703
|
+
PROTECT(Dimensions = allocVector(REALSXP,2));
|
704
|
+
|
705
|
+
if (my_cdf.units[0].unittype ==1){
|
706
|
+
PROTECT(LocMap = allocVector(VECSXP,my_cdf.header.n_units));
|
707
|
+
PROTECT(PSnames = allocVector(STRSXP,my_cdf.header.n_units));
|
708
|
+
} else {
|
709
|
+
PROTECT(tempLocMap = allocVector(VECSXP,2*my_cdf.header.n_units));
|
710
|
+
PROTECT(tempPSnames = allocVector(STRSXP,2*my_cdf.header.n_units));
|
711
|
+
}
|
712
|
+
|
713
|
+
NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.rows;
|
714
|
+
NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.cols;
|
715
|
+
|
716
|
+
|
717
|
+
for (i=0; i < my_cdf.header.n_units; i++){
|
718
|
+
#ifdef READ_CDF_DEBUG
|
719
|
+
printf("%d\n",i);
|
720
|
+
#endif
|
721
|
+
cur_blocks = my_cdf.units[i].nblocks;
|
722
|
+
|
723
|
+
#ifdef READ_CDF_DEBUG
|
724
|
+
Rprintf("New Block: ");
|
725
|
+
#endif
|
726
|
+
if (my_cdf.units[i].unittype ==1){
|
727
|
+
/* Expression analysis */
|
728
|
+
for (j=0; j < cur_blocks; j++){
|
729
|
+
|
730
|
+
#ifdef READ_CDF_DEBUG
|
731
|
+
Rprintf("%s ",my_cdf.units[i].unit_block[j].blockname);
|
732
|
+
#endif
|
733
|
+
|
734
|
+
cur_cells = my_cdf.units[i].unit_block[j].ncells;
|
735
|
+
cur_atoms = my_cdf.units[i].unit_block[j].natoms;
|
736
|
+
|
737
|
+
SET_STRING_ELT(PSnames,i,mkChar(my_cdf.units[i].unit_block[j].blockname));
|
738
|
+
|
739
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
|
740
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
741
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
742
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
743
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
744
|
+
|
745
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
746
|
+
|
747
|
+
for (k=0; k < cur_atoms*2; k++){
|
748
|
+
curlocs[k] = R_NaN;
|
749
|
+
}
|
750
|
+
|
751
|
+
for (k=0; k < cur_cells; k++){
|
752
|
+
current_cell = &(my_cdf.units[i].unit_block[j].unit_cells[k]);
|
753
|
+
|
754
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
755
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
756
|
+
} else {
|
757
|
+
curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
758
|
+
}
|
759
|
+
}
|
760
|
+
|
761
|
+
|
762
|
+
|
763
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
764
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
765
|
+
SET_VECTOR_ELT(LocMap,i,CurLocs);
|
766
|
+
UNPROTECT(3);
|
767
|
+
}
|
768
|
+
} else if (my_cdf.units[i].unittype == 2){
|
769
|
+
/* Genotyping array */
|
770
|
+
|
771
|
+
#ifndef READ_CDF_NOSNP
|
772
|
+
if (cur_blocks == 1){
|
773
|
+
|
774
|
+
cur_cells = my_cdf.units[i].unit_block[0].ncells;
|
775
|
+
cur_atoms = my_cdf.units[i].unit_block[0].natoms;
|
776
|
+
|
777
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
|
778
|
+
|
779
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
|
780
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
781
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
782
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
783
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
784
|
+
|
785
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
786
|
+
|
787
|
+
for (k=0; k < cur_cells; k++){
|
788
|
+
current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
|
789
|
+
|
790
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
791
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
792
|
+
} else {
|
793
|
+
curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
794
|
+
}
|
795
|
+
}
|
796
|
+
|
797
|
+
|
798
|
+
|
799
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
800
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
801
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
802
|
+
UNPROTECT(3);
|
803
|
+
which_psname++;
|
804
|
+
|
805
|
+
} else if (cur_blocks == 4){
|
806
|
+
for (j=0; j < cur_blocks; j++){
|
807
|
+
#ifdef READ_CDF_DEBUG_SNP
|
808
|
+
Rprintf("%s %s\n",my_cdf.probesetnames[i],my_cdf.units[i].unit_block[j].blockname);
|
809
|
+
#endif
|
810
|
+
}
|
811
|
+
|
812
|
+
j = 0;
|
813
|
+
cur_cells = my_cdf.units[i].unit_block[0].ncells;
|
814
|
+
cur_atoms = my_cdf.units[i].unit_block[0].natoms;
|
815
|
+
if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
|
816
|
+
tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
|
817
|
+
tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
|
818
|
+
tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
|
819
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
|
820
|
+
Free(tmp_name);
|
821
|
+
} else {
|
822
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
|
823
|
+
}
|
824
|
+
|
825
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
|
826
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
827
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
828
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
829
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
830
|
+
|
831
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
832
|
+
|
833
|
+
|
834
|
+
for (k=0; k < cur_cells; k++){
|
835
|
+
current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
|
836
|
+
/* Rprintf("%d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
837
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
838
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
839
|
+
} else {
|
840
|
+
curlocs[current_cell->atomnumber+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
841
|
+
}
|
842
|
+
if (current_cell->x + current_cell->y*(my_cdf.header.rows) + 1 == 370737){
|
843
|
+
Rprintf("%d %c %c",isPM(current_cell->pbase,current_cell->tbase),current_cell->pbase,current_cell->tbase);
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
847
|
+
j=2;
|
848
|
+
cur_cells = my_cdf.units[i].unit_block[2].ncells;
|
849
|
+
cur_atoms = my_cdf.units[i].unit_block[2].natoms;
|
850
|
+
for (k=0; k < cur_cells; k++){
|
851
|
+
current_cell = &(my_cdf.units[i].unit_block[2].unit_cells[k]);
|
852
|
+
/* Rprintf("half : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
853
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
854
|
+
curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
855
|
+
} else {
|
856
|
+
curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
857
|
+
}
|
858
|
+
}
|
859
|
+
|
860
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
861
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
862
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
863
|
+
UNPROTECT(3);
|
864
|
+
which_psname++;
|
865
|
+
|
866
|
+
|
867
|
+
|
868
|
+
|
869
|
+
|
870
|
+
j = 1;
|
871
|
+
cur_cells = my_cdf.units[i].unit_block[1].ncells;
|
872
|
+
cur_atoms = my_cdf.units[i].unit_block[1].natoms;
|
873
|
+
if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
|
874
|
+
tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
|
875
|
+
tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
|
876
|
+
tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
|
877
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
|
878
|
+
Free(tmp_name);
|
879
|
+
} else {
|
880
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[1].blockname));
|
881
|
+
}
|
882
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
|
883
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
884
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
885
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
886
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
887
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
888
|
+
|
889
|
+
for (k=0; k < cur_cells; k++){
|
890
|
+
current_cell = &(my_cdf.units[i].unit_block[1].unit_cells[k]);
|
891
|
+
/* Rprintf("Dual : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
892
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
893
|
+
curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
894
|
+
} else {
|
895
|
+
curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
896
|
+
}
|
897
|
+
}
|
898
|
+
|
899
|
+
j=3;
|
900
|
+
cur_cells = my_cdf.units[i].unit_block[3].ncells;
|
901
|
+
cur_atoms = my_cdf.units[i].unit_block[3].natoms;
|
902
|
+
for (k=0; k < cur_cells; k++){
|
903
|
+
current_cell = &(my_cdf.units[i].unit_block[3].unit_cells[k]);
|
904
|
+
/* Rprintf("half deux : %d %d %d %u %u \n",cur_cells, current_cell->atomnumber, cur_atoms,current_cell->x,current_cell->y); */
|
905
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
906
|
+
curlocs[current_cell->atomnumber - (2*cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
907
|
+
} else {
|
908
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
909
|
+
}
|
910
|
+
}
|
911
|
+
|
912
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
913
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
914
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
915
|
+
UNPROTECT(3);
|
916
|
+
which_psname++;
|
917
|
+
|
918
|
+
} else {
|
919
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (genotyping with blocks != 1 or 4.)");
|
920
|
+
}
|
921
|
+
#else
|
922
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (genotyping).");
|
923
|
+
#endif
|
924
|
+
|
925
|
+
|
926
|
+
|
927
|
+
|
928
|
+
} else {
|
929
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (ie not expression or genotyping)");
|
930
|
+
}
|
931
|
+
|
932
|
+
|
933
|
+
#ifdef READ_CDF_DEBUG
|
934
|
+
Rprintf("\n");
|
935
|
+
#endif
|
936
|
+
}
|
937
|
+
|
938
|
+
if (my_cdf.units[0].unittype ==2){
|
939
|
+
PROTECT(PSnames = allocVector(STRSXP,which_psname));
|
940
|
+
PROTECT(LocMap = allocVector(VECSXP,which_psname));
|
941
|
+
for (i =0; i < which_psname; i++){
|
942
|
+
SET_STRING_ELT(PSnames,i,mkChar(CHAR(STRING_ELT(tempPSnames,i))));
|
943
|
+
SET_VECTOR_ELT(LocMap,i,VECTOR_ELT(tempLocMap,i));
|
944
|
+
}
|
945
|
+
|
946
|
+
}
|
947
|
+
#ifdef READ_CDF_DEBUG
|
948
|
+
Rprintf("%d \n",which_psname);
|
949
|
+
#endif
|
950
|
+
setAttrib(LocMap,R_NamesSymbol,PSnames);
|
951
|
+
SET_VECTOR_ELT(CDFInfo,0,Dimensions);
|
952
|
+
SET_VECTOR_ELT(CDFInfo,1,LocMap);
|
953
|
+
if (my_cdf.units[0].unittype ==2){
|
954
|
+
UNPROTECT(6);
|
955
|
+
} else {
|
956
|
+
UNPROTECT(4);
|
957
|
+
}
|
958
|
+
|
959
|
+
dealloc_cdf_xda(&my_cdf);
|
960
|
+
return CDFInfo;
|
961
|
+
|
962
|
+
}
|
963
|
+
|
964
|
+
|
965
|
+
|
966
|
+
|
967
|
+
/* This function is for reading in the entire binary cdf file and then
|
968
|
+
* returing the structure in a complex list object.
|
969
|
+
* The fullstructure argument is expected to be a BOOLEAN. If TRUE the
|
970
|
+
* entire contents of the CDF file are returned.
|
971
|
+
* If False, a modified CDFENV style structure is returned
|
972
|
+
*/
|
973
|
+
|
974
|
+
|
975
|
+
|
976
|
+
SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){
|
977
|
+
|
978
|
+
SEXP CDFInfo = R_NilValue; /* this is the object that will be returned */
|
979
|
+
SEXP CDFInfoNames;
|
980
|
+
SEXP HEADER; /* Will store the header information */
|
981
|
+
SEXP HEADERNames;
|
982
|
+
SEXP Dimensions;
|
983
|
+
SEXP DimensionsNames;
|
984
|
+
SEXP REFSEQ; /* Resequencing reference sequence */
|
985
|
+
SEXP UNITNAMES;
|
986
|
+
|
987
|
+
SEXP FILEPOSITIONS;
|
988
|
+
SEXP FILEPOSITIONSQC;
|
989
|
+
SEXP FILEPOSITIONSUNITS;
|
990
|
+
SEXP FILEPOSITIONSNames;
|
991
|
+
|
992
|
+
SEXP QCUNITS;
|
993
|
+
SEXP QCUNITSsub;
|
994
|
+
SEXP QCUNITSsubNames;
|
995
|
+
SEXP QCHEADER;
|
996
|
+
SEXP QCHEADERNames;
|
997
|
+
SEXP QCUNITSProbeInfo;
|
998
|
+
SEXP QCUNITSProbeInfoX;
|
999
|
+
SEXP QCUNITSProbeInfoY;
|
1000
|
+
SEXP QCUNITSProbeInfoPL;
|
1001
|
+
SEXP QCUNITSProbeInfoPMFLAG;
|
1002
|
+
SEXP QCUNITSProbeInfoBGFLAG;
|
1003
|
+
SEXP QCUNITSProbeInfoNames;
|
1004
|
+
SEXP QCUNITSProbeInforow_names;
|
1005
|
+
|
1006
|
+
SEXP UNITS;
|
1007
|
+
SEXP tmpUNIT;
|
1008
|
+
SEXP tmpUNITNames;
|
1009
|
+
SEXP UNITSHeader;
|
1010
|
+
SEXP UNITSHeaderNames;
|
1011
|
+
SEXP tmpUNITSBlock;
|
1012
|
+
SEXP UNITSBlock;
|
1013
|
+
SEXP UNITSBlockNames;
|
1014
|
+
SEXP UNITSBlockHeader;
|
1015
|
+
SEXP UNITSBlockHeaderNames;
|
1016
|
+
SEXP UNITSBlockInfo;
|
1017
|
+
SEXP UNITSBlockInfoNames;
|
1018
|
+
SEXP UNITSBlockInforow_names;
|
1019
|
+
SEXP UNITSBlockAtom ;
|
1020
|
+
SEXP UNITSBlockX;
|
1021
|
+
SEXP UNITSBlockY;
|
1022
|
+
SEXP UNITSBlockIndexPos;
|
1023
|
+
SEXP UNITSBlockPbase;
|
1024
|
+
SEXP UNITSBlockTbase;
|
1025
|
+
|
1026
|
+
|
1027
|
+
|
1028
|
+
|
1029
|
+
|
1030
|
+
|
1031
|
+
|
1032
|
+
|
1033
|
+
char buf[10];
|
1034
|
+
int i,j,k;
|
1035
|
+
|
1036
|
+
cdf_xda my_cdf;
|
1037
|
+
const char *cur_file_name;
|
1038
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
1039
|
+
|
1040
|
+
/* Read in the xda style CDF file into memory */
|
1041
|
+
if (!read_cdf_xda(cur_file_name,&my_cdf)){
|
1042
|
+
error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
|
1046
|
+
if (asInteger(fullstructure)){
|
1047
|
+
/* return the full structure */
|
1048
|
+
PROTECT(CDFInfo = allocVector(VECSXP,5));
|
1049
|
+
|
1050
|
+
PROTECT(CDFInfoNames = allocVector(STRSXP,5));
|
1051
|
+
SET_STRING_ELT(CDFInfoNames,0,mkChar("Header"));
|
1052
|
+
SET_STRING_ELT(CDFInfoNames,1,mkChar("UnitNames"));
|
1053
|
+
SET_STRING_ELT(CDFInfoNames,2,mkChar("FilePositions"));
|
1054
|
+
SET_STRING_ELT(CDFInfoNames,3,mkChar("QCUnits"));
|
1055
|
+
SET_STRING_ELT(CDFInfoNames,4,mkChar("Units"));
|
1056
|
+
setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
|
1057
|
+
UNPROTECT(1);
|
1058
|
+
|
1059
|
+
PROTECT(HEADER = allocVector(VECSXP,2));
|
1060
|
+
PROTECT(HEADERNames = allocVector(STRSXP,2));
|
1061
|
+
SET_STRING_ELT(HEADERNames,0,mkChar("Dimensions"));
|
1062
|
+
SET_STRING_ELT(HEADERNames,1,mkChar("ReseqRefSeq"));
|
1063
|
+
setAttrib(HEADER,R_NamesSymbol,HEADERNames);
|
1064
|
+
UNPROTECT(1);
|
1065
|
+
|
1066
|
+
PROTECT(Dimensions = allocVector(REALSXP,7));
|
1067
|
+
NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.magicnumber;
|
1068
|
+
NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.version_number;
|
1069
|
+
NUMERIC_POINTER(Dimensions)[2] = (double)my_cdf.header.cols;
|
1070
|
+
NUMERIC_POINTER(Dimensions)[3] = (double)my_cdf.header.rows;
|
1071
|
+
NUMERIC_POINTER(Dimensions)[4] = (double)my_cdf.header.n_qc_units;
|
1072
|
+
NUMERIC_POINTER(Dimensions)[5] = (double)my_cdf.header.n_units;
|
1073
|
+
NUMERIC_POINTER(Dimensions)[6] = (double)my_cdf.header.len_ref_seq;
|
1074
|
+
|
1075
|
+
PROTECT(DimensionsNames = allocVector(STRSXP,7));
|
1076
|
+
SET_STRING_ELT(DimensionsNames,0,mkChar("MagicNumber"));
|
1077
|
+
SET_STRING_ELT(DimensionsNames,1,mkChar("VersionNumber"));
|
1078
|
+
SET_STRING_ELT(DimensionsNames,2,mkChar("Cols"));
|
1079
|
+
SET_STRING_ELT(DimensionsNames,3,mkChar("Rows"));
|
1080
|
+
SET_STRING_ELT(DimensionsNames,4,mkChar("n.QCunits"));
|
1081
|
+
SET_STRING_ELT(DimensionsNames,5,mkChar("n.units"));
|
1082
|
+
SET_STRING_ELT(DimensionsNames,6,mkChar("LenRefSeq"));
|
1083
|
+
setAttrib(Dimensions,R_NamesSymbol,DimensionsNames);
|
1084
|
+
SET_VECTOR_ELT(HEADER,0,Dimensions);
|
1085
|
+
UNPROTECT(2);
|
1086
|
+
|
1087
|
+
PROTECT(REFSEQ = allocVector(STRSXP,1));
|
1088
|
+
SET_STRING_ELT(REFSEQ,0,mkChar(my_cdf.header.ref_seq));
|
1089
|
+
SET_VECTOR_ELT(HEADER,1,REFSEQ);
|
1090
|
+
UNPROTECT(1);
|
1091
|
+
|
1092
|
+
SET_VECTOR_ELT(CDFInfo,0,HEADER);
|
1093
|
+
UNPROTECT(1);
|
1094
|
+
|
1095
|
+
PROTECT(UNITNAMES = allocVector(STRSXP,my_cdf.header.n_units));
|
1096
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1097
|
+
SET_STRING_ELT(UNITNAMES,i,mkChar(my_cdf.probesetnames[i]));
|
1098
|
+
}
|
1099
|
+
SET_VECTOR_ELT(CDFInfo,1,UNITNAMES);
|
1100
|
+
UNPROTECT(1);
|
1101
|
+
|
1102
|
+
PROTECT(FILEPOSITIONS = allocVector(VECSXP,2));
|
1103
|
+
PROTECT(FILEPOSITIONSQC = allocVector(REALSXP,my_cdf.header.n_qc_units));
|
1104
|
+
PROTECT(FILEPOSITIONSUNITS = allocVector(REALSXP,my_cdf.header.n_units));
|
1105
|
+
for (i =0; i < my_cdf.header.n_qc_units; i++){
|
1106
|
+
NUMERIC_POINTER(FILEPOSITIONSQC)[i] = (double)my_cdf.qc_start[i];
|
1107
|
+
}
|
1108
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1109
|
+
NUMERIC_POINTER(FILEPOSITIONSUNITS)[i] = (double)my_cdf.units_start[i];
|
1110
|
+
}
|
1111
|
+
SET_VECTOR_ELT(FILEPOSITIONS,0,FILEPOSITIONSQC);
|
1112
|
+
SET_VECTOR_ELT(FILEPOSITIONS,1,FILEPOSITIONSUNITS);
|
1113
|
+
PROTECT(FILEPOSITIONSNames = allocVector(STRSXP,2));
|
1114
|
+
SET_STRING_ELT(FILEPOSITIONSNames,0,mkChar("FilePosQC"));
|
1115
|
+
SET_STRING_ELT(FILEPOSITIONSNames,1,mkChar("FilePosUnits"));
|
1116
|
+
setAttrib(FILEPOSITIONS,R_NamesSymbol,FILEPOSITIONSNames);
|
1117
|
+
SET_VECTOR_ELT(CDFInfo,2,FILEPOSITIONS);
|
1118
|
+
UNPROTECT(4);
|
1119
|
+
|
1120
|
+
PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.n_qc_units));
|
1121
|
+
for (i =0; i < my_cdf.header.n_qc_units; i++){
|
1122
|
+
PROTECT(QCUNITSsub = allocVector(VECSXP,2));
|
1123
|
+
PROTECT(QCUNITSsubNames= allocVector(STRSXP,2));
|
1124
|
+
SET_STRING_ELT(QCUNITSsubNames,0,mkChar("QCUnitHeader"));
|
1125
|
+
SET_STRING_ELT(QCUNITSsubNames,1,mkChar("QCUnitInfo"));
|
1126
|
+
setAttrib(QCUNITSsub,R_NamesSymbol,QCUNITSsubNames);
|
1127
|
+
|
1128
|
+
PROTECT(QCHEADER = allocVector(REALSXP,2));
|
1129
|
+
NUMERIC_POINTER(QCHEADER)[0] = (double)my_cdf.qc_units[i].type;
|
1130
|
+
NUMERIC_POINTER(QCHEADER)[1] = (double)my_cdf.qc_units[i].n_probes;
|
1131
|
+
PROTECT(QCHEADERNames = allocVector(STRSXP,2));
|
1132
|
+
SET_STRING_ELT(QCHEADERNames,0,mkChar("Type"));
|
1133
|
+
SET_STRING_ELT(QCHEADERNames,1,mkChar("n.probes"));
|
1134
|
+
|
1135
|
+
setAttrib(QCHEADER,R_NamesSymbol,QCHEADERNames);
|
1136
|
+
SET_VECTOR_ELT(QCUNITSsub,0,QCHEADER);
|
1137
|
+
|
1138
|
+
|
1139
|
+
PROTECT(QCUNITSProbeInfo = allocVector(VECSXP,5));
|
1140
|
+
PROTECT(QCUNITSProbeInfoX = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1141
|
+
PROTECT(QCUNITSProbeInfoY = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1142
|
+
PROTECT(QCUNITSProbeInfoPL = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1143
|
+
PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1144
|
+
PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1145
|
+
|
1146
|
+
for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
|
1147
|
+
NUMERIC_POINTER(QCUNITSProbeInfoX)[j] = (double)my_cdf.qc_units[i].qc_probes[j].x;
|
1148
|
+
NUMERIC_POINTER(QCUNITSProbeInfoY)[j] = (double)my_cdf.qc_units[i].qc_probes[j].y;
|
1149
|
+
NUMERIC_POINTER(QCUNITSProbeInfoPL)[j] = (double)my_cdf.qc_units[i].qc_probes[j].probelength;
|
1150
|
+
NUMERIC_POINTER(QCUNITSProbeInfoPMFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].pmflag;
|
1151
|
+
NUMERIC_POINTER(QCUNITSProbeInfoBGFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].bgprobeflag;
|
1152
|
+
}
|
1153
|
+
|
1154
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,0,QCUNITSProbeInfoX);
|
1155
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,1,QCUNITSProbeInfoY);
|
1156
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,2,QCUNITSProbeInfoPL);
|
1157
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,3,QCUNITSProbeInfoPMFLAG);
|
1158
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,4,QCUNITSProbeInfoBGFLAG);
|
1159
|
+
|
1160
|
+
PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,5));
|
1161
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
|
1162
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
|
1163
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("ProbeLength"));
|
1164
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("PMFlag"));
|
1165
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("BGProbeFlag"));
|
1166
|
+
|
1167
|
+
setAttrib(QCUNITSProbeInfo,R_NamesSymbol,QCUNITSProbeInfoNames);
|
1168
|
+
|
1169
|
+
PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
|
1170
|
+
|
1171
|
+
for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
|
1172
|
+
sprintf(buf, "%d", j+1);
|
1173
|
+
SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
|
1177
|
+
|
1178
|
+
setAttrib(QCUNITSProbeInfo, R_RowNamesSymbol, QCUNITSProbeInforow_names);
|
1179
|
+
|
1180
|
+
|
1181
|
+
setAttrib(QCUNITSProbeInfo,R_ClassSymbol,mkString("data.frame"));
|
1182
|
+
|
1183
|
+
SET_VECTOR_ELT(QCUNITSsub,1,QCUNITSProbeInfo);
|
1184
|
+
SET_VECTOR_ELT(QCUNITS,i,QCUNITSsub);
|
1185
|
+
UNPROTECT(12);
|
1186
|
+
}
|
1187
|
+
SET_VECTOR_ELT(CDFInfo,3,QCUNITS);
|
1188
|
+
UNPROTECT(1);
|
1189
|
+
|
1190
|
+
|
1191
|
+
PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.n_units));
|
1192
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1193
|
+
PROTECT(tmpUNIT = allocVector(VECSXP,2));
|
1194
|
+
PROTECT(tmpUNITNames = allocVector(STRSXP,2));
|
1195
|
+
SET_STRING_ELT(tmpUNITNames,0,mkChar("UnitHeader"));
|
1196
|
+
SET_STRING_ELT(tmpUNITNames,1,mkChar("Block"));
|
1197
|
+
setAttrib(tmpUNIT,R_NamesSymbol,tmpUNITNames);
|
1198
|
+
|
1199
|
+
|
1200
|
+
PROTECT(UNITSHeader = allocVector(REALSXP,7));
|
1201
|
+
PROTECT(UNITSHeaderNames = allocVector(STRSXP,7));
|
1202
|
+
SET_STRING_ELT(UNITSHeaderNames,0,mkChar("UnitType"));
|
1203
|
+
SET_STRING_ELT(UNITSHeaderNames,1,mkChar("Direction"));
|
1204
|
+
SET_STRING_ELT(UNITSHeaderNames,2,mkChar("n.atoms"));
|
1205
|
+
SET_STRING_ELT(UNITSHeaderNames,3,mkChar("n.blocks"));
|
1206
|
+
SET_STRING_ELT(UNITSHeaderNames,4,mkChar("n.cells"));
|
1207
|
+
SET_STRING_ELT(UNITSHeaderNames,5,mkChar("UnitNumber"));
|
1208
|
+
SET_STRING_ELT(UNITSHeaderNames,6,mkChar("n.cellsperatom"));
|
1209
|
+
|
1210
|
+
setAttrib(UNITSHeader,R_NamesSymbol,UNITSHeaderNames);
|
1211
|
+
|
1212
|
+
NUMERIC_POINTER(UNITSHeader)[0] = (double)my_cdf.units[i].unittype;
|
1213
|
+
NUMERIC_POINTER(UNITSHeader)[1] = (double)my_cdf.units[i].direction;
|
1214
|
+
NUMERIC_POINTER(UNITSHeader)[2] = (double)my_cdf.units[i].natoms;
|
1215
|
+
NUMERIC_POINTER(UNITSHeader)[3] = (double)my_cdf.units[i].nblocks;
|
1216
|
+
NUMERIC_POINTER(UNITSHeader)[4] = (double)my_cdf.units[i].ncells;
|
1217
|
+
NUMERIC_POINTER(UNITSHeader)[5] = (double)my_cdf.units[i].unitnumber;
|
1218
|
+
NUMERIC_POINTER(UNITSHeader)[6] = (double)my_cdf.units[i].ncellperatom;
|
1219
|
+
|
1220
|
+
PROTECT(tmpUNITSBlock = allocVector(VECSXP,my_cdf.units[i].nblocks));
|
1221
|
+
for (j=0; j < my_cdf.units[i].nblocks; j++){
|
1222
|
+
PROTECT(UNITSBlock = allocVector(VECSXP,3));
|
1223
|
+
PROTECT(UNITSBlockNames = allocVector(STRSXP,3));
|
1224
|
+
SET_STRING_ELT(UNITSBlockNames,0,mkChar("Header"));
|
1225
|
+
SET_STRING_ELT(UNITSBlockNames,1,mkChar("Name"));
|
1226
|
+
SET_STRING_ELT(UNITSBlockNames,2,mkChar("UnitInfo"));
|
1227
|
+
setAttrib(UNITSBlock,R_NamesSymbol,UNITSBlockNames);
|
1228
|
+
|
1229
|
+
PROTECT(UNITSBlockHeader = allocVector(REALSXP,6));
|
1230
|
+
PROTECT(UNITSBlockHeaderNames= allocVector(VECSXP,6));
|
1231
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,0,mkChar("n.atoms"));
|
1232
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,1,mkChar("n.cells"));
|
1233
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,2,mkChar("n.cellsperatom"));
|
1234
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,3,mkChar("Direction"));
|
1235
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,4,mkChar("firstatom"));
|
1236
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,5,mkChar("unused"));
|
1237
|
+
|
1238
|
+
NUMERIC_POINTER(UNITSBlockHeader)[0] = (double)my_cdf.units[i].unit_block[j].natoms;
|
1239
|
+
NUMERIC_POINTER(UNITSBlockHeader)[1] = (double)my_cdf.units[i].unit_block[j].ncells;
|
1240
|
+
NUMERIC_POINTER(UNITSBlockHeader)[2] = (double)my_cdf.units[i].unit_block[j].ncellperatom;
|
1241
|
+
NUMERIC_POINTER(UNITSBlockHeader)[3] = (double)my_cdf.units[i].unit_block[j].direction;
|
1242
|
+
NUMERIC_POINTER(UNITSBlockHeader)[4] = (double)my_cdf.units[i].unit_block[j].firstatom;
|
1243
|
+
NUMERIC_POINTER(UNITSBlockHeader)[5] = (double)my_cdf.units[i].unit_block[j].unused;
|
1244
|
+
|
1245
|
+
|
1246
|
+
setAttrib(UNITSBlockHeader,R_NamesSymbol,UNITSBlockHeaderNames);
|
1247
|
+
|
1248
|
+
SET_VECTOR_ELT(UNITSBlock,0,UNITSBlockHeader);
|
1249
|
+
|
1250
|
+
SET_VECTOR_ELT(UNITSBlock,1,mkString(my_cdf.units[i].unit_block[j].blockname));
|
1251
|
+
|
1252
|
+
PROTECT(UNITSBlockInfo = allocVector(VECSXP,6));
|
1253
|
+
|
1254
|
+
PROTECT(UNITSBlockInfoNames = allocVector(STRSXP,6));
|
1255
|
+
SET_STRING_ELT(UNITSBlockInfoNames,0,mkChar("atom.number"));
|
1256
|
+
SET_STRING_ELT(UNITSBlockInfoNames,1,mkChar("x"));
|
1257
|
+
SET_STRING_ELT(UNITSBlockInfoNames,2,mkChar("y"));
|
1258
|
+
SET_STRING_ELT(UNITSBlockInfoNames,3,mkChar("index.position"));
|
1259
|
+
SET_STRING_ELT(UNITSBlockInfoNames,4,mkChar("pbase"));
|
1260
|
+
SET_STRING_ELT(UNITSBlockInfoNames,5,mkChar("tbase"));
|
1261
|
+
|
1262
|
+
setAttrib(UNITSBlockInfo,R_NamesSymbol,UNITSBlockInfoNames);
|
1263
|
+
|
1264
|
+
|
1265
|
+
PROTECT(UNITSBlockInforow_names = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1266
|
+
|
1267
|
+
for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
|
1268
|
+
sprintf(buf, "%d", k+1);
|
1269
|
+
SET_STRING_ELT(UNITSBlockInforow_names,k,mkChar(buf));
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
PROTECT(UNITSBlockAtom = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1273
|
+
PROTECT(UNITSBlockX = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1274
|
+
PROTECT(UNITSBlockY = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1275
|
+
PROTECT(UNITSBlockIndexPos = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1276
|
+
PROTECT(UNITSBlockPbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1277
|
+
PROTECT(UNITSBlockTbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1278
|
+
|
1279
|
+
for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
|
1280
|
+
/* Rprintf("%d %d %d\n",i,j,k);
|
1281
|
+
// NUMERIC_POINTER(UNITSBlockAtom)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
|
1282
|
+
// NUMERIC_POINTER(UNITSBlockX)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].x;
|
1283
|
+
// NUMERIC_POINTER(UNITSBlockY)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].y;
|
1284
|
+
// NUMERIC_POINTER(UNITSBlockIndexPos)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; */
|
1285
|
+
INTEGER_POINTER(UNITSBlockAtom)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
|
1286
|
+
INTEGER_POINTER(UNITSBlockX)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].x;
|
1287
|
+
INTEGER_POINTER(UNITSBlockY)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].y;
|
1288
|
+
INTEGER_POINTER(UNITSBlockIndexPos)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos;
|
1289
|
+
sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].pbase);
|
1290
|
+
SET_STRING_ELT(UNITSBlockPbase,k,mkChar(buf));
|
1291
|
+
|
1292
|
+
sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].tbase);
|
1293
|
+
SET_STRING_ELT(UNITSBlockTbase,k,mkChar(buf));
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
SET_VECTOR_ELT(UNITSBlockInfo,0,UNITSBlockAtom);
|
1297
|
+
SET_VECTOR_ELT(UNITSBlockInfo,1,UNITSBlockX);
|
1298
|
+
SET_VECTOR_ELT(UNITSBlockInfo,2,UNITSBlockY);
|
1299
|
+
SET_VECTOR_ELT(UNITSBlockInfo,3,UNITSBlockIndexPos);
|
1300
|
+
SET_VECTOR_ELT(UNITSBlockInfo,4,UNITSBlockPbase);
|
1301
|
+
SET_VECTOR_ELT(UNITSBlockInfo,5,UNITSBlockTbase);
|
1302
|
+
UNPROTECT(6);
|
1303
|
+
|
1304
|
+
|
1305
|
+
|
1306
|
+
|
1307
|
+
setAttrib(UNITSBlockInfo, R_RowNamesSymbol, UNITSBlockInforow_names);
|
1308
|
+
setAttrib(UNITSBlockInfo,R_ClassSymbol,mkString("data.frame"));
|
1309
|
+
|
1310
|
+
SET_VECTOR_ELT(UNITSBlock,2,UNITSBlockInfo);
|
1311
|
+
|
1312
|
+
SET_VECTOR_ELT(tmpUNITSBlock,j,UNITSBlock);
|
1313
|
+
UNPROTECT(7);
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
SET_VECTOR_ELT(tmpUNIT,0,UNITSHeader);
|
1317
|
+
SET_VECTOR_ELT(tmpUNIT,1,tmpUNITSBlock);
|
1318
|
+
|
1319
|
+
SET_VECTOR_ELT(UNITS,i,tmpUNIT);
|
1320
|
+
UNPROTECT(5);
|
1321
|
+
}
|
1322
|
+
SET_VECTOR_ELT(CDFInfo,4,UNITS);
|
1323
|
+
UNPROTECT(1);
|
1324
|
+
|
1325
|
+
|
1326
|
+
} else {
|
1327
|
+
/* return the abbreviated structure */
|
1328
|
+
error("Abbreviated structure not yet implemented.\n");
|
1329
|
+
|
1330
|
+
|
1331
|
+
}
|
1332
|
+
|
1333
|
+
|
1334
|
+
|
1335
|
+
|
1336
|
+
|
1337
|
+
dealloc_cdf_xda(&my_cdf);
|
1338
|
+
UNPROTECT(1);
|
1339
|
+
return CDFInfo;
|
1340
|
+
|
1341
|
+
|
1342
|
+
}
|