bio-affy 0.1.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
Binary file
|
data/ext/src/read_cdf.h
ADDED
@@ -0,0 +1,347 @@
|
|
1
|
+
/*****************************************************************
|
2
|
+
**
|
3
|
+
** This file contains the record definitions for the CDF files.
|
4
|
+
** They come in two formats (textual and binary). The first set
|
5
|
+
** represents the textual form (starting with cdf_text_..
|
6
|
+
**
|
7
|
+
******************************************************************/
|
8
|
+
|
9
|
+
/*****************************************************************
|
10
|
+
** Textual CDF formats
|
11
|
+
**
|
12
|
+
** A structure for holding information in the
|
13
|
+
** "CDF" and "Chip" sections (basically header information)
|
14
|
+
**
|
15
|
+
******************************************************************/
|
16
|
+
|
17
|
+
typedef struct {
|
18
|
+
|
19
|
+
char *version;
|
20
|
+
char *name;
|
21
|
+
int rows,cols;
|
22
|
+
int numberofunits;
|
23
|
+
int maxunit;
|
24
|
+
int NumQCUnits;
|
25
|
+
char *chipreference;
|
26
|
+
} cdf_text_header;
|
27
|
+
|
28
|
+
/*****************************************************************
|
29
|
+
**
|
30
|
+
**
|
31
|
+
** A structure for holding QC probe information
|
32
|
+
** Note the "CYCLES" item is ignored and never parsed
|
33
|
+
**
|
34
|
+
******************************************************************/
|
35
|
+
|
36
|
+
|
37
|
+
typedef struct {
|
38
|
+
int x;
|
39
|
+
int y;
|
40
|
+
char *probe;
|
41
|
+
int plen;
|
42
|
+
int atom;
|
43
|
+
int index;
|
44
|
+
int match;
|
45
|
+
int bg;
|
46
|
+
} cdf_text_qc_probe;
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
/*******************************************************************
|
55
|
+
**
|
56
|
+
** A structure for holding QC units information. These are
|
57
|
+
** areas of the chip that contain probes that may or may not be useful
|
58
|
+
** for QC and other purposes.
|
59
|
+
**
|
60
|
+
**
|
61
|
+
*******************************************************************/
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
typedef struct{
|
66
|
+
int type;
|
67
|
+
unsigned int n_probes;
|
68
|
+
int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
|
69
|
+
cdf_text_qc_probe *qc_probes;
|
70
|
+
|
71
|
+
} cdf_text_qc_unit;
|
72
|
+
|
73
|
+
|
74
|
+
/*******************************************************************
|
75
|
+
**
|
76
|
+
** A structure for holding probe information for unit_blocks_probes
|
77
|
+
**
|
78
|
+
** probes are stored within blocks
|
79
|
+
**
|
80
|
+
*******************************************************************/
|
81
|
+
|
82
|
+
typedef struct{
|
83
|
+
int x;
|
84
|
+
int y;
|
85
|
+
char *probe;
|
86
|
+
char *feat;
|
87
|
+
char *qual;
|
88
|
+
int expos;
|
89
|
+
int pos;
|
90
|
+
char *cbase;
|
91
|
+
char *pbase;
|
92
|
+
char *tbase;
|
93
|
+
int atom;
|
94
|
+
int index;
|
95
|
+
int codonid;
|
96
|
+
int codon;
|
97
|
+
int regiontype;
|
98
|
+
char* region;
|
99
|
+
} cdf_text_unit_block_probe;
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
/*******************************************************************
|
105
|
+
**
|
106
|
+
** A structure holding Unit_blocks
|
107
|
+
**
|
108
|
+
** blocks are stored within units.
|
109
|
+
** blocks contain many probes
|
110
|
+
**
|
111
|
+
*******************************************************************/
|
112
|
+
|
113
|
+
typedef struct{
|
114
|
+
char *name;
|
115
|
+
int blocknumber;
|
116
|
+
int num_atoms;
|
117
|
+
int num_cells;
|
118
|
+
int start_position;
|
119
|
+
int stop_position;
|
120
|
+
int direction;
|
121
|
+
cdf_text_unit_block_probe *probes;
|
122
|
+
|
123
|
+
} cdf_text_unit_block;
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
/*******************************************************************
|
131
|
+
**
|
132
|
+
** A structure for holding "Units" AKA known as probesets
|
133
|
+
**
|
134
|
+
** Each unit contains one or more blocks. Each block contains one or
|
135
|
+
** more probes
|
136
|
+
**
|
137
|
+
*******************************************************************/
|
138
|
+
|
139
|
+
|
140
|
+
typedef struct{
|
141
|
+
char *name;
|
142
|
+
int direction;
|
143
|
+
int num_atoms;
|
144
|
+
int num_cells;
|
145
|
+
int unit_number;
|
146
|
+
int unit_type;
|
147
|
+
int numberblocks;
|
148
|
+
int MutationType;
|
149
|
+
cdf_text_unit_block *blocks;
|
150
|
+
} cdf_text_unit;
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
/*******************************************************************
|
155
|
+
**
|
156
|
+
** A structure for holding a text CDF file
|
157
|
+
**
|
158
|
+
** text cdf files consist of
|
159
|
+
** basic header information
|
160
|
+
** qcunits
|
161
|
+
** - qc probes
|
162
|
+
** units (aka probesets)
|
163
|
+
** - blocks
|
164
|
+
** - probes
|
165
|
+
**
|
166
|
+
**
|
167
|
+
*******************************************************************/
|
168
|
+
|
169
|
+
typedef struct{
|
170
|
+
cdf_text_header header;
|
171
|
+
cdf_text_qc_unit *qc_units;
|
172
|
+
cdf_text_unit *units;
|
173
|
+
} cdf_text;
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
/************************************************************************
|
178
|
+
**
|
179
|
+
** Structures for holding the CDF file information. Basically
|
180
|
+
** header/general information that appears at the start of the CDF file
|
181
|
+
**
|
182
|
+
************************************************************************/
|
183
|
+
|
184
|
+
typedef struct {
|
185
|
+
int magicnumber;
|
186
|
+
int version_number;
|
187
|
+
unsigned short rows,cols;
|
188
|
+
int n_units,n_qc_units;
|
189
|
+
int len_ref_seq;
|
190
|
+
int i;
|
191
|
+
char *ref_seq;
|
192
|
+
} cdf_xda_header;
|
193
|
+
|
194
|
+
|
195
|
+
/****************************************************************************
|
196
|
+
**
|
197
|
+
** The following two structures store QC units and QC unit probe information
|
198
|
+
**
|
199
|
+
** QC information, repeated for each QC unit:
|
200
|
+
** Type - unsigned short
|
201
|
+
** Number of probes - integer
|
202
|
+
**
|
203
|
+
** Probe information, repeated for each probe in the QC unit:
|
204
|
+
** X coordinate - unsigned short
|
205
|
+
** Y coordinate - unsigned short
|
206
|
+
** Probe length - unsigned char
|
207
|
+
** Perfect match flag - unsigned char
|
208
|
+
** Background probe flag - unsigned char
|
209
|
+
**
|
210
|
+
****************************************************************************/
|
211
|
+
|
212
|
+
|
213
|
+
typedef struct{
|
214
|
+
unsigned short x;
|
215
|
+
unsigned short y;
|
216
|
+
unsigned char probelength;
|
217
|
+
unsigned char pmflag;
|
218
|
+
unsigned char bgprobeflag;
|
219
|
+
|
220
|
+
} cdf_qc_probe;
|
221
|
+
|
222
|
+
typedef struct{
|
223
|
+
unsigned short type;
|
224
|
+
unsigned int n_probes;
|
225
|
+
|
226
|
+
cdf_qc_probe *qc_probes;
|
227
|
+
|
228
|
+
} cdf_qc_unit;
|
229
|
+
|
230
|
+
|
231
|
+
/****************************************************************************
|
232
|
+
**
|
233
|
+
** The following three structures store information for units (sometimes called
|
234
|
+
** probesets), blocks (of which there are one or more within a unit) and cells
|
235
|
+
** sometimes called probe of which there are one or more within each block
|
236
|
+
**
|
237
|
+
**
|
238
|
+
** Unit information, repeated for each unit:
|
239
|
+
**
|
240
|
+
** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
|
241
|
+
** Direction - unsigned char
|
242
|
+
** Number of atoms - integer
|
243
|
+
** Number of blocks - integer (always 1 for expression units)
|
244
|
+
** Number of cells - integer
|
245
|
+
** Unit number (probe set number) - integer
|
246
|
+
** Number of cells per atom - unsigned char
|
247
|
+
**
|
248
|
+
**
|
249
|
+
**
|
250
|
+
** Block information, repeated for each block in the unit:
|
251
|
+
**
|
252
|
+
** Number of atoms - integer
|
253
|
+
** Number of cells - integer
|
254
|
+
** Number of cells per atom - unsigned char
|
255
|
+
** Direction - unsigned char
|
256
|
+
** The position of the first atom - integer
|
257
|
+
** <unused integer value> - integer
|
258
|
+
** The block name - char[64]
|
259
|
+
**
|
260
|
+
**
|
261
|
+
**
|
262
|
+
** Cell information, repeated for each cell in the block:
|
263
|
+
**
|
264
|
+
** Atom number - integer
|
265
|
+
** X coordinate - unsigned short
|
266
|
+
** Y coordinate - unsigned short
|
267
|
+
** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
|
268
|
+
** Base of probe at substitution position - char
|
269
|
+
** Base of target at interrogation position - char
|
270
|
+
**
|
271
|
+
**
|
272
|
+
****************************************************************************/
|
273
|
+
|
274
|
+
|
275
|
+
typedef struct{
|
276
|
+
int atomnumber;
|
277
|
+
unsigned short x;
|
278
|
+
unsigned short y;
|
279
|
+
int indexpos;
|
280
|
+
char pbase;
|
281
|
+
char tbase;
|
282
|
+
} cdf_unit_cell;
|
283
|
+
|
284
|
+
|
285
|
+
typedef struct{
|
286
|
+
int natoms;
|
287
|
+
int ncells;
|
288
|
+
unsigned char ncellperatom;
|
289
|
+
unsigned char direction;
|
290
|
+
int firstatom;
|
291
|
+
int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
|
292
|
+
char blockname[64];
|
293
|
+
|
294
|
+
cdf_unit_cell *unit_cells;
|
295
|
+
|
296
|
+
} cdf_unit_block;
|
297
|
+
|
298
|
+
|
299
|
+
typedef struct{
|
300
|
+
unsigned short unittype;
|
301
|
+
unsigned char direction;
|
302
|
+
int natoms;
|
303
|
+
int nblocks;
|
304
|
+
int ncells;
|
305
|
+
int unitnumber;
|
306
|
+
unsigned char ncellperatom;
|
307
|
+
|
308
|
+
cdf_unit_block *unit_block;
|
309
|
+
|
310
|
+
} cdf_unit;
|
311
|
+
|
312
|
+
|
313
|
+
/****************************************************************************
|
314
|
+
**
|
315
|
+
** A data structure for holding CDF information read from a xda format cdf file
|
316
|
+
**
|
317
|
+
** note that this structure reads in everything including things that might not
|
318
|
+
** be of any subsequent use.
|
319
|
+
**
|
320
|
+
****************************************************************************/
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
typedef struct {
|
325
|
+
|
326
|
+
cdf_xda_header header; /* Header information */
|
327
|
+
char **probesetnames; /* Names of probesets */
|
328
|
+
|
329
|
+
int *qc_start; /* These are used for random access */
|
330
|
+
int *units_start;
|
331
|
+
|
332
|
+
cdf_qc_unit *qc_units;
|
333
|
+
cdf_unit *units;
|
334
|
+
|
335
|
+
|
336
|
+
} cdf_xda;
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
|
341
|
+
// int check_xda_file(const char *filename);
|
342
|
+
int isTextCDFFile(const char *filename);
|
343
|
+
int read_cdf_text(const char *filename, cdf_text *mycdf);
|
344
|
+
void dealloc_cdf_text(cdf_text *my_cdf);
|
345
|
+
int read_cdf_xda(const char *filename,cdf_xda *my_cdf);
|
346
|
+
void dealloc_cdf_xda(cdf_xda *my_cdf);
|
347
|
+
|
@@ -0,0 +1,1342 @@
|
|
1
|
+
/****************************************************************
|
2
|
+
**
|
3
|
+
** File: read_cdf_xda.c
|
4
|
+
**
|
5
|
+
** Implementation by: B. M. Bolstad <bmb@bmbolstad.com>
|
6
|
+
**
|
7
|
+
** A parser designed to read the binary format cdf files.
|
8
|
+
** Sometimes called the xda format.
|
9
|
+
**
|
10
|
+
** Implemented based on documentation available from Affymetrix
|
11
|
+
**
|
12
|
+
** Implementation begun 2005.
|
13
|
+
**
|
14
|
+
** Modification Dates
|
15
|
+
** Feb 4 - Initial version
|
16
|
+
** Feb 5 - A bunch of hacks for SNP chips.
|
17
|
+
** Apr 20
|
18
|
+
** Aug 16, 2005 - Fix potential big endian bug
|
19
|
+
** Sep 22, 2005 - Fix some signed/unsigned bugs
|
20
|
+
** Dec 1, 2005 - Comment cleaning
|
21
|
+
** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
|
22
|
+
** May 31, 2006 - fix some compiler warnings
|
23
|
+
** Aug 23, 2006 - fix a potential (but at current time non-existant) problem
|
24
|
+
** when there are 0 qcunits or 0 units
|
25
|
+
** Aug 25, 2007 - Move file reading functions to centralized location
|
26
|
+
** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays)
|
27
|
+
** Nov 12, 2008 - Fix crash
|
28
|
+
** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
|
29
|
+
**
|
30
|
+
****************************************************************/
|
31
|
+
|
32
|
+
/** --- includes --- */
|
33
|
+
#include <R.h>
|
34
|
+
#include <Rdefines.h>
|
35
|
+
|
36
|
+
#include "stdlib.h"
|
37
|
+
#include "stdio.h"
|
38
|
+
#include "fread_functions.h"
|
39
|
+
#include <ctype.h>
|
40
|
+
|
41
|
+
/* #define READ_CDF_DEBUG */
|
42
|
+
/* #define READ_CDF_DEBUG_SNP */
|
43
|
+
#define READ_CDF_NOSNP
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
/************************************************************************
|
48
|
+
**
|
49
|
+
** Structures for holding the CDF file information. Basically
|
50
|
+
** header/general information that appears at the start of the CDF file
|
51
|
+
**
|
52
|
+
************************************************************************/
|
53
|
+
|
54
|
+
typedef struct {
|
55
|
+
int magicnumber;
|
56
|
+
int version_number;
|
57
|
+
unsigned short rows,cols;
|
58
|
+
int n_units,n_qc_units;
|
59
|
+
int len_ref_seq;
|
60
|
+
int i;
|
61
|
+
char *ref_seq;
|
62
|
+
} cdf_xda_header;
|
63
|
+
|
64
|
+
|
65
|
+
/****************************************************************************
|
66
|
+
**
|
67
|
+
** The following two structures store QC units and QC unit probe information
|
68
|
+
**
|
69
|
+
** QC information, repeated for each QC unit:
|
70
|
+
** Type - unsigned short
|
71
|
+
** Number of probes - integer
|
72
|
+
**
|
73
|
+
** Probe information, repeated for each probe in the QC unit:
|
74
|
+
** X coordinate - unsigned short
|
75
|
+
** Y coordinate - unsigned short
|
76
|
+
** Probe length - unsigned char
|
77
|
+
** Perfect match flag - unsigned char
|
78
|
+
** Background probe flag - unsigned char
|
79
|
+
**
|
80
|
+
****************************************************************************/
|
81
|
+
|
82
|
+
|
83
|
+
typedef struct{
|
84
|
+
unsigned short x;
|
85
|
+
unsigned short y;
|
86
|
+
unsigned char probelength;
|
87
|
+
unsigned char pmflag;
|
88
|
+
unsigned char bgprobeflag;
|
89
|
+
|
90
|
+
} cdf_qc_probe;
|
91
|
+
|
92
|
+
typedef struct{
|
93
|
+
unsigned short type;
|
94
|
+
unsigned int n_probes;
|
95
|
+
|
96
|
+
cdf_qc_probe *qc_probes;
|
97
|
+
|
98
|
+
} cdf_qc_unit;
|
99
|
+
|
100
|
+
|
101
|
+
/****************************************************************************
|
102
|
+
**
|
103
|
+
** The following three structures store information for units (sometimes called
|
104
|
+
** probesets), blocks (of which there are one or more within a unit) and cells
|
105
|
+
** sometimes called probe of which there are one or more within each block
|
106
|
+
**
|
107
|
+
**
|
108
|
+
** Unit information, repeated for each unit:
|
109
|
+
**
|
110
|
+
** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
|
111
|
+
** Direction - unsigned char
|
112
|
+
** Number of atoms - integer
|
113
|
+
** Number of blocks - integer (always 1 for expression units)
|
114
|
+
** Number of cells - integer
|
115
|
+
** Unit number (probe set number) - integer
|
116
|
+
** Number of cells per atom - unsigned char
|
117
|
+
**
|
118
|
+
**
|
119
|
+
**
|
120
|
+
** Block information, repeated for each block in the unit:
|
121
|
+
**
|
122
|
+
** Number of atoms - integer
|
123
|
+
** Number of cells - integer
|
124
|
+
** Number of cells per atom - unsigned char
|
125
|
+
** Direction - unsigned char
|
126
|
+
** The position of the first atom - integer
|
127
|
+
** <unused integer value> - integer
|
128
|
+
** The block name - char[64]
|
129
|
+
**
|
130
|
+
**
|
131
|
+
**
|
132
|
+
** Cell information, repeated for each cell in the block:
|
133
|
+
**
|
134
|
+
** Atom number - integer
|
135
|
+
** X coordinate - unsigned short
|
136
|
+
** Y coordinate - unsigned short
|
137
|
+
** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
|
138
|
+
** Base of probe at substitution position - char
|
139
|
+
** Base of target at interrogation position - char
|
140
|
+
**
|
141
|
+
**
|
142
|
+
****************************************************************************/
|
143
|
+
|
144
|
+
|
145
|
+
typedef struct{
|
146
|
+
int atomnumber;
|
147
|
+
unsigned short x;
|
148
|
+
unsigned short y;
|
149
|
+
int indexpos;
|
150
|
+
char pbase;
|
151
|
+
char tbase;
|
152
|
+
} cdf_unit_cell;
|
153
|
+
|
154
|
+
|
155
|
+
typedef struct{
|
156
|
+
int natoms;
|
157
|
+
int ncells;
|
158
|
+
unsigned char ncellperatom;
|
159
|
+
unsigned char direction;
|
160
|
+
int firstatom;
|
161
|
+
int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
|
162
|
+
char blockname[64];
|
163
|
+
|
164
|
+
cdf_unit_cell *unit_cells;
|
165
|
+
|
166
|
+
} cdf_unit_block;
|
167
|
+
|
168
|
+
|
169
|
+
typedef struct{
|
170
|
+
unsigned short unittype;
|
171
|
+
unsigned char direction;
|
172
|
+
int natoms;
|
173
|
+
int nblocks;
|
174
|
+
int ncells;
|
175
|
+
int unitnumber;
|
176
|
+
unsigned char ncellperatom;
|
177
|
+
|
178
|
+
cdf_unit_block *unit_block;
|
179
|
+
|
180
|
+
} cdf_unit;
|
181
|
+
|
182
|
+
|
183
|
+
/****************************************************************************
|
184
|
+
**
|
185
|
+
** A data structure for holding CDF information read from a xda format cdf file
|
186
|
+
**
|
187
|
+
** note that this structure reads in everything including things that might not
|
188
|
+
** be of any subsequent use.
|
189
|
+
**
|
190
|
+
****************************************************************************/
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
typedef struct {
|
195
|
+
|
196
|
+
cdf_xda_header header; /* Header information */
|
197
|
+
char **probesetnames; /* Names of probesets */
|
198
|
+
|
199
|
+
int *qc_start; /* These are used for random access */
|
200
|
+
int *units_start;
|
201
|
+
|
202
|
+
cdf_qc_unit *qc_units;
|
203
|
+
cdf_unit *units;
|
204
|
+
|
205
|
+
|
206
|
+
} cdf_xda;
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
/*************************************************************************
|
220
|
+
**
|
221
|
+
** int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream)
|
222
|
+
**
|
223
|
+
** cdf_qc_unit *my_unit - preallocated space to store qc unit information
|
224
|
+
** int filelocation - indexing/location information used to read information
|
225
|
+
** from file
|
226
|
+
** FILE *instream - a pre-opened file to read from
|
227
|
+
**
|
228
|
+
** reads a specificed qc_unit from the file. Allocates space for the cdf_qc_probes
|
229
|
+
** and also reads them in
|
230
|
+
**
|
231
|
+
**
|
232
|
+
*************************************************************************/
|
233
|
+
|
234
|
+
int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream){
|
235
|
+
|
236
|
+
int i;
|
237
|
+
|
238
|
+
|
239
|
+
fseek(instream,filelocation,SEEK_SET);
|
240
|
+
|
241
|
+
fread_uint16(&(my_unit->type),1,instream);
|
242
|
+
fread_uint32(&(my_unit->n_probes),1,instream);
|
243
|
+
|
244
|
+
|
245
|
+
my_unit->qc_probes = Calloc(my_unit->n_probes,cdf_qc_probe);
|
246
|
+
|
247
|
+
for (i=0; i < my_unit->n_probes; i++){
|
248
|
+
fread_uint16(&(my_unit->qc_probes[i].x),1,instream);
|
249
|
+
fread_uint16(&(my_unit->qc_probes[i].y),1,instream);
|
250
|
+
fread_uchar(&(my_unit->qc_probes[i].probelength),1,instream);
|
251
|
+
fread_uchar(&(my_unit->qc_probes[i].pmflag),1,instream);
|
252
|
+
fread_uchar(&(my_unit->qc_probes[i].bgprobeflag),1,instream);
|
253
|
+
|
254
|
+
}
|
255
|
+
return 1;
|
256
|
+
}
|
257
|
+
|
258
|
+
/*************************************************************************
|
259
|
+
**
|
260
|
+
** int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream)
|
261
|
+
**
|
262
|
+
** cdf_qc_unit *my_unit - preallocated space to store unit (aka probeset) information
|
263
|
+
** int filelocation - indexing/location information used to read information
|
264
|
+
** from file
|
265
|
+
** FILE *instream - a pre-opened file to read from
|
266
|
+
**
|
267
|
+
** reads a specified probeset into the my_unit, including all blocks and all probes
|
268
|
+
** it is assumed that the unit itself is preallocated. Blocks and probes within
|
269
|
+
** the blocks are allocated by this function.
|
270
|
+
**
|
271
|
+
*************************************************************************/
|
272
|
+
|
273
|
+
int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){
|
274
|
+
|
275
|
+
int i,j;
|
276
|
+
|
277
|
+
fseek(instream,filelocation,SEEK_SET);
|
278
|
+
|
279
|
+
fread_uint16(&(my_unit->unittype),1,instream);
|
280
|
+
fread_uchar(&(my_unit->direction),1,instream);
|
281
|
+
|
282
|
+
|
283
|
+
fread_int32(&(my_unit->natoms),1,instream);
|
284
|
+
fread_int32(&(my_unit->nblocks),1,instream);
|
285
|
+
fread_int32(&(my_unit->ncells),1,instream);
|
286
|
+
fread_int32(&(my_unit->unitnumber),1,instream);
|
287
|
+
fread_uchar(&(my_unit->ncellperatom),1,instream);
|
288
|
+
|
289
|
+
my_unit->unit_block = Calloc(my_unit->nblocks,cdf_unit_block);
|
290
|
+
|
291
|
+
for (i=0; i < my_unit->nblocks; i++){
|
292
|
+
fread_int32(&(my_unit->unit_block[i].natoms),1,instream);
|
293
|
+
fread_int32(&(my_unit->unit_block[i].ncells),1,instream);
|
294
|
+
fread_uchar(&(my_unit->unit_block[i].ncellperatom),1,instream);
|
295
|
+
fread_uchar(&(my_unit->unit_block[i].direction),1,instream);
|
296
|
+
fread_int32(&(my_unit->unit_block[i].firstatom),1,instream);
|
297
|
+
fread_int32(&(my_unit->unit_block[i].unused),1,instream);
|
298
|
+
fread_char(my_unit->unit_block[i].blockname,64,instream);
|
299
|
+
|
300
|
+
my_unit->unit_block[i].unit_cells = Calloc(my_unit->unit_block[i].ncells,cdf_unit_cell);
|
301
|
+
|
302
|
+
for (j=0; j < my_unit->unit_block[i].ncells; j++){
|
303
|
+
fread_int32(&(my_unit->unit_block[i].unit_cells[j].atomnumber),1,instream);
|
304
|
+
fread_uint16(&(my_unit->unit_block[i].unit_cells[j].x),1,instream);
|
305
|
+
fread_uint16(&(my_unit->unit_block[i].unit_cells[j].y),1,instream);
|
306
|
+
fread_int32(&(my_unit->unit_block[i].unit_cells[j].indexpos),1,instream);
|
307
|
+
fread_char(&(my_unit->unit_block[i].unit_cells[j].pbase),1,instream);
|
308
|
+
fread_char(&(my_unit->unit_block[i].unit_cells[j].tbase),1,instream);
|
309
|
+
}
|
310
|
+
|
311
|
+
|
312
|
+
}
|
313
|
+
|
314
|
+
|
315
|
+
return 1;
|
316
|
+
|
317
|
+
}
|
318
|
+
|
319
|
+
/*************************************************************************
|
320
|
+
**
|
321
|
+
** static void dealloc_cdf_xda(cdf_xda *my_cdf)
|
322
|
+
**
|
323
|
+
** Deallocates all the previously allocated memory.
|
324
|
+
**
|
325
|
+
*************************************************************************/
|
326
|
+
|
327
|
+
void dealloc_cdf_xda(cdf_xda *my_cdf){
|
328
|
+
|
329
|
+
int i;
|
330
|
+
|
331
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
332
|
+
Free(my_cdf->probesetnames[i]);
|
333
|
+
}
|
334
|
+
Free(my_cdf->probesetnames);
|
335
|
+
|
336
|
+
Free(my_cdf->qc_start);
|
337
|
+
Free(my_cdf->units_start);
|
338
|
+
|
339
|
+
for (i=0; i < my_cdf->header.n_qc_units; i++){
|
340
|
+
Free(my_cdf->qc_units[i].qc_probes);
|
341
|
+
}
|
342
|
+
|
343
|
+
Free(my_cdf->qc_units);
|
344
|
+
|
345
|
+
|
346
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
347
|
+
Free(my_cdf->units[i].unit_block);
|
348
|
+
}
|
349
|
+
Free(my_cdf->units);
|
350
|
+
Free(my_cdf->header.ref_seq);
|
351
|
+
|
352
|
+
}
|
353
|
+
|
354
|
+
|
355
|
+
|
356
|
+
/*************************************************************
|
357
|
+
**
|
358
|
+
** int read_cdf_xda(const char *filename)
|
359
|
+
**
|
360
|
+
** filename - Name of the prospective binary cel file
|
361
|
+
**
|
362
|
+
** Returns 1 if the file was completely successfully parsed
|
363
|
+
** otherwise 0 (and possible prints a message to screen)
|
364
|
+
**
|
365
|
+
**
|
366
|
+
**
|
367
|
+
**
|
368
|
+
*************************************************************/
|
369
|
+
|
370
|
+
int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
|
371
|
+
|
372
|
+
FILE *infile;
|
373
|
+
|
374
|
+
int i;
|
375
|
+
|
376
|
+
if ((infile = fopen(filename, "rb")) == NULL)
|
377
|
+
{
|
378
|
+
error("Unable to open the file %s",filename);
|
379
|
+
return 0;
|
380
|
+
}
|
381
|
+
|
382
|
+
if (!fread_int32(&my_cdf->header.magicnumber,1,infile)){
|
383
|
+
return 0;
|
384
|
+
}
|
385
|
+
|
386
|
+
if (!fread_int32(&my_cdf->header.version_number,1,infile)){
|
387
|
+
return 0;
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
if (my_cdf->header.magicnumber != 67){
|
392
|
+
Rprintf("Magic number is not 67. This is probably not a binary cdf file.\n");
|
393
|
+
return 0;
|
394
|
+
}
|
395
|
+
|
396
|
+
if (my_cdf->header.version_number != 1){
|
397
|
+
Rprintf("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number);
|
398
|
+
return 0;
|
399
|
+
}
|
400
|
+
if (!fread_uint16(&my_cdf->header.cols,1,infile)){
|
401
|
+
return 0;
|
402
|
+
}
|
403
|
+
if (!fread_uint16(&my_cdf->header.rows,1,infile)){
|
404
|
+
return 0;
|
405
|
+
}
|
406
|
+
|
407
|
+
if (!fread_int32(&my_cdf->header.n_units,1,infile)){
|
408
|
+
return 0;
|
409
|
+
}
|
410
|
+
|
411
|
+
if (!fread_int32(&my_cdf->header.n_qc_units,1,infile)){
|
412
|
+
return 0;
|
413
|
+
}
|
414
|
+
|
415
|
+
|
416
|
+
if (!fread_int32(&my_cdf->header.len_ref_seq,1,infile)){
|
417
|
+
return 0;
|
418
|
+
}
|
419
|
+
|
420
|
+
my_cdf->header.ref_seq = Calloc(my_cdf->header.len_ref_seq,char);
|
421
|
+
|
422
|
+
fread_char(my_cdf->header.ref_seq, my_cdf->header.len_ref_seq, infile);
|
423
|
+
my_cdf->probesetnames = Calloc(my_cdf->header.n_units,char *);
|
424
|
+
|
425
|
+
|
426
|
+
for (i =0; i < my_cdf->header.n_units;i++){
|
427
|
+
my_cdf->probesetnames[i] = Calloc(64,char);
|
428
|
+
if (!fread_char(my_cdf->probesetnames[i], 64, infile)){
|
429
|
+
return 0;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
my_cdf->qc_start = Calloc(my_cdf->header.n_qc_units,int);
|
436
|
+
my_cdf->units_start = Calloc(my_cdf->header.n_units,int);
|
437
|
+
|
438
|
+
/*** Old code that might fail if there is 0 QCunits or 0 Units
|
439
|
+
if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)
|
440
|
+
|| !fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)){
|
441
|
+
return 0;
|
442
|
+
}
|
443
|
+
***/
|
444
|
+
|
445
|
+
if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)) {
|
446
|
+
if(my_cdf->header.n_qc_units != 0) {
|
447
|
+
return 0;
|
448
|
+
}
|
449
|
+
}
|
450
|
+
|
451
|
+
if(!fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)) {
|
452
|
+
if(my_cdf->header.n_units != 0) {
|
453
|
+
return 0;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
/* We will read in all the QC and Standard Units, rather than
|
458
|
+
random accessing what we need */
|
459
|
+
my_cdf->qc_units = Calloc(my_cdf->header.n_qc_units,cdf_qc_unit);
|
460
|
+
|
461
|
+
|
462
|
+
for (i =0; i < my_cdf->header.n_qc_units; i++){
|
463
|
+
if (!read_cdf_qcunit(&my_cdf->qc_units[i],my_cdf->qc_start[i],infile)){
|
464
|
+
return 0;
|
465
|
+
}
|
466
|
+
}
|
467
|
+
|
468
|
+
my_cdf->units = Calloc(my_cdf->header.n_units,cdf_unit);
|
469
|
+
|
470
|
+
|
471
|
+
for (i=0; i < my_cdf->header.n_units; i++){
|
472
|
+
if (!read_cdf_unit(&my_cdf->units[i],my_cdf->units_start[i],infile)){
|
473
|
+
return 0;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
|
477
|
+
|
478
|
+
#ifdef READ_CDF_DEBUG
|
479
|
+
Rprintf("%d %d %d %d %d\n",my_cdf->header.cols,my_cdf->header.rows,my_cdf->header.n_units,my_cdf->header.n_qc_units,my_cdf->header.len_ref_seq);
|
480
|
+
for (i =0; i < my_cdf->header.n_units;i++){
|
481
|
+
Rprintf("%s\n",my_cdf->probesetnames[i]);
|
482
|
+
}
|
483
|
+
|
484
|
+
for (i =0; i < my_cdf->header.n_qc_units;i++){
|
485
|
+
Rprintf("%d\n",my_cdf->qc_start[i]);
|
486
|
+
}
|
487
|
+
|
488
|
+
for (i =0; i < my_cdf->header.n_qc_units;i++){
|
489
|
+
Rprintf("%d\n",my_cdf->units_start[i]);
|
490
|
+
}
|
491
|
+
|
492
|
+
Rprintf("%d %d\n",my_cdf->qc_units[0].type,my_cdf->qc_units[0].n_probes);
|
493
|
+
|
494
|
+
for (i=0; i < my_cdf->qc_units[0].n_probes; i++){
|
495
|
+
Rprintf("%d %d %d %u %d\n",my_cdf->qc_units[0].qc_probes[i].x,my_cdf->qc_units[0].qc_probes[i].y,
|
496
|
+
my_cdf->qc_units[0].qc_probes[i].probelength,
|
497
|
+
my_cdf->qc_units[0].qc_probes[i].pmflag,
|
498
|
+
my_cdf->qc_units[0].qc_probes[i].bgprobeflag);
|
499
|
+
|
500
|
+
}
|
501
|
+
|
502
|
+
|
503
|
+
Rprintf("%u %u %d %d %d %d %u\n",my_cdf->units[0].unittype,my_cdf->units[0].direction,
|
504
|
+
my_cdf->units[0].natoms,
|
505
|
+
my_cdf->units[0].nblocks,
|
506
|
+
my_cdf->units[0].ncells,
|
507
|
+
my_cdf->units[0].unitnumber,
|
508
|
+
my_cdf->units[0].ncellperatom);
|
509
|
+
|
510
|
+
Rprintf("%d %d %u %u %d %d %s\n",my_cdf->units[0].unit_block[0].natoms,my_cdf->units[0].unit_block[0].ncells,
|
511
|
+
my_cdf->units[0].unit_block[0].ncellperatom,
|
512
|
+
my_cdf->units[0].unit_block[0].direction,
|
513
|
+
my_cdf->units[0].unit_block[0].firstatom,
|
514
|
+
my_cdf->units[0].unit_block[0].unused,
|
515
|
+
my_cdf->units[0].unit_block[0].blockname);
|
516
|
+
|
517
|
+
for (i=0; i <my_cdf->units[0].unit_block[0].ncells ; i++){
|
518
|
+
Rprintf("%d %u %u %d %c %c\n",
|
519
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].atomnumber,
|
520
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].x,
|
521
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].y,
|
522
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].indexpos,
|
523
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].pbase,
|
524
|
+
my_cdf->units[0].unit_block[0].unit_cells[i].tbase);
|
525
|
+
}
|
526
|
+
#endif
|
527
|
+
|
528
|
+
fclose(infile);
|
529
|
+
return 1;
|
530
|
+
|
531
|
+
/* fseek() */
|
532
|
+
}
|
533
|
+
|
534
|
+
|
535
|
+
|
536
|
+
/*************************************************************
|
537
|
+
**
|
538
|
+
** static int check_cdf_xda(const char *filename)
|
539
|
+
**
|
540
|
+
** Opens the file give by filename and checks it to see if
|
541
|
+
** it looks like a binary CDF file. returns 0 if
|
542
|
+
** the file looks like it is not a binary CDF aka xda format
|
543
|
+
** cdf file
|
544
|
+
**
|
545
|
+
**
|
546
|
+
*************************************************************/
|
547
|
+
|
548
|
+
|
549
|
+
int check_cdf_xda(const char *filename){
|
550
|
+
|
551
|
+
FILE *infile;
|
552
|
+
|
553
|
+
|
554
|
+
int magicnumber,version_number;
|
555
|
+
|
556
|
+
if ((infile = fopen(filename, "rb")) == NULL)
|
557
|
+
{
|
558
|
+
error("Unable to open the file %s",filename);
|
559
|
+
return 0;
|
560
|
+
}
|
561
|
+
|
562
|
+
if (!fread_int32(&magicnumber,1,infile)){
|
563
|
+
error("File corrupt or truncated?");
|
564
|
+
return 0;
|
565
|
+
}
|
566
|
+
|
567
|
+
if (!fread_int32(&version_number,1,infile)){
|
568
|
+
error("File corrupt or truncated?");
|
569
|
+
return 0;
|
570
|
+
}
|
571
|
+
|
572
|
+
|
573
|
+
if (magicnumber != 67){
|
574
|
+
/* error("Magic number is not 67. This is probably not a binary cdf file.\n"); */
|
575
|
+
return 0;
|
576
|
+
}
|
577
|
+
|
578
|
+
if (version_number != 1){
|
579
|
+
/* error("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); */
|
580
|
+
return 0;
|
581
|
+
}
|
582
|
+
|
583
|
+
return 1;
|
584
|
+
|
585
|
+
}
|
586
|
+
|
587
|
+
|
588
|
+
|
589
|
+
/*************************************************************
|
590
|
+
**
|
591
|
+
** static int isPM(char pbase,char tbase)
|
592
|
+
**
|
593
|
+
** char pbase - probe base at substitution position
|
594
|
+
** char tbase - target base at substitution position
|
595
|
+
**
|
596
|
+
** this function works out whether a probe is a PM or MM
|
597
|
+
**
|
598
|
+
**
|
599
|
+
*************************************************************/
|
600
|
+
|
601
|
+
|
602
|
+
static int isPM(char pbase,char tbase){
|
603
|
+
/*
|
604
|
+
if (Pbase.Cmp(Tbase) == 0){
|
605
|
+
*isPM = false;
|
606
|
+
} else if (((Pbase.Cmp("A")== 0) && (Tbase.Cmp("T") != 0)) || ((Pbase.Cmp("T")
|
607
|
+
== 0) && (Tbase.Cmp("A") != 0))){
|
608
|
+
*isPM = false;
|
609
|
+
} else if (((Pbase.Cmp("C")== 0) && (Tbase.Cmp("G") != 0)) || ((Pbase.Cmp("G")
|
610
|
+
== 0) && (Tbase.Cmp("C") != 0))){
|
611
|
+
*isPM = false;
|
612
|
+
} else {
|
613
|
+
*isPM = true;
|
614
|
+
}
|
615
|
+
*/
|
616
|
+
|
617
|
+
pbase = toupper(pbase);
|
618
|
+
tbase = toupper(tbase);
|
619
|
+
|
620
|
+
if (pbase == tbase){
|
621
|
+
return 0;
|
622
|
+
} else if ((( pbase == 'A') && (tbase != 'T')) || (( pbase == 'T') && (tbase != 'A'))){
|
623
|
+
return 0;
|
624
|
+
} else if ((( pbase == 'C') && (tbase != 'G')) || (( pbase == 'G') && (tbase != 'C'))){
|
625
|
+
return 0;
|
626
|
+
}
|
627
|
+
return 1;
|
628
|
+
|
629
|
+
|
630
|
+
}
|
631
|
+
|
632
|
+
|
633
|
+
|
634
|
+
/*************************************************************
|
635
|
+
**
|
636
|
+
** SEXP CheckCDFXDA(SEXP filename)
|
637
|
+
**
|
638
|
+
** Takes a given file name and returns 1 if it is a xda format CDF file
|
639
|
+
** otherwise it returns 0
|
640
|
+
**
|
641
|
+
*************************************************************/
|
642
|
+
|
643
|
+
|
644
|
+
|
645
|
+
SEXP CheckCDFXDA(SEXP filename){
|
646
|
+
SEXP tmp;
|
647
|
+
int good;
|
648
|
+
const char *cur_file_name;
|
649
|
+
|
650
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
651
|
+
|
652
|
+
good = check_cdf_xda(cur_file_name);
|
653
|
+
|
654
|
+
PROTECT(tmp= allocVector(INTSXP,1));
|
655
|
+
|
656
|
+
INTEGER(tmp)[0] = good;
|
657
|
+
|
658
|
+
UNPROTECT(1);
|
659
|
+
return tmp;
|
660
|
+
}
|
661
|
+
|
662
|
+
|
663
|
+
|
664
|
+
|
665
|
+
|
666
|
+
|
667
|
+
SEXP ReadCDFFile(SEXP filename){
|
668
|
+
|
669
|
+
SEXP CDFInfo;
|
670
|
+
SEXP Dimensions;
|
671
|
+
SEXP LocMap= R_NilValue,tempLocMap;
|
672
|
+
SEXP CurLocs;
|
673
|
+
SEXP PSnames = R_NilValue,tempPSnames;
|
674
|
+
SEXP ColNames;
|
675
|
+
SEXP dimnames;
|
676
|
+
|
677
|
+
cdf_xda my_cdf;
|
678
|
+
const char *cur_file_name;
|
679
|
+
/* char *tmp_name; */
|
680
|
+
|
681
|
+
int i,j,k;
|
682
|
+
int cur_blocks,cur_cells, cur_atoms;
|
683
|
+
/* int which_probetype; */
|
684
|
+
int which_psname=0;
|
685
|
+
|
686
|
+
cdf_unit_cell *current_cell;
|
687
|
+
|
688
|
+
double *curlocs;
|
689
|
+
|
690
|
+
/* int nrows, ncols; */
|
691
|
+
|
692
|
+
|
693
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
694
|
+
|
695
|
+
if (!read_cdf_xda(cur_file_name,&my_cdf)){
|
696
|
+
error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
|
697
|
+
}
|
698
|
+
|
699
|
+
|
700
|
+
/* We output:
|
701
|
+
nrows, ncols in an integer vector, plus a list of probesets PM MM locations (in the BioC style) */
|
702
|
+
PROTECT(CDFInfo = allocVector(VECSXP,2));
|
703
|
+
PROTECT(Dimensions = allocVector(REALSXP,2));
|
704
|
+
|
705
|
+
if (my_cdf.units[0].unittype ==1){
|
706
|
+
PROTECT(LocMap = allocVector(VECSXP,my_cdf.header.n_units));
|
707
|
+
PROTECT(PSnames = allocVector(STRSXP,my_cdf.header.n_units));
|
708
|
+
} else {
|
709
|
+
PROTECT(tempLocMap = allocVector(VECSXP,2*my_cdf.header.n_units));
|
710
|
+
PROTECT(tempPSnames = allocVector(STRSXP,2*my_cdf.header.n_units));
|
711
|
+
}
|
712
|
+
|
713
|
+
NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.rows;
|
714
|
+
NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.cols;
|
715
|
+
|
716
|
+
|
717
|
+
for (i=0; i < my_cdf.header.n_units; i++){
|
718
|
+
#ifdef READ_CDF_DEBUG
|
719
|
+
printf("%d\n",i);
|
720
|
+
#endif
|
721
|
+
cur_blocks = my_cdf.units[i].nblocks;
|
722
|
+
|
723
|
+
#ifdef READ_CDF_DEBUG
|
724
|
+
Rprintf("New Block: ");
|
725
|
+
#endif
|
726
|
+
if (my_cdf.units[i].unittype ==1){
|
727
|
+
/* Expression analysis */
|
728
|
+
for (j=0; j < cur_blocks; j++){
|
729
|
+
|
730
|
+
#ifdef READ_CDF_DEBUG
|
731
|
+
Rprintf("%s ",my_cdf.units[i].unit_block[j].blockname);
|
732
|
+
#endif
|
733
|
+
|
734
|
+
cur_cells = my_cdf.units[i].unit_block[j].ncells;
|
735
|
+
cur_atoms = my_cdf.units[i].unit_block[j].natoms;
|
736
|
+
|
737
|
+
SET_STRING_ELT(PSnames,i,mkChar(my_cdf.units[i].unit_block[j].blockname));
|
738
|
+
|
739
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
|
740
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
741
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
742
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
743
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
744
|
+
|
745
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
746
|
+
|
747
|
+
for (k=0; k < cur_atoms*2; k++){
|
748
|
+
curlocs[k] = R_NaN;
|
749
|
+
}
|
750
|
+
|
751
|
+
for (k=0; k < cur_cells; k++){
|
752
|
+
current_cell = &(my_cdf.units[i].unit_block[j].unit_cells[k]);
|
753
|
+
|
754
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
755
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
756
|
+
} else {
|
757
|
+
curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
758
|
+
}
|
759
|
+
}
|
760
|
+
|
761
|
+
|
762
|
+
|
763
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
764
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
765
|
+
SET_VECTOR_ELT(LocMap,i,CurLocs);
|
766
|
+
UNPROTECT(3);
|
767
|
+
}
|
768
|
+
} else if (my_cdf.units[i].unittype == 2){
|
769
|
+
/* Genotyping array */
|
770
|
+
|
771
|
+
#ifndef READ_CDF_NOSNP
|
772
|
+
if (cur_blocks == 1){
|
773
|
+
|
774
|
+
cur_cells = my_cdf.units[i].unit_block[0].ncells;
|
775
|
+
cur_atoms = my_cdf.units[i].unit_block[0].natoms;
|
776
|
+
|
777
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
|
778
|
+
|
779
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
|
780
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
781
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
782
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
783
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
784
|
+
|
785
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
786
|
+
|
787
|
+
for (k=0; k < cur_cells; k++){
|
788
|
+
current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
|
789
|
+
|
790
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
791
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
792
|
+
} else {
|
793
|
+
curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
794
|
+
}
|
795
|
+
}
|
796
|
+
|
797
|
+
|
798
|
+
|
799
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
800
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
801
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
802
|
+
UNPROTECT(3);
|
803
|
+
which_psname++;
|
804
|
+
|
805
|
+
} else if (cur_blocks == 4){
|
806
|
+
for (j=0; j < cur_blocks; j++){
|
807
|
+
#ifdef READ_CDF_DEBUG_SNP
|
808
|
+
Rprintf("%s %s\n",my_cdf.probesetnames[i],my_cdf.units[i].unit_block[j].blockname);
|
809
|
+
#endif
|
810
|
+
}
|
811
|
+
|
812
|
+
j = 0;
|
813
|
+
cur_cells = my_cdf.units[i].unit_block[0].ncells;
|
814
|
+
cur_atoms = my_cdf.units[i].unit_block[0].natoms;
|
815
|
+
if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
|
816
|
+
tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
|
817
|
+
tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
|
818
|
+
tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
|
819
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
|
820
|
+
Free(tmp_name);
|
821
|
+
} else {
|
822
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
|
823
|
+
}
|
824
|
+
|
825
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
|
826
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
827
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
828
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
829
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
830
|
+
|
831
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
832
|
+
|
833
|
+
|
834
|
+
for (k=0; k < cur_cells; k++){
|
835
|
+
current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
|
836
|
+
/* Rprintf("%d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
837
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
838
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
839
|
+
} else {
|
840
|
+
curlocs[current_cell->atomnumber+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
841
|
+
}
|
842
|
+
if (current_cell->x + current_cell->y*(my_cdf.header.rows) + 1 == 370737){
|
843
|
+
Rprintf("%d %c %c",isPM(current_cell->pbase,current_cell->tbase),current_cell->pbase,current_cell->tbase);
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
847
|
+
j=2;
|
848
|
+
cur_cells = my_cdf.units[i].unit_block[2].ncells;
|
849
|
+
cur_atoms = my_cdf.units[i].unit_block[2].natoms;
|
850
|
+
for (k=0; k < cur_cells; k++){
|
851
|
+
current_cell = &(my_cdf.units[i].unit_block[2].unit_cells[k]);
|
852
|
+
/* Rprintf("half : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
853
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
854
|
+
curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
855
|
+
} else {
|
856
|
+
curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
857
|
+
}
|
858
|
+
}
|
859
|
+
|
860
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
861
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
862
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
863
|
+
UNPROTECT(3);
|
864
|
+
which_psname++;
|
865
|
+
|
866
|
+
|
867
|
+
|
868
|
+
|
869
|
+
|
870
|
+
j = 1;
|
871
|
+
cur_cells = my_cdf.units[i].unit_block[1].ncells;
|
872
|
+
cur_atoms = my_cdf.units[i].unit_block[1].natoms;
|
873
|
+
if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
|
874
|
+
tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
|
875
|
+
tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
|
876
|
+
tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
|
877
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
|
878
|
+
Free(tmp_name);
|
879
|
+
} else {
|
880
|
+
SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[1].blockname));
|
881
|
+
}
|
882
|
+
PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
|
883
|
+
PROTECT(ColNames = allocVector(STRSXP,2));
|
884
|
+
PROTECT(dimnames = allocVector(VECSXP,2));
|
885
|
+
SET_STRING_ELT(ColNames,0,mkChar("pm"));
|
886
|
+
SET_STRING_ELT(ColNames,1,mkChar("mm"));
|
887
|
+
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
|
888
|
+
|
889
|
+
for (k=0; k < cur_cells; k++){
|
890
|
+
current_cell = &(my_cdf.units[i].unit_block[1].unit_cells[k]);
|
891
|
+
/* Rprintf("Dual : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
|
892
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
893
|
+
curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
894
|
+
} else {
|
895
|
+
curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
896
|
+
}
|
897
|
+
}
|
898
|
+
|
899
|
+
j=3;
|
900
|
+
cur_cells = my_cdf.units[i].unit_block[3].ncells;
|
901
|
+
cur_atoms = my_cdf.units[i].unit_block[3].natoms;
|
902
|
+
for (k=0; k < cur_cells; k++){
|
903
|
+
current_cell = &(my_cdf.units[i].unit_block[3].unit_cells[k]);
|
904
|
+
/* Rprintf("half deux : %d %d %d %u %u \n",cur_cells, current_cell->atomnumber, cur_atoms,current_cell->x,current_cell->y); */
|
905
|
+
if(isPM(current_cell->pbase,current_cell->tbase)){
|
906
|
+
curlocs[current_cell->atomnumber - (2*cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
|
907
|
+
} else {
|
908
|
+
curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
|
909
|
+
}
|
910
|
+
}
|
911
|
+
|
912
|
+
SET_VECTOR_ELT(dimnames,1,ColNames);
|
913
|
+
setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
|
914
|
+
SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
|
915
|
+
UNPROTECT(3);
|
916
|
+
which_psname++;
|
917
|
+
|
918
|
+
} else {
|
919
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (genotyping with blocks != 1 or 4.)");
|
920
|
+
}
|
921
|
+
#else
|
922
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (genotyping).");
|
923
|
+
#endif
|
924
|
+
|
925
|
+
|
926
|
+
|
927
|
+
|
928
|
+
} else {
|
929
|
+
error("makecdfenv does not currently know how to handle cdf files of this type (ie not expression or genotyping)");
|
930
|
+
}
|
931
|
+
|
932
|
+
|
933
|
+
#ifdef READ_CDF_DEBUG
|
934
|
+
Rprintf("\n");
|
935
|
+
#endif
|
936
|
+
}
|
937
|
+
|
938
|
+
if (my_cdf.units[0].unittype ==2){
|
939
|
+
PROTECT(PSnames = allocVector(STRSXP,which_psname));
|
940
|
+
PROTECT(LocMap = allocVector(VECSXP,which_psname));
|
941
|
+
for (i =0; i < which_psname; i++){
|
942
|
+
SET_STRING_ELT(PSnames,i,mkChar(CHAR(STRING_ELT(tempPSnames,i))));
|
943
|
+
SET_VECTOR_ELT(LocMap,i,VECTOR_ELT(tempLocMap,i));
|
944
|
+
}
|
945
|
+
|
946
|
+
}
|
947
|
+
#ifdef READ_CDF_DEBUG
|
948
|
+
Rprintf("%d \n",which_psname);
|
949
|
+
#endif
|
950
|
+
setAttrib(LocMap,R_NamesSymbol,PSnames);
|
951
|
+
SET_VECTOR_ELT(CDFInfo,0,Dimensions);
|
952
|
+
SET_VECTOR_ELT(CDFInfo,1,LocMap);
|
953
|
+
if (my_cdf.units[0].unittype ==2){
|
954
|
+
UNPROTECT(6);
|
955
|
+
} else {
|
956
|
+
UNPROTECT(4);
|
957
|
+
}
|
958
|
+
|
959
|
+
dealloc_cdf_xda(&my_cdf);
|
960
|
+
return CDFInfo;
|
961
|
+
|
962
|
+
}
|
963
|
+
|
964
|
+
|
965
|
+
|
966
|
+
|
967
|
+
/* This function is for reading in the entire binary cdf file and then
|
968
|
+
* returing the structure in a complex list object.
|
969
|
+
* The fullstructure argument is expected to be a BOOLEAN. If TRUE the
|
970
|
+
* entire contents of the CDF file are returned.
|
971
|
+
* If False, a modified CDFENV style structure is returned
|
972
|
+
*/
|
973
|
+
|
974
|
+
|
975
|
+
|
976
|
+
SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){
|
977
|
+
|
978
|
+
SEXP CDFInfo = R_NilValue; /* this is the object that will be returned */
|
979
|
+
SEXP CDFInfoNames;
|
980
|
+
SEXP HEADER; /* Will store the header information */
|
981
|
+
SEXP HEADERNames;
|
982
|
+
SEXP Dimensions;
|
983
|
+
SEXP DimensionsNames;
|
984
|
+
SEXP REFSEQ; /* Resequencing reference sequence */
|
985
|
+
SEXP UNITNAMES;
|
986
|
+
|
987
|
+
SEXP FILEPOSITIONS;
|
988
|
+
SEXP FILEPOSITIONSQC;
|
989
|
+
SEXP FILEPOSITIONSUNITS;
|
990
|
+
SEXP FILEPOSITIONSNames;
|
991
|
+
|
992
|
+
SEXP QCUNITS;
|
993
|
+
SEXP QCUNITSsub;
|
994
|
+
SEXP QCUNITSsubNames;
|
995
|
+
SEXP QCHEADER;
|
996
|
+
SEXP QCHEADERNames;
|
997
|
+
SEXP QCUNITSProbeInfo;
|
998
|
+
SEXP QCUNITSProbeInfoX;
|
999
|
+
SEXP QCUNITSProbeInfoY;
|
1000
|
+
SEXP QCUNITSProbeInfoPL;
|
1001
|
+
SEXP QCUNITSProbeInfoPMFLAG;
|
1002
|
+
SEXP QCUNITSProbeInfoBGFLAG;
|
1003
|
+
SEXP QCUNITSProbeInfoNames;
|
1004
|
+
SEXP QCUNITSProbeInforow_names;
|
1005
|
+
|
1006
|
+
SEXP UNITS;
|
1007
|
+
SEXP tmpUNIT;
|
1008
|
+
SEXP tmpUNITNames;
|
1009
|
+
SEXP UNITSHeader;
|
1010
|
+
SEXP UNITSHeaderNames;
|
1011
|
+
SEXP tmpUNITSBlock;
|
1012
|
+
SEXP UNITSBlock;
|
1013
|
+
SEXP UNITSBlockNames;
|
1014
|
+
SEXP UNITSBlockHeader;
|
1015
|
+
SEXP UNITSBlockHeaderNames;
|
1016
|
+
SEXP UNITSBlockInfo;
|
1017
|
+
SEXP UNITSBlockInfoNames;
|
1018
|
+
SEXP UNITSBlockInforow_names;
|
1019
|
+
SEXP UNITSBlockAtom ;
|
1020
|
+
SEXP UNITSBlockX;
|
1021
|
+
SEXP UNITSBlockY;
|
1022
|
+
SEXP UNITSBlockIndexPos;
|
1023
|
+
SEXP UNITSBlockPbase;
|
1024
|
+
SEXP UNITSBlockTbase;
|
1025
|
+
|
1026
|
+
|
1027
|
+
|
1028
|
+
|
1029
|
+
|
1030
|
+
|
1031
|
+
|
1032
|
+
|
1033
|
+
char buf[10];
|
1034
|
+
int i,j,k;
|
1035
|
+
|
1036
|
+
cdf_xda my_cdf;
|
1037
|
+
const char *cur_file_name;
|
1038
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
1039
|
+
|
1040
|
+
/* Read in the xda style CDF file into memory */
|
1041
|
+
if (!read_cdf_xda(cur_file_name,&my_cdf)){
|
1042
|
+
error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
|
1046
|
+
if (asInteger(fullstructure)){
|
1047
|
+
/* return the full structure */
|
1048
|
+
PROTECT(CDFInfo = allocVector(VECSXP,5));
|
1049
|
+
|
1050
|
+
PROTECT(CDFInfoNames = allocVector(STRSXP,5));
|
1051
|
+
SET_STRING_ELT(CDFInfoNames,0,mkChar("Header"));
|
1052
|
+
SET_STRING_ELT(CDFInfoNames,1,mkChar("UnitNames"));
|
1053
|
+
SET_STRING_ELT(CDFInfoNames,2,mkChar("FilePositions"));
|
1054
|
+
SET_STRING_ELT(CDFInfoNames,3,mkChar("QCUnits"));
|
1055
|
+
SET_STRING_ELT(CDFInfoNames,4,mkChar("Units"));
|
1056
|
+
setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
|
1057
|
+
UNPROTECT(1);
|
1058
|
+
|
1059
|
+
PROTECT(HEADER = allocVector(VECSXP,2));
|
1060
|
+
PROTECT(HEADERNames = allocVector(STRSXP,2));
|
1061
|
+
SET_STRING_ELT(HEADERNames,0,mkChar("Dimensions"));
|
1062
|
+
SET_STRING_ELT(HEADERNames,1,mkChar("ReseqRefSeq"));
|
1063
|
+
setAttrib(HEADER,R_NamesSymbol,HEADERNames);
|
1064
|
+
UNPROTECT(1);
|
1065
|
+
|
1066
|
+
PROTECT(Dimensions = allocVector(REALSXP,7));
|
1067
|
+
NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.magicnumber;
|
1068
|
+
NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.version_number;
|
1069
|
+
NUMERIC_POINTER(Dimensions)[2] = (double)my_cdf.header.cols;
|
1070
|
+
NUMERIC_POINTER(Dimensions)[3] = (double)my_cdf.header.rows;
|
1071
|
+
NUMERIC_POINTER(Dimensions)[4] = (double)my_cdf.header.n_qc_units;
|
1072
|
+
NUMERIC_POINTER(Dimensions)[5] = (double)my_cdf.header.n_units;
|
1073
|
+
NUMERIC_POINTER(Dimensions)[6] = (double)my_cdf.header.len_ref_seq;
|
1074
|
+
|
1075
|
+
PROTECT(DimensionsNames = allocVector(STRSXP,7));
|
1076
|
+
SET_STRING_ELT(DimensionsNames,0,mkChar("MagicNumber"));
|
1077
|
+
SET_STRING_ELT(DimensionsNames,1,mkChar("VersionNumber"));
|
1078
|
+
SET_STRING_ELT(DimensionsNames,2,mkChar("Cols"));
|
1079
|
+
SET_STRING_ELT(DimensionsNames,3,mkChar("Rows"));
|
1080
|
+
SET_STRING_ELT(DimensionsNames,4,mkChar("n.QCunits"));
|
1081
|
+
SET_STRING_ELT(DimensionsNames,5,mkChar("n.units"));
|
1082
|
+
SET_STRING_ELT(DimensionsNames,6,mkChar("LenRefSeq"));
|
1083
|
+
setAttrib(Dimensions,R_NamesSymbol,DimensionsNames);
|
1084
|
+
SET_VECTOR_ELT(HEADER,0,Dimensions);
|
1085
|
+
UNPROTECT(2);
|
1086
|
+
|
1087
|
+
PROTECT(REFSEQ = allocVector(STRSXP,1));
|
1088
|
+
SET_STRING_ELT(REFSEQ,0,mkChar(my_cdf.header.ref_seq));
|
1089
|
+
SET_VECTOR_ELT(HEADER,1,REFSEQ);
|
1090
|
+
UNPROTECT(1);
|
1091
|
+
|
1092
|
+
SET_VECTOR_ELT(CDFInfo,0,HEADER);
|
1093
|
+
UNPROTECT(1);
|
1094
|
+
|
1095
|
+
PROTECT(UNITNAMES = allocVector(STRSXP,my_cdf.header.n_units));
|
1096
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1097
|
+
SET_STRING_ELT(UNITNAMES,i,mkChar(my_cdf.probesetnames[i]));
|
1098
|
+
}
|
1099
|
+
SET_VECTOR_ELT(CDFInfo,1,UNITNAMES);
|
1100
|
+
UNPROTECT(1);
|
1101
|
+
|
1102
|
+
PROTECT(FILEPOSITIONS = allocVector(VECSXP,2));
|
1103
|
+
PROTECT(FILEPOSITIONSQC = allocVector(REALSXP,my_cdf.header.n_qc_units));
|
1104
|
+
PROTECT(FILEPOSITIONSUNITS = allocVector(REALSXP,my_cdf.header.n_units));
|
1105
|
+
for (i =0; i < my_cdf.header.n_qc_units; i++){
|
1106
|
+
NUMERIC_POINTER(FILEPOSITIONSQC)[i] = (double)my_cdf.qc_start[i];
|
1107
|
+
}
|
1108
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1109
|
+
NUMERIC_POINTER(FILEPOSITIONSUNITS)[i] = (double)my_cdf.units_start[i];
|
1110
|
+
}
|
1111
|
+
SET_VECTOR_ELT(FILEPOSITIONS,0,FILEPOSITIONSQC);
|
1112
|
+
SET_VECTOR_ELT(FILEPOSITIONS,1,FILEPOSITIONSUNITS);
|
1113
|
+
PROTECT(FILEPOSITIONSNames = allocVector(STRSXP,2));
|
1114
|
+
SET_STRING_ELT(FILEPOSITIONSNames,0,mkChar("FilePosQC"));
|
1115
|
+
SET_STRING_ELT(FILEPOSITIONSNames,1,mkChar("FilePosUnits"));
|
1116
|
+
setAttrib(FILEPOSITIONS,R_NamesSymbol,FILEPOSITIONSNames);
|
1117
|
+
SET_VECTOR_ELT(CDFInfo,2,FILEPOSITIONS);
|
1118
|
+
UNPROTECT(4);
|
1119
|
+
|
1120
|
+
PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.n_qc_units));
|
1121
|
+
for (i =0; i < my_cdf.header.n_qc_units; i++){
|
1122
|
+
PROTECT(QCUNITSsub = allocVector(VECSXP,2));
|
1123
|
+
PROTECT(QCUNITSsubNames= allocVector(STRSXP,2));
|
1124
|
+
SET_STRING_ELT(QCUNITSsubNames,0,mkChar("QCUnitHeader"));
|
1125
|
+
SET_STRING_ELT(QCUNITSsubNames,1,mkChar("QCUnitInfo"));
|
1126
|
+
setAttrib(QCUNITSsub,R_NamesSymbol,QCUNITSsubNames);
|
1127
|
+
|
1128
|
+
PROTECT(QCHEADER = allocVector(REALSXP,2));
|
1129
|
+
NUMERIC_POINTER(QCHEADER)[0] = (double)my_cdf.qc_units[i].type;
|
1130
|
+
NUMERIC_POINTER(QCHEADER)[1] = (double)my_cdf.qc_units[i].n_probes;
|
1131
|
+
PROTECT(QCHEADERNames = allocVector(STRSXP,2));
|
1132
|
+
SET_STRING_ELT(QCHEADERNames,0,mkChar("Type"));
|
1133
|
+
SET_STRING_ELT(QCHEADERNames,1,mkChar("n.probes"));
|
1134
|
+
|
1135
|
+
setAttrib(QCHEADER,R_NamesSymbol,QCHEADERNames);
|
1136
|
+
SET_VECTOR_ELT(QCUNITSsub,0,QCHEADER);
|
1137
|
+
|
1138
|
+
|
1139
|
+
PROTECT(QCUNITSProbeInfo = allocVector(VECSXP,5));
|
1140
|
+
PROTECT(QCUNITSProbeInfoX = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1141
|
+
PROTECT(QCUNITSProbeInfoY = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1142
|
+
PROTECT(QCUNITSProbeInfoPL = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1143
|
+
PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1144
|
+
PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
|
1145
|
+
|
1146
|
+
for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
|
1147
|
+
NUMERIC_POINTER(QCUNITSProbeInfoX)[j] = (double)my_cdf.qc_units[i].qc_probes[j].x;
|
1148
|
+
NUMERIC_POINTER(QCUNITSProbeInfoY)[j] = (double)my_cdf.qc_units[i].qc_probes[j].y;
|
1149
|
+
NUMERIC_POINTER(QCUNITSProbeInfoPL)[j] = (double)my_cdf.qc_units[i].qc_probes[j].probelength;
|
1150
|
+
NUMERIC_POINTER(QCUNITSProbeInfoPMFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].pmflag;
|
1151
|
+
NUMERIC_POINTER(QCUNITSProbeInfoBGFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].bgprobeflag;
|
1152
|
+
}
|
1153
|
+
|
1154
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,0,QCUNITSProbeInfoX);
|
1155
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,1,QCUNITSProbeInfoY);
|
1156
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,2,QCUNITSProbeInfoPL);
|
1157
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,3,QCUNITSProbeInfoPMFLAG);
|
1158
|
+
SET_VECTOR_ELT(QCUNITSProbeInfo,4,QCUNITSProbeInfoBGFLAG);
|
1159
|
+
|
1160
|
+
PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,5));
|
1161
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
|
1162
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
|
1163
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("ProbeLength"));
|
1164
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("PMFlag"));
|
1165
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("BGProbeFlag"));
|
1166
|
+
|
1167
|
+
setAttrib(QCUNITSProbeInfo,R_NamesSymbol,QCUNITSProbeInfoNames);
|
1168
|
+
|
1169
|
+
PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
|
1170
|
+
|
1171
|
+
for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
|
1172
|
+
sprintf(buf, "%d", j+1);
|
1173
|
+
SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
|
1177
|
+
|
1178
|
+
setAttrib(QCUNITSProbeInfo, R_RowNamesSymbol, QCUNITSProbeInforow_names);
|
1179
|
+
|
1180
|
+
|
1181
|
+
setAttrib(QCUNITSProbeInfo,R_ClassSymbol,mkString("data.frame"));
|
1182
|
+
|
1183
|
+
SET_VECTOR_ELT(QCUNITSsub,1,QCUNITSProbeInfo);
|
1184
|
+
SET_VECTOR_ELT(QCUNITS,i,QCUNITSsub);
|
1185
|
+
UNPROTECT(12);
|
1186
|
+
}
|
1187
|
+
SET_VECTOR_ELT(CDFInfo,3,QCUNITS);
|
1188
|
+
UNPROTECT(1);
|
1189
|
+
|
1190
|
+
|
1191
|
+
PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.n_units));
|
1192
|
+
for (i =0; i < my_cdf.header.n_units; i++){
|
1193
|
+
PROTECT(tmpUNIT = allocVector(VECSXP,2));
|
1194
|
+
PROTECT(tmpUNITNames = allocVector(STRSXP,2));
|
1195
|
+
SET_STRING_ELT(tmpUNITNames,0,mkChar("UnitHeader"));
|
1196
|
+
SET_STRING_ELT(tmpUNITNames,1,mkChar("Block"));
|
1197
|
+
setAttrib(tmpUNIT,R_NamesSymbol,tmpUNITNames);
|
1198
|
+
|
1199
|
+
|
1200
|
+
PROTECT(UNITSHeader = allocVector(REALSXP,7));
|
1201
|
+
PROTECT(UNITSHeaderNames = allocVector(STRSXP,7));
|
1202
|
+
SET_STRING_ELT(UNITSHeaderNames,0,mkChar("UnitType"));
|
1203
|
+
SET_STRING_ELT(UNITSHeaderNames,1,mkChar("Direction"));
|
1204
|
+
SET_STRING_ELT(UNITSHeaderNames,2,mkChar("n.atoms"));
|
1205
|
+
SET_STRING_ELT(UNITSHeaderNames,3,mkChar("n.blocks"));
|
1206
|
+
SET_STRING_ELT(UNITSHeaderNames,4,mkChar("n.cells"));
|
1207
|
+
SET_STRING_ELT(UNITSHeaderNames,5,mkChar("UnitNumber"));
|
1208
|
+
SET_STRING_ELT(UNITSHeaderNames,6,mkChar("n.cellsperatom"));
|
1209
|
+
|
1210
|
+
setAttrib(UNITSHeader,R_NamesSymbol,UNITSHeaderNames);
|
1211
|
+
|
1212
|
+
NUMERIC_POINTER(UNITSHeader)[0] = (double)my_cdf.units[i].unittype;
|
1213
|
+
NUMERIC_POINTER(UNITSHeader)[1] = (double)my_cdf.units[i].direction;
|
1214
|
+
NUMERIC_POINTER(UNITSHeader)[2] = (double)my_cdf.units[i].natoms;
|
1215
|
+
NUMERIC_POINTER(UNITSHeader)[3] = (double)my_cdf.units[i].nblocks;
|
1216
|
+
NUMERIC_POINTER(UNITSHeader)[4] = (double)my_cdf.units[i].ncells;
|
1217
|
+
NUMERIC_POINTER(UNITSHeader)[5] = (double)my_cdf.units[i].unitnumber;
|
1218
|
+
NUMERIC_POINTER(UNITSHeader)[6] = (double)my_cdf.units[i].ncellperatom;
|
1219
|
+
|
1220
|
+
PROTECT(tmpUNITSBlock = allocVector(VECSXP,my_cdf.units[i].nblocks));
|
1221
|
+
for (j=0; j < my_cdf.units[i].nblocks; j++){
|
1222
|
+
PROTECT(UNITSBlock = allocVector(VECSXP,3));
|
1223
|
+
PROTECT(UNITSBlockNames = allocVector(STRSXP,3));
|
1224
|
+
SET_STRING_ELT(UNITSBlockNames,0,mkChar("Header"));
|
1225
|
+
SET_STRING_ELT(UNITSBlockNames,1,mkChar("Name"));
|
1226
|
+
SET_STRING_ELT(UNITSBlockNames,2,mkChar("UnitInfo"));
|
1227
|
+
setAttrib(UNITSBlock,R_NamesSymbol,UNITSBlockNames);
|
1228
|
+
|
1229
|
+
PROTECT(UNITSBlockHeader = allocVector(REALSXP,6));
|
1230
|
+
PROTECT(UNITSBlockHeaderNames= allocVector(VECSXP,6));
|
1231
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,0,mkChar("n.atoms"));
|
1232
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,1,mkChar("n.cells"));
|
1233
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,2,mkChar("n.cellsperatom"));
|
1234
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,3,mkChar("Direction"));
|
1235
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,4,mkChar("firstatom"));
|
1236
|
+
SET_VECTOR_ELT(UNITSBlockHeaderNames,5,mkChar("unused"));
|
1237
|
+
|
1238
|
+
NUMERIC_POINTER(UNITSBlockHeader)[0] = (double)my_cdf.units[i].unit_block[j].natoms;
|
1239
|
+
NUMERIC_POINTER(UNITSBlockHeader)[1] = (double)my_cdf.units[i].unit_block[j].ncells;
|
1240
|
+
NUMERIC_POINTER(UNITSBlockHeader)[2] = (double)my_cdf.units[i].unit_block[j].ncellperatom;
|
1241
|
+
NUMERIC_POINTER(UNITSBlockHeader)[3] = (double)my_cdf.units[i].unit_block[j].direction;
|
1242
|
+
NUMERIC_POINTER(UNITSBlockHeader)[4] = (double)my_cdf.units[i].unit_block[j].firstatom;
|
1243
|
+
NUMERIC_POINTER(UNITSBlockHeader)[5] = (double)my_cdf.units[i].unit_block[j].unused;
|
1244
|
+
|
1245
|
+
|
1246
|
+
setAttrib(UNITSBlockHeader,R_NamesSymbol,UNITSBlockHeaderNames);
|
1247
|
+
|
1248
|
+
SET_VECTOR_ELT(UNITSBlock,0,UNITSBlockHeader);
|
1249
|
+
|
1250
|
+
SET_VECTOR_ELT(UNITSBlock,1,mkString(my_cdf.units[i].unit_block[j].blockname));
|
1251
|
+
|
1252
|
+
PROTECT(UNITSBlockInfo = allocVector(VECSXP,6));
|
1253
|
+
|
1254
|
+
PROTECT(UNITSBlockInfoNames = allocVector(STRSXP,6));
|
1255
|
+
SET_STRING_ELT(UNITSBlockInfoNames,0,mkChar("atom.number"));
|
1256
|
+
SET_STRING_ELT(UNITSBlockInfoNames,1,mkChar("x"));
|
1257
|
+
SET_STRING_ELT(UNITSBlockInfoNames,2,mkChar("y"));
|
1258
|
+
SET_STRING_ELT(UNITSBlockInfoNames,3,mkChar("index.position"));
|
1259
|
+
SET_STRING_ELT(UNITSBlockInfoNames,4,mkChar("pbase"));
|
1260
|
+
SET_STRING_ELT(UNITSBlockInfoNames,5,mkChar("tbase"));
|
1261
|
+
|
1262
|
+
setAttrib(UNITSBlockInfo,R_NamesSymbol,UNITSBlockInfoNames);
|
1263
|
+
|
1264
|
+
|
1265
|
+
PROTECT(UNITSBlockInforow_names = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1266
|
+
|
1267
|
+
for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
|
1268
|
+
sprintf(buf, "%d", k+1);
|
1269
|
+
SET_STRING_ELT(UNITSBlockInforow_names,k,mkChar(buf));
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
PROTECT(UNITSBlockAtom = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1273
|
+
PROTECT(UNITSBlockX = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1274
|
+
PROTECT(UNITSBlockY = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1275
|
+
PROTECT(UNITSBlockIndexPos = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
|
1276
|
+
PROTECT(UNITSBlockPbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1277
|
+
PROTECT(UNITSBlockTbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
|
1278
|
+
|
1279
|
+
for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
|
1280
|
+
/* Rprintf("%d %d %d\n",i,j,k);
|
1281
|
+
// NUMERIC_POINTER(UNITSBlockAtom)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
|
1282
|
+
// NUMERIC_POINTER(UNITSBlockX)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].x;
|
1283
|
+
// NUMERIC_POINTER(UNITSBlockY)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].y;
|
1284
|
+
// NUMERIC_POINTER(UNITSBlockIndexPos)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; */
|
1285
|
+
INTEGER_POINTER(UNITSBlockAtom)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
|
1286
|
+
INTEGER_POINTER(UNITSBlockX)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].x;
|
1287
|
+
INTEGER_POINTER(UNITSBlockY)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].y;
|
1288
|
+
INTEGER_POINTER(UNITSBlockIndexPos)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos;
|
1289
|
+
sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].pbase);
|
1290
|
+
SET_STRING_ELT(UNITSBlockPbase,k,mkChar(buf));
|
1291
|
+
|
1292
|
+
sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].tbase);
|
1293
|
+
SET_STRING_ELT(UNITSBlockTbase,k,mkChar(buf));
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
SET_VECTOR_ELT(UNITSBlockInfo,0,UNITSBlockAtom);
|
1297
|
+
SET_VECTOR_ELT(UNITSBlockInfo,1,UNITSBlockX);
|
1298
|
+
SET_VECTOR_ELT(UNITSBlockInfo,2,UNITSBlockY);
|
1299
|
+
SET_VECTOR_ELT(UNITSBlockInfo,3,UNITSBlockIndexPos);
|
1300
|
+
SET_VECTOR_ELT(UNITSBlockInfo,4,UNITSBlockPbase);
|
1301
|
+
SET_VECTOR_ELT(UNITSBlockInfo,5,UNITSBlockTbase);
|
1302
|
+
UNPROTECT(6);
|
1303
|
+
|
1304
|
+
|
1305
|
+
|
1306
|
+
|
1307
|
+
setAttrib(UNITSBlockInfo, R_RowNamesSymbol, UNITSBlockInforow_names);
|
1308
|
+
setAttrib(UNITSBlockInfo,R_ClassSymbol,mkString("data.frame"));
|
1309
|
+
|
1310
|
+
SET_VECTOR_ELT(UNITSBlock,2,UNITSBlockInfo);
|
1311
|
+
|
1312
|
+
SET_VECTOR_ELT(tmpUNITSBlock,j,UNITSBlock);
|
1313
|
+
UNPROTECT(7);
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
SET_VECTOR_ELT(tmpUNIT,0,UNITSHeader);
|
1317
|
+
SET_VECTOR_ELT(tmpUNIT,1,tmpUNITSBlock);
|
1318
|
+
|
1319
|
+
SET_VECTOR_ELT(UNITS,i,tmpUNIT);
|
1320
|
+
UNPROTECT(5);
|
1321
|
+
}
|
1322
|
+
SET_VECTOR_ELT(CDFInfo,4,UNITS);
|
1323
|
+
UNPROTECT(1);
|
1324
|
+
|
1325
|
+
|
1326
|
+
} else {
|
1327
|
+
/* return the abbreviated structure */
|
1328
|
+
error("Abbreviated structure not yet implemented.\n");
|
1329
|
+
|
1330
|
+
|
1331
|
+
}
|
1332
|
+
|
1333
|
+
|
1334
|
+
|
1335
|
+
|
1336
|
+
|
1337
|
+
dealloc_cdf_xda(&my_cdf);
|
1338
|
+
UNPROTECT(1);
|
1339
|
+
return CDFInfo;
|
1340
|
+
|
1341
|
+
|
1342
|
+
}
|