bio-affy 0.1.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
Binary file
@@ -0,0 +1,347 @@
1
+ /*****************************************************************
2
+ **
3
+ ** This file contains the record definitions for the CDF files.
4
+ ** They come in two formats (textual and binary). The first set
5
+ ** represents the textual form (starting with cdf_text_..
6
+ **
7
+ ******************************************************************/
8
+
9
+ /*****************************************************************
10
+ ** Textual CDF formats
11
+ **
12
+ ** A structure for holding information in the
13
+ ** "CDF" and "Chip" sections (basically header information)
14
+ **
15
+ ******************************************************************/
16
+
17
+ typedef struct {
18
+
19
+ char *version;
20
+ char *name;
21
+ int rows,cols;
22
+ int numberofunits;
23
+ int maxunit;
24
+ int NumQCUnits;
25
+ char *chipreference;
26
+ } cdf_text_header;
27
+
28
+ /*****************************************************************
29
+ **
30
+ **
31
+ ** A structure for holding QC probe information
32
+ ** Note the "CYCLES" item is ignored and never parsed
33
+ **
34
+ ******************************************************************/
35
+
36
+
37
+ typedef struct {
38
+ int x;
39
+ int y;
40
+ char *probe;
41
+ int plen;
42
+ int atom;
43
+ int index;
44
+ int match;
45
+ int bg;
46
+ } cdf_text_qc_probe;
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+ /*******************************************************************
55
+ **
56
+ ** A structure for holding QC units information. These are
57
+ ** areas of the chip that contain probes that may or may not be useful
58
+ ** for QC and other purposes.
59
+ **
60
+ **
61
+ *******************************************************************/
62
+
63
+
64
+
65
+ typedef struct{
66
+ int type;
67
+ unsigned int n_probes;
68
+ int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
69
+ cdf_text_qc_probe *qc_probes;
70
+
71
+ } cdf_text_qc_unit;
72
+
73
+
74
+ /*******************************************************************
75
+ **
76
+ ** A structure for holding probe information for unit_blocks_probes
77
+ **
78
+ ** probes are stored within blocks
79
+ **
80
+ *******************************************************************/
81
+
82
+ typedef struct{
83
+ int x;
84
+ int y;
85
+ char *probe;
86
+ char *feat;
87
+ char *qual;
88
+ int expos;
89
+ int pos;
90
+ char *cbase;
91
+ char *pbase;
92
+ char *tbase;
93
+ int atom;
94
+ int index;
95
+ int codonid;
96
+ int codon;
97
+ int regiontype;
98
+ char* region;
99
+ } cdf_text_unit_block_probe;
100
+
101
+
102
+
103
+
104
+ /*******************************************************************
105
+ **
106
+ ** A structure holding Unit_blocks
107
+ **
108
+ ** blocks are stored within units.
109
+ ** blocks contain many probes
110
+ **
111
+ *******************************************************************/
112
+
113
+ typedef struct{
114
+ char *name;
115
+ int blocknumber;
116
+ int num_atoms;
117
+ int num_cells;
118
+ int start_position;
119
+ int stop_position;
120
+ int direction;
121
+ cdf_text_unit_block_probe *probes;
122
+
123
+ } cdf_text_unit_block;
124
+
125
+
126
+
127
+
128
+
129
+
130
+ /*******************************************************************
131
+ **
132
+ ** A structure for holding "Units" AKA known as probesets
133
+ **
134
+ ** Each unit contains one or more blocks. Each block contains one or
135
+ ** more probes
136
+ **
137
+ *******************************************************************/
138
+
139
+
140
+ typedef struct{
141
+ char *name;
142
+ int direction;
143
+ int num_atoms;
144
+ int num_cells;
145
+ int unit_number;
146
+ int unit_type;
147
+ int numberblocks;
148
+ int MutationType;
149
+ cdf_text_unit_block *blocks;
150
+ } cdf_text_unit;
151
+
152
+
153
+
154
+ /*******************************************************************
155
+ **
156
+ ** A structure for holding a text CDF file
157
+ **
158
+ ** text cdf files consist of
159
+ ** basic header information
160
+ ** qcunits
161
+ ** - qc probes
162
+ ** units (aka probesets)
163
+ ** - blocks
164
+ ** - probes
165
+ **
166
+ **
167
+ *******************************************************************/
168
+
169
+ typedef struct{
170
+ cdf_text_header header;
171
+ cdf_text_qc_unit *qc_units;
172
+ cdf_text_unit *units;
173
+ } cdf_text;
174
+
175
+
176
+
177
+ /************************************************************************
178
+ **
179
+ ** Structures for holding the CDF file information. Basically
180
+ ** header/general information that appears at the start of the CDF file
181
+ **
182
+ ************************************************************************/
183
+
184
+ typedef struct {
185
+ int magicnumber;
186
+ int version_number;
187
+ unsigned short rows,cols;
188
+ int n_units,n_qc_units;
189
+ int len_ref_seq;
190
+ int i;
191
+ char *ref_seq;
192
+ } cdf_xda_header;
193
+
194
+
195
+ /****************************************************************************
196
+ **
197
+ ** The following two structures store QC units and QC unit probe information
198
+ **
199
+ ** QC information, repeated for each QC unit:
200
+ ** Type - unsigned short
201
+ ** Number of probes - integer
202
+ **
203
+ ** Probe information, repeated for each probe in the QC unit:
204
+ ** X coordinate - unsigned short
205
+ ** Y coordinate - unsigned short
206
+ ** Probe length - unsigned char
207
+ ** Perfect match flag - unsigned char
208
+ ** Background probe flag - unsigned char
209
+ **
210
+ ****************************************************************************/
211
+
212
+
213
+ typedef struct{
214
+ unsigned short x;
215
+ unsigned short y;
216
+ unsigned char probelength;
217
+ unsigned char pmflag;
218
+ unsigned char bgprobeflag;
219
+
220
+ } cdf_qc_probe;
221
+
222
+ typedef struct{
223
+ unsigned short type;
224
+ unsigned int n_probes;
225
+
226
+ cdf_qc_probe *qc_probes;
227
+
228
+ } cdf_qc_unit;
229
+
230
+
231
+ /****************************************************************************
232
+ **
233
+ ** The following three structures store information for units (sometimes called
234
+ ** probesets), blocks (of which there are one or more within a unit) and cells
235
+ ** sometimes called probe of which there are one or more within each block
236
+ **
237
+ **
238
+ ** Unit information, repeated for each unit:
239
+ **
240
+ ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
241
+ ** Direction - unsigned char
242
+ ** Number of atoms - integer
243
+ ** Number of blocks - integer (always 1 for expression units)
244
+ ** Number of cells - integer
245
+ ** Unit number (probe set number) - integer
246
+ ** Number of cells per atom - unsigned char
247
+ **
248
+ **
249
+ **
250
+ ** Block information, repeated for each block in the unit:
251
+ **
252
+ ** Number of atoms - integer
253
+ ** Number of cells - integer
254
+ ** Number of cells per atom - unsigned char
255
+ ** Direction - unsigned char
256
+ ** The position of the first atom - integer
257
+ ** <unused integer value> - integer
258
+ ** The block name - char[64]
259
+ **
260
+ **
261
+ **
262
+ ** Cell information, repeated for each cell in the block:
263
+ **
264
+ ** Atom number - integer
265
+ ** X coordinate - unsigned short
266
+ ** Y coordinate - unsigned short
267
+ ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
268
+ ** Base of probe at substitution position - char
269
+ ** Base of target at interrogation position - char
270
+ **
271
+ **
272
+ ****************************************************************************/
273
+
274
+
275
+ typedef struct{
276
+ int atomnumber;
277
+ unsigned short x;
278
+ unsigned short y;
279
+ int indexpos;
280
+ char pbase;
281
+ char tbase;
282
+ } cdf_unit_cell;
283
+
284
+
285
+ typedef struct{
286
+ int natoms;
287
+ int ncells;
288
+ unsigned char ncellperatom;
289
+ unsigned char direction;
290
+ int firstatom;
291
+ int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
292
+ char blockname[64];
293
+
294
+ cdf_unit_cell *unit_cells;
295
+
296
+ } cdf_unit_block;
297
+
298
+
299
+ typedef struct{
300
+ unsigned short unittype;
301
+ unsigned char direction;
302
+ int natoms;
303
+ int nblocks;
304
+ int ncells;
305
+ int unitnumber;
306
+ unsigned char ncellperatom;
307
+
308
+ cdf_unit_block *unit_block;
309
+
310
+ } cdf_unit;
311
+
312
+
313
+ /****************************************************************************
314
+ **
315
+ ** A data structure for holding CDF information read from a xda format cdf file
316
+ **
317
+ ** note that this structure reads in everything including things that might not
318
+ ** be of any subsequent use.
319
+ **
320
+ ****************************************************************************/
321
+
322
+
323
+
324
+ typedef struct {
325
+
326
+ cdf_xda_header header; /* Header information */
327
+ char **probesetnames; /* Names of probesets */
328
+
329
+ int *qc_start; /* These are used for random access */
330
+ int *units_start;
331
+
332
+ cdf_qc_unit *qc_units;
333
+ cdf_unit *units;
334
+
335
+
336
+ } cdf_xda;
337
+
338
+
339
+
340
+
341
+ // int check_xda_file(const char *filename);
342
+ int isTextCDFFile(const char *filename);
343
+ int read_cdf_text(const char *filename, cdf_text *mycdf);
344
+ void dealloc_cdf_text(cdf_text *my_cdf);
345
+ int read_cdf_xda(const char *filename,cdf_xda *my_cdf);
346
+ void dealloc_cdf_xda(cdf_xda *my_cdf);
347
+
@@ -0,0 +1,1342 @@
1
+ /****************************************************************
2
+ **
3
+ ** File: read_cdf_xda.c
4
+ **
5
+ ** Implementation by: B. M. Bolstad <bmb@bmbolstad.com>
6
+ **
7
+ ** A parser designed to read the binary format cdf files.
8
+ ** Sometimes called the xda format.
9
+ **
10
+ ** Implemented based on documentation available from Affymetrix
11
+ **
12
+ ** Implementation begun 2005.
13
+ **
14
+ ** Modification Dates
15
+ ** Feb 4 - Initial version
16
+ ** Feb 5 - A bunch of hacks for SNP chips.
17
+ ** Apr 20
18
+ ** Aug 16, 2005 - Fix potential big endian bug
19
+ ** Sep 22, 2005 - Fix some signed/unsigned bugs
20
+ ** Dec 1, 2005 - Comment cleaning
21
+ ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
22
+ ** May 31, 2006 - fix some compiler warnings
23
+ ** Aug 23, 2006 - fix a potential (but at current time non-existant) problem
24
+ ** when there are 0 qcunits or 0 units
25
+ ** Aug 25, 2007 - Move file reading functions to centralized location
26
+ ** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays)
27
+ ** Nov 12, 2008 - Fix crash
28
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
29
+ **
30
+ ****************************************************************/
31
+
32
+ /** --- includes --- */
33
+ #include <R.h>
34
+ #include <Rdefines.h>
35
+
36
+ #include "stdlib.h"
37
+ #include "stdio.h"
38
+ #include "fread_functions.h"
39
+ #include <ctype.h>
40
+
41
+ /* #define READ_CDF_DEBUG */
42
+ /* #define READ_CDF_DEBUG_SNP */
43
+ #define READ_CDF_NOSNP
44
+
45
+
46
+
47
+ /************************************************************************
48
+ **
49
+ ** Structures for holding the CDF file information. Basically
50
+ ** header/general information that appears at the start of the CDF file
51
+ **
52
+ ************************************************************************/
53
+
54
+ typedef struct {
55
+ int magicnumber;
56
+ int version_number;
57
+ unsigned short rows,cols;
58
+ int n_units,n_qc_units;
59
+ int len_ref_seq;
60
+ int i;
61
+ char *ref_seq;
62
+ } cdf_xda_header;
63
+
64
+
65
+ /****************************************************************************
66
+ **
67
+ ** The following two structures store QC units and QC unit probe information
68
+ **
69
+ ** QC information, repeated for each QC unit:
70
+ ** Type - unsigned short
71
+ ** Number of probes - integer
72
+ **
73
+ ** Probe information, repeated for each probe in the QC unit:
74
+ ** X coordinate - unsigned short
75
+ ** Y coordinate - unsigned short
76
+ ** Probe length - unsigned char
77
+ ** Perfect match flag - unsigned char
78
+ ** Background probe flag - unsigned char
79
+ **
80
+ ****************************************************************************/
81
+
82
+
83
+ typedef struct{
84
+ unsigned short x;
85
+ unsigned short y;
86
+ unsigned char probelength;
87
+ unsigned char pmflag;
88
+ unsigned char bgprobeflag;
89
+
90
+ } cdf_qc_probe;
91
+
92
+ typedef struct{
93
+ unsigned short type;
94
+ unsigned int n_probes;
95
+
96
+ cdf_qc_probe *qc_probes;
97
+
98
+ } cdf_qc_unit;
99
+
100
+
101
+ /****************************************************************************
102
+ **
103
+ ** The following three structures store information for units (sometimes called
104
+ ** probesets), blocks (of which there are one or more within a unit) and cells
105
+ ** sometimes called probe of which there are one or more within each block
106
+ **
107
+ **
108
+ ** Unit information, repeated for each unit:
109
+ **
110
+ ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
111
+ ** Direction - unsigned char
112
+ ** Number of atoms - integer
113
+ ** Number of blocks - integer (always 1 for expression units)
114
+ ** Number of cells - integer
115
+ ** Unit number (probe set number) - integer
116
+ ** Number of cells per atom - unsigned char
117
+ **
118
+ **
119
+ **
120
+ ** Block information, repeated for each block in the unit:
121
+ **
122
+ ** Number of atoms - integer
123
+ ** Number of cells - integer
124
+ ** Number of cells per atom - unsigned char
125
+ ** Direction - unsigned char
126
+ ** The position of the first atom - integer
127
+ ** <unused integer value> - integer
128
+ ** The block name - char[64]
129
+ **
130
+ **
131
+ **
132
+ ** Cell information, repeated for each cell in the block:
133
+ **
134
+ ** Atom number - integer
135
+ ** X coordinate - unsigned short
136
+ ** Y coordinate - unsigned short
137
+ ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
138
+ ** Base of probe at substitution position - char
139
+ ** Base of target at interrogation position - char
140
+ **
141
+ **
142
+ ****************************************************************************/
143
+
144
+
145
+ typedef struct{
146
+ int atomnumber;
147
+ unsigned short x;
148
+ unsigned short y;
149
+ int indexpos;
150
+ char pbase;
151
+ char tbase;
152
+ } cdf_unit_cell;
153
+
154
+
155
+ typedef struct{
156
+ int natoms;
157
+ int ncells;
158
+ unsigned char ncellperatom;
159
+ unsigned char direction;
160
+ int firstatom;
161
+ int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
162
+ char blockname[64];
163
+
164
+ cdf_unit_cell *unit_cells;
165
+
166
+ } cdf_unit_block;
167
+
168
+
169
+ typedef struct{
170
+ unsigned short unittype;
171
+ unsigned char direction;
172
+ int natoms;
173
+ int nblocks;
174
+ int ncells;
175
+ int unitnumber;
176
+ unsigned char ncellperatom;
177
+
178
+ cdf_unit_block *unit_block;
179
+
180
+ } cdf_unit;
181
+
182
+
183
+ /****************************************************************************
184
+ **
185
+ ** A data structure for holding CDF information read from a xda format cdf file
186
+ **
187
+ ** note that this structure reads in everything including things that might not
188
+ ** be of any subsequent use.
189
+ **
190
+ ****************************************************************************/
191
+
192
+
193
+
194
+ typedef struct {
195
+
196
+ cdf_xda_header header; /* Header information */
197
+ char **probesetnames; /* Names of probesets */
198
+
199
+ int *qc_start; /* These are used for random access */
200
+ int *units_start;
201
+
202
+ cdf_qc_unit *qc_units;
203
+ cdf_unit *units;
204
+
205
+
206
+ } cdf_xda;
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+ /*************************************************************************
220
+ **
221
+ ** int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream)
222
+ **
223
+ ** cdf_qc_unit *my_unit - preallocated space to store qc unit information
224
+ ** int filelocation - indexing/location information used to read information
225
+ ** from file
226
+ ** FILE *instream - a pre-opened file to read from
227
+ **
228
+ ** reads a specificed qc_unit from the file. Allocates space for the cdf_qc_probes
229
+ ** and also reads them in
230
+ **
231
+ **
232
+ *************************************************************************/
233
+
234
+ int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream){
235
+
236
+ int i;
237
+
238
+
239
+ fseek(instream,filelocation,SEEK_SET);
240
+
241
+ fread_uint16(&(my_unit->type),1,instream);
242
+ fread_uint32(&(my_unit->n_probes),1,instream);
243
+
244
+
245
+ my_unit->qc_probes = Calloc(my_unit->n_probes,cdf_qc_probe);
246
+
247
+ for (i=0; i < my_unit->n_probes; i++){
248
+ fread_uint16(&(my_unit->qc_probes[i].x),1,instream);
249
+ fread_uint16(&(my_unit->qc_probes[i].y),1,instream);
250
+ fread_uchar(&(my_unit->qc_probes[i].probelength),1,instream);
251
+ fread_uchar(&(my_unit->qc_probes[i].pmflag),1,instream);
252
+ fread_uchar(&(my_unit->qc_probes[i].bgprobeflag),1,instream);
253
+
254
+ }
255
+ return 1;
256
+ }
257
+
258
+ /*************************************************************************
259
+ **
260
+ ** int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream)
261
+ **
262
+ ** cdf_qc_unit *my_unit - preallocated space to store unit (aka probeset) information
263
+ ** int filelocation - indexing/location information used to read information
264
+ ** from file
265
+ ** FILE *instream - a pre-opened file to read from
266
+ **
267
+ ** reads a specified probeset into the my_unit, including all blocks and all probes
268
+ ** it is assumed that the unit itself is preallocated. Blocks and probes within
269
+ ** the blocks are allocated by this function.
270
+ **
271
+ *************************************************************************/
272
+
273
+ int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){
274
+
275
+ int i,j;
276
+
277
+ fseek(instream,filelocation,SEEK_SET);
278
+
279
+ fread_uint16(&(my_unit->unittype),1,instream);
280
+ fread_uchar(&(my_unit->direction),1,instream);
281
+
282
+
283
+ fread_int32(&(my_unit->natoms),1,instream);
284
+ fread_int32(&(my_unit->nblocks),1,instream);
285
+ fread_int32(&(my_unit->ncells),1,instream);
286
+ fread_int32(&(my_unit->unitnumber),1,instream);
287
+ fread_uchar(&(my_unit->ncellperatom),1,instream);
288
+
289
+ my_unit->unit_block = Calloc(my_unit->nblocks,cdf_unit_block);
290
+
291
+ for (i=0; i < my_unit->nblocks; i++){
292
+ fread_int32(&(my_unit->unit_block[i].natoms),1,instream);
293
+ fread_int32(&(my_unit->unit_block[i].ncells),1,instream);
294
+ fread_uchar(&(my_unit->unit_block[i].ncellperatom),1,instream);
295
+ fread_uchar(&(my_unit->unit_block[i].direction),1,instream);
296
+ fread_int32(&(my_unit->unit_block[i].firstatom),1,instream);
297
+ fread_int32(&(my_unit->unit_block[i].unused),1,instream);
298
+ fread_char(my_unit->unit_block[i].blockname,64,instream);
299
+
300
+ my_unit->unit_block[i].unit_cells = Calloc(my_unit->unit_block[i].ncells,cdf_unit_cell);
301
+
302
+ for (j=0; j < my_unit->unit_block[i].ncells; j++){
303
+ fread_int32(&(my_unit->unit_block[i].unit_cells[j].atomnumber),1,instream);
304
+ fread_uint16(&(my_unit->unit_block[i].unit_cells[j].x),1,instream);
305
+ fread_uint16(&(my_unit->unit_block[i].unit_cells[j].y),1,instream);
306
+ fread_int32(&(my_unit->unit_block[i].unit_cells[j].indexpos),1,instream);
307
+ fread_char(&(my_unit->unit_block[i].unit_cells[j].pbase),1,instream);
308
+ fread_char(&(my_unit->unit_block[i].unit_cells[j].tbase),1,instream);
309
+ }
310
+
311
+
312
+ }
313
+
314
+
315
+ return 1;
316
+
317
+ }
318
+
319
+ /*************************************************************************
320
+ **
321
+ ** static void dealloc_cdf_xda(cdf_xda *my_cdf)
322
+ **
323
+ ** Deallocates all the previously allocated memory.
324
+ **
325
+ *************************************************************************/
326
+
327
+ void dealloc_cdf_xda(cdf_xda *my_cdf){
328
+
329
+ int i;
330
+
331
+ for (i=0; i < my_cdf->header.n_units; i++){
332
+ Free(my_cdf->probesetnames[i]);
333
+ }
334
+ Free(my_cdf->probesetnames);
335
+
336
+ Free(my_cdf->qc_start);
337
+ Free(my_cdf->units_start);
338
+
339
+ for (i=0; i < my_cdf->header.n_qc_units; i++){
340
+ Free(my_cdf->qc_units[i].qc_probes);
341
+ }
342
+
343
+ Free(my_cdf->qc_units);
344
+
345
+
346
+ for (i=0; i < my_cdf->header.n_units; i++){
347
+ Free(my_cdf->units[i].unit_block);
348
+ }
349
+ Free(my_cdf->units);
350
+ Free(my_cdf->header.ref_seq);
351
+
352
+ }
353
+
354
+
355
+
356
+ /*************************************************************
357
+ **
358
+ ** int read_cdf_xda(const char *filename)
359
+ **
360
+ ** filename - Name of the prospective binary cel file
361
+ **
362
+ ** Returns 1 if the file was completely successfully parsed
363
+ ** otherwise 0 (and possible prints a message to screen)
364
+ **
365
+ **
366
+ **
367
+ **
368
+ *************************************************************/
369
+
370
+ int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
371
+
372
+ FILE *infile;
373
+
374
+ int i;
375
+
376
+ if ((infile = fopen(filename, "rb")) == NULL)
377
+ {
378
+ error("Unable to open the file %s",filename);
379
+ return 0;
380
+ }
381
+
382
+ if (!fread_int32(&my_cdf->header.magicnumber,1,infile)){
383
+ return 0;
384
+ }
385
+
386
+ if (!fread_int32(&my_cdf->header.version_number,1,infile)){
387
+ return 0;
388
+ }
389
+
390
+
391
+ if (my_cdf->header.magicnumber != 67){
392
+ Rprintf("Magic number is not 67. This is probably not a binary cdf file.\n");
393
+ return 0;
394
+ }
395
+
396
+ if (my_cdf->header.version_number != 1){
397
+ Rprintf("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number);
398
+ return 0;
399
+ }
400
+ if (!fread_uint16(&my_cdf->header.cols,1,infile)){
401
+ return 0;
402
+ }
403
+ if (!fread_uint16(&my_cdf->header.rows,1,infile)){
404
+ return 0;
405
+ }
406
+
407
+ if (!fread_int32(&my_cdf->header.n_units,1,infile)){
408
+ return 0;
409
+ }
410
+
411
+ if (!fread_int32(&my_cdf->header.n_qc_units,1,infile)){
412
+ return 0;
413
+ }
414
+
415
+
416
+ if (!fread_int32(&my_cdf->header.len_ref_seq,1,infile)){
417
+ return 0;
418
+ }
419
+
420
+ my_cdf->header.ref_seq = Calloc(my_cdf->header.len_ref_seq,char);
421
+
422
+ fread_char(my_cdf->header.ref_seq, my_cdf->header.len_ref_seq, infile);
423
+ my_cdf->probesetnames = Calloc(my_cdf->header.n_units,char *);
424
+
425
+
426
+ for (i =0; i < my_cdf->header.n_units;i++){
427
+ my_cdf->probesetnames[i] = Calloc(64,char);
428
+ if (!fread_char(my_cdf->probesetnames[i], 64, infile)){
429
+ return 0;
430
+ }
431
+ }
432
+
433
+
434
+
435
+ my_cdf->qc_start = Calloc(my_cdf->header.n_qc_units,int);
436
+ my_cdf->units_start = Calloc(my_cdf->header.n_units,int);
437
+
438
+ /*** Old code that might fail if there is 0 QCunits or 0 Units
439
+ if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)
440
+ || !fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)){
441
+ return 0;
442
+ }
443
+ ***/
444
+
445
+ if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)) {
446
+ if(my_cdf->header.n_qc_units != 0) {
447
+ return 0;
448
+ }
449
+ }
450
+
451
+ if(!fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)) {
452
+ if(my_cdf->header.n_units != 0) {
453
+ return 0;
454
+ }
455
+ }
456
+
457
+ /* We will read in all the QC and Standard Units, rather than
458
+ random accessing what we need */
459
+ my_cdf->qc_units = Calloc(my_cdf->header.n_qc_units,cdf_qc_unit);
460
+
461
+
462
+ for (i =0; i < my_cdf->header.n_qc_units; i++){
463
+ if (!read_cdf_qcunit(&my_cdf->qc_units[i],my_cdf->qc_start[i],infile)){
464
+ return 0;
465
+ }
466
+ }
467
+
468
+ my_cdf->units = Calloc(my_cdf->header.n_units,cdf_unit);
469
+
470
+
471
+ for (i=0; i < my_cdf->header.n_units; i++){
472
+ if (!read_cdf_unit(&my_cdf->units[i],my_cdf->units_start[i],infile)){
473
+ return 0;
474
+ }
475
+ }
476
+
477
+
478
+ #ifdef READ_CDF_DEBUG
479
+ Rprintf("%d %d %d %d %d\n",my_cdf->header.cols,my_cdf->header.rows,my_cdf->header.n_units,my_cdf->header.n_qc_units,my_cdf->header.len_ref_seq);
480
+ for (i =0; i < my_cdf->header.n_units;i++){
481
+ Rprintf("%s\n",my_cdf->probesetnames[i]);
482
+ }
483
+
484
+ for (i =0; i < my_cdf->header.n_qc_units;i++){
485
+ Rprintf("%d\n",my_cdf->qc_start[i]);
486
+ }
487
+
488
+ for (i =0; i < my_cdf->header.n_qc_units;i++){
489
+ Rprintf("%d\n",my_cdf->units_start[i]);
490
+ }
491
+
492
+ Rprintf("%d %d\n",my_cdf->qc_units[0].type,my_cdf->qc_units[0].n_probes);
493
+
494
+ for (i=0; i < my_cdf->qc_units[0].n_probes; i++){
495
+ Rprintf("%d %d %d %u %d\n",my_cdf->qc_units[0].qc_probes[i].x,my_cdf->qc_units[0].qc_probes[i].y,
496
+ my_cdf->qc_units[0].qc_probes[i].probelength,
497
+ my_cdf->qc_units[0].qc_probes[i].pmflag,
498
+ my_cdf->qc_units[0].qc_probes[i].bgprobeflag);
499
+
500
+ }
501
+
502
+
503
+ Rprintf("%u %u %d %d %d %d %u\n",my_cdf->units[0].unittype,my_cdf->units[0].direction,
504
+ my_cdf->units[0].natoms,
505
+ my_cdf->units[0].nblocks,
506
+ my_cdf->units[0].ncells,
507
+ my_cdf->units[0].unitnumber,
508
+ my_cdf->units[0].ncellperatom);
509
+
510
+ Rprintf("%d %d %u %u %d %d %s\n",my_cdf->units[0].unit_block[0].natoms,my_cdf->units[0].unit_block[0].ncells,
511
+ my_cdf->units[0].unit_block[0].ncellperatom,
512
+ my_cdf->units[0].unit_block[0].direction,
513
+ my_cdf->units[0].unit_block[0].firstatom,
514
+ my_cdf->units[0].unit_block[0].unused,
515
+ my_cdf->units[0].unit_block[0].blockname);
516
+
517
+ for (i=0; i <my_cdf->units[0].unit_block[0].ncells ; i++){
518
+ Rprintf("%d %u %u %d %c %c\n",
519
+ my_cdf->units[0].unit_block[0].unit_cells[i].atomnumber,
520
+ my_cdf->units[0].unit_block[0].unit_cells[i].x,
521
+ my_cdf->units[0].unit_block[0].unit_cells[i].y,
522
+ my_cdf->units[0].unit_block[0].unit_cells[i].indexpos,
523
+ my_cdf->units[0].unit_block[0].unit_cells[i].pbase,
524
+ my_cdf->units[0].unit_block[0].unit_cells[i].tbase);
525
+ }
526
+ #endif
527
+
528
+ fclose(infile);
529
+ return 1;
530
+
531
+ /* fseek() */
532
+ }
533
+
534
+
535
+
536
+ /*************************************************************
537
+ **
538
+ ** static int check_cdf_xda(const char *filename)
539
+ **
540
+ ** Opens the file give by filename and checks it to see if
541
+ ** it looks like a binary CDF file. returns 0 if
542
+ ** the file looks like it is not a binary CDF aka xda format
543
+ ** cdf file
544
+ **
545
+ **
546
+ *************************************************************/
547
+
548
+
549
+ int check_cdf_xda(const char *filename){
550
+
551
+ FILE *infile;
552
+
553
+
554
+ int magicnumber,version_number;
555
+
556
+ if ((infile = fopen(filename, "rb")) == NULL)
557
+ {
558
+ error("Unable to open the file %s",filename);
559
+ return 0;
560
+ }
561
+
562
+ if (!fread_int32(&magicnumber,1,infile)){
563
+ error("File corrupt or truncated?");
564
+ return 0;
565
+ }
566
+
567
+ if (!fread_int32(&version_number,1,infile)){
568
+ error("File corrupt or truncated?");
569
+ return 0;
570
+ }
571
+
572
+
573
+ if (magicnumber != 67){
574
+ /* error("Magic number is not 67. This is probably not a binary cdf file.\n"); */
575
+ return 0;
576
+ }
577
+
578
+ if (version_number != 1){
579
+ /* error("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); */
580
+ return 0;
581
+ }
582
+
583
+ return 1;
584
+
585
+ }
586
+
587
+
588
+
589
+ /*************************************************************
590
+ **
591
+ ** static int isPM(char pbase,char tbase)
592
+ **
593
+ ** char pbase - probe base at substitution position
594
+ ** char tbase - target base at substitution position
595
+ **
596
+ ** this function works out whether a probe is a PM or MM
597
+ **
598
+ **
599
+ *************************************************************/
600
+
601
+
602
+ static int isPM(char pbase,char tbase){
603
+ /*
604
+ if (Pbase.Cmp(Tbase) == 0){
605
+ *isPM = false;
606
+ } else if (((Pbase.Cmp("A")== 0) && (Tbase.Cmp("T") != 0)) || ((Pbase.Cmp("T")
607
+ == 0) && (Tbase.Cmp("A") != 0))){
608
+ *isPM = false;
609
+ } else if (((Pbase.Cmp("C")== 0) && (Tbase.Cmp("G") != 0)) || ((Pbase.Cmp("G")
610
+ == 0) && (Tbase.Cmp("C") != 0))){
611
+ *isPM = false;
612
+ } else {
613
+ *isPM = true;
614
+ }
615
+ */
616
+
617
+ pbase = toupper(pbase);
618
+ tbase = toupper(tbase);
619
+
620
+ if (pbase == tbase){
621
+ return 0;
622
+ } else if ((( pbase == 'A') && (tbase != 'T')) || (( pbase == 'T') && (tbase != 'A'))){
623
+ return 0;
624
+ } else if ((( pbase == 'C') && (tbase != 'G')) || (( pbase == 'G') && (tbase != 'C'))){
625
+ return 0;
626
+ }
627
+ return 1;
628
+
629
+
630
+ }
631
+
632
+
633
+
634
+ /*************************************************************
635
+ **
636
+ ** SEXP CheckCDFXDA(SEXP filename)
637
+ **
638
+ ** Takes a given file name and returns 1 if it is a xda format CDF file
639
+ ** otherwise it returns 0
640
+ **
641
+ *************************************************************/
642
+
643
+
644
+
645
+ SEXP CheckCDFXDA(SEXP filename){
646
+ SEXP tmp;
647
+ int good;
648
+ const char *cur_file_name;
649
+
650
+ cur_file_name = CHAR(STRING_ELT(filename,0));
651
+
652
+ good = check_cdf_xda(cur_file_name);
653
+
654
+ PROTECT(tmp= allocVector(INTSXP,1));
655
+
656
+ INTEGER(tmp)[0] = good;
657
+
658
+ UNPROTECT(1);
659
+ return tmp;
660
+ }
661
+
662
+
663
+
664
+
665
+
666
+
667
+ SEXP ReadCDFFile(SEXP filename){
668
+
669
+ SEXP CDFInfo;
670
+ SEXP Dimensions;
671
+ SEXP LocMap= R_NilValue,tempLocMap;
672
+ SEXP CurLocs;
673
+ SEXP PSnames = R_NilValue,tempPSnames;
674
+ SEXP ColNames;
675
+ SEXP dimnames;
676
+
677
+ cdf_xda my_cdf;
678
+ const char *cur_file_name;
679
+ /* char *tmp_name; */
680
+
681
+ int i,j,k;
682
+ int cur_blocks,cur_cells, cur_atoms;
683
+ /* int which_probetype; */
684
+ int which_psname=0;
685
+
686
+ cdf_unit_cell *current_cell;
687
+
688
+ double *curlocs;
689
+
690
+ /* int nrows, ncols; */
691
+
692
+
693
+ cur_file_name = CHAR(STRING_ELT(filename,0));
694
+
695
+ if (!read_cdf_xda(cur_file_name,&my_cdf)){
696
+ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
697
+ }
698
+
699
+
700
+ /* We output:
701
+ nrows, ncols in an integer vector, plus a list of probesets PM MM locations (in the BioC style) */
702
+ PROTECT(CDFInfo = allocVector(VECSXP,2));
703
+ PROTECT(Dimensions = allocVector(REALSXP,2));
704
+
705
+ if (my_cdf.units[0].unittype ==1){
706
+ PROTECT(LocMap = allocVector(VECSXP,my_cdf.header.n_units));
707
+ PROTECT(PSnames = allocVector(STRSXP,my_cdf.header.n_units));
708
+ } else {
709
+ PROTECT(tempLocMap = allocVector(VECSXP,2*my_cdf.header.n_units));
710
+ PROTECT(tempPSnames = allocVector(STRSXP,2*my_cdf.header.n_units));
711
+ }
712
+
713
+ NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.rows;
714
+ NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.cols;
715
+
716
+
717
+ for (i=0; i < my_cdf.header.n_units; i++){
718
+ #ifdef READ_CDF_DEBUG
719
+ printf("%d\n",i);
720
+ #endif
721
+ cur_blocks = my_cdf.units[i].nblocks;
722
+
723
+ #ifdef READ_CDF_DEBUG
724
+ Rprintf("New Block: ");
725
+ #endif
726
+ if (my_cdf.units[i].unittype ==1){
727
+ /* Expression analysis */
728
+ for (j=0; j < cur_blocks; j++){
729
+
730
+ #ifdef READ_CDF_DEBUG
731
+ Rprintf("%s ",my_cdf.units[i].unit_block[j].blockname);
732
+ #endif
733
+
734
+ cur_cells = my_cdf.units[i].unit_block[j].ncells;
735
+ cur_atoms = my_cdf.units[i].unit_block[j].natoms;
736
+
737
+ SET_STRING_ELT(PSnames,i,mkChar(my_cdf.units[i].unit_block[j].blockname));
738
+
739
+ PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
740
+ PROTECT(ColNames = allocVector(STRSXP,2));
741
+ PROTECT(dimnames = allocVector(VECSXP,2));
742
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
743
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
744
+
745
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
746
+
747
+ for (k=0; k < cur_atoms*2; k++){
748
+ curlocs[k] = R_NaN;
749
+ }
750
+
751
+ for (k=0; k < cur_cells; k++){
752
+ current_cell = &(my_cdf.units[i].unit_block[j].unit_cells[k]);
753
+
754
+ if(isPM(current_cell->pbase,current_cell->tbase)){
755
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
756
+ } else {
757
+ curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
758
+ }
759
+ }
760
+
761
+
762
+
763
+ SET_VECTOR_ELT(dimnames,1,ColNames);
764
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
765
+ SET_VECTOR_ELT(LocMap,i,CurLocs);
766
+ UNPROTECT(3);
767
+ }
768
+ } else if (my_cdf.units[i].unittype == 2){
769
+ /* Genotyping array */
770
+
771
+ #ifndef READ_CDF_NOSNP
772
+ if (cur_blocks == 1){
773
+
774
+ cur_cells = my_cdf.units[i].unit_block[0].ncells;
775
+ cur_atoms = my_cdf.units[i].unit_block[0].natoms;
776
+
777
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
778
+
779
+ PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
780
+ PROTECT(ColNames = allocVector(STRSXP,2));
781
+ PROTECT(dimnames = allocVector(VECSXP,2));
782
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
783
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
784
+
785
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
786
+
787
+ for (k=0; k < cur_cells; k++){
788
+ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
789
+
790
+ if(isPM(current_cell->pbase,current_cell->tbase)){
791
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
792
+ } else {
793
+ curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
794
+ }
795
+ }
796
+
797
+
798
+
799
+ SET_VECTOR_ELT(dimnames,1,ColNames);
800
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
801
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
802
+ UNPROTECT(3);
803
+ which_psname++;
804
+
805
+ } else if (cur_blocks == 4){
806
+ for (j=0; j < cur_blocks; j++){
807
+ #ifdef READ_CDF_DEBUG_SNP
808
+ Rprintf("%s %s\n",my_cdf.probesetnames[i],my_cdf.units[i].unit_block[j].blockname);
809
+ #endif
810
+ }
811
+
812
+ j = 0;
813
+ cur_cells = my_cdf.units[i].unit_block[0].ncells;
814
+ cur_atoms = my_cdf.units[i].unit_block[0].natoms;
815
+ if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
816
+ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
817
+ tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
818
+ tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
819
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
820
+ Free(tmp_name);
821
+ } else {
822
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
823
+ }
824
+
825
+ PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
826
+ PROTECT(ColNames = allocVector(STRSXP,2));
827
+ PROTECT(dimnames = allocVector(VECSXP,2));
828
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
829
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
830
+
831
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
832
+
833
+
834
+ for (k=0; k < cur_cells; k++){
835
+ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
836
+ /* Rprintf("%d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
837
+ if(isPM(current_cell->pbase,current_cell->tbase)){
838
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
839
+ } else {
840
+ curlocs[current_cell->atomnumber+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
841
+ }
842
+ if (current_cell->x + current_cell->y*(my_cdf.header.rows) + 1 == 370737){
843
+ Rprintf("%d %c %c",isPM(current_cell->pbase,current_cell->tbase),current_cell->pbase,current_cell->tbase);
844
+ }
845
+ }
846
+
847
+ j=2;
848
+ cur_cells = my_cdf.units[i].unit_block[2].ncells;
849
+ cur_atoms = my_cdf.units[i].unit_block[2].natoms;
850
+ for (k=0; k < cur_cells; k++){
851
+ current_cell = &(my_cdf.units[i].unit_block[2].unit_cells[k]);
852
+ /* Rprintf("half : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
853
+ if(isPM(current_cell->pbase,current_cell->tbase)){
854
+ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
855
+ } else {
856
+ curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
857
+ }
858
+ }
859
+
860
+ SET_VECTOR_ELT(dimnames,1,ColNames);
861
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
862
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
863
+ UNPROTECT(3);
864
+ which_psname++;
865
+
866
+
867
+
868
+
869
+
870
+ j = 1;
871
+ cur_cells = my_cdf.units[i].unit_block[1].ncells;
872
+ cur_atoms = my_cdf.units[i].unit_block[1].natoms;
873
+ if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
874
+ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
875
+ tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
876
+ tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
877
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
878
+ Free(tmp_name);
879
+ } else {
880
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[1].blockname));
881
+ }
882
+ PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
883
+ PROTECT(ColNames = allocVector(STRSXP,2));
884
+ PROTECT(dimnames = allocVector(VECSXP,2));
885
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
886
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
887
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
888
+
889
+ for (k=0; k < cur_cells; k++){
890
+ current_cell = &(my_cdf.units[i].unit_block[1].unit_cells[k]);
891
+ /* Rprintf("Dual : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
892
+ if(isPM(current_cell->pbase,current_cell->tbase)){
893
+ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
894
+ } else {
895
+ curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
896
+ }
897
+ }
898
+
899
+ j=3;
900
+ cur_cells = my_cdf.units[i].unit_block[3].ncells;
901
+ cur_atoms = my_cdf.units[i].unit_block[3].natoms;
902
+ for (k=0; k < cur_cells; k++){
903
+ current_cell = &(my_cdf.units[i].unit_block[3].unit_cells[k]);
904
+ /* Rprintf("half deux : %d %d %d %u %u \n",cur_cells, current_cell->atomnumber, cur_atoms,current_cell->x,current_cell->y); */
905
+ if(isPM(current_cell->pbase,current_cell->tbase)){
906
+ curlocs[current_cell->atomnumber - (2*cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
907
+ } else {
908
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
909
+ }
910
+ }
911
+
912
+ SET_VECTOR_ELT(dimnames,1,ColNames);
913
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
914
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
915
+ UNPROTECT(3);
916
+ which_psname++;
917
+
918
+ } else {
919
+ error("makecdfenv does not currently know how to handle cdf files of this type (genotyping with blocks != 1 or 4.)");
920
+ }
921
+ #else
922
+ error("makecdfenv does not currently know how to handle cdf files of this type (genotyping).");
923
+ #endif
924
+
925
+
926
+
927
+
928
+ } else {
929
+ error("makecdfenv does not currently know how to handle cdf files of this type (ie not expression or genotyping)");
930
+ }
931
+
932
+
933
+ #ifdef READ_CDF_DEBUG
934
+ Rprintf("\n");
935
+ #endif
936
+ }
937
+
938
+ if (my_cdf.units[0].unittype ==2){
939
+ PROTECT(PSnames = allocVector(STRSXP,which_psname));
940
+ PROTECT(LocMap = allocVector(VECSXP,which_psname));
941
+ for (i =0; i < which_psname; i++){
942
+ SET_STRING_ELT(PSnames,i,mkChar(CHAR(STRING_ELT(tempPSnames,i))));
943
+ SET_VECTOR_ELT(LocMap,i,VECTOR_ELT(tempLocMap,i));
944
+ }
945
+
946
+ }
947
+ #ifdef READ_CDF_DEBUG
948
+ Rprintf("%d \n",which_psname);
949
+ #endif
950
+ setAttrib(LocMap,R_NamesSymbol,PSnames);
951
+ SET_VECTOR_ELT(CDFInfo,0,Dimensions);
952
+ SET_VECTOR_ELT(CDFInfo,1,LocMap);
953
+ if (my_cdf.units[0].unittype ==2){
954
+ UNPROTECT(6);
955
+ } else {
956
+ UNPROTECT(4);
957
+ }
958
+
959
+ dealloc_cdf_xda(&my_cdf);
960
+ return CDFInfo;
961
+
962
+ }
963
+
964
+
965
+
966
+
967
+ /* This function is for reading in the entire binary cdf file and then
968
+ * returing the structure in a complex list object.
969
+ * The fullstructure argument is expected to be a BOOLEAN. If TRUE the
970
+ * entire contents of the CDF file are returned.
971
+ * If False, a modified CDFENV style structure is returned
972
+ */
973
+
974
+
975
+
976
+ SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){
977
+
978
+ SEXP CDFInfo = R_NilValue; /* this is the object that will be returned */
979
+ SEXP CDFInfoNames;
980
+ SEXP HEADER; /* Will store the header information */
981
+ SEXP HEADERNames;
982
+ SEXP Dimensions;
983
+ SEXP DimensionsNames;
984
+ SEXP REFSEQ; /* Resequencing reference sequence */
985
+ SEXP UNITNAMES;
986
+
987
+ SEXP FILEPOSITIONS;
988
+ SEXP FILEPOSITIONSQC;
989
+ SEXP FILEPOSITIONSUNITS;
990
+ SEXP FILEPOSITIONSNames;
991
+
992
+ SEXP QCUNITS;
993
+ SEXP QCUNITSsub;
994
+ SEXP QCUNITSsubNames;
995
+ SEXP QCHEADER;
996
+ SEXP QCHEADERNames;
997
+ SEXP QCUNITSProbeInfo;
998
+ SEXP QCUNITSProbeInfoX;
999
+ SEXP QCUNITSProbeInfoY;
1000
+ SEXP QCUNITSProbeInfoPL;
1001
+ SEXP QCUNITSProbeInfoPMFLAG;
1002
+ SEXP QCUNITSProbeInfoBGFLAG;
1003
+ SEXP QCUNITSProbeInfoNames;
1004
+ SEXP QCUNITSProbeInforow_names;
1005
+
1006
+ SEXP UNITS;
1007
+ SEXP tmpUNIT;
1008
+ SEXP tmpUNITNames;
1009
+ SEXP UNITSHeader;
1010
+ SEXP UNITSHeaderNames;
1011
+ SEXP tmpUNITSBlock;
1012
+ SEXP UNITSBlock;
1013
+ SEXP UNITSBlockNames;
1014
+ SEXP UNITSBlockHeader;
1015
+ SEXP UNITSBlockHeaderNames;
1016
+ SEXP UNITSBlockInfo;
1017
+ SEXP UNITSBlockInfoNames;
1018
+ SEXP UNITSBlockInforow_names;
1019
+ SEXP UNITSBlockAtom ;
1020
+ SEXP UNITSBlockX;
1021
+ SEXP UNITSBlockY;
1022
+ SEXP UNITSBlockIndexPos;
1023
+ SEXP UNITSBlockPbase;
1024
+ SEXP UNITSBlockTbase;
1025
+
1026
+
1027
+
1028
+
1029
+
1030
+
1031
+
1032
+
1033
+ char buf[10];
1034
+ int i,j,k;
1035
+
1036
+ cdf_xda my_cdf;
1037
+ const char *cur_file_name;
1038
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1039
+
1040
+ /* Read in the xda style CDF file into memory */
1041
+ if (!read_cdf_xda(cur_file_name,&my_cdf)){
1042
+ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
1043
+ }
1044
+
1045
+
1046
+ if (asInteger(fullstructure)){
1047
+ /* return the full structure */
1048
+ PROTECT(CDFInfo = allocVector(VECSXP,5));
1049
+
1050
+ PROTECT(CDFInfoNames = allocVector(STRSXP,5));
1051
+ SET_STRING_ELT(CDFInfoNames,0,mkChar("Header"));
1052
+ SET_STRING_ELT(CDFInfoNames,1,mkChar("UnitNames"));
1053
+ SET_STRING_ELT(CDFInfoNames,2,mkChar("FilePositions"));
1054
+ SET_STRING_ELT(CDFInfoNames,3,mkChar("QCUnits"));
1055
+ SET_STRING_ELT(CDFInfoNames,4,mkChar("Units"));
1056
+ setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
1057
+ UNPROTECT(1);
1058
+
1059
+ PROTECT(HEADER = allocVector(VECSXP,2));
1060
+ PROTECT(HEADERNames = allocVector(STRSXP,2));
1061
+ SET_STRING_ELT(HEADERNames,0,mkChar("Dimensions"));
1062
+ SET_STRING_ELT(HEADERNames,1,mkChar("ReseqRefSeq"));
1063
+ setAttrib(HEADER,R_NamesSymbol,HEADERNames);
1064
+ UNPROTECT(1);
1065
+
1066
+ PROTECT(Dimensions = allocVector(REALSXP,7));
1067
+ NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.magicnumber;
1068
+ NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.version_number;
1069
+ NUMERIC_POINTER(Dimensions)[2] = (double)my_cdf.header.cols;
1070
+ NUMERIC_POINTER(Dimensions)[3] = (double)my_cdf.header.rows;
1071
+ NUMERIC_POINTER(Dimensions)[4] = (double)my_cdf.header.n_qc_units;
1072
+ NUMERIC_POINTER(Dimensions)[5] = (double)my_cdf.header.n_units;
1073
+ NUMERIC_POINTER(Dimensions)[6] = (double)my_cdf.header.len_ref_seq;
1074
+
1075
+ PROTECT(DimensionsNames = allocVector(STRSXP,7));
1076
+ SET_STRING_ELT(DimensionsNames,0,mkChar("MagicNumber"));
1077
+ SET_STRING_ELT(DimensionsNames,1,mkChar("VersionNumber"));
1078
+ SET_STRING_ELT(DimensionsNames,2,mkChar("Cols"));
1079
+ SET_STRING_ELT(DimensionsNames,3,mkChar("Rows"));
1080
+ SET_STRING_ELT(DimensionsNames,4,mkChar("n.QCunits"));
1081
+ SET_STRING_ELT(DimensionsNames,5,mkChar("n.units"));
1082
+ SET_STRING_ELT(DimensionsNames,6,mkChar("LenRefSeq"));
1083
+ setAttrib(Dimensions,R_NamesSymbol,DimensionsNames);
1084
+ SET_VECTOR_ELT(HEADER,0,Dimensions);
1085
+ UNPROTECT(2);
1086
+
1087
+ PROTECT(REFSEQ = allocVector(STRSXP,1));
1088
+ SET_STRING_ELT(REFSEQ,0,mkChar(my_cdf.header.ref_seq));
1089
+ SET_VECTOR_ELT(HEADER,1,REFSEQ);
1090
+ UNPROTECT(1);
1091
+
1092
+ SET_VECTOR_ELT(CDFInfo,0,HEADER);
1093
+ UNPROTECT(1);
1094
+
1095
+ PROTECT(UNITNAMES = allocVector(STRSXP,my_cdf.header.n_units));
1096
+ for (i =0; i < my_cdf.header.n_units; i++){
1097
+ SET_STRING_ELT(UNITNAMES,i,mkChar(my_cdf.probesetnames[i]));
1098
+ }
1099
+ SET_VECTOR_ELT(CDFInfo,1,UNITNAMES);
1100
+ UNPROTECT(1);
1101
+
1102
+ PROTECT(FILEPOSITIONS = allocVector(VECSXP,2));
1103
+ PROTECT(FILEPOSITIONSQC = allocVector(REALSXP,my_cdf.header.n_qc_units));
1104
+ PROTECT(FILEPOSITIONSUNITS = allocVector(REALSXP,my_cdf.header.n_units));
1105
+ for (i =0; i < my_cdf.header.n_qc_units; i++){
1106
+ NUMERIC_POINTER(FILEPOSITIONSQC)[i] = (double)my_cdf.qc_start[i];
1107
+ }
1108
+ for (i =0; i < my_cdf.header.n_units; i++){
1109
+ NUMERIC_POINTER(FILEPOSITIONSUNITS)[i] = (double)my_cdf.units_start[i];
1110
+ }
1111
+ SET_VECTOR_ELT(FILEPOSITIONS,0,FILEPOSITIONSQC);
1112
+ SET_VECTOR_ELT(FILEPOSITIONS,1,FILEPOSITIONSUNITS);
1113
+ PROTECT(FILEPOSITIONSNames = allocVector(STRSXP,2));
1114
+ SET_STRING_ELT(FILEPOSITIONSNames,0,mkChar("FilePosQC"));
1115
+ SET_STRING_ELT(FILEPOSITIONSNames,1,mkChar("FilePosUnits"));
1116
+ setAttrib(FILEPOSITIONS,R_NamesSymbol,FILEPOSITIONSNames);
1117
+ SET_VECTOR_ELT(CDFInfo,2,FILEPOSITIONS);
1118
+ UNPROTECT(4);
1119
+
1120
+ PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.n_qc_units));
1121
+ for (i =0; i < my_cdf.header.n_qc_units; i++){
1122
+ PROTECT(QCUNITSsub = allocVector(VECSXP,2));
1123
+ PROTECT(QCUNITSsubNames= allocVector(STRSXP,2));
1124
+ SET_STRING_ELT(QCUNITSsubNames,0,mkChar("QCUnitHeader"));
1125
+ SET_STRING_ELT(QCUNITSsubNames,1,mkChar("QCUnitInfo"));
1126
+ setAttrib(QCUNITSsub,R_NamesSymbol,QCUNITSsubNames);
1127
+
1128
+ PROTECT(QCHEADER = allocVector(REALSXP,2));
1129
+ NUMERIC_POINTER(QCHEADER)[0] = (double)my_cdf.qc_units[i].type;
1130
+ NUMERIC_POINTER(QCHEADER)[1] = (double)my_cdf.qc_units[i].n_probes;
1131
+ PROTECT(QCHEADERNames = allocVector(STRSXP,2));
1132
+ SET_STRING_ELT(QCHEADERNames,0,mkChar("Type"));
1133
+ SET_STRING_ELT(QCHEADERNames,1,mkChar("n.probes"));
1134
+
1135
+ setAttrib(QCHEADER,R_NamesSymbol,QCHEADERNames);
1136
+ SET_VECTOR_ELT(QCUNITSsub,0,QCHEADER);
1137
+
1138
+
1139
+ PROTECT(QCUNITSProbeInfo = allocVector(VECSXP,5));
1140
+ PROTECT(QCUNITSProbeInfoX = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1141
+ PROTECT(QCUNITSProbeInfoY = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1142
+ PROTECT(QCUNITSProbeInfoPL = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1143
+ PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1144
+ PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1145
+
1146
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1147
+ NUMERIC_POINTER(QCUNITSProbeInfoX)[j] = (double)my_cdf.qc_units[i].qc_probes[j].x;
1148
+ NUMERIC_POINTER(QCUNITSProbeInfoY)[j] = (double)my_cdf.qc_units[i].qc_probes[j].y;
1149
+ NUMERIC_POINTER(QCUNITSProbeInfoPL)[j] = (double)my_cdf.qc_units[i].qc_probes[j].probelength;
1150
+ NUMERIC_POINTER(QCUNITSProbeInfoPMFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].pmflag;
1151
+ NUMERIC_POINTER(QCUNITSProbeInfoBGFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].bgprobeflag;
1152
+ }
1153
+
1154
+ SET_VECTOR_ELT(QCUNITSProbeInfo,0,QCUNITSProbeInfoX);
1155
+ SET_VECTOR_ELT(QCUNITSProbeInfo,1,QCUNITSProbeInfoY);
1156
+ SET_VECTOR_ELT(QCUNITSProbeInfo,2,QCUNITSProbeInfoPL);
1157
+ SET_VECTOR_ELT(QCUNITSProbeInfo,3,QCUNITSProbeInfoPMFLAG);
1158
+ SET_VECTOR_ELT(QCUNITSProbeInfo,4,QCUNITSProbeInfoBGFLAG);
1159
+
1160
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,5));
1161
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1162
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1163
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("ProbeLength"));
1164
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("PMFlag"));
1165
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("BGProbeFlag"));
1166
+
1167
+ setAttrib(QCUNITSProbeInfo,R_NamesSymbol,QCUNITSProbeInfoNames);
1168
+
1169
+ PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1170
+
1171
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1172
+ sprintf(buf, "%d", j+1);
1173
+ SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
1174
+ }
1175
+
1176
+
1177
+
1178
+ setAttrib(QCUNITSProbeInfo, R_RowNamesSymbol, QCUNITSProbeInforow_names);
1179
+
1180
+
1181
+ setAttrib(QCUNITSProbeInfo,R_ClassSymbol,mkString("data.frame"));
1182
+
1183
+ SET_VECTOR_ELT(QCUNITSsub,1,QCUNITSProbeInfo);
1184
+ SET_VECTOR_ELT(QCUNITS,i,QCUNITSsub);
1185
+ UNPROTECT(12);
1186
+ }
1187
+ SET_VECTOR_ELT(CDFInfo,3,QCUNITS);
1188
+ UNPROTECT(1);
1189
+
1190
+
1191
+ PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.n_units));
1192
+ for (i =0; i < my_cdf.header.n_units; i++){
1193
+ PROTECT(tmpUNIT = allocVector(VECSXP,2));
1194
+ PROTECT(tmpUNITNames = allocVector(STRSXP,2));
1195
+ SET_STRING_ELT(tmpUNITNames,0,mkChar("UnitHeader"));
1196
+ SET_STRING_ELT(tmpUNITNames,1,mkChar("Block"));
1197
+ setAttrib(tmpUNIT,R_NamesSymbol,tmpUNITNames);
1198
+
1199
+
1200
+ PROTECT(UNITSHeader = allocVector(REALSXP,7));
1201
+ PROTECT(UNITSHeaderNames = allocVector(STRSXP,7));
1202
+ SET_STRING_ELT(UNITSHeaderNames,0,mkChar("UnitType"));
1203
+ SET_STRING_ELT(UNITSHeaderNames,1,mkChar("Direction"));
1204
+ SET_STRING_ELT(UNITSHeaderNames,2,mkChar("n.atoms"));
1205
+ SET_STRING_ELT(UNITSHeaderNames,3,mkChar("n.blocks"));
1206
+ SET_STRING_ELT(UNITSHeaderNames,4,mkChar("n.cells"));
1207
+ SET_STRING_ELT(UNITSHeaderNames,5,mkChar("UnitNumber"));
1208
+ SET_STRING_ELT(UNITSHeaderNames,6,mkChar("n.cellsperatom"));
1209
+
1210
+ setAttrib(UNITSHeader,R_NamesSymbol,UNITSHeaderNames);
1211
+
1212
+ NUMERIC_POINTER(UNITSHeader)[0] = (double)my_cdf.units[i].unittype;
1213
+ NUMERIC_POINTER(UNITSHeader)[1] = (double)my_cdf.units[i].direction;
1214
+ NUMERIC_POINTER(UNITSHeader)[2] = (double)my_cdf.units[i].natoms;
1215
+ NUMERIC_POINTER(UNITSHeader)[3] = (double)my_cdf.units[i].nblocks;
1216
+ NUMERIC_POINTER(UNITSHeader)[4] = (double)my_cdf.units[i].ncells;
1217
+ NUMERIC_POINTER(UNITSHeader)[5] = (double)my_cdf.units[i].unitnumber;
1218
+ NUMERIC_POINTER(UNITSHeader)[6] = (double)my_cdf.units[i].ncellperatom;
1219
+
1220
+ PROTECT(tmpUNITSBlock = allocVector(VECSXP,my_cdf.units[i].nblocks));
1221
+ for (j=0; j < my_cdf.units[i].nblocks; j++){
1222
+ PROTECT(UNITSBlock = allocVector(VECSXP,3));
1223
+ PROTECT(UNITSBlockNames = allocVector(STRSXP,3));
1224
+ SET_STRING_ELT(UNITSBlockNames,0,mkChar("Header"));
1225
+ SET_STRING_ELT(UNITSBlockNames,1,mkChar("Name"));
1226
+ SET_STRING_ELT(UNITSBlockNames,2,mkChar("UnitInfo"));
1227
+ setAttrib(UNITSBlock,R_NamesSymbol,UNITSBlockNames);
1228
+
1229
+ PROTECT(UNITSBlockHeader = allocVector(REALSXP,6));
1230
+ PROTECT(UNITSBlockHeaderNames= allocVector(VECSXP,6));
1231
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,0,mkChar("n.atoms"));
1232
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,1,mkChar("n.cells"));
1233
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,2,mkChar("n.cellsperatom"));
1234
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,3,mkChar("Direction"));
1235
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,4,mkChar("firstatom"));
1236
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,5,mkChar("unused"));
1237
+
1238
+ NUMERIC_POINTER(UNITSBlockHeader)[0] = (double)my_cdf.units[i].unit_block[j].natoms;
1239
+ NUMERIC_POINTER(UNITSBlockHeader)[1] = (double)my_cdf.units[i].unit_block[j].ncells;
1240
+ NUMERIC_POINTER(UNITSBlockHeader)[2] = (double)my_cdf.units[i].unit_block[j].ncellperatom;
1241
+ NUMERIC_POINTER(UNITSBlockHeader)[3] = (double)my_cdf.units[i].unit_block[j].direction;
1242
+ NUMERIC_POINTER(UNITSBlockHeader)[4] = (double)my_cdf.units[i].unit_block[j].firstatom;
1243
+ NUMERIC_POINTER(UNITSBlockHeader)[5] = (double)my_cdf.units[i].unit_block[j].unused;
1244
+
1245
+
1246
+ setAttrib(UNITSBlockHeader,R_NamesSymbol,UNITSBlockHeaderNames);
1247
+
1248
+ SET_VECTOR_ELT(UNITSBlock,0,UNITSBlockHeader);
1249
+
1250
+ SET_VECTOR_ELT(UNITSBlock,1,mkString(my_cdf.units[i].unit_block[j].blockname));
1251
+
1252
+ PROTECT(UNITSBlockInfo = allocVector(VECSXP,6));
1253
+
1254
+ PROTECT(UNITSBlockInfoNames = allocVector(STRSXP,6));
1255
+ SET_STRING_ELT(UNITSBlockInfoNames,0,mkChar("atom.number"));
1256
+ SET_STRING_ELT(UNITSBlockInfoNames,1,mkChar("x"));
1257
+ SET_STRING_ELT(UNITSBlockInfoNames,2,mkChar("y"));
1258
+ SET_STRING_ELT(UNITSBlockInfoNames,3,mkChar("index.position"));
1259
+ SET_STRING_ELT(UNITSBlockInfoNames,4,mkChar("pbase"));
1260
+ SET_STRING_ELT(UNITSBlockInfoNames,5,mkChar("tbase"));
1261
+
1262
+ setAttrib(UNITSBlockInfo,R_NamesSymbol,UNITSBlockInfoNames);
1263
+
1264
+
1265
+ PROTECT(UNITSBlockInforow_names = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1266
+
1267
+ for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
1268
+ sprintf(buf, "%d", k+1);
1269
+ SET_STRING_ELT(UNITSBlockInforow_names,k,mkChar(buf));
1270
+ }
1271
+
1272
+ PROTECT(UNITSBlockAtom = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1273
+ PROTECT(UNITSBlockX = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1274
+ PROTECT(UNITSBlockY = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1275
+ PROTECT(UNITSBlockIndexPos = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1276
+ PROTECT(UNITSBlockPbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1277
+ PROTECT(UNITSBlockTbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1278
+
1279
+ for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
1280
+ /* Rprintf("%d %d %d\n",i,j,k);
1281
+ // NUMERIC_POINTER(UNITSBlockAtom)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
1282
+ // NUMERIC_POINTER(UNITSBlockX)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].x;
1283
+ // NUMERIC_POINTER(UNITSBlockY)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].y;
1284
+ // NUMERIC_POINTER(UNITSBlockIndexPos)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; */
1285
+ INTEGER_POINTER(UNITSBlockAtom)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
1286
+ INTEGER_POINTER(UNITSBlockX)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].x;
1287
+ INTEGER_POINTER(UNITSBlockY)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].y;
1288
+ INTEGER_POINTER(UNITSBlockIndexPos)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos;
1289
+ sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].pbase);
1290
+ SET_STRING_ELT(UNITSBlockPbase,k,mkChar(buf));
1291
+
1292
+ sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].tbase);
1293
+ SET_STRING_ELT(UNITSBlockTbase,k,mkChar(buf));
1294
+ }
1295
+
1296
+ SET_VECTOR_ELT(UNITSBlockInfo,0,UNITSBlockAtom);
1297
+ SET_VECTOR_ELT(UNITSBlockInfo,1,UNITSBlockX);
1298
+ SET_VECTOR_ELT(UNITSBlockInfo,2,UNITSBlockY);
1299
+ SET_VECTOR_ELT(UNITSBlockInfo,3,UNITSBlockIndexPos);
1300
+ SET_VECTOR_ELT(UNITSBlockInfo,4,UNITSBlockPbase);
1301
+ SET_VECTOR_ELT(UNITSBlockInfo,5,UNITSBlockTbase);
1302
+ UNPROTECT(6);
1303
+
1304
+
1305
+
1306
+
1307
+ setAttrib(UNITSBlockInfo, R_RowNamesSymbol, UNITSBlockInforow_names);
1308
+ setAttrib(UNITSBlockInfo,R_ClassSymbol,mkString("data.frame"));
1309
+
1310
+ SET_VECTOR_ELT(UNITSBlock,2,UNITSBlockInfo);
1311
+
1312
+ SET_VECTOR_ELT(tmpUNITSBlock,j,UNITSBlock);
1313
+ UNPROTECT(7);
1314
+ }
1315
+
1316
+ SET_VECTOR_ELT(tmpUNIT,0,UNITSHeader);
1317
+ SET_VECTOR_ELT(tmpUNIT,1,tmpUNITSBlock);
1318
+
1319
+ SET_VECTOR_ELT(UNITS,i,tmpUNIT);
1320
+ UNPROTECT(5);
1321
+ }
1322
+ SET_VECTOR_ELT(CDFInfo,4,UNITS);
1323
+ UNPROTECT(1);
1324
+
1325
+
1326
+ } else {
1327
+ /* return the abbreviated structure */
1328
+ error("Abbreviated structure not yet implemented.\n");
1329
+
1330
+
1331
+ }
1332
+
1333
+
1334
+
1335
+
1336
+
1337
+ dealloc_cdf_xda(&my_cdf);
1338
+ UNPROTECT(1);
1339
+ return CDFInfo;
1340
+
1341
+
1342
+ }