bio-affy 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
Binary file
@@ -0,0 +1,347 @@
1
+ /*****************************************************************
2
+ **
3
+ ** This file contains the record definitions for the CDF files.
4
+ ** They come in two formats (textual and binary). The first set
5
+ ** represents the textual form (starting with cdf_text_..
6
+ **
7
+ ******************************************************************/
8
+
9
+ /*****************************************************************
10
+ ** Textual CDF formats
11
+ **
12
+ ** A structure for holding information in the
13
+ ** "CDF" and "Chip" sections (basically header information)
14
+ **
15
+ ******************************************************************/
16
+
17
+ typedef struct {
18
+
19
+ char *version;
20
+ char *name;
21
+ int rows,cols;
22
+ int numberofunits;
23
+ int maxunit;
24
+ int NumQCUnits;
25
+ char *chipreference;
26
+ } cdf_text_header;
27
+
28
+ /*****************************************************************
29
+ **
30
+ **
31
+ ** A structure for holding QC probe information
32
+ ** Note the "CYCLES" item is ignored and never parsed
33
+ **
34
+ ******************************************************************/
35
+
36
+
37
+ typedef struct {
38
+ int x;
39
+ int y;
40
+ char *probe;
41
+ int plen;
42
+ int atom;
43
+ int index;
44
+ int match;
45
+ int bg;
46
+ } cdf_text_qc_probe;
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+ /*******************************************************************
55
+ **
56
+ ** A structure for holding QC units information. These are
57
+ ** areas of the chip that contain probes that may or may not be useful
58
+ ** for QC and other purposes.
59
+ **
60
+ **
61
+ *******************************************************************/
62
+
63
+
64
+
65
+ typedef struct{
66
+ int type;
67
+ unsigned int n_probes;
68
+ int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
69
+ cdf_text_qc_probe *qc_probes;
70
+
71
+ } cdf_text_qc_unit;
72
+
73
+
74
+ /*******************************************************************
75
+ **
76
+ ** A structure for holding probe information for unit_blocks_probes
77
+ **
78
+ ** probes are stored within blocks
79
+ **
80
+ *******************************************************************/
81
+
82
+ typedef struct{
83
+ int x;
84
+ int y;
85
+ char *probe;
86
+ char *feat;
87
+ char *qual;
88
+ int expos;
89
+ int pos;
90
+ char *cbase;
91
+ char *pbase;
92
+ char *tbase;
93
+ int atom;
94
+ int index;
95
+ int codonid;
96
+ int codon;
97
+ int regiontype;
98
+ char* region;
99
+ } cdf_text_unit_block_probe;
100
+
101
+
102
+
103
+
104
+ /*******************************************************************
105
+ **
106
+ ** A structure holding Unit_blocks
107
+ **
108
+ ** blocks are stored within units.
109
+ ** blocks contain many probes
110
+ **
111
+ *******************************************************************/
112
+
113
+ typedef struct{
114
+ char *name;
115
+ int blocknumber;
116
+ int num_atoms;
117
+ int num_cells;
118
+ int start_position;
119
+ int stop_position;
120
+ int direction;
121
+ cdf_text_unit_block_probe *probes;
122
+
123
+ } cdf_text_unit_block;
124
+
125
+
126
+
127
+
128
+
129
+
130
+ /*******************************************************************
131
+ **
132
+ ** A structure for holding "Units" AKA known as probesets
133
+ **
134
+ ** Each unit contains one or more blocks. Each block contains one or
135
+ ** more probes
136
+ **
137
+ *******************************************************************/
138
+
139
+
140
+ typedef struct{
141
+ char *name;
142
+ int direction;
143
+ int num_atoms;
144
+ int num_cells;
145
+ int unit_number;
146
+ int unit_type;
147
+ int numberblocks;
148
+ int MutationType;
149
+ cdf_text_unit_block *blocks;
150
+ } cdf_text_unit;
151
+
152
+
153
+
154
+ /*******************************************************************
155
+ **
156
+ ** A structure for holding a text CDF file
157
+ **
158
+ ** text cdf files consist of
159
+ ** basic header information
160
+ ** qcunits
161
+ ** - qc probes
162
+ ** units (aka probesets)
163
+ ** - blocks
164
+ ** - probes
165
+ **
166
+ **
167
+ *******************************************************************/
168
+
169
+ typedef struct{
170
+ cdf_text_header header;
171
+ cdf_text_qc_unit *qc_units;
172
+ cdf_text_unit *units;
173
+ } cdf_text;
174
+
175
+
176
+
177
+ /************************************************************************
178
+ **
179
+ ** Structures for holding the CDF file information. Basically
180
+ ** header/general information that appears at the start of the CDF file
181
+ **
182
+ ************************************************************************/
183
+
184
+ typedef struct {
185
+ int magicnumber;
186
+ int version_number;
187
+ unsigned short rows,cols;
188
+ int n_units,n_qc_units;
189
+ int len_ref_seq;
190
+ int i;
191
+ char *ref_seq;
192
+ } cdf_xda_header;
193
+
194
+
195
+ /****************************************************************************
196
+ **
197
+ ** The following two structures store QC units and QC unit probe information
198
+ **
199
+ ** QC information, repeated for each QC unit:
200
+ ** Type - unsigned short
201
+ ** Number of probes - integer
202
+ **
203
+ ** Probe information, repeated for each probe in the QC unit:
204
+ ** X coordinate - unsigned short
205
+ ** Y coordinate - unsigned short
206
+ ** Probe length - unsigned char
207
+ ** Perfect match flag - unsigned char
208
+ ** Background probe flag - unsigned char
209
+ **
210
+ ****************************************************************************/
211
+
212
+
213
+ typedef struct{
214
+ unsigned short x;
215
+ unsigned short y;
216
+ unsigned char probelength;
217
+ unsigned char pmflag;
218
+ unsigned char bgprobeflag;
219
+
220
+ } cdf_qc_probe;
221
+
222
+ typedef struct{
223
+ unsigned short type;
224
+ unsigned int n_probes;
225
+
226
+ cdf_qc_probe *qc_probes;
227
+
228
+ } cdf_qc_unit;
229
+
230
+
231
+ /****************************************************************************
232
+ **
233
+ ** The following three structures store information for units (sometimes called
234
+ ** probesets), blocks (of which there are one or more within a unit) and cells
235
+ ** sometimes called probe of which there are one or more within each block
236
+ **
237
+ **
238
+ ** Unit information, repeated for each unit:
239
+ **
240
+ ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
241
+ ** Direction - unsigned char
242
+ ** Number of atoms - integer
243
+ ** Number of blocks - integer (always 1 for expression units)
244
+ ** Number of cells - integer
245
+ ** Unit number (probe set number) - integer
246
+ ** Number of cells per atom - unsigned char
247
+ **
248
+ **
249
+ **
250
+ ** Block information, repeated for each block in the unit:
251
+ **
252
+ ** Number of atoms - integer
253
+ ** Number of cells - integer
254
+ ** Number of cells per atom - unsigned char
255
+ ** Direction - unsigned char
256
+ ** The position of the first atom - integer
257
+ ** <unused integer value> - integer
258
+ ** The block name - char[64]
259
+ **
260
+ **
261
+ **
262
+ ** Cell information, repeated for each cell in the block:
263
+ **
264
+ ** Atom number - integer
265
+ ** X coordinate - unsigned short
266
+ ** Y coordinate - unsigned short
267
+ ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
268
+ ** Base of probe at substitution position - char
269
+ ** Base of target at interrogation position - char
270
+ **
271
+ **
272
+ ****************************************************************************/
273
+
274
+
275
+ typedef struct{
276
+ int atomnumber;
277
+ unsigned short x;
278
+ unsigned short y;
279
+ int indexpos;
280
+ char pbase;
281
+ char tbase;
282
+ } cdf_unit_cell;
283
+
284
+
285
+ typedef struct{
286
+ int natoms;
287
+ int ncells;
288
+ unsigned char ncellperatom;
289
+ unsigned char direction;
290
+ int firstatom;
291
+ int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
292
+ char blockname[64];
293
+
294
+ cdf_unit_cell *unit_cells;
295
+
296
+ } cdf_unit_block;
297
+
298
+
299
+ typedef struct{
300
+ unsigned short unittype;
301
+ unsigned char direction;
302
+ int natoms;
303
+ int nblocks;
304
+ int ncells;
305
+ int unitnumber;
306
+ unsigned char ncellperatom;
307
+
308
+ cdf_unit_block *unit_block;
309
+
310
+ } cdf_unit;
311
+
312
+
313
+ /****************************************************************************
314
+ **
315
+ ** A data structure for holding CDF information read from a xda format cdf file
316
+ **
317
+ ** note that this structure reads in everything including things that might not
318
+ ** be of any subsequent use.
319
+ **
320
+ ****************************************************************************/
321
+
322
+
323
+
324
+ typedef struct {
325
+
326
+ cdf_xda_header header; /* Header information */
327
+ char **probesetnames; /* Names of probesets */
328
+
329
+ int *qc_start; /* These are used for random access */
330
+ int *units_start;
331
+
332
+ cdf_qc_unit *qc_units;
333
+ cdf_unit *units;
334
+
335
+
336
+ } cdf_xda;
337
+
338
+
339
+
340
+
341
+ // int check_xda_file(const char *filename);
342
+ int isTextCDFFile(const char *filename);
343
+ int read_cdf_text(const char *filename, cdf_text *mycdf);
344
+ void dealloc_cdf_text(cdf_text *my_cdf);
345
+ int read_cdf_xda(const char *filename,cdf_xda *my_cdf);
346
+ void dealloc_cdf_xda(cdf_xda *my_cdf);
347
+
@@ -0,0 +1,1342 @@
1
+ /****************************************************************
2
+ **
3
+ ** File: read_cdf_xda.c
4
+ **
5
+ ** Implementation by: B. M. Bolstad <bmb@bmbolstad.com>
6
+ **
7
+ ** A parser designed to read the binary format cdf files.
8
+ ** Sometimes called the xda format.
9
+ **
10
+ ** Implemented based on documentation available from Affymetrix
11
+ **
12
+ ** Implementation begun 2005.
13
+ **
14
+ ** Modification Dates
15
+ ** Feb 4 - Initial version
16
+ ** Feb 5 - A bunch of hacks for SNP chips.
17
+ ** Apr 20
18
+ ** Aug 16, 2005 - Fix potential big endian bug
19
+ ** Sep 22, 2005 - Fix some signed/unsigned bugs
20
+ ** Dec 1, 2005 - Comment cleaning
21
+ ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
22
+ ** May 31, 2006 - fix some compiler warnings
23
+ ** Aug 23, 2006 - fix a potential (but at current time non-existant) problem
24
+ ** when there are 0 qcunits or 0 units
25
+ ** Aug 25, 2007 - Move file reading functions to centralized location
26
+ ** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays)
27
+ ** Nov 12, 2008 - Fix crash
28
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
29
+ **
30
+ ****************************************************************/
31
+
32
+ /** --- includes --- */
33
+ #include <R.h>
34
+ #include <Rdefines.h>
35
+
36
+ #include "stdlib.h"
37
+ #include "stdio.h"
38
+ #include "fread_functions.h"
39
+ #include <ctype.h>
40
+
41
+ /* #define READ_CDF_DEBUG */
42
+ /* #define READ_CDF_DEBUG_SNP */
43
+ #define READ_CDF_NOSNP
44
+
45
+
46
+
47
+ /************************************************************************
48
+ **
49
+ ** Structures for holding the CDF file information. Basically
50
+ ** header/general information that appears at the start of the CDF file
51
+ **
52
+ ************************************************************************/
53
+
54
+ typedef struct {
55
+ int magicnumber;
56
+ int version_number;
57
+ unsigned short rows,cols;
58
+ int n_units,n_qc_units;
59
+ int len_ref_seq;
60
+ int i;
61
+ char *ref_seq;
62
+ } cdf_xda_header;
63
+
64
+
65
+ /****************************************************************************
66
+ **
67
+ ** The following two structures store QC units and QC unit probe information
68
+ **
69
+ ** QC information, repeated for each QC unit:
70
+ ** Type - unsigned short
71
+ ** Number of probes - integer
72
+ **
73
+ ** Probe information, repeated for each probe in the QC unit:
74
+ ** X coordinate - unsigned short
75
+ ** Y coordinate - unsigned short
76
+ ** Probe length - unsigned char
77
+ ** Perfect match flag - unsigned char
78
+ ** Background probe flag - unsigned char
79
+ **
80
+ ****************************************************************************/
81
+
82
+
83
+ typedef struct{
84
+ unsigned short x;
85
+ unsigned short y;
86
+ unsigned char probelength;
87
+ unsigned char pmflag;
88
+ unsigned char bgprobeflag;
89
+
90
+ } cdf_qc_probe;
91
+
92
+ typedef struct{
93
+ unsigned short type;
94
+ unsigned int n_probes;
95
+
96
+ cdf_qc_probe *qc_probes;
97
+
98
+ } cdf_qc_unit;
99
+
100
+
101
+ /****************************************************************************
102
+ **
103
+ ** The following three structures store information for units (sometimes called
104
+ ** probesets), blocks (of which there are one or more within a unit) and cells
105
+ ** sometimes called probe of which there are one or more within each block
106
+ **
107
+ **
108
+ ** Unit information, repeated for each unit:
109
+ **
110
+ ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
111
+ ** Direction - unsigned char
112
+ ** Number of atoms - integer
113
+ ** Number of blocks - integer (always 1 for expression units)
114
+ ** Number of cells - integer
115
+ ** Unit number (probe set number) - integer
116
+ ** Number of cells per atom - unsigned char
117
+ **
118
+ **
119
+ **
120
+ ** Block information, repeated for each block in the unit:
121
+ **
122
+ ** Number of atoms - integer
123
+ ** Number of cells - integer
124
+ ** Number of cells per atom - unsigned char
125
+ ** Direction - unsigned char
126
+ ** The position of the first atom - integer
127
+ ** <unused integer value> - integer
128
+ ** The block name - char[64]
129
+ **
130
+ **
131
+ **
132
+ ** Cell information, repeated for each cell in the block:
133
+ **
134
+ ** Atom number - integer
135
+ ** X coordinate - unsigned short
136
+ ** Y coordinate - unsigned short
137
+ ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
138
+ ** Base of probe at substitution position - char
139
+ ** Base of target at interrogation position - char
140
+ **
141
+ **
142
+ ****************************************************************************/
143
+
144
+
145
+ typedef struct{
146
+ int atomnumber;
147
+ unsigned short x;
148
+ unsigned short y;
149
+ int indexpos;
150
+ char pbase;
151
+ char tbase;
152
+ } cdf_unit_cell;
153
+
154
+
155
+ typedef struct{
156
+ int natoms;
157
+ int ncells;
158
+ unsigned char ncellperatom;
159
+ unsigned char direction;
160
+ int firstatom;
161
+ int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
162
+ char blockname[64];
163
+
164
+ cdf_unit_cell *unit_cells;
165
+
166
+ } cdf_unit_block;
167
+
168
+
169
+ typedef struct{
170
+ unsigned short unittype;
171
+ unsigned char direction;
172
+ int natoms;
173
+ int nblocks;
174
+ int ncells;
175
+ int unitnumber;
176
+ unsigned char ncellperatom;
177
+
178
+ cdf_unit_block *unit_block;
179
+
180
+ } cdf_unit;
181
+
182
+
183
+ /****************************************************************************
184
+ **
185
+ ** A data structure for holding CDF information read from a xda format cdf file
186
+ **
187
+ ** note that this structure reads in everything including things that might not
188
+ ** be of any subsequent use.
189
+ **
190
+ ****************************************************************************/
191
+
192
+
193
+
194
+ typedef struct {
195
+
196
+ cdf_xda_header header; /* Header information */
197
+ char **probesetnames; /* Names of probesets */
198
+
199
+ int *qc_start; /* These are used for random access */
200
+ int *units_start;
201
+
202
+ cdf_qc_unit *qc_units;
203
+ cdf_unit *units;
204
+
205
+
206
+ } cdf_xda;
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+ /*************************************************************************
220
+ **
221
+ ** int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream)
222
+ **
223
+ ** cdf_qc_unit *my_unit - preallocated space to store qc unit information
224
+ ** int filelocation - indexing/location information used to read information
225
+ ** from file
226
+ ** FILE *instream - a pre-opened file to read from
227
+ **
228
+ ** reads a specificed qc_unit from the file. Allocates space for the cdf_qc_probes
229
+ ** and also reads them in
230
+ **
231
+ **
232
+ *************************************************************************/
233
+
234
+ int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream){
235
+
236
+ int i;
237
+
238
+
239
+ fseek(instream,filelocation,SEEK_SET);
240
+
241
+ fread_uint16(&(my_unit->type),1,instream);
242
+ fread_uint32(&(my_unit->n_probes),1,instream);
243
+
244
+
245
+ my_unit->qc_probes = Calloc(my_unit->n_probes,cdf_qc_probe);
246
+
247
+ for (i=0; i < my_unit->n_probes; i++){
248
+ fread_uint16(&(my_unit->qc_probes[i].x),1,instream);
249
+ fread_uint16(&(my_unit->qc_probes[i].y),1,instream);
250
+ fread_uchar(&(my_unit->qc_probes[i].probelength),1,instream);
251
+ fread_uchar(&(my_unit->qc_probes[i].pmflag),1,instream);
252
+ fread_uchar(&(my_unit->qc_probes[i].bgprobeflag),1,instream);
253
+
254
+ }
255
+ return 1;
256
+ }
257
+
258
+ /*************************************************************************
259
+ **
260
+ ** int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream)
261
+ **
262
+ ** cdf_qc_unit *my_unit - preallocated space to store unit (aka probeset) information
263
+ ** int filelocation - indexing/location information used to read information
264
+ ** from file
265
+ ** FILE *instream - a pre-opened file to read from
266
+ **
267
+ ** reads a specified probeset into the my_unit, including all blocks and all probes
268
+ ** it is assumed that the unit itself is preallocated. Blocks and probes within
269
+ ** the blocks are allocated by this function.
270
+ **
271
+ *************************************************************************/
272
+
273
+ int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){
274
+
275
+ int i,j;
276
+
277
+ fseek(instream,filelocation,SEEK_SET);
278
+
279
+ fread_uint16(&(my_unit->unittype),1,instream);
280
+ fread_uchar(&(my_unit->direction),1,instream);
281
+
282
+
283
+ fread_int32(&(my_unit->natoms),1,instream);
284
+ fread_int32(&(my_unit->nblocks),1,instream);
285
+ fread_int32(&(my_unit->ncells),1,instream);
286
+ fread_int32(&(my_unit->unitnumber),1,instream);
287
+ fread_uchar(&(my_unit->ncellperatom),1,instream);
288
+
289
+ my_unit->unit_block = Calloc(my_unit->nblocks,cdf_unit_block);
290
+
291
+ for (i=0; i < my_unit->nblocks; i++){
292
+ fread_int32(&(my_unit->unit_block[i].natoms),1,instream);
293
+ fread_int32(&(my_unit->unit_block[i].ncells),1,instream);
294
+ fread_uchar(&(my_unit->unit_block[i].ncellperatom),1,instream);
295
+ fread_uchar(&(my_unit->unit_block[i].direction),1,instream);
296
+ fread_int32(&(my_unit->unit_block[i].firstatom),1,instream);
297
+ fread_int32(&(my_unit->unit_block[i].unused),1,instream);
298
+ fread_char(my_unit->unit_block[i].blockname,64,instream);
299
+
300
+ my_unit->unit_block[i].unit_cells = Calloc(my_unit->unit_block[i].ncells,cdf_unit_cell);
301
+
302
+ for (j=0; j < my_unit->unit_block[i].ncells; j++){
303
+ fread_int32(&(my_unit->unit_block[i].unit_cells[j].atomnumber),1,instream);
304
+ fread_uint16(&(my_unit->unit_block[i].unit_cells[j].x),1,instream);
305
+ fread_uint16(&(my_unit->unit_block[i].unit_cells[j].y),1,instream);
306
+ fread_int32(&(my_unit->unit_block[i].unit_cells[j].indexpos),1,instream);
307
+ fread_char(&(my_unit->unit_block[i].unit_cells[j].pbase),1,instream);
308
+ fread_char(&(my_unit->unit_block[i].unit_cells[j].tbase),1,instream);
309
+ }
310
+
311
+
312
+ }
313
+
314
+
315
+ return 1;
316
+
317
+ }
318
+
319
+ /*************************************************************************
320
+ **
321
+ ** static void dealloc_cdf_xda(cdf_xda *my_cdf)
322
+ **
323
+ ** Deallocates all the previously allocated memory.
324
+ **
325
+ *************************************************************************/
326
+
327
+ void dealloc_cdf_xda(cdf_xda *my_cdf){
328
+
329
+ int i;
330
+
331
+ for (i=0; i < my_cdf->header.n_units; i++){
332
+ Free(my_cdf->probesetnames[i]);
333
+ }
334
+ Free(my_cdf->probesetnames);
335
+
336
+ Free(my_cdf->qc_start);
337
+ Free(my_cdf->units_start);
338
+
339
+ for (i=0; i < my_cdf->header.n_qc_units; i++){
340
+ Free(my_cdf->qc_units[i].qc_probes);
341
+ }
342
+
343
+ Free(my_cdf->qc_units);
344
+
345
+
346
+ for (i=0; i < my_cdf->header.n_units; i++){
347
+ Free(my_cdf->units[i].unit_block);
348
+ }
349
+ Free(my_cdf->units);
350
+ Free(my_cdf->header.ref_seq);
351
+
352
+ }
353
+
354
+
355
+
356
+ /*************************************************************
357
+ **
358
+ ** int read_cdf_xda(const char *filename)
359
+ **
360
+ ** filename - Name of the prospective binary cel file
361
+ **
362
+ ** Returns 1 if the file was completely successfully parsed
363
+ ** otherwise 0 (and possible prints a message to screen)
364
+ **
365
+ **
366
+ **
367
+ **
368
+ *************************************************************/
369
+
370
+ int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
371
+
372
+ FILE *infile;
373
+
374
+ int i;
375
+
376
+ if ((infile = fopen(filename, "rb")) == NULL)
377
+ {
378
+ error("Unable to open the file %s",filename);
379
+ return 0;
380
+ }
381
+
382
+ if (!fread_int32(&my_cdf->header.magicnumber,1,infile)){
383
+ return 0;
384
+ }
385
+
386
+ if (!fread_int32(&my_cdf->header.version_number,1,infile)){
387
+ return 0;
388
+ }
389
+
390
+
391
+ if (my_cdf->header.magicnumber != 67){
392
+ Rprintf("Magic number is not 67. This is probably not a binary cdf file.\n");
393
+ return 0;
394
+ }
395
+
396
+ if (my_cdf->header.version_number != 1){
397
+ Rprintf("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number);
398
+ return 0;
399
+ }
400
+ if (!fread_uint16(&my_cdf->header.cols,1,infile)){
401
+ return 0;
402
+ }
403
+ if (!fread_uint16(&my_cdf->header.rows,1,infile)){
404
+ return 0;
405
+ }
406
+
407
+ if (!fread_int32(&my_cdf->header.n_units,1,infile)){
408
+ return 0;
409
+ }
410
+
411
+ if (!fread_int32(&my_cdf->header.n_qc_units,1,infile)){
412
+ return 0;
413
+ }
414
+
415
+
416
+ if (!fread_int32(&my_cdf->header.len_ref_seq,1,infile)){
417
+ return 0;
418
+ }
419
+
420
+ my_cdf->header.ref_seq = Calloc(my_cdf->header.len_ref_seq,char);
421
+
422
+ fread_char(my_cdf->header.ref_seq, my_cdf->header.len_ref_seq, infile);
423
+ my_cdf->probesetnames = Calloc(my_cdf->header.n_units,char *);
424
+
425
+
426
+ for (i =0; i < my_cdf->header.n_units;i++){
427
+ my_cdf->probesetnames[i] = Calloc(64,char);
428
+ if (!fread_char(my_cdf->probesetnames[i], 64, infile)){
429
+ return 0;
430
+ }
431
+ }
432
+
433
+
434
+
435
+ my_cdf->qc_start = Calloc(my_cdf->header.n_qc_units,int);
436
+ my_cdf->units_start = Calloc(my_cdf->header.n_units,int);
437
+
438
+ /*** Old code that might fail if there is 0 QCunits or 0 Units
439
+ if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)
440
+ || !fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)){
441
+ return 0;
442
+ }
443
+ ***/
444
+
445
+ if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)) {
446
+ if(my_cdf->header.n_qc_units != 0) {
447
+ return 0;
448
+ }
449
+ }
450
+
451
+ if(!fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)) {
452
+ if(my_cdf->header.n_units != 0) {
453
+ return 0;
454
+ }
455
+ }
456
+
457
+ /* We will read in all the QC and Standard Units, rather than
458
+ random accessing what we need */
459
+ my_cdf->qc_units = Calloc(my_cdf->header.n_qc_units,cdf_qc_unit);
460
+
461
+
462
+ for (i =0; i < my_cdf->header.n_qc_units; i++){
463
+ if (!read_cdf_qcunit(&my_cdf->qc_units[i],my_cdf->qc_start[i],infile)){
464
+ return 0;
465
+ }
466
+ }
467
+
468
+ my_cdf->units = Calloc(my_cdf->header.n_units,cdf_unit);
469
+
470
+
471
+ for (i=0; i < my_cdf->header.n_units; i++){
472
+ if (!read_cdf_unit(&my_cdf->units[i],my_cdf->units_start[i],infile)){
473
+ return 0;
474
+ }
475
+ }
476
+
477
+
478
+ #ifdef READ_CDF_DEBUG
479
+ Rprintf("%d %d %d %d %d\n",my_cdf->header.cols,my_cdf->header.rows,my_cdf->header.n_units,my_cdf->header.n_qc_units,my_cdf->header.len_ref_seq);
480
+ for (i =0; i < my_cdf->header.n_units;i++){
481
+ Rprintf("%s\n",my_cdf->probesetnames[i]);
482
+ }
483
+
484
+ for (i =0; i < my_cdf->header.n_qc_units;i++){
485
+ Rprintf("%d\n",my_cdf->qc_start[i]);
486
+ }
487
+
488
+ for (i =0; i < my_cdf->header.n_qc_units;i++){
489
+ Rprintf("%d\n",my_cdf->units_start[i]);
490
+ }
491
+
492
+ Rprintf("%d %d\n",my_cdf->qc_units[0].type,my_cdf->qc_units[0].n_probes);
493
+
494
+ for (i=0; i < my_cdf->qc_units[0].n_probes; i++){
495
+ Rprintf("%d %d %d %u %d\n",my_cdf->qc_units[0].qc_probes[i].x,my_cdf->qc_units[0].qc_probes[i].y,
496
+ my_cdf->qc_units[0].qc_probes[i].probelength,
497
+ my_cdf->qc_units[0].qc_probes[i].pmflag,
498
+ my_cdf->qc_units[0].qc_probes[i].bgprobeflag);
499
+
500
+ }
501
+
502
+
503
+ Rprintf("%u %u %d %d %d %d %u\n",my_cdf->units[0].unittype,my_cdf->units[0].direction,
504
+ my_cdf->units[0].natoms,
505
+ my_cdf->units[0].nblocks,
506
+ my_cdf->units[0].ncells,
507
+ my_cdf->units[0].unitnumber,
508
+ my_cdf->units[0].ncellperatom);
509
+
510
+ Rprintf("%d %d %u %u %d %d %s\n",my_cdf->units[0].unit_block[0].natoms,my_cdf->units[0].unit_block[0].ncells,
511
+ my_cdf->units[0].unit_block[0].ncellperatom,
512
+ my_cdf->units[0].unit_block[0].direction,
513
+ my_cdf->units[0].unit_block[0].firstatom,
514
+ my_cdf->units[0].unit_block[0].unused,
515
+ my_cdf->units[0].unit_block[0].blockname);
516
+
517
+ for (i=0; i <my_cdf->units[0].unit_block[0].ncells ; i++){
518
+ Rprintf("%d %u %u %d %c %c\n",
519
+ my_cdf->units[0].unit_block[0].unit_cells[i].atomnumber,
520
+ my_cdf->units[0].unit_block[0].unit_cells[i].x,
521
+ my_cdf->units[0].unit_block[0].unit_cells[i].y,
522
+ my_cdf->units[0].unit_block[0].unit_cells[i].indexpos,
523
+ my_cdf->units[0].unit_block[0].unit_cells[i].pbase,
524
+ my_cdf->units[0].unit_block[0].unit_cells[i].tbase);
525
+ }
526
+ #endif
527
+
528
+ fclose(infile);
529
+ return 1;
530
+
531
+ /* fseek() */
532
+ }
533
+
534
+
535
+
536
+ /*************************************************************
537
+ **
538
+ ** static int check_cdf_xda(const char *filename)
539
+ **
540
+ ** Opens the file give by filename and checks it to see if
541
+ ** it looks like a binary CDF file. returns 0 if
542
+ ** the file looks like it is not a binary CDF aka xda format
543
+ ** cdf file
544
+ **
545
+ **
546
+ *************************************************************/
547
+
548
+
549
+ int check_cdf_xda(const char *filename){
550
+
551
+ FILE *infile;
552
+
553
+
554
+ int magicnumber,version_number;
555
+
556
+ if ((infile = fopen(filename, "rb")) == NULL)
557
+ {
558
+ error("Unable to open the file %s",filename);
559
+ return 0;
560
+ }
561
+
562
+ if (!fread_int32(&magicnumber,1,infile)){
563
+ error("File corrupt or truncated?");
564
+ return 0;
565
+ }
566
+
567
+ if (!fread_int32(&version_number,1,infile)){
568
+ error("File corrupt or truncated?");
569
+ return 0;
570
+ }
571
+
572
+
573
+ if (magicnumber != 67){
574
+ /* error("Magic number is not 67. This is probably not a binary cdf file.\n"); */
575
+ return 0;
576
+ }
577
+
578
+ if (version_number != 1){
579
+ /* error("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); */
580
+ return 0;
581
+ }
582
+
583
+ return 1;
584
+
585
+ }
586
+
587
+
588
+
589
+ /*************************************************************
590
+ **
591
+ ** static int isPM(char pbase,char tbase)
592
+ **
593
+ ** char pbase - probe base at substitution position
594
+ ** char tbase - target base at substitution position
595
+ **
596
+ ** this function works out whether a probe is a PM or MM
597
+ **
598
+ **
599
+ *************************************************************/
600
+
601
+
602
+ static int isPM(char pbase,char tbase){
603
+ /*
604
+ if (Pbase.Cmp(Tbase) == 0){
605
+ *isPM = false;
606
+ } else if (((Pbase.Cmp("A")== 0) && (Tbase.Cmp("T") != 0)) || ((Pbase.Cmp("T")
607
+ == 0) && (Tbase.Cmp("A") != 0))){
608
+ *isPM = false;
609
+ } else if (((Pbase.Cmp("C")== 0) && (Tbase.Cmp("G") != 0)) || ((Pbase.Cmp("G")
610
+ == 0) && (Tbase.Cmp("C") != 0))){
611
+ *isPM = false;
612
+ } else {
613
+ *isPM = true;
614
+ }
615
+ */
616
+
617
+ pbase = toupper(pbase);
618
+ tbase = toupper(tbase);
619
+
620
+ if (pbase == tbase){
621
+ return 0;
622
+ } else if ((( pbase == 'A') && (tbase != 'T')) || (( pbase == 'T') && (tbase != 'A'))){
623
+ return 0;
624
+ } else if ((( pbase == 'C') && (tbase != 'G')) || (( pbase == 'G') && (tbase != 'C'))){
625
+ return 0;
626
+ }
627
+ return 1;
628
+
629
+
630
+ }
631
+
632
+
633
+
634
+ /*************************************************************
635
+ **
636
+ ** SEXP CheckCDFXDA(SEXP filename)
637
+ **
638
+ ** Takes a given file name and returns 1 if it is a xda format CDF file
639
+ ** otherwise it returns 0
640
+ **
641
+ *************************************************************/
642
+
643
+
644
+
645
+ SEXP CheckCDFXDA(SEXP filename){
646
+ SEXP tmp;
647
+ int good;
648
+ const char *cur_file_name;
649
+
650
+ cur_file_name = CHAR(STRING_ELT(filename,0));
651
+
652
+ good = check_cdf_xda(cur_file_name);
653
+
654
+ PROTECT(tmp= allocVector(INTSXP,1));
655
+
656
+ INTEGER(tmp)[0] = good;
657
+
658
+ UNPROTECT(1);
659
+ return tmp;
660
+ }
661
+
662
+
663
+
664
+
665
+
666
+
667
+ SEXP ReadCDFFile(SEXP filename){
668
+
669
+ SEXP CDFInfo;
670
+ SEXP Dimensions;
671
+ SEXP LocMap= R_NilValue,tempLocMap;
672
+ SEXP CurLocs;
673
+ SEXP PSnames = R_NilValue,tempPSnames;
674
+ SEXP ColNames;
675
+ SEXP dimnames;
676
+
677
+ cdf_xda my_cdf;
678
+ const char *cur_file_name;
679
+ /* char *tmp_name; */
680
+
681
+ int i,j,k;
682
+ int cur_blocks,cur_cells, cur_atoms;
683
+ /* int which_probetype; */
684
+ int which_psname=0;
685
+
686
+ cdf_unit_cell *current_cell;
687
+
688
+ double *curlocs;
689
+
690
+ /* int nrows, ncols; */
691
+
692
+
693
+ cur_file_name = CHAR(STRING_ELT(filename,0));
694
+
695
+ if (!read_cdf_xda(cur_file_name,&my_cdf)){
696
+ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
697
+ }
698
+
699
+
700
+ /* We output:
701
+ nrows, ncols in an integer vector, plus a list of probesets PM MM locations (in the BioC style) */
702
+ PROTECT(CDFInfo = allocVector(VECSXP,2));
703
+ PROTECT(Dimensions = allocVector(REALSXP,2));
704
+
705
+ if (my_cdf.units[0].unittype ==1){
706
+ PROTECT(LocMap = allocVector(VECSXP,my_cdf.header.n_units));
707
+ PROTECT(PSnames = allocVector(STRSXP,my_cdf.header.n_units));
708
+ } else {
709
+ PROTECT(tempLocMap = allocVector(VECSXP,2*my_cdf.header.n_units));
710
+ PROTECT(tempPSnames = allocVector(STRSXP,2*my_cdf.header.n_units));
711
+ }
712
+
713
+ NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.rows;
714
+ NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.cols;
715
+
716
+
717
+ for (i=0; i < my_cdf.header.n_units; i++){
718
+ #ifdef READ_CDF_DEBUG
719
+ printf("%d\n",i);
720
+ #endif
721
+ cur_blocks = my_cdf.units[i].nblocks;
722
+
723
+ #ifdef READ_CDF_DEBUG
724
+ Rprintf("New Block: ");
725
+ #endif
726
+ if (my_cdf.units[i].unittype ==1){
727
+ /* Expression analysis */
728
+ for (j=0; j < cur_blocks; j++){
729
+
730
+ #ifdef READ_CDF_DEBUG
731
+ Rprintf("%s ",my_cdf.units[i].unit_block[j].blockname);
732
+ #endif
733
+
734
+ cur_cells = my_cdf.units[i].unit_block[j].ncells;
735
+ cur_atoms = my_cdf.units[i].unit_block[j].natoms;
736
+
737
+ SET_STRING_ELT(PSnames,i,mkChar(my_cdf.units[i].unit_block[j].blockname));
738
+
739
+ PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
740
+ PROTECT(ColNames = allocVector(STRSXP,2));
741
+ PROTECT(dimnames = allocVector(VECSXP,2));
742
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
743
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
744
+
745
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
746
+
747
+ for (k=0; k < cur_atoms*2; k++){
748
+ curlocs[k] = R_NaN;
749
+ }
750
+
751
+ for (k=0; k < cur_cells; k++){
752
+ current_cell = &(my_cdf.units[i].unit_block[j].unit_cells[k]);
753
+
754
+ if(isPM(current_cell->pbase,current_cell->tbase)){
755
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
756
+ } else {
757
+ curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
758
+ }
759
+ }
760
+
761
+
762
+
763
+ SET_VECTOR_ELT(dimnames,1,ColNames);
764
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
765
+ SET_VECTOR_ELT(LocMap,i,CurLocs);
766
+ UNPROTECT(3);
767
+ }
768
+ } else if (my_cdf.units[i].unittype == 2){
769
+ /* Genotyping array */
770
+
771
+ #ifndef READ_CDF_NOSNP
772
+ if (cur_blocks == 1){
773
+
774
+ cur_cells = my_cdf.units[i].unit_block[0].ncells;
775
+ cur_atoms = my_cdf.units[i].unit_block[0].natoms;
776
+
777
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
778
+
779
+ PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2));
780
+ PROTECT(ColNames = allocVector(STRSXP,2));
781
+ PROTECT(dimnames = allocVector(VECSXP,2));
782
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
783
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
784
+
785
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
786
+
787
+ for (k=0; k < cur_cells; k++){
788
+ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
789
+
790
+ if(isPM(current_cell->pbase,current_cell->tbase)){
791
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
792
+ } else {
793
+ curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
794
+ }
795
+ }
796
+
797
+
798
+
799
+ SET_VECTOR_ELT(dimnames,1,ColNames);
800
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
801
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
802
+ UNPROTECT(3);
803
+ which_psname++;
804
+
805
+ } else if (cur_blocks == 4){
806
+ for (j=0; j < cur_blocks; j++){
807
+ #ifdef READ_CDF_DEBUG_SNP
808
+ Rprintf("%s %s\n",my_cdf.probesetnames[i],my_cdf.units[i].unit_block[j].blockname);
809
+ #endif
810
+ }
811
+
812
+ j = 0;
813
+ cur_cells = my_cdf.units[i].unit_block[0].ncells;
814
+ cur_atoms = my_cdf.units[i].unit_block[0].natoms;
815
+ if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
816
+ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
817
+ tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
818
+ tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
819
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
820
+ Free(tmp_name);
821
+ } else {
822
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname));
823
+ }
824
+
825
+ PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
826
+ PROTECT(ColNames = allocVector(STRSXP,2));
827
+ PROTECT(dimnames = allocVector(VECSXP,2));
828
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
829
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
830
+
831
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
832
+
833
+
834
+ for (k=0; k < cur_cells; k++){
835
+ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]);
836
+ /* Rprintf("%d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
837
+ if(isPM(current_cell->pbase,current_cell->tbase)){
838
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
839
+ } else {
840
+ curlocs[current_cell->atomnumber+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
841
+ }
842
+ if (current_cell->x + current_cell->y*(my_cdf.header.rows) + 1 == 370737){
843
+ Rprintf("%d %c %c",isPM(current_cell->pbase,current_cell->tbase),current_cell->pbase,current_cell->tbase);
844
+ }
845
+ }
846
+
847
+ j=2;
848
+ cur_cells = my_cdf.units[i].unit_block[2].ncells;
849
+ cur_atoms = my_cdf.units[i].unit_block[2].natoms;
850
+ for (k=0; k < cur_cells; k++){
851
+ current_cell = &(my_cdf.units[i].unit_block[2].unit_cells[k]);
852
+ /* Rprintf("half : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
853
+ if(isPM(current_cell->pbase,current_cell->tbase)){
854
+ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
855
+ } else {
856
+ curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
857
+ }
858
+ }
859
+
860
+ SET_VECTOR_ELT(dimnames,1,ColNames);
861
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
862
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
863
+ UNPROTECT(3);
864
+ which_psname++;
865
+
866
+
867
+
868
+
869
+
870
+ j = 1;
871
+ cur_cells = my_cdf.units[i].unit_block[1].ncells;
872
+ cur_atoms = my_cdf.units[i].unit_block[1].natoms;
873
+ if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){
874
+ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char);
875
+ tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]);
876
+ tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname);
877
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name));
878
+ Free(tmp_name);
879
+ } else {
880
+ SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[1].blockname));
881
+ }
882
+ PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2));
883
+ PROTECT(ColNames = allocVector(STRSXP,2));
884
+ PROTECT(dimnames = allocVector(VECSXP,2));
885
+ SET_STRING_ELT(ColNames,0,mkChar("pm"));
886
+ SET_STRING_ELT(ColNames,1,mkChar("mm"));
887
+ curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
888
+
889
+ for (k=0; k < cur_cells; k++){
890
+ current_cell = &(my_cdf.units[i].unit_block[1].unit_cells[k]);
891
+ /* Rprintf("Dual : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */
892
+ if(isPM(current_cell->pbase,current_cell->tbase)){
893
+ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
894
+ } else {
895
+ curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
896
+ }
897
+ }
898
+
899
+ j=3;
900
+ cur_cells = my_cdf.units[i].unit_block[3].ncells;
901
+ cur_atoms = my_cdf.units[i].unit_block[3].natoms;
902
+ for (k=0; k < cur_cells; k++){
903
+ current_cell = &(my_cdf.units[i].unit_block[3].unit_cells[k]);
904
+ /* Rprintf("half deux : %d %d %d %u %u \n",cur_cells, current_cell->atomnumber, cur_atoms,current_cell->x,current_cell->y); */
905
+ if(isPM(current_cell->pbase,current_cell->tbase)){
906
+ curlocs[current_cell->atomnumber - (2*cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */
907
+ } else {
908
+ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1;
909
+ }
910
+ }
911
+
912
+ SET_VECTOR_ELT(dimnames,1,ColNames);
913
+ setAttrib(CurLocs, R_DimNamesSymbol, dimnames);
914
+ SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs);
915
+ UNPROTECT(3);
916
+ which_psname++;
917
+
918
+ } else {
919
+ error("makecdfenv does not currently know how to handle cdf files of this type (genotyping with blocks != 1 or 4.)");
920
+ }
921
+ #else
922
+ error("makecdfenv does not currently know how to handle cdf files of this type (genotyping).");
923
+ #endif
924
+
925
+
926
+
927
+
928
+ } else {
929
+ error("makecdfenv does not currently know how to handle cdf files of this type (ie not expression or genotyping)");
930
+ }
931
+
932
+
933
+ #ifdef READ_CDF_DEBUG
934
+ Rprintf("\n");
935
+ #endif
936
+ }
937
+
938
+ if (my_cdf.units[0].unittype ==2){
939
+ PROTECT(PSnames = allocVector(STRSXP,which_psname));
940
+ PROTECT(LocMap = allocVector(VECSXP,which_psname));
941
+ for (i =0; i < which_psname; i++){
942
+ SET_STRING_ELT(PSnames,i,mkChar(CHAR(STRING_ELT(tempPSnames,i))));
943
+ SET_VECTOR_ELT(LocMap,i,VECTOR_ELT(tempLocMap,i));
944
+ }
945
+
946
+ }
947
+ #ifdef READ_CDF_DEBUG
948
+ Rprintf("%d \n",which_psname);
949
+ #endif
950
+ setAttrib(LocMap,R_NamesSymbol,PSnames);
951
+ SET_VECTOR_ELT(CDFInfo,0,Dimensions);
952
+ SET_VECTOR_ELT(CDFInfo,1,LocMap);
953
+ if (my_cdf.units[0].unittype ==2){
954
+ UNPROTECT(6);
955
+ } else {
956
+ UNPROTECT(4);
957
+ }
958
+
959
+ dealloc_cdf_xda(&my_cdf);
960
+ return CDFInfo;
961
+
962
+ }
963
+
964
+
965
+
966
+
967
+ /* This function is for reading in the entire binary cdf file and then
968
+ * returing the structure in a complex list object.
969
+ * The fullstructure argument is expected to be a BOOLEAN. If TRUE the
970
+ * entire contents of the CDF file are returned.
971
+ * If False, a modified CDFENV style structure is returned
972
+ */
973
+
974
+
975
+
976
+ SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){
977
+
978
+ SEXP CDFInfo = R_NilValue; /* this is the object that will be returned */
979
+ SEXP CDFInfoNames;
980
+ SEXP HEADER; /* Will store the header information */
981
+ SEXP HEADERNames;
982
+ SEXP Dimensions;
983
+ SEXP DimensionsNames;
984
+ SEXP REFSEQ; /* Resequencing reference sequence */
985
+ SEXP UNITNAMES;
986
+
987
+ SEXP FILEPOSITIONS;
988
+ SEXP FILEPOSITIONSQC;
989
+ SEXP FILEPOSITIONSUNITS;
990
+ SEXP FILEPOSITIONSNames;
991
+
992
+ SEXP QCUNITS;
993
+ SEXP QCUNITSsub;
994
+ SEXP QCUNITSsubNames;
995
+ SEXP QCHEADER;
996
+ SEXP QCHEADERNames;
997
+ SEXP QCUNITSProbeInfo;
998
+ SEXP QCUNITSProbeInfoX;
999
+ SEXP QCUNITSProbeInfoY;
1000
+ SEXP QCUNITSProbeInfoPL;
1001
+ SEXP QCUNITSProbeInfoPMFLAG;
1002
+ SEXP QCUNITSProbeInfoBGFLAG;
1003
+ SEXP QCUNITSProbeInfoNames;
1004
+ SEXP QCUNITSProbeInforow_names;
1005
+
1006
+ SEXP UNITS;
1007
+ SEXP tmpUNIT;
1008
+ SEXP tmpUNITNames;
1009
+ SEXP UNITSHeader;
1010
+ SEXP UNITSHeaderNames;
1011
+ SEXP tmpUNITSBlock;
1012
+ SEXP UNITSBlock;
1013
+ SEXP UNITSBlockNames;
1014
+ SEXP UNITSBlockHeader;
1015
+ SEXP UNITSBlockHeaderNames;
1016
+ SEXP UNITSBlockInfo;
1017
+ SEXP UNITSBlockInfoNames;
1018
+ SEXP UNITSBlockInforow_names;
1019
+ SEXP UNITSBlockAtom ;
1020
+ SEXP UNITSBlockX;
1021
+ SEXP UNITSBlockY;
1022
+ SEXP UNITSBlockIndexPos;
1023
+ SEXP UNITSBlockPbase;
1024
+ SEXP UNITSBlockTbase;
1025
+
1026
+
1027
+
1028
+
1029
+
1030
+
1031
+
1032
+
1033
+ char buf[10];
1034
+ int i,j,k;
1035
+
1036
+ cdf_xda my_cdf;
1037
+ const char *cur_file_name;
1038
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1039
+
1040
+ /* Read in the xda style CDF file into memory */
1041
+ if (!read_cdf_xda(cur_file_name,&my_cdf)){
1042
+ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
1043
+ }
1044
+
1045
+
1046
+ if (asInteger(fullstructure)){
1047
+ /* return the full structure */
1048
+ PROTECT(CDFInfo = allocVector(VECSXP,5));
1049
+
1050
+ PROTECT(CDFInfoNames = allocVector(STRSXP,5));
1051
+ SET_STRING_ELT(CDFInfoNames,0,mkChar("Header"));
1052
+ SET_STRING_ELT(CDFInfoNames,1,mkChar("UnitNames"));
1053
+ SET_STRING_ELT(CDFInfoNames,2,mkChar("FilePositions"));
1054
+ SET_STRING_ELT(CDFInfoNames,3,mkChar("QCUnits"));
1055
+ SET_STRING_ELT(CDFInfoNames,4,mkChar("Units"));
1056
+ setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
1057
+ UNPROTECT(1);
1058
+
1059
+ PROTECT(HEADER = allocVector(VECSXP,2));
1060
+ PROTECT(HEADERNames = allocVector(STRSXP,2));
1061
+ SET_STRING_ELT(HEADERNames,0,mkChar("Dimensions"));
1062
+ SET_STRING_ELT(HEADERNames,1,mkChar("ReseqRefSeq"));
1063
+ setAttrib(HEADER,R_NamesSymbol,HEADERNames);
1064
+ UNPROTECT(1);
1065
+
1066
+ PROTECT(Dimensions = allocVector(REALSXP,7));
1067
+ NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.magicnumber;
1068
+ NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.version_number;
1069
+ NUMERIC_POINTER(Dimensions)[2] = (double)my_cdf.header.cols;
1070
+ NUMERIC_POINTER(Dimensions)[3] = (double)my_cdf.header.rows;
1071
+ NUMERIC_POINTER(Dimensions)[4] = (double)my_cdf.header.n_qc_units;
1072
+ NUMERIC_POINTER(Dimensions)[5] = (double)my_cdf.header.n_units;
1073
+ NUMERIC_POINTER(Dimensions)[6] = (double)my_cdf.header.len_ref_seq;
1074
+
1075
+ PROTECT(DimensionsNames = allocVector(STRSXP,7));
1076
+ SET_STRING_ELT(DimensionsNames,0,mkChar("MagicNumber"));
1077
+ SET_STRING_ELT(DimensionsNames,1,mkChar("VersionNumber"));
1078
+ SET_STRING_ELT(DimensionsNames,2,mkChar("Cols"));
1079
+ SET_STRING_ELT(DimensionsNames,3,mkChar("Rows"));
1080
+ SET_STRING_ELT(DimensionsNames,4,mkChar("n.QCunits"));
1081
+ SET_STRING_ELT(DimensionsNames,5,mkChar("n.units"));
1082
+ SET_STRING_ELT(DimensionsNames,6,mkChar("LenRefSeq"));
1083
+ setAttrib(Dimensions,R_NamesSymbol,DimensionsNames);
1084
+ SET_VECTOR_ELT(HEADER,0,Dimensions);
1085
+ UNPROTECT(2);
1086
+
1087
+ PROTECT(REFSEQ = allocVector(STRSXP,1));
1088
+ SET_STRING_ELT(REFSEQ,0,mkChar(my_cdf.header.ref_seq));
1089
+ SET_VECTOR_ELT(HEADER,1,REFSEQ);
1090
+ UNPROTECT(1);
1091
+
1092
+ SET_VECTOR_ELT(CDFInfo,0,HEADER);
1093
+ UNPROTECT(1);
1094
+
1095
+ PROTECT(UNITNAMES = allocVector(STRSXP,my_cdf.header.n_units));
1096
+ for (i =0; i < my_cdf.header.n_units; i++){
1097
+ SET_STRING_ELT(UNITNAMES,i,mkChar(my_cdf.probesetnames[i]));
1098
+ }
1099
+ SET_VECTOR_ELT(CDFInfo,1,UNITNAMES);
1100
+ UNPROTECT(1);
1101
+
1102
+ PROTECT(FILEPOSITIONS = allocVector(VECSXP,2));
1103
+ PROTECT(FILEPOSITIONSQC = allocVector(REALSXP,my_cdf.header.n_qc_units));
1104
+ PROTECT(FILEPOSITIONSUNITS = allocVector(REALSXP,my_cdf.header.n_units));
1105
+ for (i =0; i < my_cdf.header.n_qc_units; i++){
1106
+ NUMERIC_POINTER(FILEPOSITIONSQC)[i] = (double)my_cdf.qc_start[i];
1107
+ }
1108
+ for (i =0; i < my_cdf.header.n_units; i++){
1109
+ NUMERIC_POINTER(FILEPOSITIONSUNITS)[i] = (double)my_cdf.units_start[i];
1110
+ }
1111
+ SET_VECTOR_ELT(FILEPOSITIONS,0,FILEPOSITIONSQC);
1112
+ SET_VECTOR_ELT(FILEPOSITIONS,1,FILEPOSITIONSUNITS);
1113
+ PROTECT(FILEPOSITIONSNames = allocVector(STRSXP,2));
1114
+ SET_STRING_ELT(FILEPOSITIONSNames,0,mkChar("FilePosQC"));
1115
+ SET_STRING_ELT(FILEPOSITIONSNames,1,mkChar("FilePosUnits"));
1116
+ setAttrib(FILEPOSITIONS,R_NamesSymbol,FILEPOSITIONSNames);
1117
+ SET_VECTOR_ELT(CDFInfo,2,FILEPOSITIONS);
1118
+ UNPROTECT(4);
1119
+
1120
+ PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.n_qc_units));
1121
+ for (i =0; i < my_cdf.header.n_qc_units; i++){
1122
+ PROTECT(QCUNITSsub = allocVector(VECSXP,2));
1123
+ PROTECT(QCUNITSsubNames= allocVector(STRSXP,2));
1124
+ SET_STRING_ELT(QCUNITSsubNames,0,mkChar("QCUnitHeader"));
1125
+ SET_STRING_ELT(QCUNITSsubNames,1,mkChar("QCUnitInfo"));
1126
+ setAttrib(QCUNITSsub,R_NamesSymbol,QCUNITSsubNames);
1127
+
1128
+ PROTECT(QCHEADER = allocVector(REALSXP,2));
1129
+ NUMERIC_POINTER(QCHEADER)[0] = (double)my_cdf.qc_units[i].type;
1130
+ NUMERIC_POINTER(QCHEADER)[1] = (double)my_cdf.qc_units[i].n_probes;
1131
+ PROTECT(QCHEADERNames = allocVector(STRSXP,2));
1132
+ SET_STRING_ELT(QCHEADERNames,0,mkChar("Type"));
1133
+ SET_STRING_ELT(QCHEADERNames,1,mkChar("n.probes"));
1134
+
1135
+ setAttrib(QCHEADER,R_NamesSymbol,QCHEADERNames);
1136
+ SET_VECTOR_ELT(QCUNITSsub,0,QCHEADER);
1137
+
1138
+
1139
+ PROTECT(QCUNITSProbeInfo = allocVector(VECSXP,5));
1140
+ PROTECT(QCUNITSProbeInfoX = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1141
+ PROTECT(QCUNITSProbeInfoY = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1142
+ PROTECT(QCUNITSProbeInfoPL = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1143
+ PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1144
+ PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes));
1145
+
1146
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1147
+ NUMERIC_POINTER(QCUNITSProbeInfoX)[j] = (double)my_cdf.qc_units[i].qc_probes[j].x;
1148
+ NUMERIC_POINTER(QCUNITSProbeInfoY)[j] = (double)my_cdf.qc_units[i].qc_probes[j].y;
1149
+ NUMERIC_POINTER(QCUNITSProbeInfoPL)[j] = (double)my_cdf.qc_units[i].qc_probes[j].probelength;
1150
+ NUMERIC_POINTER(QCUNITSProbeInfoPMFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].pmflag;
1151
+ NUMERIC_POINTER(QCUNITSProbeInfoBGFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].bgprobeflag;
1152
+ }
1153
+
1154
+ SET_VECTOR_ELT(QCUNITSProbeInfo,0,QCUNITSProbeInfoX);
1155
+ SET_VECTOR_ELT(QCUNITSProbeInfo,1,QCUNITSProbeInfoY);
1156
+ SET_VECTOR_ELT(QCUNITSProbeInfo,2,QCUNITSProbeInfoPL);
1157
+ SET_VECTOR_ELT(QCUNITSProbeInfo,3,QCUNITSProbeInfoPMFLAG);
1158
+ SET_VECTOR_ELT(QCUNITSProbeInfo,4,QCUNITSProbeInfoBGFLAG);
1159
+
1160
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,5));
1161
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1162
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1163
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("ProbeLength"));
1164
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("PMFlag"));
1165
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("BGProbeFlag"));
1166
+
1167
+ setAttrib(QCUNITSProbeInfo,R_NamesSymbol,QCUNITSProbeInfoNames);
1168
+
1169
+ PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1170
+
1171
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1172
+ sprintf(buf, "%d", j+1);
1173
+ SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
1174
+ }
1175
+
1176
+
1177
+
1178
+ setAttrib(QCUNITSProbeInfo, R_RowNamesSymbol, QCUNITSProbeInforow_names);
1179
+
1180
+
1181
+ setAttrib(QCUNITSProbeInfo,R_ClassSymbol,mkString("data.frame"));
1182
+
1183
+ SET_VECTOR_ELT(QCUNITSsub,1,QCUNITSProbeInfo);
1184
+ SET_VECTOR_ELT(QCUNITS,i,QCUNITSsub);
1185
+ UNPROTECT(12);
1186
+ }
1187
+ SET_VECTOR_ELT(CDFInfo,3,QCUNITS);
1188
+ UNPROTECT(1);
1189
+
1190
+
1191
+ PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.n_units));
1192
+ for (i =0; i < my_cdf.header.n_units; i++){
1193
+ PROTECT(tmpUNIT = allocVector(VECSXP,2));
1194
+ PROTECT(tmpUNITNames = allocVector(STRSXP,2));
1195
+ SET_STRING_ELT(tmpUNITNames,0,mkChar("UnitHeader"));
1196
+ SET_STRING_ELT(tmpUNITNames,1,mkChar("Block"));
1197
+ setAttrib(tmpUNIT,R_NamesSymbol,tmpUNITNames);
1198
+
1199
+
1200
+ PROTECT(UNITSHeader = allocVector(REALSXP,7));
1201
+ PROTECT(UNITSHeaderNames = allocVector(STRSXP,7));
1202
+ SET_STRING_ELT(UNITSHeaderNames,0,mkChar("UnitType"));
1203
+ SET_STRING_ELT(UNITSHeaderNames,1,mkChar("Direction"));
1204
+ SET_STRING_ELT(UNITSHeaderNames,2,mkChar("n.atoms"));
1205
+ SET_STRING_ELT(UNITSHeaderNames,3,mkChar("n.blocks"));
1206
+ SET_STRING_ELT(UNITSHeaderNames,4,mkChar("n.cells"));
1207
+ SET_STRING_ELT(UNITSHeaderNames,5,mkChar("UnitNumber"));
1208
+ SET_STRING_ELT(UNITSHeaderNames,6,mkChar("n.cellsperatom"));
1209
+
1210
+ setAttrib(UNITSHeader,R_NamesSymbol,UNITSHeaderNames);
1211
+
1212
+ NUMERIC_POINTER(UNITSHeader)[0] = (double)my_cdf.units[i].unittype;
1213
+ NUMERIC_POINTER(UNITSHeader)[1] = (double)my_cdf.units[i].direction;
1214
+ NUMERIC_POINTER(UNITSHeader)[2] = (double)my_cdf.units[i].natoms;
1215
+ NUMERIC_POINTER(UNITSHeader)[3] = (double)my_cdf.units[i].nblocks;
1216
+ NUMERIC_POINTER(UNITSHeader)[4] = (double)my_cdf.units[i].ncells;
1217
+ NUMERIC_POINTER(UNITSHeader)[5] = (double)my_cdf.units[i].unitnumber;
1218
+ NUMERIC_POINTER(UNITSHeader)[6] = (double)my_cdf.units[i].ncellperatom;
1219
+
1220
+ PROTECT(tmpUNITSBlock = allocVector(VECSXP,my_cdf.units[i].nblocks));
1221
+ for (j=0; j < my_cdf.units[i].nblocks; j++){
1222
+ PROTECT(UNITSBlock = allocVector(VECSXP,3));
1223
+ PROTECT(UNITSBlockNames = allocVector(STRSXP,3));
1224
+ SET_STRING_ELT(UNITSBlockNames,0,mkChar("Header"));
1225
+ SET_STRING_ELT(UNITSBlockNames,1,mkChar("Name"));
1226
+ SET_STRING_ELT(UNITSBlockNames,2,mkChar("UnitInfo"));
1227
+ setAttrib(UNITSBlock,R_NamesSymbol,UNITSBlockNames);
1228
+
1229
+ PROTECT(UNITSBlockHeader = allocVector(REALSXP,6));
1230
+ PROTECT(UNITSBlockHeaderNames= allocVector(VECSXP,6));
1231
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,0,mkChar("n.atoms"));
1232
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,1,mkChar("n.cells"));
1233
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,2,mkChar("n.cellsperatom"));
1234
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,3,mkChar("Direction"));
1235
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,4,mkChar("firstatom"));
1236
+ SET_VECTOR_ELT(UNITSBlockHeaderNames,5,mkChar("unused"));
1237
+
1238
+ NUMERIC_POINTER(UNITSBlockHeader)[0] = (double)my_cdf.units[i].unit_block[j].natoms;
1239
+ NUMERIC_POINTER(UNITSBlockHeader)[1] = (double)my_cdf.units[i].unit_block[j].ncells;
1240
+ NUMERIC_POINTER(UNITSBlockHeader)[2] = (double)my_cdf.units[i].unit_block[j].ncellperatom;
1241
+ NUMERIC_POINTER(UNITSBlockHeader)[3] = (double)my_cdf.units[i].unit_block[j].direction;
1242
+ NUMERIC_POINTER(UNITSBlockHeader)[4] = (double)my_cdf.units[i].unit_block[j].firstatom;
1243
+ NUMERIC_POINTER(UNITSBlockHeader)[5] = (double)my_cdf.units[i].unit_block[j].unused;
1244
+
1245
+
1246
+ setAttrib(UNITSBlockHeader,R_NamesSymbol,UNITSBlockHeaderNames);
1247
+
1248
+ SET_VECTOR_ELT(UNITSBlock,0,UNITSBlockHeader);
1249
+
1250
+ SET_VECTOR_ELT(UNITSBlock,1,mkString(my_cdf.units[i].unit_block[j].blockname));
1251
+
1252
+ PROTECT(UNITSBlockInfo = allocVector(VECSXP,6));
1253
+
1254
+ PROTECT(UNITSBlockInfoNames = allocVector(STRSXP,6));
1255
+ SET_STRING_ELT(UNITSBlockInfoNames,0,mkChar("atom.number"));
1256
+ SET_STRING_ELT(UNITSBlockInfoNames,1,mkChar("x"));
1257
+ SET_STRING_ELT(UNITSBlockInfoNames,2,mkChar("y"));
1258
+ SET_STRING_ELT(UNITSBlockInfoNames,3,mkChar("index.position"));
1259
+ SET_STRING_ELT(UNITSBlockInfoNames,4,mkChar("pbase"));
1260
+ SET_STRING_ELT(UNITSBlockInfoNames,5,mkChar("tbase"));
1261
+
1262
+ setAttrib(UNITSBlockInfo,R_NamesSymbol,UNITSBlockInfoNames);
1263
+
1264
+
1265
+ PROTECT(UNITSBlockInforow_names = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1266
+
1267
+ for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
1268
+ sprintf(buf, "%d", k+1);
1269
+ SET_STRING_ELT(UNITSBlockInforow_names,k,mkChar(buf));
1270
+ }
1271
+
1272
+ PROTECT(UNITSBlockAtom = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1273
+ PROTECT(UNITSBlockX = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1274
+ PROTECT(UNITSBlockY = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1275
+ PROTECT(UNITSBlockIndexPos = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells));
1276
+ PROTECT(UNITSBlockPbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1277
+ PROTECT(UNITSBlockTbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells));
1278
+
1279
+ for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){
1280
+ /* Rprintf("%d %d %d\n",i,j,k);
1281
+ // NUMERIC_POINTER(UNITSBlockAtom)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
1282
+ // NUMERIC_POINTER(UNITSBlockX)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].x;
1283
+ // NUMERIC_POINTER(UNITSBlockY)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].y;
1284
+ // NUMERIC_POINTER(UNITSBlockIndexPos)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; */
1285
+ INTEGER_POINTER(UNITSBlockAtom)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber;
1286
+ INTEGER_POINTER(UNITSBlockX)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].x;
1287
+ INTEGER_POINTER(UNITSBlockY)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].y;
1288
+ INTEGER_POINTER(UNITSBlockIndexPos)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos;
1289
+ sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].pbase);
1290
+ SET_STRING_ELT(UNITSBlockPbase,k,mkChar(buf));
1291
+
1292
+ sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].tbase);
1293
+ SET_STRING_ELT(UNITSBlockTbase,k,mkChar(buf));
1294
+ }
1295
+
1296
+ SET_VECTOR_ELT(UNITSBlockInfo,0,UNITSBlockAtom);
1297
+ SET_VECTOR_ELT(UNITSBlockInfo,1,UNITSBlockX);
1298
+ SET_VECTOR_ELT(UNITSBlockInfo,2,UNITSBlockY);
1299
+ SET_VECTOR_ELT(UNITSBlockInfo,3,UNITSBlockIndexPos);
1300
+ SET_VECTOR_ELT(UNITSBlockInfo,4,UNITSBlockPbase);
1301
+ SET_VECTOR_ELT(UNITSBlockInfo,5,UNITSBlockTbase);
1302
+ UNPROTECT(6);
1303
+
1304
+
1305
+
1306
+
1307
+ setAttrib(UNITSBlockInfo, R_RowNamesSymbol, UNITSBlockInforow_names);
1308
+ setAttrib(UNITSBlockInfo,R_ClassSymbol,mkString("data.frame"));
1309
+
1310
+ SET_VECTOR_ELT(UNITSBlock,2,UNITSBlockInfo);
1311
+
1312
+ SET_VECTOR_ELT(tmpUNITSBlock,j,UNITSBlock);
1313
+ UNPROTECT(7);
1314
+ }
1315
+
1316
+ SET_VECTOR_ELT(tmpUNIT,0,UNITSHeader);
1317
+ SET_VECTOR_ELT(tmpUNIT,1,tmpUNITSBlock);
1318
+
1319
+ SET_VECTOR_ELT(UNITS,i,tmpUNIT);
1320
+ UNPROTECT(5);
1321
+ }
1322
+ SET_VECTOR_ELT(CDFInfo,4,UNITS);
1323
+ UNPROTECT(1);
1324
+
1325
+
1326
+ } else {
1327
+ /* return the abbreviated structure */
1328
+ error("Abbreviated structure not yet implemented.\n");
1329
+
1330
+
1331
+ }
1332
+
1333
+
1334
+
1335
+
1336
+
1337
+ dealloc_cdf_xda(&my_cdf);
1338
+ UNPROTECT(1);
1339
+ return CDFInfo;
1340
+
1341
+
1342
+ }