bio-affy 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
Binary file
@@ -0,0 +1,1576 @@
1
+ /****************************************************************
2
+ **
3
+ ** File: read_cdffile2.c
4
+ **
5
+ ** Implementation by: B. M. Bolstad
6
+ **
7
+ ** Copyright (C) B. M. Bolstad 2005
8
+ **
9
+ ** A parser designed to read text CDF files into an R List structure
10
+ **
11
+ ** Note this version only parses GC3.0 version text files (which should
12
+ ** be almost all text CDF files currently used)
13
+ **
14
+ ** Note that the original text CDF parser (from which this file is not in
15
+ ** anyway based) was written by Laurent Gautier. That file was named
16
+ ** read_cdffile.c (originally part of affy and then later makecdfenv)
17
+ **
18
+ ** Implemented based on documentation available from Affymetrix
19
+ **
20
+ ** Implementation begun 2005.
21
+ **
22
+ ** Modification Dates
23
+ ** Jul 24 - Initial version
24
+ ** Sep 20 - Continued Implementation
25
+ ** Sep 21 - Continued Implementation and debugging
26
+ ** Sep 22 - Continued Implementation and testing
27
+ ** Sep 24 - QCunit probes, Unit Block probes, Finish and tested.
28
+ ** Dec 1, 2005 - Some comment cleaning. Added isTextCDFFile,CheckCDFtext
29
+ ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
30
+ ** May 31, 2006 - fix some compiler warnings
31
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
32
+ **
33
+ **
34
+ *******************************************************************/
35
+
36
+ #include <R.h>
37
+ #include <Rdefines.h>
38
+
39
+ #include "stdlib.h"
40
+ #include "stdio.h"
41
+
42
+ #ifdef BIOLIB
43
+ #include <biolib_R_map.h>
44
+ #endif
45
+
46
+ #define BUFFER_SIZE 1024
47
+
48
+
49
+ /*****************************************************************
50
+ **
51
+ **
52
+ ** A structure for holding information in the
53
+ ** "CDF" and "Chip" sections (basically header information)
54
+ **
55
+ ******************************************************************/
56
+
57
+
58
+
59
+ typedef struct {
60
+
61
+ char *version;
62
+ char *name;
63
+ int rows,cols;
64
+ int numberofunits;
65
+ int maxunit;
66
+ int NumQCUnits;
67
+ char *chipreference;
68
+ } cdf_text_header;
69
+
70
+
71
+ /*****************************************************************
72
+ **
73
+ **
74
+ ** A structure for holding QC probe information
75
+ ** Note the "CYCLES" item is ignored and never parsed
76
+ **
77
+ ******************************************************************/
78
+
79
+
80
+ typedef struct {
81
+ int x;
82
+ int y;
83
+ char *probe;
84
+ int plen;
85
+ int atom;
86
+ int index;
87
+ int match;
88
+ int bg;
89
+ } cdf_text_qc_probe;
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ /*******************************************************************
98
+ **
99
+ ** A structure for holding QC units information. These are
100
+ ** areas of the chip that contain probes that may or may not be useful
101
+ ** for QC and other purposes.
102
+ **
103
+ **
104
+ *******************************************************************/
105
+
106
+
107
+
108
+ typedef struct{
109
+ int type;
110
+ unsigned int n_probes;
111
+ int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
112
+ cdf_text_qc_probe *qc_probes;
113
+
114
+ } cdf_text_qc_unit;
115
+
116
+
117
+ /*******************************************************************
118
+ **
119
+ ** A structure for holding probe information for unit_blocks_probes
120
+ **
121
+ ** probes are stored within blocks
122
+ **
123
+ *******************************************************************/
124
+
125
+ typedef struct{
126
+ int x;
127
+ int y;
128
+ char *probe;
129
+ char *feat;
130
+ char *qual;
131
+ int expos;
132
+ int pos;
133
+ char *cbase;
134
+ char *pbase;
135
+ char *tbase;
136
+ int atom;
137
+ int index;
138
+ int codonid;
139
+ int codon;
140
+ int regiontype;
141
+ char* region;
142
+ } cdf_text_unit_block_probe;
143
+
144
+
145
+
146
+
147
+ /*******************************************************************
148
+ **
149
+ ** A structure holding Unit_blocks
150
+ **
151
+ ** blocks are stored within units.
152
+ ** blocks contain many probes
153
+ **
154
+ *******************************************************************/
155
+
156
+ typedef struct{
157
+ char *name;
158
+ int blocknumber;
159
+ int num_atoms;
160
+ int num_cells;
161
+ int start_position;
162
+ int stop_position;
163
+ int direction;
164
+ cdf_text_unit_block_probe *probes;
165
+
166
+ } cdf_text_unit_block;
167
+
168
+
169
+
170
+
171
+
172
+
173
+ /*******************************************************************
174
+ **
175
+ ** A structure for holding "Units" AKA known as probesets
176
+ **
177
+ ** Each unit contains one or more blocks. Each block contains one or
178
+ ** more probes
179
+ **
180
+ *******************************************************************/
181
+
182
+
183
+ typedef struct{
184
+ char *name;
185
+ int direction;
186
+ int num_atoms;
187
+ int num_cells;
188
+ int unit_number;
189
+ int unit_type;
190
+ int numberblocks;
191
+ int MutationType;
192
+ cdf_text_unit_block *blocks;
193
+ } cdf_text_unit;
194
+
195
+
196
+
197
+ /*******************************************************************
198
+ **
199
+ ** A structure for holding a text CDF file
200
+ **
201
+ ** text cdf files consist of
202
+ ** basic header information
203
+ ** qcunits
204
+ ** - qc probes
205
+ ** units (aka probesets)
206
+ ** - blocks
207
+ ** - probes
208
+ **
209
+ **
210
+ *******************************************************************/
211
+
212
+ typedef struct{
213
+ cdf_text_header header;
214
+ cdf_text_qc_unit *qc_units;
215
+ cdf_text_unit *units;
216
+ } cdf_text;
217
+
218
+
219
+ /**************************************************************
220
+ **
221
+ ** The following code is for tokenizing strings
222
+ ** originally included in read_abatch.c from the affy package.
223
+ **
224
+ *************************************************************/
225
+
226
+ /***************************************************************
227
+ **
228
+ ** tokenset
229
+ **
230
+ ** char **tokens - a array of token strings
231
+ ** int n - number of tokens in this set.
232
+ **
233
+ ** a structure to hold a set of tokens. Typically a tokenset is
234
+ ** created by breaking a character string based upon a set of
235
+ ** delimiters.
236
+ **
237
+ **
238
+ **************************************************************/
239
+
240
+ typedef struct{
241
+ char **tokens;
242
+ int n;
243
+ } tokenset;
244
+
245
+
246
+
247
+ /******************************************************************
248
+ **
249
+ ** tokenset *tokenize(char *str, char *delimiters)
250
+ **
251
+ ** char *str - a string to break into tokens
252
+ ** char *delimiters - delimiters to use in breaking up the line
253
+ **
254
+ **
255
+ ** RETURNS a new tokenset
256
+ **
257
+ ** Given a string, split into tokens based on a set of delimitors
258
+ **
259
+ *****************************************************************/
260
+
261
+ static tokenset *tokenize(char *str, char *delimiters){
262
+
263
+ int i=0;
264
+
265
+ char *current_token;
266
+ tokenset *my_tokenset = Calloc(1,tokenset);
267
+ my_tokenset->n=0;
268
+
269
+ my_tokenset->tokens = NULL;
270
+
271
+ current_token = strtok(str,delimiters);
272
+ while (current_token != NULL){
273
+ my_tokenset->n++;
274
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
275
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
276
+ strcpy(my_tokenset->tokens[i],current_token);
277
+ i++;
278
+ current_token = strtok(NULL,delimiters);
279
+ }
280
+
281
+ return my_tokenset;
282
+ }
283
+
284
+
285
+ /******************************************************************
286
+ **
287
+ ** int tokenset_size(tokenset *x)
288
+ **
289
+ ** tokenset *x - a tokenset
290
+ **
291
+ ** RETURNS the number of tokens in the tokenset
292
+ **
293
+ ******************************************************************/
294
+
295
+ static int tokenset_size(tokenset *x){
296
+ return x->n;
297
+ }
298
+
299
+
300
+ /******************************************************************
301
+ **
302
+ ** char *get_token(tokenset *x, int i)
303
+ **
304
+ ** tokenset *x - a tokenset
305
+ ** int i - index of the token to return
306
+ **
307
+ ** RETURNS pointer to the i'th token
308
+ **
309
+ ******************************************************************/
310
+
311
+ static char *get_token(tokenset *x,int i){
312
+ return x->tokens[i];
313
+ }
314
+
315
+ /******************************************************************
316
+ **
317
+ ** void delete_tokens(tokenset *x)
318
+ **
319
+ ** tokenset *x - a tokenset
320
+ **
321
+ ** Deallocates all the space allocated for a tokenset
322
+ **
323
+ ******************************************************************/
324
+
325
+ static void delete_tokens(tokenset *x){
326
+
327
+ int i;
328
+
329
+ for (i=0; i < x->n; i++){
330
+ Free(x->tokens[i]);
331
+ }
332
+ Free(x->tokens);
333
+ Free(x);
334
+ }
335
+
336
+ /*******************************************************************
337
+ **
338
+ ** int token_ends_with(char *token, char *ends)
339
+ **
340
+ ** char *token - a string to check
341
+ ** char *ends_in - we are looking for this string at the end of token
342
+ **
343
+ **
344
+ ** returns 0 if no match, otherwise it returns the index of the first character
345
+ ** which matchs the start of *ends.
346
+ **
347
+ ** Note that there must be one additional character in "token" beyond
348
+ ** the characters in "ends". So
349
+ **
350
+ ** *token = "TestStr"
351
+ ** *ends = "TestStr"
352
+ **
353
+ ** would return 0 but if
354
+ **
355
+ ** ends = "estStr"
356
+ **
357
+ ** we would return 1.
358
+ **
359
+ ** and if
360
+ **
361
+ ** ends= "stStr"
362
+ ** we would return 2 .....etc
363
+ **
364
+ **
365
+ ******************************************************************/
366
+
367
+ static int token_ends_with(char *token, char *ends_in){
368
+
369
+ int tokenlength = strlen(token);
370
+ int ends_length = strlen(ends_in);
371
+ int start_pos;
372
+ char *tmp_ptr;
373
+
374
+ if (tokenlength <= ends_length){
375
+ /* token string is too short so can't possibly end with ends */
376
+ return 0;
377
+ }
378
+
379
+ start_pos = tokenlength - ends_length;
380
+
381
+ tmp_ptr = &token[start_pos];
382
+
383
+ if (strcmp(tmp_ptr,ends_in)==0){
384
+ return start_pos;
385
+ } else {
386
+ return 0;
387
+ }
388
+ }
389
+
390
+
391
+ /******************************************************************
392
+ **
393
+ ** The following code, also from read_abatch.c is more about locating
394
+ ** sections in the file and reading it in.
395
+ **
396
+ ******************************************************************/
397
+
398
+
399
+ /**
400
+ ** This reads a line from the specified file stream
401
+ **
402
+ **
403
+ **/
404
+
405
+
406
+ static void ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
407
+ if (fgets(buffer, buffersize, currentFile) == NULL){
408
+ error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
409
+ }
410
+ }
411
+
412
+
413
+
414
+ /******************************************************************
415
+ **
416
+ ** void findStartsWith(FILE *my_file,char *starts, char *buffer)
417
+ **
418
+ ** FILE *my_file - an open file to read from
419
+ ** char *starts - the string to search for at the start of each line
420
+ ** char *buffer - where to place the line that has been read.
421
+ **
422
+ **
423
+ ** Find a line that starts with the specified character string.
424
+ ** At exit buffer should contain that line
425
+ **
426
+ *****************************************************************/
427
+
428
+
429
+ static void findStartsWith(FILE *my_file,char *starts, char *buffer){
430
+
431
+ int starts_len = strlen(starts);
432
+ int match = 1;
433
+
434
+ do {
435
+ ReadFileLine(buffer, BUFFER_SIZE, my_file);
436
+ match = strncmp(starts, buffer, starts_len);
437
+ } while (match != 0);
438
+ }
439
+
440
+
441
+ /******************************************************************
442
+ **
443
+ ** void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer)
444
+ **
445
+ ** FILE *my_file - an open file
446
+ ** char *sectiontitle - string we are searching for
447
+ ** char *buffer - return's with line starting with sectiontitle
448
+ **
449
+ **
450
+ *****************************************************************/
451
+
452
+ static void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer){
453
+ findStartsWith(my_file,sectiontitle,buffer);
454
+ }
455
+
456
+
457
+ /*******************************************************************
458
+ **
459
+ ** void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer)
460
+ **
461
+ ** FILE *infile - pointer to open file presumed to be a CDF file
462
+ ** cdf_text *mycdf - structure for holding cdf file
463
+ ** char *linebuffer - a place to store strings that are read in. Length
464
+ ** is given by BUFFER_SIZE
465
+ **
466
+ *******************************************************************/
467
+
468
+ static void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer){
469
+
470
+ tokenset *cur_tokenset;
471
+
472
+ /* move to the Chip section */
473
+ AdvanceToSection(infile,"[Chip]",linebuffer);
474
+
475
+ findStartsWith(infile,"Name",linebuffer);
476
+
477
+ /* Read the Name */
478
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
479
+ mycdf->header.name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
480
+ strcpy(mycdf->header.name,get_token(cur_tokenset,1));
481
+ delete_tokens(cur_tokenset);
482
+
483
+ /* Read the Rows and Cols, Number of units etc */
484
+
485
+ findStartsWith(infile,"Rows",linebuffer);
486
+ cur_tokenset = tokenize(linebuffer,"=");
487
+ mycdf->header.rows = atoi(get_token(cur_tokenset,1));
488
+ delete_tokens(cur_tokenset);
489
+
490
+ findStartsWith(infile,"Cols",linebuffer);
491
+ cur_tokenset = tokenize(linebuffer,"=");
492
+ mycdf->header.cols = atoi(get_token(cur_tokenset,1));
493
+ delete_tokens(cur_tokenset);
494
+
495
+ findStartsWith(infile,"NumberOfUnits",linebuffer);
496
+ cur_tokenset = tokenize(linebuffer,"=");
497
+ mycdf->header.numberofunits = atoi(get_token(cur_tokenset,1));
498
+ delete_tokens(cur_tokenset);
499
+
500
+ findStartsWith(infile,"MaxUnit",linebuffer);
501
+ cur_tokenset = tokenize(linebuffer,"=");
502
+ mycdf->header.maxunit = atoi(get_token(cur_tokenset,1));
503
+ delete_tokens(cur_tokenset);
504
+
505
+ findStartsWith(infile,"NumQCUnits",linebuffer);
506
+ cur_tokenset = tokenize(linebuffer,"=");
507
+ mycdf->header.NumQCUnits = atoi(get_token(cur_tokenset,1));
508
+ delete_tokens(cur_tokenset);
509
+
510
+ findStartsWith(infile,"ChipReference",linebuffer);
511
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
512
+ if (cur_tokenset->n > 1){
513
+ mycdf->header.chipreference = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
514
+ strcpy(mycdf->header.chipreference,get_token(cur_tokenset,1));
515
+ } else {
516
+ mycdf->header.chipreference = NULL;
517
+ }
518
+
519
+
520
+ delete_tokens(cur_tokenset);
521
+
522
+
523
+
524
+ }
525
+
526
+
527
+
528
+ /*******************************************************************
529
+ **
530
+ ** void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index)
531
+ **
532
+ ** FILE *infile - an opened CDF file
533
+ ** cdf_text *mycdf - a structure for holding cdf file
534
+ ** char *linebuffer - temporary place to store lines of text read in
535
+ ** int index - which QCunit.
536
+ **
537
+ ** This function reads in the QC unit probes from the cdf file. It is assumed that the space to
538
+ ** store them is already allocated.
539
+ **
540
+ *******************************************************************/
541
+
542
+
543
+ static void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index){
544
+ tokenset *cur_tokenset;
545
+ int i;
546
+
547
+ for (i =0; i < mycdf->qc_units[index].n_probes; i++){
548
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
549
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
550
+ if (mycdf->qc_units[index].qccontains[0]){
551
+ mycdf->qc_units[index].qc_probes[i].x = atoi(get_token(cur_tokenset,1));
552
+ }
553
+ if (mycdf->qc_units[index].qccontains[1]){
554
+ mycdf->qc_units[index].qc_probes[i].y = atoi(get_token(cur_tokenset,2));
555
+ }
556
+ if (mycdf->qc_units[index].qccontains[2]){
557
+ mycdf->qc_units[index].qc_probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
558
+ strcpy(mycdf->qc_units[index].qc_probes[i].probe,get_token(cur_tokenset,3));
559
+ }
560
+ if (mycdf->qc_units[index].qccontains[3]){
561
+ mycdf->qc_units[index].qc_probes[i].plen = atoi(get_token(cur_tokenset,4));
562
+ }
563
+ if (mycdf->qc_units[index].qccontains[4]){
564
+ mycdf->qc_units[index].qc_probes[i].atom = atoi(get_token(cur_tokenset,5));
565
+ }
566
+ if (mycdf->qc_units[index].qccontains[5]){
567
+ mycdf->qc_units[index].qc_probes[i].index = atoi(get_token(cur_tokenset,6));
568
+ }
569
+ if (mycdf->qc_units[index].qccontains[6]){
570
+ mycdf->qc_units[index].qc_probes[i].match = atoi(get_token(cur_tokenset,7));
571
+ }
572
+ if (mycdf->qc_units[index].qccontains[7]){
573
+ mycdf->qc_units[index].qc_probes[i].bg = atoi(get_token(cur_tokenset,8));
574
+ }
575
+ delete_tokens(cur_tokenset);
576
+ }
577
+
578
+
579
+
580
+ }
581
+
582
+ /*******************************************************************
583
+ **
584
+ ** void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer)
585
+ **
586
+ ** FILE *infile - an opened CDF file
587
+ ** cdf_text *mycdf - a structure for holding cdf file
588
+ ** char *linebuffer - temporary place to store lines of text read in
589
+ **
590
+ ** Reads all the QC units. Note that it allocates the space for the probes
591
+ ** it is assumed that the space for the actual QC units are already allocated
592
+ **
593
+ *******************************************************************/
594
+
595
+ static void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer){
596
+
597
+ tokenset *cur_tokenset;
598
+ int i,j;
599
+
600
+ mycdf->qc_units = Calloc(mycdf->header.NumQCUnits,cdf_text_qc_unit);
601
+
602
+
603
+ for (i =0; i < mycdf->header.NumQCUnits; i++){
604
+ /* move to the next QC section */
605
+ AdvanceToSection(infile,"[QC",linebuffer);
606
+ findStartsWith(infile,"Type",linebuffer);
607
+ cur_tokenset = tokenize(linebuffer,"=");
608
+ mycdf->qc_units[i].type = (unsigned short)atoi(get_token(cur_tokenset,1));
609
+ delete_tokens(cur_tokenset);
610
+ findStartsWith(infile,"NumberCells",linebuffer);
611
+ cur_tokenset = tokenize(linebuffer,"=");
612
+ mycdf->qc_units[i].n_probes = atoi(get_token(cur_tokenset,1));
613
+ delete_tokens(cur_tokenset);
614
+ mycdf->qc_units[i].qc_probes = Calloc(mycdf->qc_units[i].n_probes,cdf_text_qc_probe);
615
+
616
+ /* Figure out which fields this QC unit has */
617
+ findStartsWith(infile,"CellHeader",linebuffer);
618
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
619
+ for (j=1; j < tokenset_size(cur_tokenset); j++){
620
+ if(strncmp("X", get_token(cur_tokenset,j), 1) == 0){
621
+ mycdf->qc_units[i].qccontains[0] =1;
622
+ } else if (strncmp("Y", get_token(cur_tokenset,j), 1) == 0){
623
+ mycdf->qc_units[i].qccontains[1] =1;
624
+ } else if (strncmp("PROBE",get_token(cur_tokenset,j), 5) == 0){
625
+ mycdf->qc_units[i].qccontains[2] =1;
626
+ } else if (strncmp("PLEN",get_token(cur_tokenset,j), 4) == 0){
627
+ mycdf->qc_units[i].qccontains[3] =1;
628
+ } else if (strncmp("ATOM",get_token(cur_tokenset,j), 4) == 0){
629
+ mycdf->qc_units[i].qccontains[4] =1;
630
+ } else if (strncmp("INDEX",get_token(cur_tokenset,j), 5) == 0){
631
+ mycdf->qc_units[i].qccontains[5] =1;
632
+ } else if (strncmp("MATCH",get_token(cur_tokenset,j), 5) == 0){
633
+ mycdf->qc_units[i].qccontains[6] =1;
634
+ } else if (strncmp("BG",get_token(cur_tokenset,j), 2) == 0){
635
+ mycdf->qc_units[i].qccontains[7] =1;
636
+ }
637
+ }
638
+ delete_tokens(cur_tokenset);
639
+
640
+ read_cdf_QCUnits_probes(infile,mycdf,linebuffer,i);
641
+
642
+
643
+
644
+
645
+ }
646
+ }
647
+
648
+ /*******************************************************************
649
+ **
650
+ ** void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block)
651
+ **
652
+ ** FILE *infile - an opened CDF file
653
+ ** cdf_text *mycdf - a structure for holding cdf file
654
+ ** char *linebuffer - temporary place to store lines of text read in from the file
655
+ ** int unit - which unit
656
+ ** int block - which block
657
+ **
658
+ ** Reads in the probes for each unit. Note that it is assumed that the
659
+ ** space for the probes has actually been allocated.
660
+ **
661
+ *******************************************************************/
662
+
663
+
664
+
665
+ static void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block){
666
+ int i;
667
+ tokenset *cur_tokenset;
668
+
669
+ /* Read the Cell Header for the unit block */
670
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
671
+
672
+ for (i =0; i < mycdf->units[unit].blocks[block].num_cells; i++){
673
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
674
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
675
+ mycdf->units[unit].blocks[block].probes[i].x = atoi(get_token(cur_tokenset,1));
676
+ mycdf->units[unit].blocks[block].probes[i].y = atoi(get_token(cur_tokenset,2));
677
+ mycdf->units[unit].blocks[block].probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
678
+ strcpy(mycdf->units[unit].blocks[block].probes[i].probe,get_token(cur_tokenset,3));
679
+ mycdf->units[unit].blocks[block].probes[i].feat=Calloc(strlen(get_token(cur_tokenset,4))+1,char);
680
+ strcpy(mycdf->units[unit].blocks[block].probes[i].feat,get_token(cur_tokenset,4));
681
+ mycdf->units[unit].blocks[block].probes[i].qual=Calloc(strlen(get_token(cur_tokenset,5))+1,char);
682
+ strcpy(mycdf->units[unit].blocks[block].probes[i].qual,get_token(cur_tokenset,5));
683
+ mycdf->units[unit].blocks[block].probes[i].expos = atoi(get_token(cur_tokenset,6));
684
+ mycdf->units[unit].blocks[block].probes[i].pos = atoi(get_token(cur_tokenset,7));
685
+ mycdf->units[unit].blocks[block].probes[i].cbase = Calloc(strlen(get_token(cur_tokenset,8))+1,char);
686
+ strcpy(mycdf->units[unit].blocks[block].probes[i].cbase,get_token(cur_tokenset,8));
687
+ mycdf->units[unit].blocks[block].probes[i].pbase = Calloc(strlen(get_token(cur_tokenset,9))+1,char);
688
+ strcpy(mycdf->units[unit].blocks[block].probes[i].pbase,get_token(cur_tokenset,9));
689
+ mycdf->units[unit].blocks[block].probes[i].tbase = Calloc(strlen(get_token(cur_tokenset,10))+1,char);
690
+ strcpy(mycdf->units[unit].blocks[block].probes[i].tbase,get_token(cur_tokenset,10));
691
+ mycdf->units[unit].blocks[block].probes[i].atom = atoi(get_token(cur_tokenset,11));
692
+ mycdf->units[unit].blocks[block].probes[i].index = atoi(get_token(cur_tokenset,12));
693
+ mycdf->units[unit].blocks[block].probes[i].codonid = atoi(get_token(cur_tokenset,13));
694
+ mycdf->units[unit].blocks[block].probes[i].codon = atoi(get_token(cur_tokenset,14));
695
+ mycdf->units[unit].blocks[block].probes[i].regiontype = atoi(get_token(cur_tokenset,15));
696
+ delete_tokens(cur_tokenset);
697
+ }
698
+
699
+ }
700
+
701
+
702
+ /*******************************************************************
703
+ **
704
+ ** void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit)
705
+ **
706
+ ** FILE *infile - an opened CDF file
707
+ ** cdf_text *mycdf - a structure for holding cdf file
708
+ ** char *linebuffer - temporary place to store lines of text read in from the file
709
+ ** int unit - which unit
710
+ **
711
+ ** Reads in all the blocks for the unit. Assumes that space for the blocks are allocated
712
+ ** already. Allocates the space for the probes and calls a function to read them in.
713
+ **
714
+ *******************************************************************/
715
+
716
+
717
+ static void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit){
718
+ tokenset *cur_tokenset;
719
+ int i;
720
+
721
+
722
+
723
+ for (i=0; i < mycdf->units[unit].numberblocks; i++){
724
+
725
+ findStartsWith(infile,"Name",linebuffer);
726
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
727
+ mycdf->units[unit].blocks[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
728
+ strcpy(mycdf->units[unit].blocks[i].name,get_token(cur_tokenset,1));
729
+ delete_tokens(cur_tokenset);
730
+ /* Rprintf("%s\n",mycdf->units[unit].blocks[i].name); */
731
+
732
+
733
+
734
+ findStartsWith(infile,"BlockNumber",linebuffer);
735
+ cur_tokenset = tokenize(linebuffer,"=");
736
+ mycdf->units[unit].blocks[i].blocknumber = atoi(get_token(cur_tokenset,1));
737
+ delete_tokens(cur_tokenset);
738
+ /* Rprintf("%d %d %d\n",unit,i,mycdf->header.numberofunits); */
739
+
740
+ findStartsWith(infile,"NumAtoms",linebuffer);
741
+ cur_tokenset = tokenize(linebuffer,"=");
742
+ mycdf->units[unit].blocks[i].num_atoms = atoi(get_token(cur_tokenset,1));
743
+ delete_tokens(cur_tokenset);
744
+
745
+ findStartsWith(infile,"NumCells",linebuffer);
746
+ cur_tokenset = tokenize(linebuffer,"=");
747
+ mycdf->units[unit].blocks[i].num_cells = atoi(get_token(cur_tokenset,1));
748
+ delete_tokens(cur_tokenset);
749
+
750
+
751
+ findStartsWith(infile,"StartPosition",linebuffer);
752
+ cur_tokenset = tokenize(linebuffer,"=");
753
+ mycdf->units[unit].blocks[i].start_position = atoi(get_token(cur_tokenset,1));
754
+ delete_tokens(cur_tokenset);
755
+
756
+ findStartsWith(infile,"StopPosition",linebuffer);
757
+ cur_tokenset = tokenize(linebuffer,"=");
758
+ mycdf->units[unit].blocks[i].stop_position = atoi(get_token(cur_tokenset,1));
759
+ delete_tokens(cur_tokenset);
760
+
761
+ if (mycdf->units[unit].unit_type == 2){
762
+ findStartsWith(infile,"Direction",linebuffer);
763
+ cur_tokenset = tokenize(linebuffer,"=");
764
+ mycdf->units[unit].blocks[i].direction = atoi(get_token(cur_tokenset,1));
765
+ delete_tokens(cur_tokenset);
766
+ } else {
767
+ mycdf->units[unit].blocks[i].direction = mycdf->units[unit].direction;
768
+ }
769
+
770
+ mycdf->units[unit].blocks[i].probes = Calloc(mycdf->units[unit].blocks[i].num_cells,cdf_text_unit_block_probe);
771
+
772
+ read_cdf_unit_block_probes(infile,mycdf,linebuffer,unit,i);
773
+
774
+
775
+
776
+ }
777
+ }
778
+
779
+
780
+ /*******************************************************************
781
+ **
782
+ ** void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer)
783
+ **
784
+ ** FILE *infile - an opened CDF file
785
+ ** cdf_text *mycdf - a structure for holding cdf file
786
+ ** char *linebuffer - temporary place to store lines of text read in from the file
787
+ **
788
+ ** Reads in all the units allocating the space for them and then calling sub functions
789
+ ** to read each block and probes within the blocks
790
+ **
791
+ *******************************************************************/
792
+
793
+ static void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer){
794
+ tokenset *cur_tokenset;
795
+ int i;
796
+
797
+ mycdf->units = Calloc(mycdf->header.numberofunits,cdf_text_unit);
798
+
799
+ for (i =0; i < mycdf->header.numberofunits; i++){
800
+ /* move to the next Unit section */
801
+ AdvanceToSection(infile,"[Unit",linebuffer);
802
+ findStartsWith(infile,"Name",linebuffer);
803
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
804
+ mycdf->units[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
805
+ strcpy(mycdf->units[i].name,get_token(cur_tokenset,1));
806
+
807
+ delete_tokens(cur_tokenset);
808
+
809
+
810
+
811
+ findStartsWith(infile,"Direction",linebuffer);
812
+ cur_tokenset = tokenize(linebuffer,"=");
813
+ mycdf->units[i].direction = atoi(get_token(cur_tokenset,1));
814
+ delete_tokens(cur_tokenset);
815
+
816
+ findStartsWith(infile,"NumAtoms",linebuffer);
817
+ cur_tokenset = tokenize(linebuffer,"=");
818
+ mycdf->units[i].num_atoms = atoi(get_token(cur_tokenset,1));
819
+ delete_tokens(cur_tokenset);
820
+
821
+ findStartsWith(infile,"NumCells",linebuffer);
822
+ cur_tokenset = tokenize(linebuffer,"=");
823
+ mycdf->units[i].num_cells = atoi(get_token(cur_tokenset,1));
824
+ delete_tokens(cur_tokenset);
825
+
826
+ findStartsWith(infile,"UnitNumber",linebuffer);
827
+ cur_tokenset = tokenize(linebuffer,"=");
828
+ mycdf->units[i].unit_number = atoi(get_token(cur_tokenset,1));
829
+ delete_tokens(cur_tokenset);
830
+
831
+ findStartsWith(infile,"UnitType",linebuffer);
832
+ cur_tokenset = tokenize(linebuffer,"=");
833
+ mycdf->units[i].unit_type = atoi(get_token(cur_tokenset,1));
834
+ delete_tokens(cur_tokenset);
835
+
836
+ findStartsWith(infile,"NumberBlocks",linebuffer);
837
+ cur_tokenset = tokenize(linebuffer,"=");
838
+ mycdf->units[i].numberblocks = atoi(get_token(cur_tokenset,1));
839
+ delete_tokens(cur_tokenset);
840
+
841
+ /*Skip MutationType since only appears on one type of array */
842
+
843
+ mycdf->units[i].blocks = Calloc(mycdf->units[i].numberblocks,cdf_text_unit_block);
844
+
845
+
846
+ read_cdf_unit_block(infile,mycdf,linebuffer,i);
847
+ /* AdvanceToSection(infile,"[Unit",linebuffer);
848
+ Rprintf("%d\n",i); */
849
+ }
850
+
851
+
852
+
853
+ }
854
+
855
+
856
+ /*******************************************************************
857
+ **
858
+ ** int read_cdf_text(const char *filename, cdf_text *mycdf)
859
+ **
860
+ ** const char *filename - name of text file
861
+ ** cdf_text *mycdf - pointer to root of structure that will contain
862
+ ** the contents of the CDF file at the conclusion
863
+ ** of the function.
864
+ **
865
+ ** RETURNS 0 if the function failed, otherwise returns 1
866
+ **
867
+ ** this function reads a text CDF file into C data structure.
868
+ **
869
+ *******************************************************************/
870
+
871
+
872
+ int read_cdf_text(const char *filename, cdf_text *mycdf){
873
+
874
+ FILE *infile;
875
+
876
+ char linebuffer[BUFFER_SIZE]; /* a character buffer */
877
+ tokenset *cur_tokenset;
878
+
879
+ if ((infile = fopen(filename, "r")) == NULL)
880
+ {
881
+ error("Unable to open the file %s",filename);
882
+ return 0;
883
+ }
884
+
885
+
886
+
887
+ /* Check that is is a text CDF file */
888
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
889
+ if (strncmp("[CDF]", linebuffer, 5) != 0){
890
+ error("The file %s does not look like a text CDF file",filename);
891
+ }
892
+
893
+ /* Read the version number */
894
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
895
+
896
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
897
+ if (strncmp("GC3.0", get_token(cur_tokenset,1), 5) != 0){
898
+ error("The file %s does not look like a version GC3.0 CDF file",filename);
899
+ } else {
900
+ mycdf->header.version = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
901
+ strcpy(mycdf->header.version,get_token(cur_tokenset,1));
902
+ }
903
+ delete_tokens(cur_tokenset);
904
+
905
+
906
+ read_cdf_header(infile,mycdf,linebuffer);
907
+ read_cdf_QCUnits(infile,mycdf,linebuffer);
908
+ read_cdf_Units(infile,mycdf,linebuffer);
909
+
910
+
911
+ return 1;
912
+ }
913
+
914
+ /*******************************************************************
915
+ **
916
+ ** void dealloc_cdf_text(cdf_text *my_cdf)
917
+ **
918
+ ** Frees memory allocated
919
+ **
920
+ ******************************************************************/
921
+
922
+
923
+
924
+ void dealloc_cdf_text(cdf_text *my_cdf){
925
+ int i,j,k;
926
+
927
+
928
+ Free(my_cdf->header.version);
929
+ Free(my_cdf->header.name);
930
+ if (my_cdf->header.chipreference != NULL)
931
+ Free(my_cdf->header.chipreference);
932
+
933
+ for (i =0; i < my_cdf->header.NumQCUnits; i++){
934
+ for (j=0; j < my_cdf->qc_units[i].n_probes; j++){
935
+ Free(my_cdf->qc_units[i].qc_probes[j].probe);
936
+ }
937
+ Free(my_cdf->qc_units[i].qc_probes);
938
+ }
939
+
940
+
941
+ for (i =0; i < my_cdf->header.numberofunits; i++){
942
+ for (j=0; j < my_cdf->units[i].numberblocks; j++){
943
+ for (k=0; k < my_cdf->units[i].blocks[j].num_cells;k++){
944
+ Free(my_cdf->units[i].blocks[j].probes[k].probe);
945
+ Free(my_cdf->units[i].blocks[j].probes[k].feat);
946
+ Free(my_cdf->units[i].blocks[j].probes[k].qual);
947
+ Free(my_cdf->units[i].blocks[j].probes[k].cbase);
948
+ Free(my_cdf->units[i].blocks[j].probes[k].pbase);
949
+ Free(my_cdf->units[i].blocks[j].probes[k].tbase);
950
+ }
951
+ Free(my_cdf->units[i].blocks[j].probes);
952
+ Free(my_cdf->units[i].blocks[j].name);
953
+ }
954
+ Free(my_cdf->units[i].blocks);
955
+ Free(my_cdf->units[i].name);
956
+ }
957
+
958
+
959
+ }
960
+
961
+
962
+
963
+ /*******************************************************************
964
+ **
965
+ ** static int isTextCDFFile(const char *filename)
966
+ **
967
+ ** const char *filename - name of file to check
968
+ **
969
+ ** checks whether the supplied file is a text CDF file or not.
970
+ ** uses a very simple test.
971
+ **
972
+ ** Attempts to open the supplied filename. Then checks to see if the first
973
+ ** 5 characters are "[CDF]" if so returns 1, otherwise 0.
974
+ **
975
+ **
976
+ ******************************************************************/
977
+
978
+ int isTextCDFFile(const char *filename){
979
+
980
+
981
+ FILE *infile;
982
+
983
+ char linebuffer[BUFFER_SIZE]; /* a character buffer */
984
+
985
+
986
+ if ((infile = fopen(filename, "r")) == NULL)
987
+ {
988
+ error("Unable to open the file %s",filename);
989
+ }
990
+
991
+
992
+
993
+ /* Check that is is a text CDF file */
994
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
995
+ if (strncmp("[CDF]", linebuffer, 5) == 0){
996
+ fclose(infile);
997
+ return 1;
998
+ }
999
+ fclose(infile);
1000
+ return 0;
1001
+ }
1002
+
1003
+
1004
+
1005
+
1006
+
1007
+ /*******************************************************************
1008
+ **
1009
+ ** SEXP ReadtextCDFFileIntoRList(SEXP filename)
1010
+ **
1011
+ ** SEXP filename - name of cdffile. Should be full path to file.
1012
+ **
1013
+ ** this function should be called from R. When supplied the name
1014
+ ** of a text cdf file it first parses it into a C data structure.
1015
+ **
1016
+ ** An R list structure is then constructed from the C data structure
1017
+ **
1018
+ ** The R list is then returned.
1019
+ **
1020
+ ** Note no special effort is made to reduce down the information in
1021
+ ** the text CDF file. Instead almost everything is returned, even
1022
+ ** somewhat redundant information.
1023
+ **
1024
+ ******************************************************************/
1025
+
1026
+
1027
+ SEXP ReadtextCDFFileIntoRList(SEXP filename){
1028
+
1029
+ SEXP CDFInfo; /* this is the object that will be returned */
1030
+ SEXP CDFInfoNames;
1031
+ SEXP HEADER; /* The file header */
1032
+ SEXP HEADERNames;
1033
+ SEXP TEMPSXP;
1034
+ SEXP TEMPSXP2;
1035
+ SEXP TEMPSXP3;
1036
+ SEXP TEMPSXP4;
1037
+
1038
+ SEXP QCUNITS;
1039
+ SEXP UNITS;
1040
+
1041
+
1042
+ /* Basically fields (possible) for QC probes */
1043
+ SEXP QCUNITSProbeInfoX;
1044
+ SEXP QCUNITSProbeInfoY;
1045
+ SEXP QCUNITSProbeInfoPROBE;
1046
+ SEXP QCUNITSProbeInfoPL;
1047
+ SEXP QCUNITSProbeInfoATOM;
1048
+ SEXP QCUNITSProbeInfoINDEX;
1049
+ SEXP QCUNITSProbeInfoPMFLAG;
1050
+ SEXP QCUNITSProbeInfoBGFLAG;
1051
+ SEXP QCUNITSProbeInfoNames = R_NilValue;
1052
+ SEXP QCUNITSProbeInforow_names;
1053
+
1054
+ /* Basically fields (possible) for Unit Block probes */
1055
+
1056
+ SEXP UNITSProbeInfoX;
1057
+ SEXP UNITSProbeInfoY;
1058
+ SEXP UNITSProbeInfoPROBE;
1059
+ SEXP UNITSProbeInfoFEAT;
1060
+ SEXP UNITSProbeInfoQUAL;
1061
+ SEXP UNITSProbeInfoEXPOS;
1062
+ SEXP UNITSProbeInfoPOS;
1063
+ SEXP UNITSProbeInfoCBASE;
1064
+ SEXP UNITSProbeInfoPBASE;
1065
+ SEXP UNITSProbeInfoTBASE;
1066
+ SEXP UNITSProbeInfoATOM;
1067
+ SEXP UNITSProbeInfoINDEX;
1068
+ SEXP UNITSProbeInfoCODONIND;
1069
+ SEXP UNITSProbeInfoCODON;
1070
+ SEXP UNITSProbeInfoREGIONTYPE;
1071
+ SEXP UNITSProbeInfoNames;
1072
+ SEXP UNITSProbeInforow_names;
1073
+
1074
+ char buf[10]; /* temporary buffer for making names */
1075
+ int i,j,k,l;
1076
+ int tmpsum =0;
1077
+
1078
+
1079
+ cdf_text my_cdf;
1080
+
1081
+ const char *cur_file_name;
1082
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1083
+
1084
+ if(!read_cdf_text(cur_file_name, &my_cdf)){
1085
+ error("Problem reading text cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
1086
+ }
1087
+
1088
+
1089
+ /* Now build the R list structure */
1090
+
1091
+
1092
+ /* return the full structure */
1093
+ PROTECT(CDFInfo = allocVector(VECSXP,3));
1094
+ PROTECT(CDFInfoNames = allocVector(STRSXP,3));
1095
+ SET_STRING_ELT(CDFInfoNames,0,mkChar("Chip"));
1096
+ SET_STRING_ELT(CDFInfoNames,1,mkChar("QC"));
1097
+ SET_STRING_ELT(CDFInfoNames,2,mkChar("Unit"));
1098
+
1099
+ setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
1100
+ UNPROTECT(1);
1101
+
1102
+ /* Deal with the HEADER */
1103
+ PROTECT(HEADER = allocVector(VECSXP,8));
1104
+ PROTECT(HEADERNames = allocVector(STRSXP,8));
1105
+ SET_STRING_ELT(HEADERNames,0,mkChar("Version"));
1106
+ SET_STRING_ELT(HEADERNames,1,mkChar("Name"));
1107
+ SET_STRING_ELT(HEADERNames,2,mkChar("Rows"));
1108
+ SET_STRING_ELT(HEADERNames,3,mkChar("Cols"));
1109
+ SET_STRING_ELT(HEADERNames,4,mkChar("NumberOfUnits"));
1110
+ SET_STRING_ELT(HEADERNames,5,mkChar("MaxUnit"));
1111
+ SET_STRING_ELT(HEADERNames,6,mkChar("NumQCUnits"));
1112
+ SET_STRING_ELT(HEADERNames,7,mkChar("ChipReference"));
1113
+ setAttrib(HEADER,R_NamesSymbol,HEADERNames);
1114
+ UNPROTECT(1);
1115
+
1116
+ PROTECT(TEMPSXP = allocVector(STRSXP,1));
1117
+ SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.version));
1118
+ SET_VECTOR_ELT(HEADER,0,TEMPSXP);
1119
+ UNPROTECT(1);
1120
+
1121
+ PROTECT(TEMPSXP = allocVector(STRSXP,1));
1122
+ SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.name));
1123
+ SET_VECTOR_ELT(HEADER,1,TEMPSXP);
1124
+ UNPROTECT(1);
1125
+
1126
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1127
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.rows;
1128
+ SET_VECTOR_ELT(HEADER,2,TEMPSXP);
1129
+ UNPROTECT(1);
1130
+
1131
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1132
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.cols;
1133
+ SET_VECTOR_ELT(HEADER,3,TEMPSXP);
1134
+ UNPROTECT(1);
1135
+
1136
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1137
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.numberofunits;
1138
+ SET_VECTOR_ELT(HEADER,4,TEMPSXP);
1139
+ UNPROTECT(1);
1140
+
1141
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1142
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.maxunit;
1143
+ SET_VECTOR_ELT(HEADER,5,TEMPSXP);
1144
+ UNPROTECT(1);
1145
+
1146
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1147
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.NumQCUnits;
1148
+ SET_VECTOR_ELT(HEADER,6,TEMPSXP);
1149
+ UNPROTECT(1);
1150
+
1151
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1152
+ if (my_cdf.header.chipreference !=NULL){
1153
+ SET_VECTOR_ELT(TEMPSXP,0,mkChar(my_cdf.header.chipreference));
1154
+ SET_VECTOR_ELT(HEADER,7,TEMPSXP);
1155
+ }
1156
+ UNPROTECT(1);
1157
+
1158
+ SET_VECTOR_ELT(CDFInfo,0,HEADER);
1159
+
1160
+ PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.NumQCUnits));
1161
+ for (i=0; i <my_cdf.header.NumQCUnits; i++){
1162
+ PROTECT(TEMPSXP=allocVector(VECSXP,3));
1163
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1164
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].type;
1165
+ SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
1166
+ UNPROTECT(1);
1167
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1168
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].n_probes;
1169
+ SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
1170
+ UNPROTECT(1);
1171
+ /*Figure out what the probe information is for this QC unit and then allocate the space */
1172
+ tmpsum=0;
1173
+ for (j=0; j < 8; j++){
1174
+ tmpsum+=my_cdf.qc_units[i].qccontains[j];
1175
+ }
1176
+
1177
+ if (tmpsum == 6){
1178
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,6));
1179
+ } else if (tmpsum ==8){
1180
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,8));
1181
+ }
1182
+
1183
+ if (tmpsum == 6){
1184
+ PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1185
+ PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1186
+ PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1187
+ PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1188
+ PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1189
+ PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1190
+ } else if (tmpsum == 8){
1191
+ PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1192
+ PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1193
+ PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1194
+ PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1195
+ PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1196
+ PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1197
+ PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1198
+ PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1199
+
1200
+ }
1201
+
1202
+ /* store what was read in for the QC units in the R structure */
1203
+ if (tmpsum == 6){
1204
+ for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
1205
+ INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
1206
+ INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
1207
+ SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
1208
+ INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
1209
+ INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
1210
+ INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
1211
+
1212
+ }
1213
+ } else if (tmpsum == 8){
1214
+ for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
1215
+ INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
1216
+ INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
1217
+ SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
1218
+ INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
1219
+ INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
1220
+ INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
1221
+ INTEGER_POINTER(QCUNITSProbeInfoPMFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].match;
1222
+ INTEGER_POINTER(QCUNITSProbeInfoBGFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].bg;
1223
+ }
1224
+ }
1225
+
1226
+ if (tmpsum == 6){
1227
+ SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
1228
+ SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
1229
+ SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
1230
+ SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
1231
+ SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
1232
+ SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
1233
+ } else if (tmpsum ==8){
1234
+ SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
1235
+ SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
1236
+ SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
1237
+ SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
1238
+ SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
1239
+ SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
1240
+ SET_VECTOR_ELT(TEMPSXP2,6,QCUNITSProbeInfoPMFLAG);
1241
+ SET_VECTOR_ELT(TEMPSXP2,7,QCUNITSProbeInfoBGFLAG);
1242
+
1243
+ }
1244
+
1245
+
1246
+ if (tmpsum == 6){
1247
+ UNPROTECT(6);
1248
+ } else if (tmpsum == 8){
1249
+ UNPROTECT(8);
1250
+ }
1251
+
1252
+
1253
+ if (tmpsum == 6){
1254
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,6));
1255
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1256
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1257
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
1258
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
1259
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
1260
+ SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
1261
+ } else if (tmpsum == 8){
1262
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,8));
1263
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1264
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1265
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
1266
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
1267
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
1268
+ SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
1269
+ SET_STRING_ELT(QCUNITSProbeInfoNames,6,mkChar("PMFlag"));
1270
+ SET_STRING_ELT(QCUNITSProbeInfoNames,7,mkChar("BGProbeFlag"));
1271
+ }
1272
+ setAttrib(TEMPSXP2,R_NamesSymbol,QCUNITSProbeInfoNames);
1273
+ UNPROTECT(1);
1274
+ PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1275
+
1276
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1277
+ sprintf(buf, "%d", j+1);
1278
+ SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
1279
+ }
1280
+ setAttrib(TEMPSXP2, R_RowNamesSymbol, QCUNITSProbeInforow_names);
1281
+ UNPROTECT(1);
1282
+
1283
+ setAttrib(TEMPSXP2,R_ClassSymbol,mkString("data.frame"));
1284
+ SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
1285
+ UNPROTECT(1);
1286
+
1287
+ PROTECT(TEMPSXP2=allocVector(STRSXP,3));
1288
+ SET_STRING_ELT(TEMPSXP2,0,mkChar("Type"));
1289
+ SET_STRING_ELT(TEMPSXP2,1,mkChar("NumberCells"));
1290
+ SET_STRING_ELT(TEMPSXP2,2,mkChar("QCCells"));
1291
+ setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
1292
+ UNPROTECT(1);
1293
+ SET_VECTOR_ELT(QCUNITS,i,TEMPSXP);
1294
+
1295
+ UNPROTECT(1);
1296
+ }
1297
+ SET_VECTOR_ELT(CDFInfo,1,QCUNITS);
1298
+ UNPROTECT(1);
1299
+
1300
+
1301
+ PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.numberofunits));
1302
+ for (i=0; i < my_cdf.header.numberofunits; i++){
1303
+ PROTECT(TEMPSXP=allocVector(VECSXP,8));
1304
+ PROTECT(TEMPSXP2=allocVector(STRSXP,1));
1305
+
1306
+ SET_STRING_ELT(TEMPSXP2,0,mkChar(my_cdf.units[i].name));
1307
+ SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
1308
+ UNPROTECT(1);
1309
+
1310
+
1311
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1312
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].direction;
1313
+ SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
1314
+ UNPROTECT(1);
1315
+
1316
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1317
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_atoms;
1318
+ SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
1319
+ UNPROTECT(1);
1320
+
1321
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1322
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_cells;
1323
+ SET_VECTOR_ELT(TEMPSXP,3,TEMPSXP2);
1324
+ UNPROTECT(1);
1325
+
1326
+
1327
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1328
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_number;
1329
+ SET_VECTOR_ELT(TEMPSXP,4,TEMPSXP2);
1330
+ UNPROTECT(1);
1331
+
1332
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1333
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_type;
1334
+ SET_VECTOR_ELT(TEMPSXP,5,TEMPSXP2);
1335
+ UNPROTECT(1);
1336
+
1337
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1338
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].numberblocks;
1339
+ SET_VECTOR_ELT(TEMPSXP,6,TEMPSXP2);
1340
+ UNPROTECT(1);
1341
+
1342
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,my_cdf.units[i].numberblocks));
1343
+
1344
+ for (j=0; j <my_cdf.units[i].numberblocks; j++){
1345
+ PROTECT(TEMPSXP3 = allocVector(VECSXP,8));
1346
+
1347
+
1348
+ PROTECT(TEMPSXP4=allocVector(STRSXP,1));
1349
+
1350
+ SET_STRING_ELT(TEMPSXP4,0,mkChar(my_cdf.units[i].blocks[j].name));
1351
+ SET_VECTOR_ELT(TEMPSXP3,0,TEMPSXP4);
1352
+ UNPROTECT(1);
1353
+
1354
+
1355
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1356
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].blocknumber;
1357
+ SET_VECTOR_ELT(TEMPSXP3,1,TEMPSXP4);
1358
+ UNPROTECT(1);
1359
+
1360
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1361
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_atoms;
1362
+ SET_VECTOR_ELT(TEMPSXP3,2,TEMPSXP4);
1363
+ UNPROTECT(1);
1364
+
1365
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1366
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_cells;
1367
+ SET_VECTOR_ELT(TEMPSXP3,3,TEMPSXP4);
1368
+ UNPROTECT(1);
1369
+
1370
+
1371
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1372
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].start_position;
1373
+ SET_VECTOR_ELT(TEMPSXP3,4,TEMPSXP4);
1374
+ UNPROTECT(1);
1375
+
1376
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1377
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].stop_position;
1378
+ SET_VECTOR_ELT(TEMPSXP3,5,TEMPSXP4);
1379
+ UNPROTECT(1);
1380
+
1381
+
1382
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1383
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].direction;
1384
+ SET_VECTOR_ELT(TEMPSXP3,6,TEMPSXP4);
1385
+ UNPROTECT(1);
1386
+
1387
+ PROTECT(TEMPSXP4=allocVector(VECSXP,15));
1388
+
1389
+
1390
+ PROTECT(UNITSProbeInfoX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1391
+ PROTECT(UNITSProbeInfoY = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1392
+ PROTECT(UNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1393
+ PROTECT(UNITSProbeInfoFEAT = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1394
+ PROTECT(UNITSProbeInfoQUAL = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1395
+ PROTECT(UNITSProbeInfoEXPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1396
+ PROTECT(UNITSProbeInfoPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1397
+ PROTECT(UNITSProbeInfoCBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1398
+ PROTECT(UNITSProbeInfoPBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1399
+ PROTECT(UNITSProbeInfoTBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1400
+ PROTECT(UNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1401
+ PROTECT(UNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1402
+ PROTECT(UNITSProbeInfoCODONIND = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1403
+ PROTECT(UNITSProbeInfoCODON = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1404
+ PROTECT(UNITSProbeInfoREGIONTYPE = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1405
+ for (k=0; k < my_cdf.units[i].blocks[j].num_cells; k++){
1406
+ INTEGER_POINTER(UNITSProbeInfoX)[k]=my_cdf.units[i].blocks[j].probes[k].x;
1407
+ INTEGER_POINTER(UNITSProbeInfoY)[k]=my_cdf.units[i].blocks[j].probes[k].y;
1408
+ INTEGER_POINTER(UNITSProbeInfoEXPOS)[k]=my_cdf.units[i].blocks[j].probes[k].expos;
1409
+ INTEGER_POINTER(UNITSProbeInfoPOS)[k]=my_cdf.units[i].blocks[j].probes[k].pos;
1410
+ INTEGER_POINTER(UNITSProbeInfoATOM)[k]=my_cdf.units[i].blocks[j].probes[k].atom;
1411
+ INTEGER_POINTER(UNITSProbeInfoINDEX)[k]=my_cdf.units[i].blocks[j].probes[k].index;
1412
+ INTEGER_POINTER(UNITSProbeInfoCODONIND)[k]=my_cdf.units[i].blocks[j].probes[k].codonid;
1413
+ INTEGER_POINTER(UNITSProbeInfoCODON)[k]=my_cdf.units[i].blocks[j].probes[k].codon;
1414
+ INTEGER_POINTER(UNITSProbeInfoREGIONTYPE)[k]=my_cdf.units[i].blocks[j].probes[k].regiontype;
1415
+ SET_VECTOR_ELT(UNITSProbeInfoPROBE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].probe));
1416
+ SET_STRING_ELT(UNITSProbeInfoFEAT,k,mkChar(my_cdf.units[i].blocks[j].probes[k].feat));
1417
+ SET_STRING_ELT(UNITSProbeInfoQUAL,k,mkChar(my_cdf.units[i].blocks[j].probes[k].qual));
1418
+ SET_STRING_ELT(UNITSProbeInfoCBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].cbase));
1419
+ SET_STRING_ELT(UNITSProbeInfoPBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].pbase));
1420
+ SET_STRING_ELT(UNITSProbeInfoTBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].tbase));
1421
+ }
1422
+
1423
+
1424
+ SET_VECTOR_ELT(TEMPSXP4,0,UNITSProbeInfoX);
1425
+ SET_VECTOR_ELT(TEMPSXP4,1,UNITSProbeInfoY);
1426
+ SET_VECTOR_ELT(TEMPSXP4,2,UNITSProbeInfoPROBE);
1427
+ SET_VECTOR_ELT(TEMPSXP4,3,UNITSProbeInfoFEAT);
1428
+ SET_VECTOR_ELT(TEMPSXP4,4,UNITSProbeInfoQUAL);
1429
+ SET_VECTOR_ELT(TEMPSXP4,5,UNITSProbeInfoEXPOS);
1430
+ SET_VECTOR_ELT(TEMPSXP4,6,UNITSProbeInfoPOS);
1431
+ SET_VECTOR_ELT(TEMPSXP4,7,UNITSProbeInfoCBASE);
1432
+ SET_VECTOR_ELT(TEMPSXP4,8,UNITSProbeInfoPBASE);
1433
+ SET_VECTOR_ELT(TEMPSXP4,9,UNITSProbeInfoTBASE);
1434
+ SET_VECTOR_ELT(TEMPSXP4,10,UNITSProbeInfoATOM);
1435
+ SET_VECTOR_ELT(TEMPSXP4,11,UNITSProbeInfoINDEX);
1436
+ SET_VECTOR_ELT(TEMPSXP4,12,UNITSProbeInfoCODONIND);
1437
+ SET_VECTOR_ELT(TEMPSXP4,13,UNITSProbeInfoCODON);
1438
+ SET_VECTOR_ELT(TEMPSXP4,14,UNITSProbeInfoREGIONTYPE);
1439
+
1440
+
1441
+
1442
+
1443
+
1444
+
1445
+
1446
+ UNPROTECT(15);
1447
+
1448
+ PROTECT(UNITSProbeInfoNames =allocVector(STRSXP,15));
1449
+ SET_STRING_ELT(UNITSProbeInfoNames,0,mkChar("x"));
1450
+ SET_STRING_ELT(UNITSProbeInfoNames,1,mkChar("y"));
1451
+ SET_STRING_ELT(UNITSProbeInfoNames,2,mkChar("Probe"));
1452
+ SET_STRING_ELT(UNITSProbeInfoNames,3,mkChar("Feat"));
1453
+ SET_STRING_ELT(UNITSProbeInfoNames,4,mkChar("Qual"));
1454
+ SET_STRING_ELT(UNITSProbeInfoNames,5,mkChar("Expos"));
1455
+ SET_STRING_ELT(UNITSProbeInfoNames,6,mkChar("Pos"));
1456
+ SET_STRING_ELT(UNITSProbeInfoNames,7,mkChar("cbase"));
1457
+ SET_STRING_ELT(UNITSProbeInfoNames,8,mkChar("pbase"));
1458
+ SET_STRING_ELT(UNITSProbeInfoNames,9,mkChar("tbase"));
1459
+ SET_STRING_ELT(UNITSProbeInfoNames,10,mkChar("Atom"));
1460
+ SET_STRING_ELT(UNITSProbeInfoNames,11,mkChar("Index"));
1461
+ SET_STRING_ELT(UNITSProbeInfoNames,12,mkChar("CodonInd"));
1462
+ SET_STRING_ELT(UNITSProbeInfoNames,13,mkChar("Codon"));
1463
+ SET_STRING_ELT(UNITSProbeInfoNames,14,mkChar("Regiontype"));
1464
+
1465
+
1466
+ setAttrib(TEMPSXP4,R_NamesSymbol,UNITSProbeInfoNames);
1467
+ UNPROTECT(1);
1468
+
1469
+ PROTECT(UNITSProbeInforow_names= allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1470
+
1471
+ for (l=0; l < my_cdf.units[i].blocks[j].num_cells; l++){
1472
+ sprintf(buf, "%d", l+1);
1473
+ SET_STRING_ELT(UNITSProbeInforow_names,l,mkChar(buf));
1474
+ }
1475
+ setAttrib(TEMPSXP4, R_RowNamesSymbol, UNITSProbeInforow_names);
1476
+ UNPROTECT(1);
1477
+
1478
+
1479
+
1480
+ setAttrib(TEMPSXP4,R_ClassSymbol,mkString("data.frame"));
1481
+ SET_VECTOR_ELT(TEMPSXP3,7,TEMPSXP4);
1482
+ UNPROTECT(1);
1483
+
1484
+
1485
+
1486
+ PROTECT(TEMPSXP4=allocVector(STRSXP,8));
1487
+ SET_STRING_ELT(TEMPSXP4,0,mkChar("Name"));
1488
+ SET_STRING_ELT(TEMPSXP4,1,mkChar("BlockNumber"));
1489
+ SET_STRING_ELT(TEMPSXP4,2,mkChar("NumAtoms"));
1490
+ SET_STRING_ELT(TEMPSXP4,3,mkChar("NumCells"));
1491
+ SET_STRING_ELT(TEMPSXP4,4,mkChar("StartPosition"));
1492
+ SET_STRING_ELT(TEMPSXP4,5,mkChar("StopPosition"));
1493
+ SET_STRING_ELT(TEMPSXP4,6,mkChar("Direction"));
1494
+ SET_STRING_ELT(TEMPSXP4,7,mkChar("Unit_Block_Cells"));
1495
+ setAttrib(TEMPSXP3,R_NamesSymbol,TEMPSXP4);
1496
+ UNPROTECT(1);
1497
+
1498
+ SET_VECTOR_ELT(TEMPSXP2,j,TEMPSXP3);
1499
+ UNPROTECT(1);
1500
+ }
1501
+
1502
+
1503
+
1504
+
1505
+
1506
+
1507
+ SET_VECTOR_ELT(TEMPSXP,7,TEMPSXP2);
1508
+ UNPROTECT(1);
1509
+
1510
+
1511
+
1512
+
1513
+ PROTECT(TEMPSXP2 = allocVector(STRSXP,8));
1514
+ SET_STRING_ELT(TEMPSXP2,0,mkChar("Name"));
1515
+ SET_STRING_ELT(TEMPSXP2,1,mkChar("Direction"));
1516
+ SET_STRING_ELT(TEMPSXP2,2,mkChar("NumAtoms"));
1517
+ SET_STRING_ELT(TEMPSXP2,3,mkChar("NumCells"));
1518
+ SET_STRING_ELT(TEMPSXP2,4,mkChar("UnitNumber"));
1519
+ SET_STRING_ELT(TEMPSXP2,5,mkChar("UnitType"));
1520
+ SET_STRING_ELT(TEMPSXP2,6,mkChar("NumberBlocks"));
1521
+ SET_STRING_ELT(TEMPSXP2,7,mkChar("Unit_Block"));
1522
+ setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
1523
+ UNPROTECT(1);
1524
+
1525
+
1526
+
1527
+
1528
+
1529
+ SET_VECTOR_ELT(UNITS,i,TEMPSXP);
1530
+ UNPROTECT(1);
1531
+
1532
+
1533
+
1534
+ }
1535
+ SET_VECTOR_ELT(CDFInfo,2,UNITS);
1536
+ UNPROTECT(1);
1537
+
1538
+
1539
+
1540
+ dealloc_cdf_text(&my_cdf);
1541
+ UNPROTECT(2);
1542
+ return CDFInfo;
1543
+ }
1544
+
1545
+
1546
+
1547
+
1548
+
1549
+ /*************************************************************
1550
+ **
1551
+ ** SEXP CheckCDFtext(SEXP filename)
1552
+ **
1553
+ ** Takes a given file name and returns 1 if it is a text format CDF file
1554
+ ** otherwise it returns 0
1555
+ **
1556
+ *************************************************************/
1557
+
1558
+
1559
+
1560
+ SEXP CheckCDFtext(SEXP filename){
1561
+ SEXP tmp;
1562
+ int good;
1563
+ const char *cur_file_name;
1564
+
1565
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1566
+
1567
+ good = isTextCDFFile(cur_file_name);
1568
+
1569
+ PROTECT(tmp= allocVector(INTSXP,1));
1570
+
1571
+ INTEGER(tmp)[0] = good;
1572
+
1573
+ UNPROTECT(1);
1574
+ return tmp;
1575
+ }
1576
+