bio-affy 0.1.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
Binary file
@@ -0,0 +1,1576 @@
1
+ /****************************************************************
2
+ **
3
+ ** File: read_cdffile2.c
4
+ **
5
+ ** Implementation by: B. M. Bolstad
6
+ **
7
+ ** Copyright (C) B. M. Bolstad 2005
8
+ **
9
+ ** A parser designed to read text CDF files into an R List structure
10
+ **
11
+ ** Note this version only parses GC3.0 version text files (which should
12
+ ** be almost all text CDF files currently used)
13
+ **
14
+ ** Note that the original text CDF parser (from which this file is not in
15
+ ** anyway based) was written by Laurent Gautier. That file was named
16
+ ** read_cdffile.c (originally part of affy and then later makecdfenv)
17
+ **
18
+ ** Implemented based on documentation available from Affymetrix
19
+ **
20
+ ** Implementation begun 2005.
21
+ **
22
+ ** Modification Dates
23
+ ** Jul 24 - Initial version
24
+ ** Sep 20 - Continued Implementation
25
+ ** Sep 21 - Continued Implementation and debugging
26
+ ** Sep 22 - Continued Implementation and testing
27
+ ** Sep 24 - QCunit probes, Unit Block probes, Finish and tested.
28
+ ** Dec 1, 2005 - Some comment cleaning. Added isTextCDFFile,CheckCDFtext
29
+ ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
30
+ ** May 31, 2006 - fix some compiler warnings
31
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
32
+ **
33
+ **
34
+ *******************************************************************/
35
+
36
+ #include <R.h>
37
+ #include <Rdefines.h>
38
+
39
+ #include "stdlib.h"
40
+ #include "stdio.h"
41
+
42
+ #ifdef BIOLIB
43
+ #include <biolib_R_map.h>
44
+ #endif
45
+
46
+ #define BUFFER_SIZE 1024
47
+
48
+
49
+ /*****************************************************************
50
+ **
51
+ **
52
+ ** A structure for holding information in the
53
+ ** "CDF" and "Chip" sections (basically header information)
54
+ **
55
+ ******************************************************************/
56
+
57
+
58
+
59
+ typedef struct {
60
+
61
+ char *version;
62
+ char *name;
63
+ int rows,cols;
64
+ int numberofunits;
65
+ int maxunit;
66
+ int NumQCUnits;
67
+ char *chipreference;
68
+ } cdf_text_header;
69
+
70
+
71
+ /*****************************************************************
72
+ **
73
+ **
74
+ ** A structure for holding QC probe information
75
+ ** Note the "CYCLES" item is ignored and never parsed
76
+ **
77
+ ******************************************************************/
78
+
79
+
80
+ typedef struct {
81
+ int x;
82
+ int y;
83
+ char *probe;
84
+ int plen;
85
+ int atom;
86
+ int index;
87
+ int match;
88
+ int bg;
89
+ } cdf_text_qc_probe;
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ /*******************************************************************
98
+ **
99
+ ** A structure for holding QC units information. These are
100
+ ** areas of the chip that contain probes that may or may not be useful
101
+ ** for QC and other purposes.
102
+ **
103
+ **
104
+ *******************************************************************/
105
+
106
+
107
+
108
+ typedef struct{
109
+ int type;
110
+ unsigned int n_probes;
111
+ int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
112
+ cdf_text_qc_probe *qc_probes;
113
+
114
+ } cdf_text_qc_unit;
115
+
116
+
117
+ /*******************************************************************
118
+ **
119
+ ** A structure for holding probe information for unit_blocks_probes
120
+ **
121
+ ** probes are stored within blocks
122
+ **
123
+ *******************************************************************/
124
+
125
+ typedef struct{
126
+ int x;
127
+ int y;
128
+ char *probe;
129
+ char *feat;
130
+ char *qual;
131
+ int expos;
132
+ int pos;
133
+ char *cbase;
134
+ char *pbase;
135
+ char *tbase;
136
+ int atom;
137
+ int index;
138
+ int codonid;
139
+ int codon;
140
+ int regiontype;
141
+ char* region;
142
+ } cdf_text_unit_block_probe;
143
+
144
+
145
+
146
+
147
+ /*******************************************************************
148
+ **
149
+ ** A structure holding Unit_blocks
150
+ **
151
+ ** blocks are stored within units.
152
+ ** blocks contain many probes
153
+ **
154
+ *******************************************************************/
155
+
156
+ typedef struct{
157
+ char *name;
158
+ int blocknumber;
159
+ int num_atoms;
160
+ int num_cells;
161
+ int start_position;
162
+ int stop_position;
163
+ int direction;
164
+ cdf_text_unit_block_probe *probes;
165
+
166
+ } cdf_text_unit_block;
167
+
168
+
169
+
170
+
171
+
172
+
173
+ /*******************************************************************
174
+ **
175
+ ** A structure for holding "Units" AKA known as probesets
176
+ **
177
+ ** Each unit contains one or more blocks. Each block contains one or
178
+ ** more probes
179
+ **
180
+ *******************************************************************/
181
+
182
+
183
+ typedef struct{
184
+ char *name;
185
+ int direction;
186
+ int num_atoms;
187
+ int num_cells;
188
+ int unit_number;
189
+ int unit_type;
190
+ int numberblocks;
191
+ int MutationType;
192
+ cdf_text_unit_block *blocks;
193
+ } cdf_text_unit;
194
+
195
+
196
+
197
+ /*******************************************************************
198
+ **
199
+ ** A structure for holding a text CDF file
200
+ **
201
+ ** text cdf files consist of
202
+ ** basic header information
203
+ ** qcunits
204
+ ** - qc probes
205
+ ** units (aka probesets)
206
+ ** - blocks
207
+ ** - probes
208
+ **
209
+ **
210
+ *******************************************************************/
211
+
212
+ typedef struct{
213
+ cdf_text_header header;
214
+ cdf_text_qc_unit *qc_units;
215
+ cdf_text_unit *units;
216
+ } cdf_text;
217
+
218
+
219
+ /**************************************************************
220
+ **
221
+ ** The following code is for tokenizing strings
222
+ ** originally included in read_abatch.c from the affy package.
223
+ **
224
+ *************************************************************/
225
+
226
+ /***************************************************************
227
+ **
228
+ ** tokenset
229
+ **
230
+ ** char **tokens - a array of token strings
231
+ ** int n - number of tokens in this set.
232
+ **
233
+ ** a structure to hold a set of tokens. Typically a tokenset is
234
+ ** created by breaking a character string based upon a set of
235
+ ** delimiters.
236
+ **
237
+ **
238
+ **************************************************************/
239
+
240
+ typedef struct{
241
+ char **tokens;
242
+ int n;
243
+ } tokenset;
244
+
245
+
246
+
247
+ /******************************************************************
248
+ **
249
+ ** tokenset *tokenize(char *str, char *delimiters)
250
+ **
251
+ ** char *str - a string to break into tokens
252
+ ** char *delimiters - delimiters to use in breaking up the line
253
+ **
254
+ **
255
+ ** RETURNS a new tokenset
256
+ **
257
+ ** Given a string, split into tokens based on a set of delimitors
258
+ **
259
+ *****************************************************************/
260
+
261
+ static tokenset *tokenize(char *str, char *delimiters){
262
+
263
+ int i=0;
264
+
265
+ char *current_token;
266
+ tokenset *my_tokenset = Calloc(1,tokenset);
267
+ my_tokenset->n=0;
268
+
269
+ my_tokenset->tokens = NULL;
270
+
271
+ current_token = strtok(str,delimiters);
272
+ while (current_token != NULL){
273
+ my_tokenset->n++;
274
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
275
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
276
+ strcpy(my_tokenset->tokens[i],current_token);
277
+ i++;
278
+ current_token = strtok(NULL,delimiters);
279
+ }
280
+
281
+ return my_tokenset;
282
+ }
283
+
284
+
285
+ /******************************************************************
286
+ **
287
+ ** int tokenset_size(tokenset *x)
288
+ **
289
+ ** tokenset *x - a tokenset
290
+ **
291
+ ** RETURNS the number of tokens in the tokenset
292
+ **
293
+ ******************************************************************/
294
+
295
+ static int tokenset_size(tokenset *x){
296
+ return x->n;
297
+ }
298
+
299
+
300
+ /******************************************************************
301
+ **
302
+ ** char *get_token(tokenset *x, int i)
303
+ **
304
+ ** tokenset *x - a tokenset
305
+ ** int i - index of the token to return
306
+ **
307
+ ** RETURNS pointer to the i'th token
308
+ **
309
+ ******************************************************************/
310
+
311
+ static char *get_token(tokenset *x,int i){
312
+ return x->tokens[i];
313
+ }
314
+
315
+ /******************************************************************
316
+ **
317
+ ** void delete_tokens(tokenset *x)
318
+ **
319
+ ** tokenset *x - a tokenset
320
+ **
321
+ ** Deallocates all the space allocated for a tokenset
322
+ **
323
+ ******************************************************************/
324
+
325
+ static void delete_tokens(tokenset *x){
326
+
327
+ int i;
328
+
329
+ for (i=0; i < x->n; i++){
330
+ Free(x->tokens[i]);
331
+ }
332
+ Free(x->tokens);
333
+ Free(x);
334
+ }
335
+
336
+ /*******************************************************************
337
+ **
338
+ ** int token_ends_with(char *token, char *ends)
339
+ **
340
+ ** char *token - a string to check
341
+ ** char *ends_in - we are looking for this string at the end of token
342
+ **
343
+ **
344
+ ** returns 0 if no match, otherwise it returns the index of the first character
345
+ ** which matchs the start of *ends.
346
+ **
347
+ ** Note that there must be one additional character in "token" beyond
348
+ ** the characters in "ends". So
349
+ **
350
+ ** *token = "TestStr"
351
+ ** *ends = "TestStr"
352
+ **
353
+ ** would return 0 but if
354
+ **
355
+ ** ends = "estStr"
356
+ **
357
+ ** we would return 1.
358
+ **
359
+ ** and if
360
+ **
361
+ ** ends= "stStr"
362
+ ** we would return 2 .....etc
363
+ **
364
+ **
365
+ ******************************************************************/
366
+
367
+ static int token_ends_with(char *token, char *ends_in){
368
+
369
+ int tokenlength = strlen(token);
370
+ int ends_length = strlen(ends_in);
371
+ int start_pos;
372
+ char *tmp_ptr;
373
+
374
+ if (tokenlength <= ends_length){
375
+ /* token string is too short so can't possibly end with ends */
376
+ return 0;
377
+ }
378
+
379
+ start_pos = tokenlength - ends_length;
380
+
381
+ tmp_ptr = &token[start_pos];
382
+
383
+ if (strcmp(tmp_ptr,ends_in)==0){
384
+ return start_pos;
385
+ } else {
386
+ return 0;
387
+ }
388
+ }
389
+
390
+
391
+ /******************************************************************
392
+ **
393
+ ** The following code, also from read_abatch.c is more about locating
394
+ ** sections in the file and reading it in.
395
+ **
396
+ ******************************************************************/
397
+
398
+
399
+ /**
400
+ ** This reads a line from the specified file stream
401
+ **
402
+ **
403
+ **/
404
+
405
+
406
+ static void ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
407
+ if (fgets(buffer, buffersize, currentFile) == NULL){
408
+ error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
409
+ }
410
+ }
411
+
412
+
413
+
414
+ /******************************************************************
415
+ **
416
+ ** void findStartsWith(FILE *my_file,char *starts, char *buffer)
417
+ **
418
+ ** FILE *my_file - an open file to read from
419
+ ** char *starts - the string to search for at the start of each line
420
+ ** char *buffer - where to place the line that has been read.
421
+ **
422
+ **
423
+ ** Find a line that starts with the specified character string.
424
+ ** At exit buffer should contain that line
425
+ **
426
+ *****************************************************************/
427
+
428
+
429
+ static void findStartsWith(FILE *my_file,char *starts, char *buffer){
430
+
431
+ int starts_len = strlen(starts);
432
+ int match = 1;
433
+
434
+ do {
435
+ ReadFileLine(buffer, BUFFER_SIZE, my_file);
436
+ match = strncmp(starts, buffer, starts_len);
437
+ } while (match != 0);
438
+ }
439
+
440
+
441
+ /******************************************************************
442
+ **
443
+ ** void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer)
444
+ **
445
+ ** FILE *my_file - an open file
446
+ ** char *sectiontitle - string we are searching for
447
+ ** char *buffer - return's with line starting with sectiontitle
448
+ **
449
+ **
450
+ *****************************************************************/
451
+
452
+ static void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer){
453
+ findStartsWith(my_file,sectiontitle,buffer);
454
+ }
455
+
456
+
457
+ /*******************************************************************
458
+ **
459
+ ** void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer)
460
+ **
461
+ ** FILE *infile - pointer to open file presumed to be a CDF file
462
+ ** cdf_text *mycdf - structure for holding cdf file
463
+ ** char *linebuffer - a place to store strings that are read in. Length
464
+ ** is given by BUFFER_SIZE
465
+ **
466
+ *******************************************************************/
467
+
468
+ static void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer){
469
+
470
+ tokenset *cur_tokenset;
471
+
472
+ /* move to the Chip section */
473
+ AdvanceToSection(infile,"[Chip]",linebuffer);
474
+
475
+ findStartsWith(infile,"Name",linebuffer);
476
+
477
+ /* Read the Name */
478
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
479
+ mycdf->header.name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
480
+ strcpy(mycdf->header.name,get_token(cur_tokenset,1));
481
+ delete_tokens(cur_tokenset);
482
+
483
+ /* Read the Rows and Cols, Number of units etc */
484
+
485
+ findStartsWith(infile,"Rows",linebuffer);
486
+ cur_tokenset = tokenize(linebuffer,"=");
487
+ mycdf->header.rows = atoi(get_token(cur_tokenset,1));
488
+ delete_tokens(cur_tokenset);
489
+
490
+ findStartsWith(infile,"Cols",linebuffer);
491
+ cur_tokenset = tokenize(linebuffer,"=");
492
+ mycdf->header.cols = atoi(get_token(cur_tokenset,1));
493
+ delete_tokens(cur_tokenset);
494
+
495
+ findStartsWith(infile,"NumberOfUnits",linebuffer);
496
+ cur_tokenset = tokenize(linebuffer,"=");
497
+ mycdf->header.numberofunits = atoi(get_token(cur_tokenset,1));
498
+ delete_tokens(cur_tokenset);
499
+
500
+ findStartsWith(infile,"MaxUnit",linebuffer);
501
+ cur_tokenset = tokenize(linebuffer,"=");
502
+ mycdf->header.maxunit = atoi(get_token(cur_tokenset,1));
503
+ delete_tokens(cur_tokenset);
504
+
505
+ findStartsWith(infile,"NumQCUnits",linebuffer);
506
+ cur_tokenset = tokenize(linebuffer,"=");
507
+ mycdf->header.NumQCUnits = atoi(get_token(cur_tokenset,1));
508
+ delete_tokens(cur_tokenset);
509
+
510
+ findStartsWith(infile,"ChipReference",linebuffer);
511
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
512
+ if (cur_tokenset->n > 1){
513
+ mycdf->header.chipreference = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
514
+ strcpy(mycdf->header.chipreference,get_token(cur_tokenset,1));
515
+ } else {
516
+ mycdf->header.chipreference = NULL;
517
+ }
518
+
519
+
520
+ delete_tokens(cur_tokenset);
521
+
522
+
523
+
524
+ }
525
+
526
+
527
+
528
+ /*******************************************************************
529
+ **
530
+ ** void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index)
531
+ **
532
+ ** FILE *infile - an opened CDF file
533
+ ** cdf_text *mycdf - a structure for holding cdf file
534
+ ** char *linebuffer - temporary place to store lines of text read in
535
+ ** int index - which QCunit.
536
+ **
537
+ ** This function reads in the QC unit probes from the cdf file. It is assumed that the space to
538
+ ** store them is already allocated.
539
+ **
540
+ *******************************************************************/
541
+
542
+
543
+ static void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index){
544
+ tokenset *cur_tokenset;
545
+ int i;
546
+
547
+ for (i =0; i < mycdf->qc_units[index].n_probes; i++){
548
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
549
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
550
+ if (mycdf->qc_units[index].qccontains[0]){
551
+ mycdf->qc_units[index].qc_probes[i].x = atoi(get_token(cur_tokenset,1));
552
+ }
553
+ if (mycdf->qc_units[index].qccontains[1]){
554
+ mycdf->qc_units[index].qc_probes[i].y = atoi(get_token(cur_tokenset,2));
555
+ }
556
+ if (mycdf->qc_units[index].qccontains[2]){
557
+ mycdf->qc_units[index].qc_probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
558
+ strcpy(mycdf->qc_units[index].qc_probes[i].probe,get_token(cur_tokenset,3));
559
+ }
560
+ if (mycdf->qc_units[index].qccontains[3]){
561
+ mycdf->qc_units[index].qc_probes[i].plen = atoi(get_token(cur_tokenset,4));
562
+ }
563
+ if (mycdf->qc_units[index].qccontains[4]){
564
+ mycdf->qc_units[index].qc_probes[i].atom = atoi(get_token(cur_tokenset,5));
565
+ }
566
+ if (mycdf->qc_units[index].qccontains[5]){
567
+ mycdf->qc_units[index].qc_probes[i].index = atoi(get_token(cur_tokenset,6));
568
+ }
569
+ if (mycdf->qc_units[index].qccontains[6]){
570
+ mycdf->qc_units[index].qc_probes[i].match = atoi(get_token(cur_tokenset,7));
571
+ }
572
+ if (mycdf->qc_units[index].qccontains[7]){
573
+ mycdf->qc_units[index].qc_probes[i].bg = atoi(get_token(cur_tokenset,8));
574
+ }
575
+ delete_tokens(cur_tokenset);
576
+ }
577
+
578
+
579
+
580
+ }
581
+
582
+ /*******************************************************************
583
+ **
584
+ ** void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer)
585
+ **
586
+ ** FILE *infile - an opened CDF file
587
+ ** cdf_text *mycdf - a structure for holding cdf file
588
+ ** char *linebuffer - temporary place to store lines of text read in
589
+ **
590
+ ** Reads all the QC units. Note that it allocates the space for the probes
591
+ ** it is assumed that the space for the actual QC units are already allocated
592
+ **
593
+ *******************************************************************/
594
+
595
+ static void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer){
596
+
597
+ tokenset *cur_tokenset;
598
+ int i,j;
599
+
600
+ mycdf->qc_units = Calloc(mycdf->header.NumQCUnits,cdf_text_qc_unit);
601
+
602
+
603
+ for (i =0; i < mycdf->header.NumQCUnits; i++){
604
+ /* move to the next QC section */
605
+ AdvanceToSection(infile,"[QC",linebuffer);
606
+ findStartsWith(infile,"Type",linebuffer);
607
+ cur_tokenset = tokenize(linebuffer,"=");
608
+ mycdf->qc_units[i].type = (unsigned short)atoi(get_token(cur_tokenset,1));
609
+ delete_tokens(cur_tokenset);
610
+ findStartsWith(infile,"NumberCells",linebuffer);
611
+ cur_tokenset = tokenize(linebuffer,"=");
612
+ mycdf->qc_units[i].n_probes = atoi(get_token(cur_tokenset,1));
613
+ delete_tokens(cur_tokenset);
614
+ mycdf->qc_units[i].qc_probes = Calloc(mycdf->qc_units[i].n_probes,cdf_text_qc_probe);
615
+
616
+ /* Figure out which fields this QC unit has */
617
+ findStartsWith(infile,"CellHeader",linebuffer);
618
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
619
+ for (j=1; j < tokenset_size(cur_tokenset); j++){
620
+ if(strncmp("X", get_token(cur_tokenset,j), 1) == 0){
621
+ mycdf->qc_units[i].qccontains[0] =1;
622
+ } else if (strncmp("Y", get_token(cur_tokenset,j), 1) == 0){
623
+ mycdf->qc_units[i].qccontains[1] =1;
624
+ } else if (strncmp("PROBE",get_token(cur_tokenset,j), 5) == 0){
625
+ mycdf->qc_units[i].qccontains[2] =1;
626
+ } else if (strncmp("PLEN",get_token(cur_tokenset,j), 4) == 0){
627
+ mycdf->qc_units[i].qccontains[3] =1;
628
+ } else if (strncmp("ATOM",get_token(cur_tokenset,j), 4) == 0){
629
+ mycdf->qc_units[i].qccontains[4] =1;
630
+ } else if (strncmp("INDEX",get_token(cur_tokenset,j), 5) == 0){
631
+ mycdf->qc_units[i].qccontains[5] =1;
632
+ } else if (strncmp("MATCH",get_token(cur_tokenset,j), 5) == 0){
633
+ mycdf->qc_units[i].qccontains[6] =1;
634
+ } else if (strncmp("BG",get_token(cur_tokenset,j), 2) == 0){
635
+ mycdf->qc_units[i].qccontains[7] =1;
636
+ }
637
+ }
638
+ delete_tokens(cur_tokenset);
639
+
640
+ read_cdf_QCUnits_probes(infile,mycdf,linebuffer,i);
641
+
642
+
643
+
644
+
645
+ }
646
+ }
647
+
648
+ /*******************************************************************
649
+ **
650
+ ** void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block)
651
+ **
652
+ ** FILE *infile - an opened CDF file
653
+ ** cdf_text *mycdf - a structure for holding cdf file
654
+ ** char *linebuffer - temporary place to store lines of text read in from the file
655
+ ** int unit - which unit
656
+ ** int block - which block
657
+ **
658
+ ** Reads in the probes for each unit. Note that it is assumed that the
659
+ ** space for the probes has actually been allocated.
660
+ **
661
+ *******************************************************************/
662
+
663
+
664
+
665
+ static void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block){
666
+ int i;
667
+ tokenset *cur_tokenset;
668
+
669
+ /* Read the Cell Header for the unit block */
670
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
671
+
672
+ for (i =0; i < mycdf->units[unit].blocks[block].num_cells; i++){
673
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
674
+ cur_tokenset = tokenize(linebuffer,"=\t\r\n");
675
+ mycdf->units[unit].blocks[block].probes[i].x = atoi(get_token(cur_tokenset,1));
676
+ mycdf->units[unit].blocks[block].probes[i].y = atoi(get_token(cur_tokenset,2));
677
+ mycdf->units[unit].blocks[block].probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
678
+ strcpy(mycdf->units[unit].blocks[block].probes[i].probe,get_token(cur_tokenset,3));
679
+ mycdf->units[unit].blocks[block].probes[i].feat=Calloc(strlen(get_token(cur_tokenset,4))+1,char);
680
+ strcpy(mycdf->units[unit].blocks[block].probes[i].feat,get_token(cur_tokenset,4));
681
+ mycdf->units[unit].blocks[block].probes[i].qual=Calloc(strlen(get_token(cur_tokenset,5))+1,char);
682
+ strcpy(mycdf->units[unit].blocks[block].probes[i].qual,get_token(cur_tokenset,5));
683
+ mycdf->units[unit].blocks[block].probes[i].expos = atoi(get_token(cur_tokenset,6));
684
+ mycdf->units[unit].blocks[block].probes[i].pos = atoi(get_token(cur_tokenset,7));
685
+ mycdf->units[unit].blocks[block].probes[i].cbase = Calloc(strlen(get_token(cur_tokenset,8))+1,char);
686
+ strcpy(mycdf->units[unit].blocks[block].probes[i].cbase,get_token(cur_tokenset,8));
687
+ mycdf->units[unit].blocks[block].probes[i].pbase = Calloc(strlen(get_token(cur_tokenset,9))+1,char);
688
+ strcpy(mycdf->units[unit].blocks[block].probes[i].pbase,get_token(cur_tokenset,9));
689
+ mycdf->units[unit].blocks[block].probes[i].tbase = Calloc(strlen(get_token(cur_tokenset,10))+1,char);
690
+ strcpy(mycdf->units[unit].blocks[block].probes[i].tbase,get_token(cur_tokenset,10));
691
+ mycdf->units[unit].blocks[block].probes[i].atom = atoi(get_token(cur_tokenset,11));
692
+ mycdf->units[unit].blocks[block].probes[i].index = atoi(get_token(cur_tokenset,12));
693
+ mycdf->units[unit].blocks[block].probes[i].codonid = atoi(get_token(cur_tokenset,13));
694
+ mycdf->units[unit].blocks[block].probes[i].codon = atoi(get_token(cur_tokenset,14));
695
+ mycdf->units[unit].blocks[block].probes[i].regiontype = atoi(get_token(cur_tokenset,15));
696
+ delete_tokens(cur_tokenset);
697
+ }
698
+
699
+ }
700
+
701
+
702
+ /*******************************************************************
703
+ **
704
+ ** void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit)
705
+ **
706
+ ** FILE *infile - an opened CDF file
707
+ ** cdf_text *mycdf - a structure for holding cdf file
708
+ ** char *linebuffer - temporary place to store lines of text read in from the file
709
+ ** int unit - which unit
710
+ **
711
+ ** Reads in all the blocks for the unit. Assumes that space for the blocks are allocated
712
+ ** already. Allocates the space for the probes and calls a function to read them in.
713
+ **
714
+ *******************************************************************/
715
+
716
+
717
+ static void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit){
718
+ tokenset *cur_tokenset;
719
+ int i;
720
+
721
+
722
+
723
+ for (i=0; i < mycdf->units[unit].numberblocks; i++){
724
+
725
+ findStartsWith(infile,"Name",linebuffer);
726
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
727
+ mycdf->units[unit].blocks[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
728
+ strcpy(mycdf->units[unit].blocks[i].name,get_token(cur_tokenset,1));
729
+ delete_tokens(cur_tokenset);
730
+ /* Rprintf("%s\n",mycdf->units[unit].blocks[i].name); */
731
+
732
+
733
+
734
+ findStartsWith(infile,"BlockNumber",linebuffer);
735
+ cur_tokenset = tokenize(linebuffer,"=");
736
+ mycdf->units[unit].blocks[i].blocknumber = atoi(get_token(cur_tokenset,1));
737
+ delete_tokens(cur_tokenset);
738
+ /* Rprintf("%d %d %d\n",unit,i,mycdf->header.numberofunits); */
739
+
740
+ findStartsWith(infile,"NumAtoms",linebuffer);
741
+ cur_tokenset = tokenize(linebuffer,"=");
742
+ mycdf->units[unit].blocks[i].num_atoms = atoi(get_token(cur_tokenset,1));
743
+ delete_tokens(cur_tokenset);
744
+
745
+ findStartsWith(infile,"NumCells",linebuffer);
746
+ cur_tokenset = tokenize(linebuffer,"=");
747
+ mycdf->units[unit].blocks[i].num_cells = atoi(get_token(cur_tokenset,1));
748
+ delete_tokens(cur_tokenset);
749
+
750
+
751
+ findStartsWith(infile,"StartPosition",linebuffer);
752
+ cur_tokenset = tokenize(linebuffer,"=");
753
+ mycdf->units[unit].blocks[i].start_position = atoi(get_token(cur_tokenset,1));
754
+ delete_tokens(cur_tokenset);
755
+
756
+ findStartsWith(infile,"StopPosition",linebuffer);
757
+ cur_tokenset = tokenize(linebuffer,"=");
758
+ mycdf->units[unit].blocks[i].stop_position = atoi(get_token(cur_tokenset,1));
759
+ delete_tokens(cur_tokenset);
760
+
761
+ if (mycdf->units[unit].unit_type == 2){
762
+ findStartsWith(infile,"Direction",linebuffer);
763
+ cur_tokenset = tokenize(linebuffer,"=");
764
+ mycdf->units[unit].blocks[i].direction = atoi(get_token(cur_tokenset,1));
765
+ delete_tokens(cur_tokenset);
766
+ } else {
767
+ mycdf->units[unit].blocks[i].direction = mycdf->units[unit].direction;
768
+ }
769
+
770
+ mycdf->units[unit].blocks[i].probes = Calloc(mycdf->units[unit].blocks[i].num_cells,cdf_text_unit_block_probe);
771
+
772
+ read_cdf_unit_block_probes(infile,mycdf,linebuffer,unit,i);
773
+
774
+
775
+
776
+ }
777
+ }
778
+
779
+
780
+ /*******************************************************************
781
+ **
782
+ ** void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer)
783
+ **
784
+ ** FILE *infile - an opened CDF file
785
+ ** cdf_text *mycdf - a structure for holding cdf file
786
+ ** char *linebuffer - temporary place to store lines of text read in from the file
787
+ **
788
+ ** Reads in all the units allocating the space for them and then calling sub functions
789
+ ** to read each block and probes within the blocks
790
+ **
791
+ *******************************************************************/
792
+
793
+ static void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer){
794
+ tokenset *cur_tokenset;
795
+ int i;
796
+
797
+ mycdf->units = Calloc(mycdf->header.numberofunits,cdf_text_unit);
798
+
799
+ for (i =0; i < mycdf->header.numberofunits; i++){
800
+ /* move to the next Unit section */
801
+ AdvanceToSection(infile,"[Unit",linebuffer);
802
+ findStartsWith(infile,"Name",linebuffer);
803
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
804
+ mycdf->units[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
805
+ strcpy(mycdf->units[i].name,get_token(cur_tokenset,1));
806
+
807
+ delete_tokens(cur_tokenset);
808
+
809
+
810
+
811
+ findStartsWith(infile,"Direction",linebuffer);
812
+ cur_tokenset = tokenize(linebuffer,"=");
813
+ mycdf->units[i].direction = atoi(get_token(cur_tokenset,1));
814
+ delete_tokens(cur_tokenset);
815
+
816
+ findStartsWith(infile,"NumAtoms",linebuffer);
817
+ cur_tokenset = tokenize(linebuffer,"=");
818
+ mycdf->units[i].num_atoms = atoi(get_token(cur_tokenset,1));
819
+ delete_tokens(cur_tokenset);
820
+
821
+ findStartsWith(infile,"NumCells",linebuffer);
822
+ cur_tokenset = tokenize(linebuffer,"=");
823
+ mycdf->units[i].num_cells = atoi(get_token(cur_tokenset,1));
824
+ delete_tokens(cur_tokenset);
825
+
826
+ findStartsWith(infile,"UnitNumber",linebuffer);
827
+ cur_tokenset = tokenize(linebuffer,"=");
828
+ mycdf->units[i].unit_number = atoi(get_token(cur_tokenset,1));
829
+ delete_tokens(cur_tokenset);
830
+
831
+ findStartsWith(infile,"UnitType",linebuffer);
832
+ cur_tokenset = tokenize(linebuffer,"=");
833
+ mycdf->units[i].unit_type = atoi(get_token(cur_tokenset,1));
834
+ delete_tokens(cur_tokenset);
835
+
836
+ findStartsWith(infile,"NumberBlocks",linebuffer);
837
+ cur_tokenset = tokenize(linebuffer,"=");
838
+ mycdf->units[i].numberblocks = atoi(get_token(cur_tokenset,1));
839
+ delete_tokens(cur_tokenset);
840
+
841
+ /*Skip MutationType since only appears on one type of array */
842
+
843
+ mycdf->units[i].blocks = Calloc(mycdf->units[i].numberblocks,cdf_text_unit_block);
844
+
845
+
846
+ read_cdf_unit_block(infile,mycdf,linebuffer,i);
847
+ /* AdvanceToSection(infile,"[Unit",linebuffer);
848
+ Rprintf("%d\n",i); */
849
+ }
850
+
851
+
852
+
853
+ }
854
+
855
+
856
+ /*******************************************************************
857
+ **
858
+ ** int read_cdf_text(const char *filename, cdf_text *mycdf)
859
+ **
860
+ ** const char *filename - name of text file
861
+ ** cdf_text *mycdf - pointer to root of structure that will contain
862
+ ** the contents of the CDF file at the conclusion
863
+ ** of the function.
864
+ **
865
+ ** RETURNS 0 if the function failed, otherwise returns 1
866
+ **
867
+ ** this function reads a text CDF file into C data structure.
868
+ **
869
+ *******************************************************************/
870
+
871
+
872
+ int read_cdf_text(const char *filename, cdf_text *mycdf){
873
+
874
+ FILE *infile;
875
+
876
+ char linebuffer[BUFFER_SIZE]; /* a character buffer */
877
+ tokenset *cur_tokenset;
878
+
879
+ if ((infile = fopen(filename, "r")) == NULL)
880
+ {
881
+ error("Unable to open the file %s",filename);
882
+ return 0;
883
+ }
884
+
885
+
886
+
887
+ /* Check that is is a text CDF file */
888
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
889
+ if (strncmp("[CDF]", linebuffer, 5) != 0){
890
+ error("The file %s does not look like a text CDF file",filename);
891
+ }
892
+
893
+ /* Read the version number */
894
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
895
+
896
+ cur_tokenset = tokenize(linebuffer,"=\r\n");
897
+ if (strncmp("GC3.0", get_token(cur_tokenset,1), 5) != 0){
898
+ error("The file %s does not look like a version GC3.0 CDF file",filename);
899
+ } else {
900
+ mycdf->header.version = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
901
+ strcpy(mycdf->header.version,get_token(cur_tokenset,1));
902
+ }
903
+ delete_tokens(cur_tokenset);
904
+
905
+
906
+ read_cdf_header(infile,mycdf,linebuffer);
907
+ read_cdf_QCUnits(infile,mycdf,linebuffer);
908
+ read_cdf_Units(infile,mycdf,linebuffer);
909
+
910
+
911
+ return 1;
912
+ }
913
+
914
+ /*******************************************************************
915
+ **
916
+ ** void dealloc_cdf_text(cdf_text *my_cdf)
917
+ **
918
+ ** Frees memory allocated
919
+ **
920
+ ******************************************************************/
921
+
922
+
923
+
924
+ void dealloc_cdf_text(cdf_text *my_cdf){
925
+ int i,j,k;
926
+
927
+
928
+ Free(my_cdf->header.version);
929
+ Free(my_cdf->header.name);
930
+ if (my_cdf->header.chipreference != NULL)
931
+ Free(my_cdf->header.chipreference);
932
+
933
+ for (i =0; i < my_cdf->header.NumQCUnits; i++){
934
+ for (j=0; j < my_cdf->qc_units[i].n_probes; j++){
935
+ Free(my_cdf->qc_units[i].qc_probes[j].probe);
936
+ }
937
+ Free(my_cdf->qc_units[i].qc_probes);
938
+ }
939
+
940
+
941
+ for (i =0; i < my_cdf->header.numberofunits; i++){
942
+ for (j=0; j < my_cdf->units[i].numberblocks; j++){
943
+ for (k=0; k < my_cdf->units[i].blocks[j].num_cells;k++){
944
+ Free(my_cdf->units[i].blocks[j].probes[k].probe);
945
+ Free(my_cdf->units[i].blocks[j].probes[k].feat);
946
+ Free(my_cdf->units[i].blocks[j].probes[k].qual);
947
+ Free(my_cdf->units[i].blocks[j].probes[k].cbase);
948
+ Free(my_cdf->units[i].blocks[j].probes[k].pbase);
949
+ Free(my_cdf->units[i].blocks[j].probes[k].tbase);
950
+ }
951
+ Free(my_cdf->units[i].blocks[j].probes);
952
+ Free(my_cdf->units[i].blocks[j].name);
953
+ }
954
+ Free(my_cdf->units[i].blocks);
955
+ Free(my_cdf->units[i].name);
956
+ }
957
+
958
+
959
+ }
960
+
961
+
962
+
963
+ /*******************************************************************
964
+ **
965
+ ** static int isTextCDFFile(const char *filename)
966
+ **
967
+ ** const char *filename - name of file to check
968
+ **
969
+ ** checks whether the supplied file is a text CDF file or not.
970
+ ** uses a very simple test.
971
+ **
972
+ ** Attempts to open the supplied filename. Then checks to see if the first
973
+ ** 5 characters are "[CDF]" if so returns 1, otherwise 0.
974
+ **
975
+ **
976
+ ******************************************************************/
977
+
978
+ int isTextCDFFile(const char *filename){
979
+
980
+
981
+ FILE *infile;
982
+
983
+ char linebuffer[BUFFER_SIZE]; /* a character buffer */
984
+
985
+
986
+ if ((infile = fopen(filename, "r")) == NULL)
987
+ {
988
+ error("Unable to open the file %s",filename);
989
+ }
990
+
991
+
992
+
993
+ /* Check that is is a text CDF file */
994
+ ReadFileLine(linebuffer, BUFFER_SIZE, infile);
995
+ if (strncmp("[CDF]", linebuffer, 5) == 0){
996
+ fclose(infile);
997
+ return 1;
998
+ }
999
+ fclose(infile);
1000
+ return 0;
1001
+ }
1002
+
1003
+
1004
+
1005
+
1006
+
1007
+ /*******************************************************************
1008
+ **
1009
+ ** SEXP ReadtextCDFFileIntoRList(SEXP filename)
1010
+ **
1011
+ ** SEXP filename - name of cdffile. Should be full path to file.
1012
+ **
1013
+ ** this function should be called from R. When supplied the name
1014
+ ** of a text cdf file it first parses it into a C data structure.
1015
+ **
1016
+ ** An R list structure is then constructed from the C data structure
1017
+ **
1018
+ ** The R list is then returned.
1019
+ **
1020
+ ** Note no special effort is made to reduce down the information in
1021
+ ** the text CDF file. Instead almost everything is returned, even
1022
+ ** somewhat redundant information.
1023
+ **
1024
+ ******************************************************************/
1025
+
1026
+
1027
+ SEXP ReadtextCDFFileIntoRList(SEXP filename){
1028
+
1029
+ SEXP CDFInfo; /* this is the object that will be returned */
1030
+ SEXP CDFInfoNames;
1031
+ SEXP HEADER; /* The file header */
1032
+ SEXP HEADERNames;
1033
+ SEXP TEMPSXP;
1034
+ SEXP TEMPSXP2;
1035
+ SEXP TEMPSXP3;
1036
+ SEXP TEMPSXP4;
1037
+
1038
+ SEXP QCUNITS;
1039
+ SEXP UNITS;
1040
+
1041
+
1042
+ /* Basically fields (possible) for QC probes */
1043
+ SEXP QCUNITSProbeInfoX;
1044
+ SEXP QCUNITSProbeInfoY;
1045
+ SEXP QCUNITSProbeInfoPROBE;
1046
+ SEXP QCUNITSProbeInfoPL;
1047
+ SEXP QCUNITSProbeInfoATOM;
1048
+ SEXP QCUNITSProbeInfoINDEX;
1049
+ SEXP QCUNITSProbeInfoPMFLAG;
1050
+ SEXP QCUNITSProbeInfoBGFLAG;
1051
+ SEXP QCUNITSProbeInfoNames = R_NilValue;
1052
+ SEXP QCUNITSProbeInforow_names;
1053
+
1054
+ /* Basically fields (possible) for Unit Block probes */
1055
+
1056
+ SEXP UNITSProbeInfoX;
1057
+ SEXP UNITSProbeInfoY;
1058
+ SEXP UNITSProbeInfoPROBE;
1059
+ SEXP UNITSProbeInfoFEAT;
1060
+ SEXP UNITSProbeInfoQUAL;
1061
+ SEXP UNITSProbeInfoEXPOS;
1062
+ SEXP UNITSProbeInfoPOS;
1063
+ SEXP UNITSProbeInfoCBASE;
1064
+ SEXP UNITSProbeInfoPBASE;
1065
+ SEXP UNITSProbeInfoTBASE;
1066
+ SEXP UNITSProbeInfoATOM;
1067
+ SEXP UNITSProbeInfoINDEX;
1068
+ SEXP UNITSProbeInfoCODONIND;
1069
+ SEXP UNITSProbeInfoCODON;
1070
+ SEXP UNITSProbeInfoREGIONTYPE;
1071
+ SEXP UNITSProbeInfoNames;
1072
+ SEXP UNITSProbeInforow_names;
1073
+
1074
+ char buf[10]; /* temporary buffer for making names */
1075
+ int i,j,k,l;
1076
+ int tmpsum =0;
1077
+
1078
+
1079
+ cdf_text my_cdf;
1080
+
1081
+ const char *cur_file_name;
1082
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1083
+
1084
+ if(!read_cdf_text(cur_file_name, &my_cdf)){
1085
+ error("Problem reading text cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
1086
+ }
1087
+
1088
+
1089
+ /* Now build the R list structure */
1090
+
1091
+
1092
+ /* return the full structure */
1093
+ PROTECT(CDFInfo = allocVector(VECSXP,3));
1094
+ PROTECT(CDFInfoNames = allocVector(STRSXP,3));
1095
+ SET_STRING_ELT(CDFInfoNames,0,mkChar("Chip"));
1096
+ SET_STRING_ELT(CDFInfoNames,1,mkChar("QC"));
1097
+ SET_STRING_ELT(CDFInfoNames,2,mkChar("Unit"));
1098
+
1099
+ setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
1100
+ UNPROTECT(1);
1101
+
1102
+ /* Deal with the HEADER */
1103
+ PROTECT(HEADER = allocVector(VECSXP,8));
1104
+ PROTECT(HEADERNames = allocVector(STRSXP,8));
1105
+ SET_STRING_ELT(HEADERNames,0,mkChar("Version"));
1106
+ SET_STRING_ELT(HEADERNames,1,mkChar("Name"));
1107
+ SET_STRING_ELT(HEADERNames,2,mkChar("Rows"));
1108
+ SET_STRING_ELT(HEADERNames,3,mkChar("Cols"));
1109
+ SET_STRING_ELT(HEADERNames,4,mkChar("NumberOfUnits"));
1110
+ SET_STRING_ELT(HEADERNames,5,mkChar("MaxUnit"));
1111
+ SET_STRING_ELT(HEADERNames,6,mkChar("NumQCUnits"));
1112
+ SET_STRING_ELT(HEADERNames,7,mkChar("ChipReference"));
1113
+ setAttrib(HEADER,R_NamesSymbol,HEADERNames);
1114
+ UNPROTECT(1);
1115
+
1116
+ PROTECT(TEMPSXP = allocVector(STRSXP,1));
1117
+ SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.version));
1118
+ SET_VECTOR_ELT(HEADER,0,TEMPSXP);
1119
+ UNPROTECT(1);
1120
+
1121
+ PROTECT(TEMPSXP = allocVector(STRSXP,1));
1122
+ SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.name));
1123
+ SET_VECTOR_ELT(HEADER,1,TEMPSXP);
1124
+ UNPROTECT(1);
1125
+
1126
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1127
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.rows;
1128
+ SET_VECTOR_ELT(HEADER,2,TEMPSXP);
1129
+ UNPROTECT(1);
1130
+
1131
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1132
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.cols;
1133
+ SET_VECTOR_ELT(HEADER,3,TEMPSXP);
1134
+ UNPROTECT(1);
1135
+
1136
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1137
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.numberofunits;
1138
+ SET_VECTOR_ELT(HEADER,4,TEMPSXP);
1139
+ UNPROTECT(1);
1140
+
1141
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1142
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.maxunit;
1143
+ SET_VECTOR_ELT(HEADER,5,TEMPSXP);
1144
+ UNPROTECT(1);
1145
+
1146
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1147
+ NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.NumQCUnits;
1148
+ SET_VECTOR_ELT(HEADER,6,TEMPSXP);
1149
+ UNPROTECT(1);
1150
+
1151
+ PROTECT(TEMPSXP = allocVector(REALSXP,1));
1152
+ if (my_cdf.header.chipreference !=NULL){
1153
+ SET_VECTOR_ELT(TEMPSXP,0,mkChar(my_cdf.header.chipreference));
1154
+ SET_VECTOR_ELT(HEADER,7,TEMPSXP);
1155
+ }
1156
+ UNPROTECT(1);
1157
+
1158
+ SET_VECTOR_ELT(CDFInfo,0,HEADER);
1159
+
1160
+ PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.NumQCUnits));
1161
+ for (i=0; i <my_cdf.header.NumQCUnits; i++){
1162
+ PROTECT(TEMPSXP=allocVector(VECSXP,3));
1163
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1164
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].type;
1165
+ SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
1166
+ UNPROTECT(1);
1167
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1168
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].n_probes;
1169
+ SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
1170
+ UNPROTECT(1);
1171
+ /*Figure out what the probe information is for this QC unit and then allocate the space */
1172
+ tmpsum=0;
1173
+ for (j=0; j < 8; j++){
1174
+ tmpsum+=my_cdf.qc_units[i].qccontains[j];
1175
+ }
1176
+
1177
+ if (tmpsum == 6){
1178
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,6));
1179
+ } else if (tmpsum ==8){
1180
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,8));
1181
+ }
1182
+
1183
+ if (tmpsum == 6){
1184
+ PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1185
+ PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1186
+ PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1187
+ PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1188
+ PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1189
+ PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1190
+ } else if (tmpsum == 8){
1191
+ PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1192
+ PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1193
+ PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1194
+ PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1195
+ PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1196
+ PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1197
+ PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1198
+ PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
1199
+
1200
+ }
1201
+
1202
+ /* store what was read in for the QC units in the R structure */
1203
+ if (tmpsum == 6){
1204
+ for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
1205
+ INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
1206
+ INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
1207
+ SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
1208
+ INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
1209
+ INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
1210
+ INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
1211
+
1212
+ }
1213
+ } else if (tmpsum == 8){
1214
+ for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
1215
+ INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
1216
+ INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
1217
+ SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
1218
+ INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
1219
+ INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
1220
+ INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
1221
+ INTEGER_POINTER(QCUNITSProbeInfoPMFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].match;
1222
+ INTEGER_POINTER(QCUNITSProbeInfoBGFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].bg;
1223
+ }
1224
+ }
1225
+
1226
+ if (tmpsum == 6){
1227
+ SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
1228
+ SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
1229
+ SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
1230
+ SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
1231
+ SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
1232
+ SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
1233
+ } else if (tmpsum ==8){
1234
+ SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
1235
+ SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
1236
+ SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
1237
+ SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
1238
+ SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
1239
+ SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
1240
+ SET_VECTOR_ELT(TEMPSXP2,6,QCUNITSProbeInfoPMFLAG);
1241
+ SET_VECTOR_ELT(TEMPSXP2,7,QCUNITSProbeInfoBGFLAG);
1242
+
1243
+ }
1244
+
1245
+
1246
+ if (tmpsum == 6){
1247
+ UNPROTECT(6);
1248
+ } else if (tmpsum == 8){
1249
+ UNPROTECT(8);
1250
+ }
1251
+
1252
+
1253
+ if (tmpsum == 6){
1254
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,6));
1255
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1256
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1257
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
1258
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
1259
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
1260
+ SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
1261
+ } else if (tmpsum == 8){
1262
+ PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,8));
1263
+ SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
1264
+ SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
1265
+ SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
1266
+ SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
1267
+ SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
1268
+ SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
1269
+ SET_STRING_ELT(QCUNITSProbeInfoNames,6,mkChar("PMFlag"));
1270
+ SET_STRING_ELT(QCUNITSProbeInfoNames,7,mkChar("BGProbeFlag"));
1271
+ }
1272
+ setAttrib(TEMPSXP2,R_NamesSymbol,QCUNITSProbeInfoNames);
1273
+ UNPROTECT(1);
1274
+ PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
1275
+
1276
+ for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
1277
+ sprintf(buf, "%d", j+1);
1278
+ SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
1279
+ }
1280
+ setAttrib(TEMPSXP2, R_RowNamesSymbol, QCUNITSProbeInforow_names);
1281
+ UNPROTECT(1);
1282
+
1283
+ setAttrib(TEMPSXP2,R_ClassSymbol,mkString("data.frame"));
1284
+ SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
1285
+ UNPROTECT(1);
1286
+
1287
+ PROTECT(TEMPSXP2=allocVector(STRSXP,3));
1288
+ SET_STRING_ELT(TEMPSXP2,0,mkChar("Type"));
1289
+ SET_STRING_ELT(TEMPSXP2,1,mkChar("NumberCells"));
1290
+ SET_STRING_ELT(TEMPSXP2,2,mkChar("QCCells"));
1291
+ setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
1292
+ UNPROTECT(1);
1293
+ SET_VECTOR_ELT(QCUNITS,i,TEMPSXP);
1294
+
1295
+ UNPROTECT(1);
1296
+ }
1297
+ SET_VECTOR_ELT(CDFInfo,1,QCUNITS);
1298
+ UNPROTECT(1);
1299
+
1300
+
1301
+ PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.numberofunits));
1302
+ for (i=0; i < my_cdf.header.numberofunits; i++){
1303
+ PROTECT(TEMPSXP=allocVector(VECSXP,8));
1304
+ PROTECT(TEMPSXP2=allocVector(STRSXP,1));
1305
+
1306
+ SET_STRING_ELT(TEMPSXP2,0,mkChar(my_cdf.units[i].name));
1307
+ SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
1308
+ UNPROTECT(1);
1309
+
1310
+
1311
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1312
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].direction;
1313
+ SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
1314
+ UNPROTECT(1);
1315
+
1316
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1317
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_atoms;
1318
+ SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
1319
+ UNPROTECT(1);
1320
+
1321
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1322
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_cells;
1323
+ SET_VECTOR_ELT(TEMPSXP,3,TEMPSXP2);
1324
+ UNPROTECT(1);
1325
+
1326
+
1327
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1328
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_number;
1329
+ SET_VECTOR_ELT(TEMPSXP,4,TEMPSXP2);
1330
+ UNPROTECT(1);
1331
+
1332
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1333
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_type;
1334
+ SET_VECTOR_ELT(TEMPSXP,5,TEMPSXP2);
1335
+ UNPROTECT(1);
1336
+
1337
+ PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
1338
+ NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].numberblocks;
1339
+ SET_VECTOR_ELT(TEMPSXP,6,TEMPSXP2);
1340
+ UNPROTECT(1);
1341
+
1342
+ PROTECT(TEMPSXP2 = allocVector(VECSXP,my_cdf.units[i].numberblocks));
1343
+
1344
+ for (j=0; j <my_cdf.units[i].numberblocks; j++){
1345
+ PROTECT(TEMPSXP3 = allocVector(VECSXP,8));
1346
+
1347
+
1348
+ PROTECT(TEMPSXP4=allocVector(STRSXP,1));
1349
+
1350
+ SET_STRING_ELT(TEMPSXP4,0,mkChar(my_cdf.units[i].blocks[j].name));
1351
+ SET_VECTOR_ELT(TEMPSXP3,0,TEMPSXP4);
1352
+ UNPROTECT(1);
1353
+
1354
+
1355
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1356
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].blocknumber;
1357
+ SET_VECTOR_ELT(TEMPSXP3,1,TEMPSXP4);
1358
+ UNPROTECT(1);
1359
+
1360
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1361
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_atoms;
1362
+ SET_VECTOR_ELT(TEMPSXP3,2,TEMPSXP4);
1363
+ UNPROTECT(1);
1364
+
1365
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1366
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_cells;
1367
+ SET_VECTOR_ELT(TEMPSXP3,3,TEMPSXP4);
1368
+ UNPROTECT(1);
1369
+
1370
+
1371
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1372
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].start_position;
1373
+ SET_VECTOR_ELT(TEMPSXP3,4,TEMPSXP4);
1374
+ UNPROTECT(1);
1375
+
1376
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1377
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].stop_position;
1378
+ SET_VECTOR_ELT(TEMPSXP3,5,TEMPSXP4);
1379
+ UNPROTECT(1);
1380
+
1381
+
1382
+ PROTECT(TEMPSXP4=allocVector(REALSXP,1));
1383
+ NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].direction;
1384
+ SET_VECTOR_ELT(TEMPSXP3,6,TEMPSXP4);
1385
+ UNPROTECT(1);
1386
+
1387
+ PROTECT(TEMPSXP4=allocVector(VECSXP,15));
1388
+
1389
+
1390
+ PROTECT(UNITSProbeInfoX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1391
+ PROTECT(UNITSProbeInfoY = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1392
+ PROTECT(UNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1393
+ PROTECT(UNITSProbeInfoFEAT = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1394
+ PROTECT(UNITSProbeInfoQUAL = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1395
+ PROTECT(UNITSProbeInfoEXPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1396
+ PROTECT(UNITSProbeInfoPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1397
+ PROTECT(UNITSProbeInfoCBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1398
+ PROTECT(UNITSProbeInfoPBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1399
+ PROTECT(UNITSProbeInfoTBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1400
+ PROTECT(UNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1401
+ PROTECT(UNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1402
+ PROTECT(UNITSProbeInfoCODONIND = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1403
+ PROTECT(UNITSProbeInfoCODON = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1404
+ PROTECT(UNITSProbeInfoREGIONTYPE = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
1405
+ for (k=0; k < my_cdf.units[i].blocks[j].num_cells; k++){
1406
+ INTEGER_POINTER(UNITSProbeInfoX)[k]=my_cdf.units[i].blocks[j].probes[k].x;
1407
+ INTEGER_POINTER(UNITSProbeInfoY)[k]=my_cdf.units[i].blocks[j].probes[k].y;
1408
+ INTEGER_POINTER(UNITSProbeInfoEXPOS)[k]=my_cdf.units[i].blocks[j].probes[k].expos;
1409
+ INTEGER_POINTER(UNITSProbeInfoPOS)[k]=my_cdf.units[i].blocks[j].probes[k].pos;
1410
+ INTEGER_POINTER(UNITSProbeInfoATOM)[k]=my_cdf.units[i].blocks[j].probes[k].atom;
1411
+ INTEGER_POINTER(UNITSProbeInfoINDEX)[k]=my_cdf.units[i].blocks[j].probes[k].index;
1412
+ INTEGER_POINTER(UNITSProbeInfoCODONIND)[k]=my_cdf.units[i].blocks[j].probes[k].codonid;
1413
+ INTEGER_POINTER(UNITSProbeInfoCODON)[k]=my_cdf.units[i].blocks[j].probes[k].codon;
1414
+ INTEGER_POINTER(UNITSProbeInfoREGIONTYPE)[k]=my_cdf.units[i].blocks[j].probes[k].regiontype;
1415
+ SET_VECTOR_ELT(UNITSProbeInfoPROBE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].probe));
1416
+ SET_STRING_ELT(UNITSProbeInfoFEAT,k,mkChar(my_cdf.units[i].blocks[j].probes[k].feat));
1417
+ SET_STRING_ELT(UNITSProbeInfoQUAL,k,mkChar(my_cdf.units[i].blocks[j].probes[k].qual));
1418
+ SET_STRING_ELT(UNITSProbeInfoCBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].cbase));
1419
+ SET_STRING_ELT(UNITSProbeInfoPBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].pbase));
1420
+ SET_STRING_ELT(UNITSProbeInfoTBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].tbase));
1421
+ }
1422
+
1423
+
1424
+ SET_VECTOR_ELT(TEMPSXP4,0,UNITSProbeInfoX);
1425
+ SET_VECTOR_ELT(TEMPSXP4,1,UNITSProbeInfoY);
1426
+ SET_VECTOR_ELT(TEMPSXP4,2,UNITSProbeInfoPROBE);
1427
+ SET_VECTOR_ELT(TEMPSXP4,3,UNITSProbeInfoFEAT);
1428
+ SET_VECTOR_ELT(TEMPSXP4,4,UNITSProbeInfoQUAL);
1429
+ SET_VECTOR_ELT(TEMPSXP4,5,UNITSProbeInfoEXPOS);
1430
+ SET_VECTOR_ELT(TEMPSXP4,6,UNITSProbeInfoPOS);
1431
+ SET_VECTOR_ELT(TEMPSXP4,7,UNITSProbeInfoCBASE);
1432
+ SET_VECTOR_ELT(TEMPSXP4,8,UNITSProbeInfoPBASE);
1433
+ SET_VECTOR_ELT(TEMPSXP4,9,UNITSProbeInfoTBASE);
1434
+ SET_VECTOR_ELT(TEMPSXP4,10,UNITSProbeInfoATOM);
1435
+ SET_VECTOR_ELT(TEMPSXP4,11,UNITSProbeInfoINDEX);
1436
+ SET_VECTOR_ELT(TEMPSXP4,12,UNITSProbeInfoCODONIND);
1437
+ SET_VECTOR_ELT(TEMPSXP4,13,UNITSProbeInfoCODON);
1438
+ SET_VECTOR_ELT(TEMPSXP4,14,UNITSProbeInfoREGIONTYPE);
1439
+
1440
+
1441
+
1442
+
1443
+
1444
+
1445
+
1446
+ UNPROTECT(15);
1447
+
1448
+ PROTECT(UNITSProbeInfoNames =allocVector(STRSXP,15));
1449
+ SET_STRING_ELT(UNITSProbeInfoNames,0,mkChar("x"));
1450
+ SET_STRING_ELT(UNITSProbeInfoNames,1,mkChar("y"));
1451
+ SET_STRING_ELT(UNITSProbeInfoNames,2,mkChar("Probe"));
1452
+ SET_STRING_ELT(UNITSProbeInfoNames,3,mkChar("Feat"));
1453
+ SET_STRING_ELT(UNITSProbeInfoNames,4,mkChar("Qual"));
1454
+ SET_STRING_ELT(UNITSProbeInfoNames,5,mkChar("Expos"));
1455
+ SET_STRING_ELT(UNITSProbeInfoNames,6,mkChar("Pos"));
1456
+ SET_STRING_ELT(UNITSProbeInfoNames,7,mkChar("cbase"));
1457
+ SET_STRING_ELT(UNITSProbeInfoNames,8,mkChar("pbase"));
1458
+ SET_STRING_ELT(UNITSProbeInfoNames,9,mkChar("tbase"));
1459
+ SET_STRING_ELT(UNITSProbeInfoNames,10,mkChar("Atom"));
1460
+ SET_STRING_ELT(UNITSProbeInfoNames,11,mkChar("Index"));
1461
+ SET_STRING_ELT(UNITSProbeInfoNames,12,mkChar("CodonInd"));
1462
+ SET_STRING_ELT(UNITSProbeInfoNames,13,mkChar("Codon"));
1463
+ SET_STRING_ELT(UNITSProbeInfoNames,14,mkChar("Regiontype"));
1464
+
1465
+
1466
+ setAttrib(TEMPSXP4,R_NamesSymbol,UNITSProbeInfoNames);
1467
+ UNPROTECT(1);
1468
+
1469
+ PROTECT(UNITSProbeInforow_names= allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
1470
+
1471
+ for (l=0; l < my_cdf.units[i].blocks[j].num_cells; l++){
1472
+ sprintf(buf, "%d", l+1);
1473
+ SET_STRING_ELT(UNITSProbeInforow_names,l,mkChar(buf));
1474
+ }
1475
+ setAttrib(TEMPSXP4, R_RowNamesSymbol, UNITSProbeInforow_names);
1476
+ UNPROTECT(1);
1477
+
1478
+
1479
+
1480
+ setAttrib(TEMPSXP4,R_ClassSymbol,mkString("data.frame"));
1481
+ SET_VECTOR_ELT(TEMPSXP3,7,TEMPSXP4);
1482
+ UNPROTECT(1);
1483
+
1484
+
1485
+
1486
+ PROTECT(TEMPSXP4=allocVector(STRSXP,8));
1487
+ SET_STRING_ELT(TEMPSXP4,0,mkChar("Name"));
1488
+ SET_STRING_ELT(TEMPSXP4,1,mkChar("BlockNumber"));
1489
+ SET_STRING_ELT(TEMPSXP4,2,mkChar("NumAtoms"));
1490
+ SET_STRING_ELT(TEMPSXP4,3,mkChar("NumCells"));
1491
+ SET_STRING_ELT(TEMPSXP4,4,mkChar("StartPosition"));
1492
+ SET_STRING_ELT(TEMPSXP4,5,mkChar("StopPosition"));
1493
+ SET_STRING_ELT(TEMPSXP4,6,mkChar("Direction"));
1494
+ SET_STRING_ELT(TEMPSXP4,7,mkChar("Unit_Block_Cells"));
1495
+ setAttrib(TEMPSXP3,R_NamesSymbol,TEMPSXP4);
1496
+ UNPROTECT(1);
1497
+
1498
+ SET_VECTOR_ELT(TEMPSXP2,j,TEMPSXP3);
1499
+ UNPROTECT(1);
1500
+ }
1501
+
1502
+
1503
+
1504
+
1505
+
1506
+
1507
+ SET_VECTOR_ELT(TEMPSXP,7,TEMPSXP2);
1508
+ UNPROTECT(1);
1509
+
1510
+
1511
+
1512
+
1513
+ PROTECT(TEMPSXP2 = allocVector(STRSXP,8));
1514
+ SET_STRING_ELT(TEMPSXP2,0,mkChar("Name"));
1515
+ SET_STRING_ELT(TEMPSXP2,1,mkChar("Direction"));
1516
+ SET_STRING_ELT(TEMPSXP2,2,mkChar("NumAtoms"));
1517
+ SET_STRING_ELT(TEMPSXP2,3,mkChar("NumCells"));
1518
+ SET_STRING_ELT(TEMPSXP2,4,mkChar("UnitNumber"));
1519
+ SET_STRING_ELT(TEMPSXP2,5,mkChar("UnitType"));
1520
+ SET_STRING_ELT(TEMPSXP2,6,mkChar("NumberBlocks"));
1521
+ SET_STRING_ELT(TEMPSXP2,7,mkChar("Unit_Block"));
1522
+ setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
1523
+ UNPROTECT(1);
1524
+
1525
+
1526
+
1527
+
1528
+
1529
+ SET_VECTOR_ELT(UNITS,i,TEMPSXP);
1530
+ UNPROTECT(1);
1531
+
1532
+
1533
+
1534
+ }
1535
+ SET_VECTOR_ELT(CDFInfo,2,UNITS);
1536
+ UNPROTECT(1);
1537
+
1538
+
1539
+
1540
+ dealloc_cdf_text(&my_cdf);
1541
+ UNPROTECT(2);
1542
+ return CDFInfo;
1543
+ }
1544
+
1545
+
1546
+
1547
+
1548
+
1549
+ /*************************************************************
1550
+ **
1551
+ ** SEXP CheckCDFtext(SEXP filename)
1552
+ **
1553
+ ** Takes a given file name and returns 1 if it is a text format CDF file
1554
+ ** otherwise it returns 0
1555
+ **
1556
+ *************************************************************/
1557
+
1558
+
1559
+
1560
+ SEXP CheckCDFtext(SEXP filename){
1561
+ SEXP tmp;
1562
+ int good;
1563
+ const char *cur_file_name;
1564
+
1565
+ cur_file_name = CHAR(STRING_ELT(filename,0));
1566
+
1567
+ good = isTextCDFFile(cur_file_name);
1568
+
1569
+ PROTECT(tmp= allocVector(INTSXP,1));
1570
+
1571
+ INTEGER(tmp)[0] = good;
1572
+
1573
+ UNPROTECT(1);
1574
+ return tmp;
1575
+ }
1576
+