bio-affy 0.1.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
Binary file
|
@@ -0,0 +1,1576 @@
|
|
1
|
+
/****************************************************************
|
2
|
+
**
|
3
|
+
** File: read_cdffile2.c
|
4
|
+
**
|
5
|
+
** Implementation by: B. M. Bolstad
|
6
|
+
**
|
7
|
+
** Copyright (C) B. M. Bolstad 2005
|
8
|
+
**
|
9
|
+
** A parser designed to read text CDF files into an R List structure
|
10
|
+
**
|
11
|
+
** Note this version only parses GC3.0 version text files (which should
|
12
|
+
** be almost all text CDF files currently used)
|
13
|
+
**
|
14
|
+
** Note that the original text CDF parser (from which this file is not in
|
15
|
+
** anyway based) was written by Laurent Gautier. That file was named
|
16
|
+
** read_cdffile.c (originally part of affy and then later makecdfenv)
|
17
|
+
**
|
18
|
+
** Implemented based on documentation available from Affymetrix
|
19
|
+
**
|
20
|
+
** Implementation begun 2005.
|
21
|
+
**
|
22
|
+
** Modification Dates
|
23
|
+
** Jul 24 - Initial version
|
24
|
+
** Sep 20 - Continued Implementation
|
25
|
+
** Sep 21 - Continued Implementation and debugging
|
26
|
+
** Sep 22 - Continued Implementation and testing
|
27
|
+
** Sep 24 - QCunit probes, Unit Block probes, Finish and tested.
|
28
|
+
** Dec 1, 2005 - Some comment cleaning. Added isTextCDFFile,CheckCDFtext
|
29
|
+
** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
|
30
|
+
** May 31, 2006 - fix some compiler warnings
|
31
|
+
** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
|
32
|
+
**
|
33
|
+
**
|
34
|
+
*******************************************************************/
|
35
|
+
|
36
|
+
#include <R.h>
|
37
|
+
#include <Rdefines.h>
|
38
|
+
|
39
|
+
#include "stdlib.h"
|
40
|
+
#include "stdio.h"
|
41
|
+
|
42
|
+
#ifdef BIOLIB
|
43
|
+
#include <biolib_R_map.h>
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#define BUFFER_SIZE 1024
|
47
|
+
|
48
|
+
|
49
|
+
/*****************************************************************
|
50
|
+
**
|
51
|
+
**
|
52
|
+
** A structure for holding information in the
|
53
|
+
** "CDF" and "Chip" sections (basically header information)
|
54
|
+
**
|
55
|
+
******************************************************************/
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
typedef struct {
|
60
|
+
|
61
|
+
char *version;
|
62
|
+
char *name;
|
63
|
+
int rows,cols;
|
64
|
+
int numberofunits;
|
65
|
+
int maxunit;
|
66
|
+
int NumQCUnits;
|
67
|
+
char *chipreference;
|
68
|
+
} cdf_text_header;
|
69
|
+
|
70
|
+
|
71
|
+
/*****************************************************************
|
72
|
+
**
|
73
|
+
**
|
74
|
+
** A structure for holding QC probe information
|
75
|
+
** Note the "CYCLES" item is ignored and never parsed
|
76
|
+
**
|
77
|
+
******************************************************************/
|
78
|
+
|
79
|
+
|
80
|
+
typedef struct {
|
81
|
+
int x;
|
82
|
+
int y;
|
83
|
+
char *probe;
|
84
|
+
int plen;
|
85
|
+
int atom;
|
86
|
+
int index;
|
87
|
+
int match;
|
88
|
+
int bg;
|
89
|
+
} cdf_text_qc_probe;
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
/*******************************************************************
|
98
|
+
**
|
99
|
+
** A structure for holding QC units information. These are
|
100
|
+
** areas of the chip that contain probes that may or may not be useful
|
101
|
+
** for QC and other purposes.
|
102
|
+
**
|
103
|
+
**
|
104
|
+
*******************************************************************/
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
typedef struct{
|
109
|
+
int type;
|
110
|
+
unsigned int n_probes;
|
111
|
+
int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
|
112
|
+
cdf_text_qc_probe *qc_probes;
|
113
|
+
|
114
|
+
} cdf_text_qc_unit;
|
115
|
+
|
116
|
+
|
117
|
+
/*******************************************************************
|
118
|
+
**
|
119
|
+
** A structure for holding probe information for unit_blocks_probes
|
120
|
+
**
|
121
|
+
** probes are stored within blocks
|
122
|
+
**
|
123
|
+
*******************************************************************/
|
124
|
+
|
125
|
+
typedef struct{
|
126
|
+
int x;
|
127
|
+
int y;
|
128
|
+
char *probe;
|
129
|
+
char *feat;
|
130
|
+
char *qual;
|
131
|
+
int expos;
|
132
|
+
int pos;
|
133
|
+
char *cbase;
|
134
|
+
char *pbase;
|
135
|
+
char *tbase;
|
136
|
+
int atom;
|
137
|
+
int index;
|
138
|
+
int codonid;
|
139
|
+
int codon;
|
140
|
+
int regiontype;
|
141
|
+
char* region;
|
142
|
+
} cdf_text_unit_block_probe;
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
/*******************************************************************
|
148
|
+
**
|
149
|
+
** A structure holding Unit_blocks
|
150
|
+
**
|
151
|
+
** blocks are stored within units.
|
152
|
+
** blocks contain many probes
|
153
|
+
**
|
154
|
+
*******************************************************************/
|
155
|
+
|
156
|
+
typedef struct{
|
157
|
+
char *name;
|
158
|
+
int blocknumber;
|
159
|
+
int num_atoms;
|
160
|
+
int num_cells;
|
161
|
+
int start_position;
|
162
|
+
int stop_position;
|
163
|
+
int direction;
|
164
|
+
cdf_text_unit_block_probe *probes;
|
165
|
+
|
166
|
+
} cdf_text_unit_block;
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
/*******************************************************************
|
174
|
+
**
|
175
|
+
** A structure for holding "Units" AKA known as probesets
|
176
|
+
**
|
177
|
+
** Each unit contains one or more blocks. Each block contains one or
|
178
|
+
** more probes
|
179
|
+
**
|
180
|
+
*******************************************************************/
|
181
|
+
|
182
|
+
|
183
|
+
typedef struct{
|
184
|
+
char *name;
|
185
|
+
int direction;
|
186
|
+
int num_atoms;
|
187
|
+
int num_cells;
|
188
|
+
int unit_number;
|
189
|
+
int unit_type;
|
190
|
+
int numberblocks;
|
191
|
+
int MutationType;
|
192
|
+
cdf_text_unit_block *blocks;
|
193
|
+
} cdf_text_unit;
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
/*******************************************************************
|
198
|
+
**
|
199
|
+
** A structure for holding a text CDF file
|
200
|
+
**
|
201
|
+
** text cdf files consist of
|
202
|
+
** basic header information
|
203
|
+
** qcunits
|
204
|
+
** - qc probes
|
205
|
+
** units (aka probesets)
|
206
|
+
** - blocks
|
207
|
+
** - probes
|
208
|
+
**
|
209
|
+
**
|
210
|
+
*******************************************************************/
|
211
|
+
|
212
|
+
typedef struct{
|
213
|
+
cdf_text_header header;
|
214
|
+
cdf_text_qc_unit *qc_units;
|
215
|
+
cdf_text_unit *units;
|
216
|
+
} cdf_text;
|
217
|
+
|
218
|
+
|
219
|
+
/**************************************************************
|
220
|
+
**
|
221
|
+
** The following code is for tokenizing strings
|
222
|
+
** originally included in read_abatch.c from the affy package.
|
223
|
+
**
|
224
|
+
*************************************************************/
|
225
|
+
|
226
|
+
/***************************************************************
|
227
|
+
**
|
228
|
+
** tokenset
|
229
|
+
**
|
230
|
+
** char **tokens - a array of token strings
|
231
|
+
** int n - number of tokens in this set.
|
232
|
+
**
|
233
|
+
** a structure to hold a set of tokens. Typically a tokenset is
|
234
|
+
** created by breaking a character string based upon a set of
|
235
|
+
** delimiters.
|
236
|
+
**
|
237
|
+
**
|
238
|
+
**************************************************************/
|
239
|
+
|
240
|
+
typedef struct{
|
241
|
+
char **tokens;
|
242
|
+
int n;
|
243
|
+
} tokenset;
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
/******************************************************************
|
248
|
+
**
|
249
|
+
** tokenset *tokenize(char *str, char *delimiters)
|
250
|
+
**
|
251
|
+
** char *str - a string to break into tokens
|
252
|
+
** char *delimiters - delimiters to use in breaking up the line
|
253
|
+
**
|
254
|
+
**
|
255
|
+
** RETURNS a new tokenset
|
256
|
+
**
|
257
|
+
** Given a string, split into tokens based on a set of delimitors
|
258
|
+
**
|
259
|
+
*****************************************************************/
|
260
|
+
|
261
|
+
static tokenset *tokenize(char *str, char *delimiters){
|
262
|
+
|
263
|
+
int i=0;
|
264
|
+
|
265
|
+
char *current_token;
|
266
|
+
tokenset *my_tokenset = Calloc(1,tokenset);
|
267
|
+
my_tokenset->n=0;
|
268
|
+
|
269
|
+
my_tokenset->tokens = NULL;
|
270
|
+
|
271
|
+
current_token = strtok(str,delimiters);
|
272
|
+
while (current_token != NULL){
|
273
|
+
my_tokenset->n++;
|
274
|
+
my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
|
275
|
+
my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
|
276
|
+
strcpy(my_tokenset->tokens[i],current_token);
|
277
|
+
i++;
|
278
|
+
current_token = strtok(NULL,delimiters);
|
279
|
+
}
|
280
|
+
|
281
|
+
return my_tokenset;
|
282
|
+
}
|
283
|
+
|
284
|
+
|
285
|
+
/******************************************************************
|
286
|
+
**
|
287
|
+
** int tokenset_size(tokenset *x)
|
288
|
+
**
|
289
|
+
** tokenset *x - a tokenset
|
290
|
+
**
|
291
|
+
** RETURNS the number of tokens in the tokenset
|
292
|
+
**
|
293
|
+
******************************************************************/
|
294
|
+
|
295
|
+
static int tokenset_size(tokenset *x){
|
296
|
+
return x->n;
|
297
|
+
}
|
298
|
+
|
299
|
+
|
300
|
+
/******************************************************************
|
301
|
+
**
|
302
|
+
** char *get_token(tokenset *x, int i)
|
303
|
+
**
|
304
|
+
** tokenset *x - a tokenset
|
305
|
+
** int i - index of the token to return
|
306
|
+
**
|
307
|
+
** RETURNS pointer to the i'th token
|
308
|
+
**
|
309
|
+
******************************************************************/
|
310
|
+
|
311
|
+
static char *get_token(tokenset *x,int i){
|
312
|
+
return x->tokens[i];
|
313
|
+
}
|
314
|
+
|
315
|
+
/******************************************************************
|
316
|
+
**
|
317
|
+
** void delete_tokens(tokenset *x)
|
318
|
+
**
|
319
|
+
** tokenset *x - a tokenset
|
320
|
+
**
|
321
|
+
** Deallocates all the space allocated for a tokenset
|
322
|
+
**
|
323
|
+
******************************************************************/
|
324
|
+
|
325
|
+
static void delete_tokens(tokenset *x){
|
326
|
+
|
327
|
+
int i;
|
328
|
+
|
329
|
+
for (i=0; i < x->n; i++){
|
330
|
+
Free(x->tokens[i]);
|
331
|
+
}
|
332
|
+
Free(x->tokens);
|
333
|
+
Free(x);
|
334
|
+
}
|
335
|
+
|
336
|
+
/*******************************************************************
|
337
|
+
**
|
338
|
+
** int token_ends_with(char *token, char *ends)
|
339
|
+
**
|
340
|
+
** char *token - a string to check
|
341
|
+
** char *ends_in - we are looking for this string at the end of token
|
342
|
+
**
|
343
|
+
**
|
344
|
+
** returns 0 if no match, otherwise it returns the index of the first character
|
345
|
+
** which matchs the start of *ends.
|
346
|
+
**
|
347
|
+
** Note that there must be one additional character in "token" beyond
|
348
|
+
** the characters in "ends". So
|
349
|
+
**
|
350
|
+
** *token = "TestStr"
|
351
|
+
** *ends = "TestStr"
|
352
|
+
**
|
353
|
+
** would return 0 but if
|
354
|
+
**
|
355
|
+
** ends = "estStr"
|
356
|
+
**
|
357
|
+
** we would return 1.
|
358
|
+
**
|
359
|
+
** and if
|
360
|
+
**
|
361
|
+
** ends= "stStr"
|
362
|
+
** we would return 2 .....etc
|
363
|
+
**
|
364
|
+
**
|
365
|
+
******************************************************************/
|
366
|
+
|
367
|
+
static int token_ends_with(char *token, char *ends_in){
|
368
|
+
|
369
|
+
int tokenlength = strlen(token);
|
370
|
+
int ends_length = strlen(ends_in);
|
371
|
+
int start_pos;
|
372
|
+
char *tmp_ptr;
|
373
|
+
|
374
|
+
if (tokenlength <= ends_length){
|
375
|
+
/* token string is too short so can't possibly end with ends */
|
376
|
+
return 0;
|
377
|
+
}
|
378
|
+
|
379
|
+
start_pos = tokenlength - ends_length;
|
380
|
+
|
381
|
+
tmp_ptr = &token[start_pos];
|
382
|
+
|
383
|
+
if (strcmp(tmp_ptr,ends_in)==0){
|
384
|
+
return start_pos;
|
385
|
+
} else {
|
386
|
+
return 0;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
/******************************************************************
|
392
|
+
**
|
393
|
+
** The following code, also from read_abatch.c is more about locating
|
394
|
+
** sections in the file and reading it in.
|
395
|
+
**
|
396
|
+
******************************************************************/
|
397
|
+
|
398
|
+
|
399
|
+
/**
|
400
|
+
** This reads a line from the specified file stream
|
401
|
+
**
|
402
|
+
**
|
403
|
+
**/
|
404
|
+
|
405
|
+
|
406
|
+
static void ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
|
407
|
+
if (fgets(buffer, buffersize, currentFile) == NULL){
|
408
|
+
error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
|
409
|
+
}
|
410
|
+
}
|
411
|
+
|
412
|
+
|
413
|
+
|
414
|
+
/******************************************************************
|
415
|
+
**
|
416
|
+
** void findStartsWith(FILE *my_file,char *starts, char *buffer)
|
417
|
+
**
|
418
|
+
** FILE *my_file - an open file to read from
|
419
|
+
** char *starts - the string to search for at the start of each line
|
420
|
+
** char *buffer - where to place the line that has been read.
|
421
|
+
**
|
422
|
+
**
|
423
|
+
** Find a line that starts with the specified character string.
|
424
|
+
** At exit buffer should contain that line
|
425
|
+
**
|
426
|
+
*****************************************************************/
|
427
|
+
|
428
|
+
|
429
|
+
static void findStartsWith(FILE *my_file,char *starts, char *buffer){
|
430
|
+
|
431
|
+
int starts_len = strlen(starts);
|
432
|
+
int match = 1;
|
433
|
+
|
434
|
+
do {
|
435
|
+
ReadFileLine(buffer, BUFFER_SIZE, my_file);
|
436
|
+
match = strncmp(starts, buffer, starts_len);
|
437
|
+
} while (match != 0);
|
438
|
+
}
|
439
|
+
|
440
|
+
|
441
|
+
/******************************************************************
|
442
|
+
**
|
443
|
+
** void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer)
|
444
|
+
**
|
445
|
+
** FILE *my_file - an open file
|
446
|
+
** char *sectiontitle - string we are searching for
|
447
|
+
** char *buffer - return's with line starting with sectiontitle
|
448
|
+
**
|
449
|
+
**
|
450
|
+
*****************************************************************/
|
451
|
+
|
452
|
+
static void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer){
|
453
|
+
findStartsWith(my_file,sectiontitle,buffer);
|
454
|
+
}
|
455
|
+
|
456
|
+
|
457
|
+
/*******************************************************************
|
458
|
+
**
|
459
|
+
** void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer)
|
460
|
+
**
|
461
|
+
** FILE *infile - pointer to open file presumed to be a CDF file
|
462
|
+
** cdf_text *mycdf - structure for holding cdf file
|
463
|
+
** char *linebuffer - a place to store strings that are read in. Length
|
464
|
+
** is given by BUFFER_SIZE
|
465
|
+
**
|
466
|
+
*******************************************************************/
|
467
|
+
|
468
|
+
static void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer){
|
469
|
+
|
470
|
+
tokenset *cur_tokenset;
|
471
|
+
|
472
|
+
/* move to the Chip section */
|
473
|
+
AdvanceToSection(infile,"[Chip]",linebuffer);
|
474
|
+
|
475
|
+
findStartsWith(infile,"Name",linebuffer);
|
476
|
+
|
477
|
+
/* Read the Name */
|
478
|
+
cur_tokenset = tokenize(linebuffer,"=\r\n");
|
479
|
+
mycdf->header.name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
|
480
|
+
strcpy(mycdf->header.name,get_token(cur_tokenset,1));
|
481
|
+
delete_tokens(cur_tokenset);
|
482
|
+
|
483
|
+
/* Read the Rows and Cols, Number of units etc */
|
484
|
+
|
485
|
+
findStartsWith(infile,"Rows",linebuffer);
|
486
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
487
|
+
mycdf->header.rows = atoi(get_token(cur_tokenset,1));
|
488
|
+
delete_tokens(cur_tokenset);
|
489
|
+
|
490
|
+
findStartsWith(infile,"Cols",linebuffer);
|
491
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
492
|
+
mycdf->header.cols = atoi(get_token(cur_tokenset,1));
|
493
|
+
delete_tokens(cur_tokenset);
|
494
|
+
|
495
|
+
findStartsWith(infile,"NumberOfUnits",linebuffer);
|
496
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
497
|
+
mycdf->header.numberofunits = atoi(get_token(cur_tokenset,1));
|
498
|
+
delete_tokens(cur_tokenset);
|
499
|
+
|
500
|
+
findStartsWith(infile,"MaxUnit",linebuffer);
|
501
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
502
|
+
mycdf->header.maxunit = atoi(get_token(cur_tokenset,1));
|
503
|
+
delete_tokens(cur_tokenset);
|
504
|
+
|
505
|
+
findStartsWith(infile,"NumQCUnits",linebuffer);
|
506
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
507
|
+
mycdf->header.NumQCUnits = atoi(get_token(cur_tokenset,1));
|
508
|
+
delete_tokens(cur_tokenset);
|
509
|
+
|
510
|
+
findStartsWith(infile,"ChipReference",linebuffer);
|
511
|
+
cur_tokenset = tokenize(linebuffer,"=\r\n");
|
512
|
+
if (cur_tokenset->n > 1){
|
513
|
+
mycdf->header.chipreference = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
|
514
|
+
strcpy(mycdf->header.chipreference,get_token(cur_tokenset,1));
|
515
|
+
} else {
|
516
|
+
mycdf->header.chipreference = NULL;
|
517
|
+
}
|
518
|
+
|
519
|
+
|
520
|
+
delete_tokens(cur_tokenset);
|
521
|
+
|
522
|
+
|
523
|
+
|
524
|
+
}
|
525
|
+
|
526
|
+
|
527
|
+
|
528
|
+
/*******************************************************************
|
529
|
+
**
|
530
|
+
** void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index)
|
531
|
+
**
|
532
|
+
** FILE *infile - an opened CDF file
|
533
|
+
** cdf_text *mycdf - a structure for holding cdf file
|
534
|
+
** char *linebuffer - temporary place to store lines of text read in
|
535
|
+
** int index - which QCunit.
|
536
|
+
**
|
537
|
+
** This function reads in the QC unit probes from the cdf file. It is assumed that the space to
|
538
|
+
** store them is already allocated.
|
539
|
+
**
|
540
|
+
*******************************************************************/
|
541
|
+
|
542
|
+
|
543
|
+
static void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index){
|
544
|
+
tokenset *cur_tokenset;
|
545
|
+
int i;
|
546
|
+
|
547
|
+
for (i =0; i < mycdf->qc_units[index].n_probes; i++){
|
548
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
549
|
+
cur_tokenset = tokenize(linebuffer,"=\t\r\n");
|
550
|
+
if (mycdf->qc_units[index].qccontains[0]){
|
551
|
+
mycdf->qc_units[index].qc_probes[i].x = atoi(get_token(cur_tokenset,1));
|
552
|
+
}
|
553
|
+
if (mycdf->qc_units[index].qccontains[1]){
|
554
|
+
mycdf->qc_units[index].qc_probes[i].y = atoi(get_token(cur_tokenset,2));
|
555
|
+
}
|
556
|
+
if (mycdf->qc_units[index].qccontains[2]){
|
557
|
+
mycdf->qc_units[index].qc_probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
|
558
|
+
strcpy(mycdf->qc_units[index].qc_probes[i].probe,get_token(cur_tokenset,3));
|
559
|
+
}
|
560
|
+
if (mycdf->qc_units[index].qccontains[3]){
|
561
|
+
mycdf->qc_units[index].qc_probes[i].plen = atoi(get_token(cur_tokenset,4));
|
562
|
+
}
|
563
|
+
if (mycdf->qc_units[index].qccontains[4]){
|
564
|
+
mycdf->qc_units[index].qc_probes[i].atom = atoi(get_token(cur_tokenset,5));
|
565
|
+
}
|
566
|
+
if (mycdf->qc_units[index].qccontains[5]){
|
567
|
+
mycdf->qc_units[index].qc_probes[i].index = atoi(get_token(cur_tokenset,6));
|
568
|
+
}
|
569
|
+
if (mycdf->qc_units[index].qccontains[6]){
|
570
|
+
mycdf->qc_units[index].qc_probes[i].match = atoi(get_token(cur_tokenset,7));
|
571
|
+
}
|
572
|
+
if (mycdf->qc_units[index].qccontains[7]){
|
573
|
+
mycdf->qc_units[index].qc_probes[i].bg = atoi(get_token(cur_tokenset,8));
|
574
|
+
}
|
575
|
+
delete_tokens(cur_tokenset);
|
576
|
+
}
|
577
|
+
|
578
|
+
|
579
|
+
|
580
|
+
}
|
581
|
+
|
582
|
+
/*******************************************************************
|
583
|
+
**
|
584
|
+
** void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer)
|
585
|
+
**
|
586
|
+
** FILE *infile - an opened CDF file
|
587
|
+
** cdf_text *mycdf - a structure for holding cdf file
|
588
|
+
** char *linebuffer - temporary place to store lines of text read in
|
589
|
+
**
|
590
|
+
** Reads all the QC units. Note that it allocates the space for the probes
|
591
|
+
** it is assumed that the space for the actual QC units are already allocated
|
592
|
+
**
|
593
|
+
*******************************************************************/
|
594
|
+
|
595
|
+
static void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer){
|
596
|
+
|
597
|
+
tokenset *cur_tokenset;
|
598
|
+
int i,j;
|
599
|
+
|
600
|
+
mycdf->qc_units = Calloc(mycdf->header.NumQCUnits,cdf_text_qc_unit);
|
601
|
+
|
602
|
+
|
603
|
+
for (i =0; i < mycdf->header.NumQCUnits; i++){
|
604
|
+
/* move to the next QC section */
|
605
|
+
AdvanceToSection(infile,"[QC",linebuffer);
|
606
|
+
findStartsWith(infile,"Type",linebuffer);
|
607
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
608
|
+
mycdf->qc_units[i].type = (unsigned short)atoi(get_token(cur_tokenset,1));
|
609
|
+
delete_tokens(cur_tokenset);
|
610
|
+
findStartsWith(infile,"NumberCells",linebuffer);
|
611
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
612
|
+
mycdf->qc_units[i].n_probes = atoi(get_token(cur_tokenset,1));
|
613
|
+
delete_tokens(cur_tokenset);
|
614
|
+
mycdf->qc_units[i].qc_probes = Calloc(mycdf->qc_units[i].n_probes,cdf_text_qc_probe);
|
615
|
+
|
616
|
+
/* Figure out which fields this QC unit has */
|
617
|
+
findStartsWith(infile,"CellHeader",linebuffer);
|
618
|
+
cur_tokenset = tokenize(linebuffer,"=\t\r\n");
|
619
|
+
for (j=1; j < tokenset_size(cur_tokenset); j++){
|
620
|
+
if(strncmp("X", get_token(cur_tokenset,j), 1) == 0){
|
621
|
+
mycdf->qc_units[i].qccontains[0] =1;
|
622
|
+
} else if (strncmp("Y", get_token(cur_tokenset,j), 1) == 0){
|
623
|
+
mycdf->qc_units[i].qccontains[1] =1;
|
624
|
+
} else if (strncmp("PROBE",get_token(cur_tokenset,j), 5) == 0){
|
625
|
+
mycdf->qc_units[i].qccontains[2] =1;
|
626
|
+
} else if (strncmp("PLEN",get_token(cur_tokenset,j), 4) == 0){
|
627
|
+
mycdf->qc_units[i].qccontains[3] =1;
|
628
|
+
} else if (strncmp("ATOM",get_token(cur_tokenset,j), 4) == 0){
|
629
|
+
mycdf->qc_units[i].qccontains[4] =1;
|
630
|
+
} else if (strncmp("INDEX",get_token(cur_tokenset,j), 5) == 0){
|
631
|
+
mycdf->qc_units[i].qccontains[5] =1;
|
632
|
+
} else if (strncmp("MATCH",get_token(cur_tokenset,j), 5) == 0){
|
633
|
+
mycdf->qc_units[i].qccontains[6] =1;
|
634
|
+
} else if (strncmp("BG",get_token(cur_tokenset,j), 2) == 0){
|
635
|
+
mycdf->qc_units[i].qccontains[7] =1;
|
636
|
+
}
|
637
|
+
}
|
638
|
+
delete_tokens(cur_tokenset);
|
639
|
+
|
640
|
+
read_cdf_QCUnits_probes(infile,mycdf,linebuffer,i);
|
641
|
+
|
642
|
+
|
643
|
+
|
644
|
+
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
/*******************************************************************
|
649
|
+
**
|
650
|
+
** void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block)
|
651
|
+
**
|
652
|
+
** FILE *infile - an opened CDF file
|
653
|
+
** cdf_text *mycdf - a structure for holding cdf file
|
654
|
+
** char *linebuffer - temporary place to store lines of text read in from the file
|
655
|
+
** int unit - which unit
|
656
|
+
** int block - which block
|
657
|
+
**
|
658
|
+
** Reads in the probes for each unit. Note that it is assumed that the
|
659
|
+
** space for the probes has actually been allocated.
|
660
|
+
**
|
661
|
+
*******************************************************************/
|
662
|
+
|
663
|
+
|
664
|
+
|
665
|
+
static void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block){
|
666
|
+
int i;
|
667
|
+
tokenset *cur_tokenset;
|
668
|
+
|
669
|
+
/* Read the Cell Header for the unit block */
|
670
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
671
|
+
|
672
|
+
for (i =0; i < mycdf->units[unit].blocks[block].num_cells; i++){
|
673
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
674
|
+
cur_tokenset = tokenize(linebuffer,"=\t\r\n");
|
675
|
+
mycdf->units[unit].blocks[block].probes[i].x = atoi(get_token(cur_tokenset,1));
|
676
|
+
mycdf->units[unit].blocks[block].probes[i].y = atoi(get_token(cur_tokenset,2));
|
677
|
+
mycdf->units[unit].blocks[block].probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char);
|
678
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].probe,get_token(cur_tokenset,3));
|
679
|
+
mycdf->units[unit].blocks[block].probes[i].feat=Calloc(strlen(get_token(cur_tokenset,4))+1,char);
|
680
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].feat,get_token(cur_tokenset,4));
|
681
|
+
mycdf->units[unit].blocks[block].probes[i].qual=Calloc(strlen(get_token(cur_tokenset,5))+1,char);
|
682
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].qual,get_token(cur_tokenset,5));
|
683
|
+
mycdf->units[unit].blocks[block].probes[i].expos = atoi(get_token(cur_tokenset,6));
|
684
|
+
mycdf->units[unit].blocks[block].probes[i].pos = atoi(get_token(cur_tokenset,7));
|
685
|
+
mycdf->units[unit].blocks[block].probes[i].cbase = Calloc(strlen(get_token(cur_tokenset,8))+1,char);
|
686
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].cbase,get_token(cur_tokenset,8));
|
687
|
+
mycdf->units[unit].blocks[block].probes[i].pbase = Calloc(strlen(get_token(cur_tokenset,9))+1,char);
|
688
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].pbase,get_token(cur_tokenset,9));
|
689
|
+
mycdf->units[unit].blocks[block].probes[i].tbase = Calloc(strlen(get_token(cur_tokenset,10))+1,char);
|
690
|
+
strcpy(mycdf->units[unit].blocks[block].probes[i].tbase,get_token(cur_tokenset,10));
|
691
|
+
mycdf->units[unit].blocks[block].probes[i].atom = atoi(get_token(cur_tokenset,11));
|
692
|
+
mycdf->units[unit].blocks[block].probes[i].index = atoi(get_token(cur_tokenset,12));
|
693
|
+
mycdf->units[unit].blocks[block].probes[i].codonid = atoi(get_token(cur_tokenset,13));
|
694
|
+
mycdf->units[unit].blocks[block].probes[i].codon = atoi(get_token(cur_tokenset,14));
|
695
|
+
mycdf->units[unit].blocks[block].probes[i].regiontype = atoi(get_token(cur_tokenset,15));
|
696
|
+
delete_tokens(cur_tokenset);
|
697
|
+
}
|
698
|
+
|
699
|
+
}
|
700
|
+
|
701
|
+
|
702
|
+
/*******************************************************************
|
703
|
+
**
|
704
|
+
** void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit)
|
705
|
+
**
|
706
|
+
** FILE *infile - an opened CDF file
|
707
|
+
** cdf_text *mycdf - a structure for holding cdf file
|
708
|
+
** char *linebuffer - temporary place to store lines of text read in from the file
|
709
|
+
** int unit - which unit
|
710
|
+
**
|
711
|
+
** Reads in all the blocks for the unit. Assumes that space for the blocks are allocated
|
712
|
+
** already. Allocates the space for the probes and calls a function to read them in.
|
713
|
+
**
|
714
|
+
*******************************************************************/
|
715
|
+
|
716
|
+
|
717
|
+
static void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit){
|
718
|
+
tokenset *cur_tokenset;
|
719
|
+
int i;
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
for (i=0; i < mycdf->units[unit].numberblocks; i++){
|
724
|
+
|
725
|
+
findStartsWith(infile,"Name",linebuffer);
|
726
|
+
cur_tokenset = tokenize(linebuffer,"=\r\n");
|
727
|
+
mycdf->units[unit].blocks[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
|
728
|
+
strcpy(mycdf->units[unit].blocks[i].name,get_token(cur_tokenset,1));
|
729
|
+
delete_tokens(cur_tokenset);
|
730
|
+
/* Rprintf("%s\n",mycdf->units[unit].blocks[i].name); */
|
731
|
+
|
732
|
+
|
733
|
+
|
734
|
+
findStartsWith(infile,"BlockNumber",linebuffer);
|
735
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
736
|
+
mycdf->units[unit].blocks[i].blocknumber = atoi(get_token(cur_tokenset,1));
|
737
|
+
delete_tokens(cur_tokenset);
|
738
|
+
/* Rprintf("%d %d %d\n",unit,i,mycdf->header.numberofunits); */
|
739
|
+
|
740
|
+
findStartsWith(infile,"NumAtoms",linebuffer);
|
741
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
742
|
+
mycdf->units[unit].blocks[i].num_atoms = atoi(get_token(cur_tokenset,1));
|
743
|
+
delete_tokens(cur_tokenset);
|
744
|
+
|
745
|
+
findStartsWith(infile,"NumCells",linebuffer);
|
746
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
747
|
+
mycdf->units[unit].blocks[i].num_cells = atoi(get_token(cur_tokenset,1));
|
748
|
+
delete_tokens(cur_tokenset);
|
749
|
+
|
750
|
+
|
751
|
+
findStartsWith(infile,"StartPosition",linebuffer);
|
752
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
753
|
+
mycdf->units[unit].blocks[i].start_position = atoi(get_token(cur_tokenset,1));
|
754
|
+
delete_tokens(cur_tokenset);
|
755
|
+
|
756
|
+
findStartsWith(infile,"StopPosition",linebuffer);
|
757
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
758
|
+
mycdf->units[unit].blocks[i].stop_position = atoi(get_token(cur_tokenset,1));
|
759
|
+
delete_tokens(cur_tokenset);
|
760
|
+
|
761
|
+
if (mycdf->units[unit].unit_type == 2){
|
762
|
+
findStartsWith(infile,"Direction",linebuffer);
|
763
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
764
|
+
mycdf->units[unit].blocks[i].direction = atoi(get_token(cur_tokenset,1));
|
765
|
+
delete_tokens(cur_tokenset);
|
766
|
+
} else {
|
767
|
+
mycdf->units[unit].blocks[i].direction = mycdf->units[unit].direction;
|
768
|
+
}
|
769
|
+
|
770
|
+
mycdf->units[unit].blocks[i].probes = Calloc(mycdf->units[unit].blocks[i].num_cells,cdf_text_unit_block_probe);
|
771
|
+
|
772
|
+
read_cdf_unit_block_probes(infile,mycdf,linebuffer,unit,i);
|
773
|
+
|
774
|
+
|
775
|
+
|
776
|
+
}
|
777
|
+
}
|
778
|
+
|
779
|
+
|
780
|
+
/*******************************************************************
|
781
|
+
**
|
782
|
+
** void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer)
|
783
|
+
**
|
784
|
+
** FILE *infile - an opened CDF file
|
785
|
+
** cdf_text *mycdf - a structure for holding cdf file
|
786
|
+
** char *linebuffer - temporary place to store lines of text read in from the file
|
787
|
+
**
|
788
|
+
** Reads in all the units allocating the space for them and then calling sub functions
|
789
|
+
** to read each block and probes within the blocks
|
790
|
+
**
|
791
|
+
*******************************************************************/
|
792
|
+
|
793
|
+
static void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer){
|
794
|
+
tokenset *cur_tokenset;
|
795
|
+
int i;
|
796
|
+
|
797
|
+
mycdf->units = Calloc(mycdf->header.numberofunits,cdf_text_unit);
|
798
|
+
|
799
|
+
for (i =0; i < mycdf->header.numberofunits; i++){
|
800
|
+
/* move to the next Unit section */
|
801
|
+
AdvanceToSection(infile,"[Unit",linebuffer);
|
802
|
+
findStartsWith(infile,"Name",linebuffer);
|
803
|
+
cur_tokenset = tokenize(linebuffer,"=\r\n");
|
804
|
+
mycdf->units[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
|
805
|
+
strcpy(mycdf->units[i].name,get_token(cur_tokenset,1));
|
806
|
+
|
807
|
+
delete_tokens(cur_tokenset);
|
808
|
+
|
809
|
+
|
810
|
+
|
811
|
+
findStartsWith(infile,"Direction",linebuffer);
|
812
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
813
|
+
mycdf->units[i].direction = atoi(get_token(cur_tokenset,1));
|
814
|
+
delete_tokens(cur_tokenset);
|
815
|
+
|
816
|
+
findStartsWith(infile,"NumAtoms",linebuffer);
|
817
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
818
|
+
mycdf->units[i].num_atoms = atoi(get_token(cur_tokenset,1));
|
819
|
+
delete_tokens(cur_tokenset);
|
820
|
+
|
821
|
+
findStartsWith(infile,"NumCells",linebuffer);
|
822
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
823
|
+
mycdf->units[i].num_cells = atoi(get_token(cur_tokenset,1));
|
824
|
+
delete_tokens(cur_tokenset);
|
825
|
+
|
826
|
+
findStartsWith(infile,"UnitNumber",linebuffer);
|
827
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
828
|
+
mycdf->units[i].unit_number = atoi(get_token(cur_tokenset,1));
|
829
|
+
delete_tokens(cur_tokenset);
|
830
|
+
|
831
|
+
findStartsWith(infile,"UnitType",linebuffer);
|
832
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
833
|
+
mycdf->units[i].unit_type = atoi(get_token(cur_tokenset,1));
|
834
|
+
delete_tokens(cur_tokenset);
|
835
|
+
|
836
|
+
findStartsWith(infile,"NumberBlocks",linebuffer);
|
837
|
+
cur_tokenset = tokenize(linebuffer,"=");
|
838
|
+
mycdf->units[i].numberblocks = atoi(get_token(cur_tokenset,1));
|
839
|
+
delete_tokens(cur_tokenset);
|
840
|
+
|
841
|
+
/*Skip MutationType since only appears on one type of array */
|
842
|
+
|
843
|
+
mycdf->units[i].blocks = Calloc(mycdf->units[i].numberblocks,cdf_text_unit_block);
|
844
|
+
|
845
|
+
|
846
|
+
read_cdf_unit_block(infile,mycdf,linebuffer,i);
|
847
|
+
/* AdvanceToSection(infile,"[Unit",linebuffer);
|
848
|
+
Rprintf("%d\n",i); */
|
849
|
+
}
|
850
|
+
|
851
|
+
|
852
|
+
|
853
|
+
}
|
854
|
+
|
855
|
+
|
856
|
+
/*******************************************************************
|
857
|
+
**
|
858
|
+
** int read_cdf_text(const char *filename, cdf_text *mycdf)
|
859
|
+
**
|
860
|
+
** const char *filename - name of text file
|
861
|
+
** cdf_text *mycdf - pointer to root of structure that will contain
|
862
|
+
** the contents of the CDF file at the conclusion
|
863
|
+
** of the function.
|
864
|
+
**
|
865
|
+
** RETURNS 0 if the function failed, otherwise returns 1
|
866
|
+
**
|
867
|
+
** this function reads a text CDF file into C data structure.
|
868
|
+
**
|
869
|
+
*******************************************************************/
|
870
|
+
|
871
|
+
|
872
|
+
int read_cdf_text(const char *filename, cdf_text *mycdf){
|
873
|
+
|
874
|
+
FILE *infile;
|
875
|
+
|
876
|
+
char linebuffer[BUFFER_SIZE]; /* a character buffer */
|
877
|
+
tokenset *cur_tokenset;
|
878
|
+
|
879
|
+
if ((infile = fopen(filename, "r")) == NULL)
|
880
|
+
{
|
881
|
+
error("Unable to open the file %s",filename);
|
882
|
+
return 0;
|
883
|
+
}
|
884
|
+
|
885
|
+
|
886
|
+
|
887
|
+
/* Check that is is a text CDF file */
|
888
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
889
|
+
if (strncmp("[CDF]", linebuffer, 5) != 0){
|
890
|
+
error("The file %s does not look like a text CDF file",filename);
|
891
|
+
}
|
892
|
+
|
893
|
+
/* Read the version number */
|
894
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
895
|
+
|
896
|
+
cur_tokenset = tokenize(linebuffer,"=\r\n");
|
897
|
+
if (strncmp("GC3.0", get_token(cur_tokenset,1), 5) != 0){
|
898
|
+
error("The file %s does not look like a version GC3.0 CDF file",filename);
|
899
|
+
} else {
|
900
|
+
mycdf->header.version = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
|
901
|
+
strcpy(mycdf->header.version,get_token(cur_tokenset,1));
|
902
|
+
}
|
903
|
+
delete_tokens(cur_tokenset);
|
904
|
+
|
905
|
+
|
906
|
+
read_cdf_header(infile,mycdf,linebuffer);
|
907
|
+
read_cdf_QCUnits(infile,mycdf,linebuffer);
|
908
|
+
read_cdf_Units(infile,mycdf,linebuffer);
|
909
|
+
|
910
|
+
|
911
|
+
return 1;
|
912
|
+
}
|
913
|
+
|
914
|
+
/*******************************************************************
|
915
|
+
**
|
916
|
+
** void dealloc_cdf_text(cdf_text *my_cdf)
|
917
|
+
**
|
918
|
+
** Frees memory allocated
|
919
|
+
**
|
920
|
+
******************************************************************/
|
921
|
+
|
922
|
+
|
923
|
+
|
924
|
+
void dealloc_cdf_text(cdf_text *my_cdf){
|
925
|
+
int i,j,k;
|
926
|
+
|
927
|
+
|
928
|
+
Free(my_cdf->header.version);
|
929
|
+
Free(my_cdf->header.name);
|
930
|
+
if (my_cdf->header.chipreference != NULL)
|
931
|
+
Free(my_cdf->header.chipreference);
|
932
|
+
|
933
|
+
for (i =0; i < my_cdf->header.NumQCUnits; i++){
|
934
|
+
for (j=0; j < my_cdf->qc_units[i].n_probes; j++){
|
935
|
+
Free(my_cdf->qc_units[i].qc_probes[j].probe);
|
936
|
+
}
|
937
|
+
Free(my_cdf->qc_units[i].qc_probes);
|
938
|
+
}
|
939
|
+
|
940
|
+
|
941
|
+
for (i =0; i < my_cdf->header.numberofunits; i++){
|
942
|
+
for (j=0; j < my_cdf->units[i].numberblocks; j++){
|
943
|
+
for (k=0; k < my_cdf->units[i].blocks[j].num_cells;k++){
|
944
|
+
Free(my_cdf->units[i].blocks[j].probes[k].probe);
|
945
|
+
Free(my_cdf->units[i].blocks[j].probes[k].feat);
|
946
|
+
Free(my_cdf->units[i].blocks[j].probes[k].qual);
|
947
|
+
Free(my_cdf->units[i].blocks[j].probes[k].cbase);
|
948
|
+
Free(my_cdf->units[i].blocks[j].probes[k].pbase);
|
949
|
+
Free(my_cdf->units[i].blocks[j].probes[k].tbase);
|
950
|
+
}
|
951
|
+
Free(my_cdf->units[i].blocks[j].probes);
|
952
|
+
Free(my_cdf->units[i].blocks[j].name);
|
953
|
+
}
|
954
|
+
Free(my_cdf->units[i].blocks);
|
955
|
+
Free(my_cdf->units[i].name);
|
956
|
+
}
|
957
|
+
|
958
|
+
|
959
|
+
}
|
960
|
+
|
961
|
+
|
962
|
+
|
963
|
+
/*******************************************************************
|
964
|
+
**
|
965
|
+
** static int isTextCDFFile(const char *filename)
|
966
|
+
**
|
967
|
+
** const char *filename - name of file to check
|
968
|
+
**
|
969
|
+
** checks whether the supplied file is a text CDF file or not.
|
970
|
+
** uses a very simple test.
|
971
|
+
**
|
972
|
+
** Attempts to open the supplied filename. Then checks to see if the first
|
973
|
+
** 5 characters are "[CDF]" if so returns 1, otherwise 0.
|
974
|
+
**
|
975
|
+
**
|
976
|
+
******************************************************************/
|
977
|
+
|
978
|
+
int isTextCDFFile(const char *filename){
|
979
|
+
|
980
|
+
|
981
|
+
FILE *infile;
|
982
|
+
|
983
|
+
char linebuffer[BUFFER_SIZE]; /* a character buffer */
|
984
|
+
|
985
|
+
|
986
|
+
if ((infile = fopen(filename, "r")) == NULL)
|
987
|
+
{
|
988
|
+
error("Unable to open the file %s",filename);
|
989
|
+
}
|
990
|
+
|
991
|
+
|
992
|
+
|
993
|
+
/* Check that is is a text CDF file */
|
994
|
+
ReadFileLine(linebuffer, BUFFER_SIZE, infile);
|
995
|
+
if (strncmp("[CDF]", linebuffer, 5) == 0){
|
996
|
+
fclose(infile);
|
997
|
+
return 1;
|
998
|
+
}
|
999
|
+
fclose(infile);
|
1000
|
+
return 0;
|
1001
|
+
}
|
1002
|
+
|
1003
|
+
|
1004
|
+
|
1005
|
+
|
1006
|
+
|
1007
|
+
/*******************************************************************
|
1008
|
+
**
|
1009
|
+
** SEXP ReadtextCDFFileIntoRList(SEXP filename)
|
1010
|
+
**
|
1011
|
+
** SEXP filename - name of cdffile. Should be full path to file.
|
1012
|
+
**
|
1013
|
+
** this function should be called from R. When supplied the name
|
1014
|
+
** of a text cdf file it first parses it into a C data structure.
|
1015
|
+
**
|
1016
|
+
** An R list structure is then constructed from the C data structure
|
1017
|
+
**
|
1018
|
+
** The R list is then returned.
|
1019
|
+
**
|
1020
|
+
** Note no special effort is made to reduce down the information in
|
1021
|
+
** the text CDF file. Instead almost everything is returned, even
|
1022
|
+
** somewhat redundant information.
|
1023
|
+
**
|
1024
|
+
******************************************************************/
|
1025
|
+
|
1026
|
+
|
1027
|
+
SEXP ReadtextCDFFileIntoRList(SEXP filename){
|
1028
|
+
|
1029
|
+
SEXP CDFInfo; /* this is the object that will be returned */
|
1030
|
+
SEXP CDFInfoNames;
|
1031
|
+
SEXP HEADER; /* The file header */
|
1032
|
+
SEXP HEADERNames;
|
1033
|
+
SEXP TEMPSXP;
|
1034
|
+
SEXP TEMPSXP2;
|
1035
|
+
SEXP TEMPSXP3;
|
1036
|
+
SEXP TEMPSXP4;
|
1037
|
+
|
1038
|
+
SEXP QCUNITS;
|
1039
|
+
SEXP UNITS;
|
1040
|
+
|
1041
|
+
|
1042
|
+
/* Basically fields (possible) for QC probes */
|
1043
|
+
SEXP QCUNITSProbeInfoX;
|
1044
|
+
SEXP QCUNITSProbeInfoY;
|
1045
|
+
SEXP QCUNITSProbeInfoPROBE;
|
1046
|
+
SEXP QCUNITSProbeInfoPL;
|
1047
|
+
SEXP QCUNITSProbeInfoATOM;
|
1048
|
+
SEXP QCUNITSProbeInfoINDEX;
|
1049
|
+
SEXP QCUNITSProbeInfoPMFLAG;
|
1050
|
+
SEXP QCUNITSProbeInfoBGFLAG;
|
1051
|
+
SEXP QCUNITSProbeInfoNames = R_NilValue;
|
1052
|
+
SEXP QCUNITSProbeInforow_names;
|
1053
|
+
|
1054
|
+
/* Basically fields (possible) for Unit Block probes */
|
1055
|
+
|
1056
|
+
SEXP UNITSProbeInfoX;
|
1057
|
+
SEXP UNITSProbeInfoY;
|
1058
|
+
SEXP UNITSProbeInfoPROBE;
|
1059
|
+
SEXP UNITSProbeInfoFEAT;
|
1060
|
+
SEXP UNITSProbeInfoQUAL;
|
1061
|
+
SEXP UNITSProbeInfoEXPOS;
|
1062
|
+
SEXP UNITSProbeInfoPOS;
|
1063
|
+
SEXP UNITSProbeInfoCBASE;
|
1064
|
+
SEXP UNITSProbeInfoPBASE;
|
1065
|
+
SEXP UNITSProbeInfoTBASE;
|
1066
|
+
SEXP UNITSProbeInfoATOM;
|
1067
|
+
SEXP UNITSProbeInfoINDEX;
|
1068
|
+
SEXP UNITSProbeInfoCODONIND;
|
1069
|
+
SEXP UNITSProbeInfoCODON;
|
1070
|
+
SEXP UNITSProbeInfoREGIONTYPE;
|
1071
|
+
SEXP UNITSProbeInfoNames;
|
1072
|
+
SEXP UNITSProbeInforow_names;
|
1073
|
+
|
1074
|
+
char buf[10]; /* temporary buffer for making names */
|
1075
|
+
int i,j,k,l;
|
1076
|
+
int tmpsum =0;
|
1077
|
+
|
1078
|
+
|
1079
|
+
cdf_text my_cdf;
|
1080
|
+
|
1081
|
+
const char *cur_file_name;
|
1082
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
1083
|
+
|
1084
|
+
if(!read_cdf_text(cur_file_name, &my_cdf)){
|
1085
|
+
error("Problem reading text cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
|
1086
|
+
}
|
1087
|
+
|
1088
|
+
|
1089
|
+
/* Now build the R list structure */
|
1090
|
+
|
1091
|
+
|
1092
|
+
/* return the full structure */
|
1093
|
+
PROTECT(CDFInfo = allocVector(VECSXP,3));
|
1094
|
+
PROTECT(CDFInfoNames = allocVector(STRSXP,3));
|
1095
|
+
SET_STRING_ELT(CDFInfoNames,0,mkChar("Chip"));
|
1096
|
+
SET_STRING_ELT(CDFInfoNames,1,mkChar("QC"));
|
1097
|
+
SET_STRING_ELT(CDFInfoNames,2,mkChar("Unit"));
|
1098
|
+
|
1099
|
+
setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames);
|
1100
|
+
UNPROTECT(1);
|
1101
|
+
|
1102
|
+
/* Deal with the HEADER */
|
1103
|
+
PROTECT(HEADER = allocVector(VECSXP,8));
|
1104
|
+
PROTECT(HEADERNames = allocVector(STRSXP,8));
|
1105
|
+
SET_STRING_ELT(HEADERNames,0,mkChar("Version"));
|
1106
|
+
SET_STRING_ELT(HEADERNames,1,mkChar("Name"));
|
1107
|
+
SET_STRING_ELT(HEADERNames,2,mkChar("Rows"));
|
1108
|
+
SET_STRING_ELT(HEADERNames,3,mkChar("Cols"));
|
1109
|
+
SET_STRING_ELT(HEADERNames,4,mkChar("NumberOfUnits"));
|
1110
|
+
SET_STRING_ELT(HEADERNames,5,mkChar("MaxUnit"));
|
1111
|
+
SET_STRING_ELT(HEADERNames,6,mkChar("NumQCUnits"));
|
1112
|
+
SET_STRING_ELT(HEADERNames,7,mkChar("ChipReference"));
|
1113
|
+
setAttrib(HEADER,R_NamesSymbol,HEADERNames);
|
1114
|
+
UNPROTECT(1);
|
1115
|
+
|
1116
|
+
PROTECT(TEMPSXP = allocVector(STRSXP,1));
|
1117
|
+
SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.version));
|
1118
|
+
SET_VECTOR_ELT(HEADER,0,TEMPSXP);
|
1119
|
+
UNPROTECT(1);
|
1120
|
+
|
1121
|
+
PROTECT(TEMPSXP = allocVector(STRSXP,1));
|
1122
|
+
SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.name));
|
1123
|
+
SET_VECTOR_ELT(HEADER,1,TEMPSXP);
|
1124
|
+
UNPROTECT(1);
|
1125
|
+
|
1126
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1127
|
+
NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.rows;
|
1128
|
+
SET_VECTOR_ELT(HEADER,2,TEMPSXP);
|
1129
|
+
UNPROTECT(1);
|
1130
|
+
|
1131
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1132
|
+
NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.cols;
|
1133
|
+
SET_VECTOR_ELT(HEADER,3,TEMPSXP);
|
1134
|
+
UNPROTECT(1);
|
1135
|
+
|
1136
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1137
|
+
NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.numberofunits;
|
1138
|
+
SET_VECTOR_ELT(HEADER,4,TEMPSXP);
|
1139
|
+
UNPROTECT(1);
|
1140
|
+
|
1141
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1142
|
+
NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.maxunit;
|
1143
|
+
SET_VECTOR_ELT(HEADER,5,TEMPSXP);
|
1144
|
+
UNPROTECT(1);
|
1145
|
+
|
1146
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1147
|
+
NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.NumQCUnits;
|
1148
|
+
SET_VECTOR_ELT(HEADER,6,TEMPSXP);
|
1149
|
+
UNPROTECT(1);
|
1150
|
+
|
1151
|
+
PROTECT(TEMPSXP = allocVector(REALSXP,1));
|
1152
|
+
if (my_cdf.header.chipreference !=NULL){
|
1153
|
+
SET_VECTOR_ELT(TEMPSXP,0,mkChar(my_cdf.header.chipreference));
|
1154
|
+
SET_VECTOR_ELT(HEADER,7,TEMPSXP);
|
1155
|
+
}
|
1156
|
+
UNPROTECT(1);
|
1157
|
+
|
1158
|
+
SET_VECTOR_ELT(CDFInfo,0,HEADER);
|
1159
|
+
|
1160
|
+
PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.NumQCUnits));
|
1161
|
+
for (i=0; i <my_cdf.header.NumQCUnits; i++){
|
1162
|
+
PROTECT(TEMPSXP=allocVector(VECSXP,3));
|
1163
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1164
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].type;
|
1165
|
+
SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
|
1166
|
+
UNPROTECT(1);
|
1167
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1168
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.qc_units[i].n_probes;
|
1169
|
+
SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
|
1170
|
+
UNPROTECT(1);
|
1171
|
+
/*Figure out what the probe information is for this QC unit and then allocate the space */
|
1172
|
+
tmpsum=0;
|
1173
|
+
for (j=0; j < 8; j++){
|
1174
|
+
tmpsum+=my_cdf.qc_units[i].qccontains[j];
|
1175
|
+
}
|
1176
|
+
|
1177
|
+
if (tmpsum == 6){
|
1178
|
+
PROTECT(TEMPSXP2 = allocVector(VECSXP,6));
|
1179
|
+
} else if (tmpsum ==8){
|
1180
|
+
PROTECT(TEMPSXP2 = allocVector(VECSXP,8));
|
1181
|
+
}
|
1182
|
+
|
1183
|
+
if (tmpsum == 6){
|
1184
|
+
PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1185
|
+
PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1186
|
+
PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
|
1187
|
+
PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1188
|
+
PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1189
|
+
PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1190
|
+
} else if (tmpsum == 8){
|
1191
|
+
PROTECT(QCUNITSProbeInfoX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1192
|
+
PROTECT(QCUNITSProbeInfoY = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1193
|
+
PROTECT(QCUNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
|
1194
|
+
PROTECT(QCUNITSProbeInfoPL = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1195
|
+
PROTECT(QCUNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1196
|
+
PROTECT(QCUNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1197
|
+
PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1198
|
+
PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(INTSXP,my_cdf.qc_units[i].n_probes));
|
1199
|
+
|
1200
|
+
}
|
1201
|
+
|
1202
|
+
/* store what was read in for the QC units in the R structure */
|
1203
|
+
if (tmpsum == 6){
|
1204
|
+
for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
|
1205
|
+
INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
|
1206
|
+
INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
|
1207
|
+
SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
|
1208
|
+
INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
|
1209
|
+
INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
|
1210
|
+
INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
|
1211
|
+
|
1212
|
+
}
|
1213
|
+
} else if (tmpsum == 8){
|
1214
|
+
for (j =0; j < my_cdf.qc_units[i].n_probes; j++){
|
1215
|
+
INTEGER_POINTER(QCUNITSProbeInfoX)[j] = my_cdf.qc_units[i].qc_probes[j].x;
|
1216
|
+
INTEGER_POINTER(QCUNITSProbeInfoY)[j] = my_cdf.qc_units[i].qc_probes[j].y;
|
1217
|
+
SET_STRING_ELT(QCUNITSProbeInfoPROBE,j,mkChar( my_cdf.qc_units[i].qc_probes[j].probe));
|
1218
|
+
INTEGER_POINTER(QCUNITSProbeInfoPL)[j] = my_cdf.qc_units[i].qc_probes[j].plen;
|
1219
|
+
INTEGER_POINTER(QCUNITSProbeInfoATOM)[j] = my_cdf.qc_units[i].qc_probes[j].atom;
|
1220
|
+
INTEGER_POINTER(QCUNITSProbeInfoINDEX)[j] = my_cdf.qc_units[i].qc_probes[j].index;
|
1221
|
+
INTEGER_POINTER(QCUNITSProbeInfoPMFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].match;
|
1222
|
+
INTEGER_POINTER(QCUNITSProbeInfoBGFLAG)[j] = my_cdf.qc_units[i].qc_probes[j].bg;
|
1223
|
+
}
|
1224
|
+
}
|
1225
|
+
|
1226
|
+
if (tmpsum == 6){
|
1227
|
+
SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
|
1228
|
+
SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
|
1229
|
+
SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
|
1230
|
+
SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
|
1231
|
+
SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
|
1232
|
+
SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
|
1233
|
+
} else if (tmpsum ==8){
|
1234
|
+
SET_VECTOR_ELT(TEMPSXP2,0,QCUNITSProbeInfoX);
|
1235
|
+
SET_VECTOR_ELT(TEMPSXP2,1,QCUNITSProbeInfoY);
|
1236
|
+
SET_VECTOR_ELT(TEMPSXP2,2,QCUNITSProbeInfoPROBE);
|
1237
|
+
SET_VECTOR_ELT(TEMPSXP2,3,QCUNITSProbeInfoPL);
|
1238
|
+
SET_VECTOR_ELT(TEMPSXP2,4,QCUNITSProbeInfoATOM);
|
1239
|
+
SET_VECTOR_ELT(TEMPSXP2,5,QCUNITSProbeInfoINDEX);
|
1240
|
+
SET_VECTOR_ELT(TEMPSXP2,6,QCUNITSProbeInfoPMFLAG);
|
1241
|
+
SET_VECTOR_ELT(TEMPSXP2,7,QCUNITSProbeInfoBGFLAG);
|
1242
|
+
|
1243
|
+
}
|
1244
|
+
|
1245
|
+
|
1246
|
+
if (tmpsum == 6){
|
1247
|
+
UNPROTECT(6);
|
1248
|
+
} else if (tmpsum == 8){
|
1249
|
+
UNPROTECT(8);
|
1250
|
+
}
|
1251
|
+
|
1252
|
+
|
1253
|
+
if (tmpsum == 6){
|
1254
|
+
PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,6));
|
1255
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
|
1256
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
|
1257
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
|
1258
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
|
1259
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
|
1260
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
|
1261
|
+
} else if (tmpsum == 8){
|
1262
|
+
PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,8));
|
1263
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x"));
|
1264
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y"));
|
1265
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("Probe"));
|
1266
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("ProbeLength"));
|
1267
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("Atom"));
|
1268
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,5,mkChar("Index"));
|
1269
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,6,mkChar("PMFlag"));
|
1270
|
+
SET_STRING_ELT(QCUNITSProbeInfoNames,7,mkChar("BGProbeFlag"));
|
1271
|
+
}
|
1272
|
+
setAttrib(TEMPSXP2,R_NamesSymbol,QCUNITSProbeInfoNames);
|
1273
|
+
UNPROTECT(1);
|
1274
|
+
PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes));
|
1275
|
+
|
1276
|
+
for (j=0; j < my_cdf.qc_units[i].n_probes; j++){
|
1277
|
+
sprintf(buf, "%d", j+1);
|
1278
|
+
SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf));
|
1279
|
+
}
|
1280
|
+
setAttrib(TEMPSXP2, R_RowNamesSymbol, QCUNITSProbeInforow_names);
|
1281
|
+
UNPROTECT(1);
|
1282
|
+
|
1283
|
+
setAttrib(TEMPSXP2,R_ClassSymbol,mkString("data.frame"));
|
1284
|
+
SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
|
1285
|
+
UNPROTECT(1);
|
1286
|
+
|
1287
|
+
PROTECT(TEMPSXP2=allocVector(STRSXP,3));
|
1288
|
+
SET_STRING_ELT(TEMPSXP2,0,mkChar("Type"));
|
1289
|
+
SET_STRING_ELT(TEMPSXP2,1,mkChar("NumberCells"));
|
1290
|
+
SET_STRING_ELT(TEMPSXP2,2,mkChar("QCCells"));
|
1291
|
+
setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
|
1292
|
+
UNPROTECT(1);
|
1293
|
+
SET_VECTOR_ELT(QCUNITS,i,TEMPSXP);
|
1294
|
+
|
1295
|
+
UNPROTECT(1);
|
1296
|
+
}
|
1297
|
+
SET_VECTOR_ELT(CDFInfo,1,QCUNITS);
|
1298
|
+
UNPROTECT(1);
|
1299
|
+
|
1300
|
+
|
1301
|
+
PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.numberofunits));
|
1302
|
+
for (i=0; i < my_cdf.header.numberofunits; i++){
|
1303
|
+
PROTECT(TEMPSXP=allocVector(VECSXP,8));
|
1304
|
+
PROTECT(TEMPSXP2=allocVector(STRSXP,1));
|
1305
|
+
|
1306
|
+
SET_STRING_ELT(TEMPSXP2,0,mkChar(my_cdf.units[i].name));
|
1307
|
+
SET_VECTOR_ELT(TEMPSXP,0,TEMPSXP2);
|
1308
|
+
UNPROTECT(1);
|
1309
|
+
|
1310
|
+
|
1311
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1312
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].direction;
|
1313
|
+
SET_VECTOR_ELT(TEMPSXP,1,TEMPSXP2);
|
1314
|
+
UNPROTECT(1);
|
1315
|
+
|
1316
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1317
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_atoms;
|
1318
|
+
SET_VECTOR_ELT(TEMPSXP,2,TEMPSXP2);
|
1319
|
+
UNPROTECT(1);
|
1320
|
+
|
1321
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1322
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].num_cells;
|
1323
|
+
SET_VECTOR_ELT(TEMPSXP,3,TEMPSXP2);
|
1324
|
+
UNPROTECT(1);
|
1325
|
+
|
1326
|
+
|
1327
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1328
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_number;
|
1329
|
+
SET_VECTOR_ELT(TEMPSXP,4,TEMPSXP2);
|
1330
|
+
UNPROTECT(1);
|
1331
|
+
|
1332
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1333
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].unit_type;
|
1334
|
+
SET_VECTOR_ELT(TEMPSXP,5,TEMPSXP2);
|
1335
|
+
UNPROTECT(1);
|
1336
|
+
|
1337
|
+
PROTECT(TEMPSXP2 = allocVector(REALSXP,1));
|
1338
|
+
NUMERIC_POINTER(TEMPSXP2)[0] = (double)my_cdf.units[i].numberblocks;
|
1339
|
+
SET_VECTOR_ELT(TEMPSXP,6,TEMPSXP2);
|
1340
|
+
UNPROTECT(1);
|
1341
|
+
|
1342
|
+
PROTECT(TEMPSXP2 = allocVector(VECSXP,my_cdf.units[i].numberblocks));
|
1343
|
+
|
1344
|
+
for (j=0; j <my_cdf.units[i].numberblocks; j++){
|
1345
|
+
PROTECT(TEMPSXP3 = allocVector(VECSXP,8));
|
1346
|
+
|
1347
|
+
|
1348
|
+
PROTECT(TEMPSXP4=allocVector(STRSXP,1));
|
1349
|
+
|
1350
|
+
SET_STRING_ELT(TEMPSXP4,0,mkChar(my_cdf.units[i].blocks[j].name));
|
1351
|
+
SET_VECTOR_ELT(TEMPSXP3,0,TEMPSXP4);
|
1352
|
+
UNPROTECT(1);
|
1353
|
+
|
1354
|
+
|
1355
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1356
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].blocknumber;
|
1357
|
+
SET_VECTOR_ELT(TEMPSXP3,1,TEMPSXP4);
|
1358
|
+
UNPROTECT(1);
|
1359
|
+
|
1360
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1361
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_atoms;
|
1362
|
+
SET_VECTOR_ELT(TEMPSXP3,2,TEMPSXP4);
|
1363
|
+
UNPROTECT(1);
|
1364
|
+
|
1365
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1366
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].num_cells;
|
1367
|
+
SET_VECTOR_ELT(TEMPSXP3,3,TEMPSXP4);
|
1368
|
+
UNPROTECT(1);
|
1369
|
+
|
1370
|
+
|
1371
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1372
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].start_position;
|
1373
|
+
SET_VECTOR_ELT(TEMPSXP3,4,TEMPSXP4);
|
1374
|
+
UNPROTECT(1);
|
1375
|
+
|
1376
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1377
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].stop_position;
|
1378
|
+
SET_VECTOR_ELT(TEMPSXP3,5,TEMPSXP4);
|
1379
|
+
UNPROTECT(1);
|
1380
|
+
|
1381
|
+
|
1382
|
+
PROTECT(TEMPSXP4=allocVector(REALSXP,1));
|
1383
|
+
NUMERIC_POINTER(TEMPSXP4)[0] = (double)my_cdf.units[i].blocks[j].direction;
|
1384
|
+
SET_VECTOR_ELT(TEMPSXP3,6,TEMPSXP4);
|
1385
|
+
UNPROTECT(1);
|
1386
|
+
|
1387
|
+
PROTECT(TEMPSXP4=allocVector(VECSXP,15));
|
1388
|
+
|
1389
|
+
|
1390
|
+
PROTECT(UNITSProbeInfoX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1391
|
+
PROTECT(UNITSProbeInfoY = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1392
|
+
PROTECT(UNITSProbeInfoPROBE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1393
|
+
PROTECT(UNITSProbeInfoFEAT = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1394
|
+
PROTECT(UNITSProbeInfoQUAL = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1395
|
+
PROTECT(UNITSProbeInfoEXPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1396
|
+
PROTECT(UNITSProbeInfoPOS = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1397
|
+
PROTECT(UNITSProbeInfoCBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1398
|
+
PROTECT(UNITSProbeInfoPBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1399
|
+
PROTECT(UNITSProbeInfoTBASE = allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1400
|
+
PROTECT(UNITSProbeInfoATOM = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1401
|
+
PROTECT(UNITSProbeInfoINDEX = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1402
|
+
PROTECT(UNITSProbeInfoCODONIND = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1403
|
+
PROTECT(UNITSProbeInfoCODON = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1404
|
+
PROTECT(UNITSProbeInfoREGIONTYPE = allocVector(INTSXP,my_cdf.units[i].blocks[j].num_cells));
|
1405
|
+
for (k=0; k < my_cdf.units[i].blocks[j].num_cells; k++){
|
1406
|
+
INTEGER_POINTER(UNITSProbeInfoX)[k]=my_cdf.units[i].blocks[j].probes[k].x;
|
1407
|
+
INTEGER_POINTER(UNITSProbeInfoY)[k]=my_cdf.units[i].blocks[j].probes[k].y;
|
1408
|
+
INTEGER_POINTER(UNITSProbeInfoEXPOS)[k]=my_cdf.units[i].blocks[j].probes[k].expos;
|
1409
|
+
INTEGER_POINTER(UNITSProbeInfoPOS)[k]=my_cdf.units[i].blocks[j].probes[k].pos;
|
1410
|
+
INTEGER_POINTER(UNITSProbeInfoATOM)[k]=my_cdf.units[i].blocks[j].probes[k].atom;
|
1411
|
+
INTEGER_POINTER(UNITSProbeInfoINDEX)[k]=my_cdf.units[i].blocks[j].probes[k].index;
|
1412
|
+
INTEGER_POINTER(UNITSProbeInfoCODONIND)[k]=my_cdf.units[i].blocks[j].probes[k].codonid;
|
1413
|
+
INTEGER_POINTER(UNITSProbeInfoCODON)[k]=my_cdf.units[i].blocks[j].probes[k].codon;
|
1414
|
+
INTEGER_POINTER(UNITSProbeInfoREGIONTYPE)[k]=my_cdf.units[i].blocks[j].probes[k].regiontype;
|
1415
|
+
SET_VECTOR_ELT(UNITSProbeInfoPROBE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].probe));
|
1416
|
+
SET_STRING_ELT(UNITSProbeInfoFEAT,k,mkChar(my_cdf.units[i].blocks[j].probes[k].feat));
|
1417
|
+
SET_STRING_ELT(UNITSProbeInfoQUAL,k,mkChar(my_cdf.units[i].blocks[j].probes[k].qual));
|
1418
|
+
SET_STRING_ELT(UNITSProbeInfoCBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].cbase));
|
1419
|
+
SET_STRING_ELT(UNITSProbeInfoPBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].pbase));
|
1420
|
+
SET_STRING_ELT(UNITSProbeInfoTBASE,k,mkChar(my_cdf.units[i].blocks[j].probes[k].tbase));
|
1421
|
+
}
|
1422
|
+
|
1423
|
+
|
1424
|
+
SET_VECTOR_ELT(TEMPSXP4,0,UNITSProbeInfoX);
|
1425
|
+
SET_VECTOR_ELT(TEMPSXP4,1,UNITSProbeInfoY);
|
1426
|
+
SET_VECTOR_ELT(TEMPSXP4,2,UNITSProbeInfoPROBE);
|
1427
|
+
SET_VECTOR_ELT(TEMPSXP4,3,UNITSProbeInfoFEAT);
|
1428
|
+
SET_VECTOR_ELT(TEMPSXP4,4,UNITSProbeInfoQUAL);
|
1429
|
+
SET_VECTOR_ELT(TEMPSXP4,5,UNITSProbeInfoEXPOS);
|
1430
|
+
SET_VECTOR_ELT(TEMPSXP4,6,UNITSProbeInfoPOS);
|
1431
|
+
SET_VECTOR_ELT(TEMPSXP4,7,UNITSProbeInfoCBASE);
|
1432
|
+
SET_VECTOR_ELT(TEMPSXP4,8,UNITSProbeInfoPBASE);
|
1433
|
+
SET_VECTOR_ELT(TEMPSXP4,9,UNITSProbeInfoTBASE);
|
1434
|
+
SET_VECTOR_ELT(TEMPSXP4,10,UNITSProbeInfoATOM);
|
1435
|
+
SET_VECTOR_ELT(TEMPSXP4,11,UNITSProbeInfoINDEX);
|
1436
|
+
SET_VECTOR_ELT(TEMPSXP4,12,UNITSProbeInfoCODONIND);
|
1437
|
+
SET_VECTOR_ELT(TEMPSXP4,13,UNITSProbeInfoCODON);
|
1438
|
+
SET_VECTOR_ELT(TEMPSXP4,14,UNITSProbeInfoREGIONTYPE);
|
1439
|
+
|
1440
|
+
|
1441
|
+
|
1442
|
+
|
1443
|
+
|
1444
|
+
|
1445
|
+
|
1446
|
+
UNPROTECT(15);
|
1447
|
+
|
1448
|
+
PROTECT(UNITSProbeInfoNames =allocVector(STRSXP,15));
|
1449
|
+
SET_STRING_ELT(UNITSProbeInfoNames,0,mkChar("x"));
|
1450
|
+
SET_STRING_ELT(UNITSProbeInfoNames,1,mkChar("y"));
|
1451
|
+
SET_STRING_ELT(UNITSProbeInfoNames,2,mkChar("Probe"));
|
1452
|
+
SET_STRING_ELT(UNITSProbeInfoNames,3,mkChar("Feat"));
|
1453
|
+
SET_STRING_ELT(UNITSProbeInfoNames,4,mkChar("Qual"));
|
1454
|
+
SET_STRING_ELT(UNITSProbeInfoNames,5,mkChar("Expos"));
|
1455
|
+
SET_STRING_ELT(UNITSProbeInfoNames,6,mkChar("Pos"));
|
1456
|
+
SET_STRING_ELT(UNITSProbeInfoNames,7,mkChar("cbase"));
|
1457
|
+
SET_STRING_ELT(UNITSProbeInfoNames,8,mkChar("pbase"));
|
1458
|
+
SET_STRING_ELT(UNITSProbeInfoNames,9,mkChar("tbase"));
|
1459
|
+
SET_STRING_ELT(UNITSProbeInfoNames,10,mkChar("Atom"));
|
1460
|
+
SET_STRING_ELT(UNITSProbeInfoNames,11,mkChar("Index"));
|
1461
|
+
SET_STRING_ELT(UNITSProbeInfoNames,12,mkChar("CodonInd"));
|
1462
|
+
SET_STRING_ELT(UNITSProbeInfoNames,13,mkChar("Codon"));
|
1463
|
+
SET_STRING_ELT(UNITSProbeInfoNames,14,mkChar("Regiontype"));
|
1464
|
+
|
1465
|
+
|
1466
|
+
setAttrib(TEMPSXP4,R_NamesSymbol,UNITSProbeInfoNames);
|
1467
|
+
UNPROTECT(1);
|
1468
|
+
|
1469
|
+
PROTECT(UNITSProbeInforow_names= allocVector(STRSXP,my_cdf.units[i].blocks[j].num_cells));
|
1470
|
+
|
1471
|
+
for (l=0; l < my_cdf.units[i].blocks[j].num_cells; l++){
|
1472
|
+
sprintf(buf, "%d", l+1);
|
1473
|
+
SET_STRING_ELT(UNITSProbeInforow_names,l,mkChar(buf));
|
1474
|
+
}
|
1475
|
+
setAttrib(TEMPSXP4, R_RowNamesSymbol, UNITSProbeInforow_names);
|
1476
|
+
UNPROTECT(1);
|
1477
|
+
|
1478
|
+
|
1479
|
+
|
1480
|
+
setAttrib(TEMPSXP4,R_ClassSymbol,mkString("data.frame"));
|
1481
|
+
SET_VECTOR_ELT(TEMPSXP3,7,TEMPSXP4);
|
1482
|
+
UNPROTECT(1);
|
1483
|
+
|
1484
|
+
|
1485
|
+
|
1486
|
+
PROTECT(TEMPSXP4=allocVector(STRSXP,8));
|
1487
|
+
SET_STRING_ELT(TEMPSXP4,0,mkChar("Name"));
|
1488
|
+
SET_STRING_ELT(TEMPSXP4,1,mkChar("BlockNumber"));
|
1489
|
+
SET_STRING_ELT(TEMPSXP4,2,mkChar("NumAtoms"));
|
1490
|
+
SET_STRING_ELT(TEMPSXP4,3,mkChar("NumCells"));
|
1491
|
+
SET_STRING_ELT(TEMPSXP4,4,mkChar("StartPosition"));
|
1492
|
+
SET_STRING_ELT(TEMPSXP4,5,mkChar("StopPosition"));
|
1493
|
+
SET_STRING_ELT(TEMPSXP4,6,mkChar("Direction"));
|
1494
|
+
SET_STRING_ELT(TEMPSXP4,7,mkChar("Unit_Block_Cells"));
|
1495
|
+
setAttrib(TEMPSXP3,R_NamesSymbol,TEMPSXP4);
|
1496
|
+
UNPROTECT(1);
|
1497
|
+
|
1498
|
+
SET_VECTOR_ELT(TEMPSXP2,j,TEMPSXP3);
|
1499
|
+
UNPROTECT(1);
|
1500
|
+
}
|
1501
|
+
|
1502
|
+
|
1503
|
+
|
1504
|
+
|
1505
|
+
|
1506
|
+
|
1507
|
+
SET_VECTOR_ELT(TEMPSXP,7,TEMPSXP2);
|
1508
|
+
UNPROTECT(1);
|
1509
|
+
|
1510
|
+
|
1511
|
+
|
1512
|
+
|
1513
|
+
PROTECT(TEMPSXP2 = allocVector(STRSXP,8));
|
1514
|
+
SET_STRING_ELT(TEMPSXP2,0,mkChar("Name"));
|
1515
|
+
SET_STRING_ELT(TEMPSXP2,1,mkChar("Direction"));
|
1516
|
+
SET_STRING_ELT(TEMPSXP2,2,mkChar("NumAtoms"));
|
1517
|
+
SET_STRING_ELT(TEMPSXP2,3,mkChar("NumCells"));
|
1518
|
+
SET_STRING_ELT(TEMPSXP2,4,mkChar("UnitNumber"));
|
1519
|
+
SET_STRING_ELT(TEMPSXP2,5,mkChar("UnitType"));
|
1520
|
+
SET_STRING_ELT(TEMPSXP2,6,mkChar("NumberBlocks"));
|
1521
|
+
SET_STRING_ELT(TEMPSXP2,7,mkChar("Unit_Block"));
|
1522
|
+
setAttrib(TEMPSXP,R_NamesSymbol,TEMPSXP2);
|
1523
|
+
UNPROTECT(1);
|
1524
|
+
|
1525
|
+
|
1526
|
+
|
1527
|
+
|
1528
|
+
|
1529
|
+
SET_VECTOR_ELT(UNITS,i,TEMPSXP);
|
1530
|
+
UNPROTECT(1);
|
1531
|
+
|
1532
|
+
|
1533
|
+
|
1534
|
+
}
|
1535
|
+
SET_VECTOR_ELT(CDFInfo,2,UNITS);
|
1536
|
+
UNPROTECT(1);
|
1537
|
+
|
1538
|
+
|
1539
|
+
|
1540
|
+
dealloc_cdf_text(&my_cdf);
|
1541
|
+
UNPROTECT(2);
|
1542
|
+
return CDFInfo;
|
1543
|
+
}
|
1544
|
+
|
1545
|
+
|
1546
|
+
|
1547
|
+
|
1548
|
+
|
1549
|
+
/*************************************************************
|
1550
|
+
**
|
1551
|
+
** SEXP CheckCDFtext(SEXP filename)
|
1552
|
+
**
|
1553
|
+
** Takes a given file name and returns 1 if it is a text format CDF file
|
1554
|
+
** otherwise it returns 0
|
1555
|
+
**
|
1556
|
+
*************************************************************/
|
1557
|
+
|
1558
|
+
|
1559
|
+
|
1560
|
+
SEXP CheckCDFtext(SEXP filename){
|
1561
|
+
SEXP tmp;
|
1562
|
+
int good;
|
1563
|
+
const char *cur_file_name;
|
1564
|
+
|
1565
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
1566
|
+
|
1567
|
+
good = isTextCDFFile(cur_file_name);
|
1568
|
+
|
1569
|
+
PROTECT(tmp= allocVector(INTSXP,1));
|
1570
|
+
|
1571
|
+
INTEGER(tmp)[0] = good;
|
1572
|
+
|
1573
|
+
UNPROTECT(1);
|
1574
|
+
return tmp;
|
1575
|
+
}
|
1576
|
+
|