bio-affy 0.1.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,144 @@
1
+
2
+ #ifdef BIOLIB
3
+ #include <biolib_R_map.h>
4
+ #endif
5
+
6
+ #include <zlib.h>
7
+
8
+ /******
9
+ ******
10
+ ****** Data Structures
11
+ ******
12
+ ******/
13
+
14
+
15
+
16
+ /* File header */
17
+
18
+ typedef struct{
19
+ uint8_t magic_number;
20
+ uint8_t version;
21
+ int32_t n_data_groups;
22
+ uint32_t first_group_file_pos;
23
+ } generic_file_header;
24
+
25
+
26
+
27
+
28
+ /* An affy generic STRING */
29
+
30
+ typedef struct{
31
+ int32_t len;
32
+ char *value;
33
+ } ASTRING;
34
+
35
+ /* An affy generic WSTRING */
36
+
37
+ typedef struct{
38
+ int32_t len;
39
+ wchar_t *value;
40
+ } AWSTRING;
41
+
42
+
43
+ /* Name Value Type Triplet */
44
+
45
+ typedef struct{
46
+ AWSTRING name;
47
+ ASTRING value;
48
+ AWSTRING type;
49
+ } nvt_triplet;
50
+
51
+
52
+
53
+
54
+ /* Data Header */
55
+
56
+ typedef struct generic_data_header *generic_data_header_pointer;
57
+
58
+ typedef struct{
59
+ ASTRING data_type_id; /*Stored in file as INT followed by CHAR array */
60
+ ASTRING unique_file_id; /*See above */
61
+ AWSTRING Date_time; /*Stored in file as INT followed by WCHAR array */
62
+ AWSTRING locale;
63
+ int32_t n_name_type_value;
64
+ nvt_triplet *name_type_value;
65
+ int32_t n_parent_headers;
66
+ void **parent_headers;
67
+ } generic_data_header;
68
+
69
+
70
+ /* Data Group */
71
+
72
+ typedef struct {
73
+
74
+ uint32_t file_position_nextgroup;
75
+ uint32_t file_position_first_data;
76
+ int32_t n_data_sets;
77
+ AWSTRING data_group_name;
78
+ } generic_data_group;
79
+
80
+
81
+ /* Dataset */
82
+
83
+ typedef struct {
84
+ AWSTRING name;
85
+ uint8_t type;
86
+ int32_t size;
87
+ } col_nvts_triplet;
88
+
89
+
90
+
91
+ typedef struct {
92
+ uint32_t file_pos_first;
93
+ uint32_t file_pos_last;
94
+ AWSTRING data_set_name;
95
+ int32_t n_name_type_value;
96
+ nvt_triplet *name_type_value;
97
+ uint32_t ncols;
98
+ col_nvts_triplet* col_name_type_value;
99
+ uint32_t nrows;
100
+ void **Data; /* in the docs this is rows */
101
+ } generic_data_set;
102
+
103
+
104
+
105
+
106
+ typedef enum{
107
+
108
+ ASCIITEXT = 1,
109
+ PLAINTEXT = 2,
110
+ UINT8 = 3,
111
+ INT8= 4,
112
+ UINT16 = 5,
113
+ INT16 = 6,
114
+ UINT32 = 7,
115
+ INT32 = 8,
116
+ FLOAT32 = 9
117
+
118
+ } AffyMIMEtypes;
119
+
120
+
121
+
122
+ AffyMIMEtypes determine_MIMETYPE(nvt_triplet triplet);
123
+ void *decode_MIME_value(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size);
124
+ char *decode_MIME_value_toASCII(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size);
125
+
126
+ nvt_triplet* find_nvt(generic_data_header *data_header,char *name);
127
+
128
+ int read_generic_file_header(generic_file_header* file_header, FILE *instream);
129
+ int read_generic_data_header(generic_data_header *data_header, FILE *instream);
130
+ int read_generic_data_group(generic_data_group *data_group, FILE *instream);
131
+ int read_generic_data_set(generic_data_set *data_set, FILE *instream);
132
+ int read_generic_data_set_rows(generic_data_set *data_set, FILE *instream);
133
+
134
+
135
+ void Free_generic_data_header(generic_data_header *header);
136
+ void Free_generic_data_group(generic_data_group *data_group);
137
+ void Free_generic_data_set(generic_data_set *data_set);
138
+
139
+
140
+ int gzread_generic_file_header(generic_file_header* file_header, gzFile *instream);
141
+ int gzread_generic_data_header(generic_data_header *data_header, gzFile *instream);
142
+ int gzread_generic_data_group(generic_data_group *data_group,gzFile *instream);
143
+ int gzread_generic_data_set(generic_data_set *data_set, gzFile *instream);
144
+ int gzread_generic_data_set_rows(generic_data_set *data_set, gzFile *instream);
Binary file
@@ -0,0 +1,1337 @@
1
+ /******************************************************************
2
+ **
3
+ ** file: read_pgf.c
4
+ **
5
+ ** Aim: implement parsing of PGF format files
6
+ **
7
+ ** Copyright (C) 2007 B. M. Bolstad
8
+ **
9
+ ** Created on Nov 4, 2007
10
+ **
11
+ ** History
12
+ ** Nov 4, 2007 - Initial version
13
+ ** Dec 17. 2007 - add function for counting number of each type of probeset
14
+ ** Dec 31, 2007 - add function which checks that all required fields are present
15
+ ** Mar 18, 2008 - fix error in read_pgf_header function
16
+ **
17
+ **
18
+ **
19
+ ******************************************************************/
20
+
21
+ #include <R.h>
22
+
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+
26
+
27
+ #define BUFFERSIZE 1024
28
+
29
+
30
+ /*******************************************************************
31
+ *******************************************************************
32
+ **
33
+ ** Structures for dealing with pgf file information
34
+ **
35
+ **
36
+ **
37
+ *******************************************************************
38
+ ******************************************************************/
39
+
40
+ /*******************************************************************
41
+ *******************************************************************
42
+ **
43
+ ** Starting off with the headers
44
+ **
45
+ *******************************************************************
46
+ ******************************************************************/
47
+
48
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
49
+
50
+ typedef struct{
51
+ int probeset_id;
52
+ int type;
53
+ int probeset_name;
54
+ } header_0;
55
+
56
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
57
+
58
+ typedef struct{
59
+ int atom_id;
60
+ int type;
61
+ int exon_position;
62
+ } header_1;
63
+
64
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
65
+
66
+ typedef struct{
67
+ int probe_id;
68
+ int type;
69
+ int gc_count;
70
+ int probe_length;
71
+ int interrogation_position;
72
+ int probe_sequence;
73
+ } header_2;
74
+
75
+
76
+
77
+ typedef struct{
78
+ char **chip_type;
79
+ int n_chip_type;
80
+ char *lib_set_name;
81
+ char *lib_set_version;
82
+ char *pgf_format_version;
83
+ char *header0_str;
84
+ header_0 *header0;
85
+ char *header1_str;
86
+ header_1 *header1;
87
+ char *header2_str;
88
+ header_2 *header2;
89
+ char *create_date;
90
+ char *guid;
91
+ char **other_headers_keys;
92
+ char **other_headers_values;
93
+ int n_other_headers;
94
+ } pgf_headers;
95
+
96
+
97
+ /********************************************************************
98
+ *******************************************************************
99
+ **
100
+ ** Structures for dealing with data stored at the probelevel
101
+ **
102
+ **
103
+ *******************************************************************
104
+ *******************************************************************/
105
+
106
+ typedef struct{
107
+
108
+ int probe_id;
109
+ char *type;
110
+ int gc_count;
111
+ int probe_length;
112
+ int interrogation_position;
113
+ char *probe_sequence;
114
+ struct probe_list_node *next;
115
+ } probe_list_node;
116
+
117
+
118
+ typedef struct{
119
+ int n_probes;
120
+ probe_list_node *first;
121
+ } probe_list_header;
122
+
123
+
124
+
125
+ /********************************************************************
126
+ *******************************************************************
127
+ **
128
+ ** Structures for dealing with data stored at the atom level
129
+ **
130
+ **
131
+ *******************************************************************
132
+ *******************************************************************/
133
+
134
+ typedef struct{
135
+ int atom_id;
136
+ char *type;
137
+ char *exon_position;
138
+ probe_list_header *probes;
139
+ struct atom_list_node *next;
140
+ } atom_list_node;
141
+
142
+
143
+ typedef struct{
144
+ int n_atoms;
145
+ atom_list_node *first;
146
+ } atom_list_header;
147
+
148
+
149
+
150
+ /*******************************************************************
151
+ *******************************************************************
152
+ **
153
+ ** Structures for dealing with data as stored at the probeset level
154
+ **
155
+ **
156
+ **
157
+ *******************************************************************
158
+ *******************************************************************/
159
+
160
+ typedef struct probeset_list_node *node_pointer;
161
+
162
+
163
+ typedef struct{
164
+ int probeset_id;
165
+ char *type;
166
+ char *probeset_name;
167
+
168
+ atom_list_header *atoms;
169
+
170
+ struct probeset_list_node *next;
171
+ } probeset_list_node;
172
+
173
+
174
+
175
+ typedef struct{
176
+
177
+ int n_probesets;
178
+
179
+ probeset_list_node *first;
180
+ probeset_list_node *current;
181
+ probeset_list_node *last;
182
+ } probeset_list_header;
183
+
184
+
185
+
186
+ /*******************************************************************
187
+ *******************************************************************
188
+ **
189
+ ** Structure for storing pgf file (after it is read from file)
190
+ **
191
+ *******************************************************************
192
+ ******************************************************************/
193
+
194
+
195
+ typedef struct{
196
+ pgf_headers *headers;
197
+ probeset_list_header *probesets;
198
+ } pgf_file;
199
+
200
+
201
+ /*******************************************************************
202
+ *******************************************************************
203
+ **
204
+ **
205
+ ** Code for splitting a string into a series of tokens
206
+ **
207
+ **
208
+ *******************************************************************
209
+ *******************************************************************/
210
+
211
+
212
+ /***************************************************************
213
+ **
214
+ ** tokenset
215
+ **
216
+ ** char **tokens - a array of token strings
217
+ ** int n - number of tokens in this set.
218
+ **
219
+ ** a structure to hold a set of tokens. Typically a tokenset is
220
+ ** created by breaking a character string based upon a set of
221
+ ** delimiters.
222
+ **
223
+ **
224
+ **************************************************************/
225
+
226
+ typedef struct{
227
+ char **tokens;
228
+ int n;
229
+ } tokenset;
230
+
231
+
232
+
233
+ /******************************************************************
234
+ **
235
+ ** tokenset *tokenize(char *str, char *delimiters)
236
+ **
237
+ ** char *str - a string to break into tokens
238
+ ** char *delimiters - delimiters to use in breaking up the line
239
+ **
240
+ **
241
+ ** RETURNS a new tokenset
242
+ **
243
+ ** Given a string, split into tokens based on a set of delimitors
244
+ **
245
+ *****************************************************************/
246
+
247
+ static tokenset *tokenize(char *str, char *delimiters){
248
+
249
+ #if USE_PTHREADS
250
+ char *tmp_pointer;
251
+ #endif
252
+ int i=0;
253
+
254
+ char *current_token;
255
+ tokenset *my_tokenset = Calloc(1,tokenset);
256
+ my_tokenset->n=0;
257
+
258
+ my_tokenset->tokens = NULL;
259
+ #if USE_PTHREADS
260
+ current_token = strtok_r(str,delimiters,&tmp_pointer);
261
+ #else
262
+ current_token = strtok(str,delimiters);
263
+ #endif
264
+ while (current_token != NULL){
265
+ my_tokenset->n++;
266
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
267
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
268
+ strcpy(my_tokenset->tokens[i],current_token);
269
+ my_tokenset->tokens[i][(strlen(current_token))] = '\0';
270
+ i++;
271
+ #if USE_PTHREADS
272
+ current_token = strtok_r(NULL,delimiters,&tmp_pointer);
273
+ #else
274
+ current_token = strtok(NULL,delimiters);
275
+ #endif
276
+ }
277
+ return my_tokenset;
278
+ }
279
+
280
+
281
+ /******************************************************************
282
+ **
283
+ ** int tokenset_size(tokenset *x)
284
+ **
285
+ ** tokenset *x - a tokenset
286
+ **
287
+ ** RETURNS the number of tokens in the tokenset
288
+ **
289
+ ******************************************************************/
290
+
291
+ static int tokenset_size(tokenset *x){
292
+ return x->n;
293
+ }
294
+
295
+
296
+ /******************************************************************
297
+ **
298
+ ** char *get_token(tokenset *x, int i)
299
+ **
300
+ ** tokenset *x - a tokenset
301
+ ** int i - index of the token to return
302
+ **
303
+ ** RETURNS pointer to the i'th token
304
+ **
305
+ ******************************************************************/
306
+
307
+ static char *get_token(tokenset *x,int i){
308
+ return x->tokens[i];
309
+ }
310
+
311
+ /******************************************************************
312
+ **
313
+ ** void delete_tokens(tokenset *x)
314
+ **
315
+ ** tokenset *x - a tokenset
316
+ **
317
+ ** Deallocates all the space allocated for a tokenset
318
+ **
319
+ ******************************************************************/
320
+
321
+ static void delete_tokens(tokenset *x){
322
+
323
+ int i;
324
+
325
+ for (i=0; i < x->n; i++){
326
+ Free(x->tokens[i]);
327
+ }
328
+ Free(x->tokens);
329
+ Free(x);
330
+ }
331
+
332
+ /*******************************************************************
333
+ **
334
+ ** int token_ends_with(char *token, char *ends)
335
+ **
336
+ ** char *token - a string to check
337
+ ** char *ends_in - we are looking for this string at the end of token
338
+ **
339
+ **
340
+ ** returns 0 if no match, otherwise it returns the index of the first character
341
+ ** which matchs the start of *ends.
342
+ **
343
+ ** Note that there must be one additional character in "token" beyond
344
+ ** the characters in "ends". So
345
+ **
346
+ ** *token = "TestStr"
347
+ ** *ends = "TestStr"
348
+ **
349
+ ** would return 0 but if
350
+ **
351
+ ** ends = "estStr"
352
+ **
353
+ ** we would return 1.
354
+ **
355
+ ** and if
356
+ **
357
+ ** ends= "stStr"
358
+ ** we would return 2 .....etc
359
+ **
360
+ **
361
+ ******************************************************************/
362
+
363
+ static int token_ends_with(char *token, char *ends_in){
364
+
365
+ int tokenlength = strlen(token);
366
+ int ends_length = strlen(ends_in);
367
+ int start_pos;
368
+ char *tmp_ptr;
369
+
370
+ if (tokenlength <= ends_length){
371
+ /* token string is too short so can't possibly end with ends */
372
+ return 0;
373
+ }
374
+
375
+ start_pos = tokenlength - ends_length;
376
+
377
+ tmp_ptr = &token[start_pos];
378
+
379
+ if (strcmp(tmp_ptr,ends_in)==0){
380
+ return start_pos;
381
+ } else {
382
+ return 0;
383
+ }
384
+ }
385
+
386
+
387
+ /*******************************************************************
388
+ *******************************************************************
389
+ **
390
+ ** Code for Reading from file
391
+ **
392
+ *******************************************************************
393
+ *******************************************************************/
394
+
395
+
396
+
397
+ /****************************************************************
398
+ **
399
+ ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile)
400
+ **
401
+ ** char *buffer - place to store contents of the line
402
+ ** int buffersize - size of the buffer
403
+ ** FILE *currentFile - FILE pointer to an opened CEL file.
404
+ **
405
+ ** Read a line from a file, into a buffer of specified size.
406
+ ** otherwise die.
407
+ **
408
+ ***************************************************************/
409
+
410
+ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
411
+ if (fgets(buffer, buffersize, currentFile) == NULL){
412
+ return 0;
413
+ //error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
414
+ }
415
+ return 1;
416
+ }
417
+
418
+
419
+
420
+
421
+ /****************************************************************
422
+ **
423
+ ** Code for identifying what type of information is stored in
424
+ ** the current line
425
+ **
426
+ ***************************************************************/
427
+
428
+ /****************************************************************
429
+ **
430
+ ** static int IsHeaderLine(char *buffer)
431
+ **
432
+ ** char *buffer - contains line to evaluate
433
+ **
434
+ ** Checks whether supplied line is a header line (ie starts with #%)
435
+ **
436
+ ** return 1 (ie true) if header line. 0 otherwise
437
+ **
438
+ ***************************************************************/
439
+
440
+
441
+ static int IsHeaderLine(char *buffer){
442
+
443
+ if (strncmp("#%",buffer,2) == 0){
444
+ return 1;
445
+ }
446
+ return 0;
447
+ }
448
+
449
+ /****************************************************************
450
+ **
451
+ ** static int IsHeaderLine(char *buffer)
452
+ **
453
+ ** char *buffer - contains line to evaluate
454
+ **
455
+ ** Checks whether supplied line is a comment line (ie starts with #)
456
+ **
457
+ **
458
+ ***************************************************************/
459
+
460
+ static int IsCommentLine(char *buffer){
461
+ if (strncmp("#",buffer,1) == 0){
462
+ return 1;
463
+ }
464
+ return 0;
465
+ }
466
+
467
+
468
+ /*****************************************************************
469
+ **
470
+ ** static int IsLevel2(char *buffer)
471
+ **
472
+ ** char *buffer - contains line to evaluate
473
+ **
474
+ ** checks whether supplied line begins with two tab characters it \t\t
475
+ **
476
+ ** Return 1 if true, 0 otherwise
477
+ **
478
+ ***************************************************************/
479
+
480
+ static int IsLevel2(char *buffer){
481
+ if (strncmp("\t\t",buffer,2) == 0){
482
+ return 1;
483
+ }
484
+ return 0;
485
+ }
486
+
487
+
488
+
489
+ /*****************************************************************
490
+ **
491
+ ** static int IsLevel1(char *buffer)
492
+ **
493
+ ** char *buffer - contains line to evaluate
494
+ **
495
+ ** checks whether supplied line begins with a single tab characters it \t
496
+ **
497
+ ** Return 1 if true, 0 otherwise
498
+ **
499
+ ***************************************************************/
500
+
501
+ static int IsLevel1(char *buffer){
502
+ if (strncmp("\t",buffer,1) == 0){
503
+ if (strncmp("\t\t",buffer,2) != 0){
504
+ return 1;
505
+ }
506
+ return 0;
507
+ }
508
+ return 0;
509
+ }
510
+
511
+
512
+
513
+ /****************************************************************
514
+ ****************************************************************
515
+ **
516
+ ** Code for deallocating or initializing header data structures
517
+ **
518
+ ****************************************************************
519
+ ****************************************************************/
520
+
521
+ void dealloc_pgf_headers(pgf_headers *header){
522
+ int i;
523
+
524
+ if (header->n_chip_type > 0){
525
+ for (i = 0; i < header->n_chip_type; i++){
526
+ Free(header->chip_type[i]);
527
+ }
528
+ Free(header->chip_type);
529
+ }
530
+
531
+ if (header->lib_set_name != NULL){
532
+ Free(header->lib_set_name);
533
+ }
534
+
535
+ if (header->lib_set_version != NULL){
536
+ Free(header->lib_set_version);
537
+ }
538
+
539
+ if (header->pgf_format_version != NULL){
540
+ Free(header->pgf_format_version);
541
+ }
542
+
543
+ if (header->header0_str != NULL){
544
+ Free(header->header0_str);
545
+ Free(header->header0);
546
+ }
547
+ if (header->header1_str != NULL){
548
+ Free(header->header1_str);
549
+ Free(header->header1);
550
+ }
551
+ if (header->header2_str != NULL){
552
+ Free(header->header2_str);
553
+ Free(header->header2);
554
+ }
555
+
556
+ if (header->create_date != NULL){
557
+ Free(header->create_date);
558
+ }
559
+
560
+ if (header->guid != NULL){
561
+ Free(header->guid);
562
+ }
563
+
564
+ if (header->n_other_headers > 0){
565
+ for (i = 0; i < header->n_other_headers; i++){
566
+ Free(header->other_headers_keys[i]);
567
+ Free(header->other_headers_values[i]);
568
+ }
569
+ Free(header->other_headers_keys);
570
+ Free(header->other_headers_values);
571
+ }
572
+ }
573
+
574
+
575
+ void dealloc_probes(probe_list_header *probes){
576
+
577
+ probe_list_node *temp_node;
578
+
579
+ if (probes->first != NULL){
580
+ temp_node = probes->first;
581
+ while (temp_node != NULL){
582
+ probes->first = (probe_list_node *)temp_node->next;
583
+ if (temp_node->type != NULL){
584
+ Free(temp_node->type);
585
+ }
586
+ if (temp_node->probe_sequence != NULL){
587
+ Free(temp_node->probe_sequence);
588
+ }
589
+ Free(temp_node);
590
+ temp_node = probes->first;
591
+ }
592
+
593
+
594
+ }
595
+ }
596
+
597
+
598
+
599
+ void dealloc_atoms(atom_list_header *atoms){
600
+
601
+ atom_list_node *temp_node;
602
+
603
+ if (atoms->first != NULL){
604
+ temp_node = atoms->first;
605
+ while (temp_node != NULL){
606
+ atoms->first = (atom_list_node *)temp_node->next;
607
+ if (temp_node->type != NULL){
608
+ Free(temp_node->type);
609
+ }
610
+ if (temp_node->exon_position != NULL){
611
+ Free(temp_node->exon_position);
612
+ }
613
+ if (temp_node->probes != NULL){
614
+ dealloc_probes(temp_node->probes);
615
+ Free(temp_node->probes);
616
+ }
617
+
618
+ Free(temp_node);
619
+ temp_node = atoms->first;
620
+ }
621
+
622
+
623
+ }
624
+
625
+
626
+
627
+ }
628
+
629
+
630
+ void dealloc_pgf_probesets(probeset_list_header *probesets){
631
+
632
+ probeset_list_node *temp_node;
633
+
634
+ if (probesets->first != NULL){
635
+ temp_node = probesets->first;
636
+ while (temp_node != NULL){
637
+ probesets->first = (probeset_list_node *)temp_node->next;
638
+
639
+ if (temp_node->type != NULL){
640
+ Free(temp_node->type);
641
+ }
642
+ if (temp_node->probeset_name != NULL){
643
+ Free(temp_node->probeset_name);
644
+ }
645
+
646
+ if (temp_node->atoms != NULL){
647
+ dealloc_atoms(temp_node->atoms);
648
+ Free(temp_node->atoms);
649
+ }
650
+
651
+ Free(temp_node);
652
+ temp_node = probesets->first;
653
+ }
654
+ }
655
+
656
+ }
657
+
658
+
659
+
660
+ void dealloc_pgf_file(pgf_file* my_pgf){
661
+
662
+
663
+ if (my_pgf->headers != NULL){
664
+ dealloc_pgf_headers(my_pgf->headers);
665
+ Free(my_pgf->headers);
666
+ }
667
+
668
+
669
+ if (my_pgf->probesets !=NULL){
670
+ dealloc_pgf_probesets(my_pgf->probesets);
671
+ Free(my_pgf->probesets);
672
+ }
673
+
674
+
675
+ }
676
+
677
+
678
+ void initialize_pgf_header(pgf_headers *header){
679
+
680
+ header->chip_type = NULL;
681
+ header->n_chip_type = 0;
682
+
683
+ header->lib_set_name= NULL;
684
+ header->lib_set_version= NULL;
685
+ header->pgf_format_version= NULL;
686
+ header->header0_str= NULL;
687
+ header->header0= NULL;
688
+ header->header1_str= NULL;
689
+ header->header1= NULL;
690
+ header->header2_str= NULL;
691
+ header->header2= NULL;
692
+ header->create_date= NULL;
693
+ header->guid= NULL;
694
+ header->other_headers_keys= NULL;
695
+ header->other_headers_values= NULL;
696
+ header->n_other_headers=0;
697
+ }
698
+
699
+ /****************************************************************
700
+ ****************************************************************
701
+ **
702
+ ** Code for figuring out column ordering
703
+ **
704
+ ****************************************************************
705
+ ***************************************************************/
706
+
707
+
708
+ static void determine_order_header0(char *header_str, header_0 *header0){
709
+
710
+ tokenset *cur_tokenset;
711
+ int i;
712
+ char *temp_str = Calloc(strlen(header_str) +1, char);
713
+
714
+
715
+ strcpy(temp_str,header_str);
716
+
717
+ header0->probeset_id = -1;
718
+ header0->type = -1;
719
+ header0->probeset_name = -1;
720
+
721
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
722
+
723
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
724
+ if (strcmp(get_token(cur_tokenset,i),"probeset_id")==0){
725
+ header0->probeset_id = i;
726
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
727
+ header0->type = i;
728
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
729
+ header0->probeset_name = i;
730
+ }
731
+ }
732
+ delete_tokens(cur_tokenset);
733
+
734
+ Free(temp_str);
735
+
736
+ }
737
+
738
+ static void determine_order_header1(char *header_str, header_1 *header1){
739
+
740
+ tokenset *cur_tokenset;
741
+ int i;
742
+ char *temp_str = Calloc(strlen(header_str) +1, char);
743
+
744
+
745
+ strcpy(temp_str,header_str);
746
+
747
+ header1->atom_id = -1;
748
+ header1->type = -1;
749
+ header1->exon_position = -1;
750
+
751
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
752
+
753
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
754
+ if (strcmp(get_token(cur_tokenset,i),"atom_id")==0){
755
+ header1->atom_id = i;
756
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
757
+ header1->type = i;
758
+ } else if (strcmp(get_token(cur_tokenset,i),"exon_position")==0){
759
+ header1->exon_position = i;
760
+ }
761
+ }
762
+ delete_tokens(cur_tokenset);
763
+
764
+ Free(temp_str);
765
+
766
+ }
767
+
768
+ static void determine_order_header2(char *header_str, header_2 *header2){
769
+
770
+ tokenset *cur_tokenset;
771
+ int i;
772
+ char *temp_str = Calloc(strlen(header_str) +1, char);
773
+
774
+
775
+ strcpy(temp_str,header_str);
776
+
777
+ header2->probe_id = -1;
778
+ header2->type = -1;
779
+ header2->gc_count = -1;
780
+ header2->probe_length = -1;
781
+ header2->interrogation_position = -1;
782
+ header2->probe_sequence = -1;
783
+
784
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
785
+
786
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
787
+ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){
788
+ header2->probe_id = i;
789
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
790
+ header2->type = i;
791
+ } else if (strcmp(get_token(cur_tokenset,i),"gc_count")==0){
792
+ header2->gc_count = i;
793
+ } else if (strcmp(get_token(cur_tokenset,i),"probe_length")==0){
794
+ header2->probe_length = i;
795
+ } else if (strcmp(get_token(cur_tokenset,i),"interrogation_position")==0){
796
+ header2->interrogation_position = i;
797
+ } else if (strcmp(get_token(cur_tokenset,i),"probe_sequence")==0){
798
+ header2->probe_sequence = i;
799
+ }
800
+
801
+ }
802
+ delete_tokens(cur_tokenset);
803
+
804
+ Free(temp_str);
805
+
806
+ }
807
+
808
+
809
+ /****************************************************************
810
+ **
811
+ ** Validate that required headers are present in file.
812
+ **
813
+ ** Return 0 if an expected header is not present.
814
+ ** Returns 1 otherwise (ie everything looks fine)
815
+ **
816
+ ***************************************************************/
817
+
818
+ static int validate_pgf_header(pgf_headers *header){
819
+
820
+
821
+ /* check that required headers are all there (have been read) */
822
+ if (header->chip_type == NULL)
823
+ return 0;
824
+
825
+ if (header->lib_set_name == NULL)
826
+ return 0;
827
+
828
+ if (header->lib_set_version == NULL)
829
+ return 0;
830
+
831
+ if (header->pgf_format_version == NULL)
832
+ return 0;
833
+
834
+ if (header->header0_str == NULL)
835
+ return 0;
836
+
837
+ if (header->header1_str == NULL)
838
+ return 0;
839
+
840
+ if (header->header2_str == NULL)
841
+ return 0;
842
+
843
+
844
+ /* Check that format version is 1.0 (only supported version) */
845
+
846
+ if (strcmp( header->pgf_format_version,"1.0") != 0){
847
+ return 0;
848
+ }
849
+
850
+ /* check that header0, header1, header2 (ie the three levels of headers) have required fields */
851
+
852
+ if (header->header0->probeset_id == -1)
853
+ return 0;
854
+
855
+ if (header->header1->atom_id == -1)
856
+ return 0;
857
+
858
+ if (header->header2->probe_id == -1)
859
+ return 0;
860
+
861
+ if (header->header2->type == -1)
862
+ return 0;
863
+
864
+ return 1;
865
+ }
866
+
867
+
868
+
869
+
870
+ /****************************************************************
871
+ ****************************************************************
872
+ **
873
+ ** Code for actually reading from the file
874
+ **
875
+ ****************************************************************
876
+ ***************************************************************/
877
+
878
+ static FILE *open_pgf_file(const char *filename){
879
+
880
+ const char *mode = "r";
881
+ FILE *currentFile = NULL;
882
+
883
+ currentFile = fopen(filename,mode);
884
+ if (currentFile == NULL){
885
+ error("Could not open file %s", filename);
886
+ }
887
+ return currentFile;
888
+
889
+ }
890
+
891
+ /****************************************************************
892
+ **
893
+ ** Reading the header
894
+ **
895
+ ***************************************************************/
896
+
897
+ void read_pgf_header(FILE *cur_file, char *buffer, pgf_headers *header){
898
+
899
+
900
+ tokenset *cur_tokenset;
901
+ int i;
902
+ char *temp_str;
903
+
904
+
905
+ initialize_pgf_header(header);
906
+ do {
907
+ ReadFileLine(buffer, 1024, cur_file);
908
+ /* Rprintf("%s\n",buffer); */
909
+ if (IsHeaderLine(buffer)){
910
+ cur_tokenset = tokenize(&buffer[2],"=\r\n");
911
+ /* hopefully token 0 is Key
912
+ and token 1 is Value */
913
+ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0));
914
+ Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */
915
+ /* Decode the Key/Value pair */
916
+ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){
917
+ if (header->n_chip_type == 0){
918
+ header->chip_type = Calloc(1, char *);
919
+ } else {
920
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
921
+ }
922
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
923
+ strcpy(temp_str,get_token(cur_tokenset,1));
924
+ header->chip_type[header->n_chip_type] = temp_str;
925
+ header->n_chip_type++;
926
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){
927
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
928
+ strcpy(temp_str,get_token(cur_tokenset,1));
929
+ header->lib_set_name = temp_str;
930
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){
931
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
932
+ strcpy(temp_str,get_token(cur_tokenset,1));
933
+ header->lib_set_version = temp_str;
934
+ } else if (strcmp(get_token(cur_tokenset,0), "pgf_format_version") == 0) {
935
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
936
+ strcpy(temp_str,get_token(cur_tokenset,1));
937
+ header->pgf_format_version = temp_str;
938
+ } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) {
939
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
940
+ strcpy(temp_str,get_token(cur_tokenset,1));
941
+ header->header0_str = temp_str;
942
+ header->header0 = Calloc(1,header_0);
943
+ determine_order_header0(header->header0_str,header->header0);
944
+ } else if (strcmp(get_token(cur_tokenset,0), "header1") == 0) {
945
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
946
+ strcpy(temp_str,get_token(cur_tokenset,1));
947
+ header->header1_str = temp_str;
948
+ header->header1 = Calloc(1,header_1);
949
+ determine_order_header1(header->header1_str,header->header1);
950
+ } else if (strcmp(get_token(cur_tokenset,0), "header2") == 0) {
951
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
952
+ strcpy(temp_str,get_token(cur_tokenset,1));
953
+ header->header2_str = temp_str;
954
+ header->header2 = Calloc(1,header_2);
955
+ determine_order_header2(header->header2_str,header->header2);
956
+ } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) {
957
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
958
+ strcpy(temp_str,get_token(cur_tokenset,1));
959
+ header->create_date = temp_str;
960
+ } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) {
961
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
962
+ strcpy(temp_str,get_token(cur_tokenset,1));
963
+ header->guid = temp_str;
964
+ } else {
965
+ /* not one of the recognised header types */
966
+ if ( header->n_other_headers == 0){
967
+ header->other_headers_keys = Calloc(1, char *);
968
+ header->other_headers_values = Calloc(1, char *);
969
+ } else {
970
+ header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *);
971
+ header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *);
972
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
973
+ }
974
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
975
+ strcpy(temp_str,get_token(cur_tokenset,1));
976
+ header->other_headers_values[header->n_other_headers] = temp_str;
977
+ temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char);
978
+ strcpy(temp_str,get_token(cur_tokenset,0));
979
+ header->other_headers_keys[header->n_other_headers] = temp_str;
980
+ header->n_other_headers++;
981
+
982
+ }
983
+
984
+ delete_tokens(cur_tokenset);
985
+ }
986
+ } while (IsHeaderLine(buffer));
987
+
988
+ }
989
+
990
+
991
+ /****************************************************************
992
+ **
993
+ ** Reading the probesets/body of the file
994
+ **
995
+ ***************************************************************/
996
+
997
+ void initialize_probeset_list(probeset_list_header *probeset_list){
998
+
999
+ probeset_list->n_probesets = 0;
1000
+ probeset_list->first = NULL;
1001
+ probeset_list->current = NULL;
1002
+ probeset_list->last = NULL;
1003
+ }
1004
+
1005
+
1006
+
1007
+ void insert_probe(char *buffer, probe_list_header *probe_list, header_2 *header2){
1008
+
1009
+ char *temp_str;
1010
+ tokenset *cur_tokenset;
1011
+ probe_list_node *temp_ptr;
1012
+
1013
+ probe_list_node *temp_node = Calloc(1,probe_list_node);
1014
+
1015
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1016
+ temp_node->probe_id = atoi(get_token(cur_tokenset,header2->probe_id));
1017
+
1018
+ if (header2->type != -1){
1019
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->type)) + 1,char);
1020
+ strcpy(temp_str,get_token(cur_tokenset,header2->type));
1021
+ temp_node->type = temp_str;
1022
+ }
1023
+ if (header2->gc_count != -1){
1024
+ temp_node->gc_count = atoi(get_token(cur_tokenset,header2->gc_count));
1025
+ }
1026
+ if (header2->probe_length != -1){
1027
+ temp_node->probe_length = atoi(get_token(cur_tokenset,header2->probe_length));
1028
+ }
1029
+ if (header2->interrogation_position != -1){
1030
+ temp_node->interrogation_position = atoi(get_token(cur_tokenset,header2->interrogation_position));
1031
+ }
1032
+ if (header2->probe_sequence != -1){
1033
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->probe_sequence)) + 1,char);
1034
+ strcpy(temp_str,get_token(cur_tokenset,header2->probe_sequence));
1035
+ temp_node->probe_sequence = temp_str;
1036
+ }
1037
+
1038
+
1039
+
1040
+ temp_node->next = NULL;
1041
+ if (probe_list->n_probes == 0){
1042
+ probe_list->first = temp_node;
1043
+ probe_list->n_probes = 1;
1044
+ } else {
1045
+ /* figure out where the end of the list is. Insert there */
1046
+ temp_ptr = probe_list->first;
1047
+
1048
+ while (temp_ptr->next != NULL){
1049
+ temp_ptr = (probe_list_node*)temp_ptr->next;
1050
+ }
1051
+ temp_ptr->next = (struct probe_list_node*)temp_node;
1052
+ probe_list->n_probes++;
1053
+ }
1054
+ delete_tokens(cur_tokenset);
1055
+ }
1056
+
1057
+
1058
+ void insert_level2(char *buffer, probeset_list_header *probeset_list, header_2 *header2){
1059
+
1060
+ atom_list_header *current_level1;
1061
+ atom_list_node *current_atom;
1062
+
1063
+ if (probeset_list->current == NULL){
1064
+ /* Oh Boy, this is a problem no header0 level object to insert into. */
1065
+ error("Can not read a level 2 line before seeing a level 0 line. File corrupted?");
1066
+ }
1067
+
1068
+ if (probeset_list->current->atoms == NULL){
1069
+ /* Oh Boy, this is a problem no header1 level object to insert into. */
1070
+ error("Can not read a level 2 line before seeing a level 1 line. File corrupted?");
1071
+ }
1072
+
1073
+ current_level1 = probeset_list->current->atoms;
1074
+
1075
+ current_atom = current_level1->first;
1076
+
1077
+ while (current_atom->next != NULL){
1078
+ current_atom = (atom_list_node *)current_atom->next;
1079
+ }
1080
+
1081
+ if (current_atom->probes == NULL){
1082
+ current_atom->probes = Calloc(1,probe_list_header);
1083
+ }
1084
+
1085
+ insert_probe(buffer, current_atom->probes, header2);
1086
+ }
1087
+
1088
+
1089
+
1090
+
1091
+
1092
+ void insert_atom(char *buffer, atom_list_header *atoms_list, header_1 *header1){
1093
+
1094
+ char *temp_str;
1095
+ tokenset *cur_tokenset;
1096
+ atom_list_node *temp_ptr;
1097
+
1098
+ atom_list_node *temp_node = Calloc(1,atom_list_node);
1099
+
1100
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1101
+
1102
+ temp_node->atom_id = atoi(get_token(cur_tokenset,header1->atom_id));
1103
+
1104
+ if (header1->type != -1){
1105
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->type)) + 1,char);
1106
+ strcpy(temp_str,get_token(cur_tokenset,header1->type));
1107
+ temp_node->type = temp_str;
1108
+ }
1109
+ if (header1->exon_position != -1){
1110
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->exon_position)) + 1,char);
1111
+ strcpy(temp_str,get_token(cur_tokenset,header1->exon_position));
1112
+ temp_node->exon_position = temp_str;
1113
+ }
1114
+ temp_node->probes = NULL;
1115
+ temp_node->next = NULL;
1116
+
1117
+ if (atoms_list->n_atoms == 0){
1118
+ atoms_list->first = temp_node;
1119
+ atoms_list->n_atoms = 1;
1120
+ } else {
1121
+ /* figure out where the end of the list is. Insert there */
1122
+ temp_ptr = (atom_list_node*)atoms_list->first;
1123
+
1124
+ while (temp_ptr->next != NULL){
1125
+ temp_ptr= (atom_list_node*)temp_ptr->next;
1126
+ }
1127
+ temp_ptr->next = (struct atom_list_node*)temp_node;
1128
+ atoms_list->n_atoms++;
1129
+ }
1130
+ delete_tokens(cur_tokenset);
1131
+ }
1132
+
1133
+
1134
+ void insert_level1(char *buffer, probeset_list_header *probeset_list, header_1 *header1){
1135
+
1136
+ probeset_list_node *current_level0;
1137
+
1138
+
1139
+ if (probeset_list->current == NULL){
1140
+ /* Oh Boy, this is a problem no header0 level object to insert into. */
1141
+ error("Can not read a level 1 line before seeing a level 0 line. File corrupted?");
1142
+ }
1143
+
1144
+ current_level0 = probeset_list->current;
1145
+
1146
+ if (current_level0->atoms == NULL){
1147
+ current_level0->atoms = Calloc(1,atom_list_header);
1148
+ }
1149
+
1150
+ /* Now lets insert the data */
1151
+
1152
+ insert_atom(buffer, current_level0->atoms, header1);
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+ }
1159
+
1160
+
1161
+
1162
+
1163
+ void insert_level0(char *buffer, probeset_list_header *probeset_list, header_0 *header0){
1164
+
1165
+ char *temp_str;
1166
+ tokenset *cur_tokenset;
1167
+ probeset_list_node *temp_ptr;
1168
+
1169
+ probeset_list_node *temp_node = Calloc(1,probeset_list_node);
1170
+
1171
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1172
+
1173
+ temp_node->probeset_id = atoi(get_token(cur_tokenset,header0->probeset_id));
1174
+
1175
+ if (header0->type != -1){
1176
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->type)) + 1,char);
1177
+ strcpy(temp_str,get_token(cur_tokenset,header0->type));
1178
+ temp_node->type = temp_str;
1179
+ }
1180
+ if (header0->probeset_name != -1){
1181
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->probeset_name)) + 1,char);
1182
+ strcpy(temp_str,get_token(cur_tokenset,header0->probeset_name));
1183
+ temp_node->probeset_name = temp_str;
1184
+ }
1185
+ temp_node->atoms = NULL;
1186
+ temp_node->next = NULL;
1187
+
1188
+ if (probeset_list->first == NULL){
1189
+ probeset_list->first = temp_node;
1190
+ probeset_list->current = temp_node;
1191
+ probeset_list->last = temp_node;
1192
+ probeset_list->n_probesets = 1;
1193
+ } else {
1194
+ probeset_list->last->next = (struct probeset_list_node *)temp_node;
1195
+ probeset_list->last = temp_node;
1196
+ probeset_list->current = temp_node;
1197
+ probeset_list->n_probesets++;
1198
+ }
1199
+ delete_tokens(cur_tokenset);
1200
+ }
1201
+
1202
+
1203
+ void read_pgf_probesets(FILE *cur_file, char *buffer, probeset_list_header *probeset_list, pgf_headers *header){
1204
+
1205
+ initialize_probeset_list(probeset_list);
1206
+
1207
+ insert_level0(buffer, probeset_list, header->header0);
1208
+
1209
+ while(ReadFileLine(buffer, 1024, cur_file)){
1210
+ if (IsLevel2(buffer)){
1211
+ insert_level2(buffer, probeset_list, header->header2);
1212
+ } else if (IsLevel1(buffer)){
1213
+ insert_level1(buffer, probeset_list, header->header1);
1214
+ } else if (IsCommentLine(buffer)){
1215
+ /*Ignore */
1216
+ } else {
1217
+ insert_level0(buffer, probeset_list, header->header0);
1218
+ }
1219
+ }
1220
+ }
1221
+
1222
+ /****************************************************************
1223
+ ****************************************************************
1224
+ **
1225
+ ** Funtionality for counting probeset types
1226
+ **
1227
+ ****************************************************************
1228
+ ****************************************************************/
1229
+
1230
+ typedef struct{
1231
+ char *type;
1232
+ int count;
1233
+ } probeset_type_list;
1234
+
1235
+
1236
+
1237
+ probeset_type_list *pgf_count_probeset_types(pgf_file *my_pgf, int *number){
1238
+
1239
+
1240
+ probeset_type_list *my_type_list = Calloc(1,probeset_type_list);
1241
+
1242
+ char *cur_type;
1243
+ int n;
1244
+
1245
+ /* traverse the probesets. each time examining the probeset type */
1246
+
1247
+
1248
+ if (my_pgf->probesets != NULL){
1249
+
1250
+ if (my_pgf->probesets->first != NULL){
1251
+
1252
+ my_pgf->probesets->current = my_pgf->probesets->first;
1253
+
1254
+ if (my_pgf->probesets->current->type == NULL){
1255
+ my_type_list[0].type = Calloc(5,char);
1256
+ strcpy(my_type_list[0].type,"none");
1257
+
1258
+ } else {
1259
+ my_type_list[0].type = Calloc(strlen(my_pgf->probesets->current->type) + 1,char);
1260
+ strcpy(my_type_list[0].type,my_pgf->probesets->current->type);
1261
+ }
1262
+ my_type_list[0].count = 1;
1263
+ *number = 1; /* number of different types seen */
1264
+ while (my_pgf->probesets->current->next != NULL){
1265
+ my_pgf->probesets->current= my_pgf->probesets->current->next;
1266
+ if (my_pgf->probesets->current->type == NULL){
1267
+ cur_type = "none";
1268
+ } else {
1269
+ cur_type = my_pgf->probesets->current->type;
1270
+ }
1271
+ n = 0;
1272
+ while (n < *number){
1273
+ if (strcmp(cur_type,my_type_list[n].type) == 0){
1274
+ break;
1275
+ }
1276
+ n++;
1277
+ }
1278
+ if (n == *number){
1279
+ my_type_list = Realloc(my_type_list,(n+1),probeset_type_list);
1280
+ my_type_list[n].type = Calloc(strlen(cur_type) + 1,char);
1281
+ strcpy(my_type_list[n].type,cur_type);
1282
+ my_type_list[n].count = 1;
1283
+ *number = *number + 1;
1284
+ } else {
1285
+ my_type_list[n].count++;
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+ return my_type_list;
1291
+ }
1292
+
1293
+
1294
+ void dealloc_probeset_type_list(probeset_type_list *my_type_list, int length){
1295
+
1296
+ int i;
1297
+
1298
+ for (i = 0; i < length; i++){
1299
+ Free(my_type_list[i].type);
1300
+ }
1301
+
1302
+ Free(my_type_list);
1303
+
1304
+ }
1305
+
1306
+ /****************************************************************
1307
+ ****************************************************************
1308
+ **
1309
+ ** Functionality for testing the parsers (from R .C interface)
1310
+ **
1311
+ ****************************************************************
1312
+ ****************************************************************/
1313
+
1314
+ void read_pgf_file(char **filename){
1315
+
1316
+ FILE *cur_file;
1317
+ pgf_file my_pgf;
1318
+ char *buffer = Calloc(1024, char);
1319
+ probeset_type_list *my_probeset_types;
1320
+ int ntypes;
1321
+
1322
+ cur_file = open_pgf_file(filename[0]);
1323
+
1324
+ my_pgf.headers = Calloc(1, pgf_headers);
1325
+ my_pgf.probesets = Calloc(1, probeset_list_header);
1326
+
1327
+ read_pgf_header(cur_file,buffer,my_pgf.headers);
1328
+ if (validate_pgf_header(my_pgf.headers)){
1329
+ read_pgf_probesets(cur_file, buffer, my_pgf.probesets, my_pgf.headers);
1330
+ my_probeset_types = pgf_count_probeset_types(&my_pgf, &ntypes);
1331
+ dealloc_probeset_type_list(my_probeset_types, ntypes);
1332
+ }
1333
+ Free(buffer);
1334
+ dealloc_pgf_file(&my_pgf);
1335
+ fclose(cur_file);
1336
+
1337
+ }