bio-affy 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,144 @@
1
+
2
+ #ifdef BIOLIB
3
+ #include <biolib_R_map.h>
4
+ #endif
5
+
6
+ #include <zlib.h>
7
+
8
+ /******
9
+ ******
10
+ ****** Data Structures
11
+ ******
12
+ ******/
13
+
14
+
15
+
16
+ /* File header */
17
+
18
+ typedef struct{
19
+ uint8_t magic_number;
20
+ uint8_t version;
21
+ int32_t n_data_groups;
22
+ uint32_t first_group_file_pos;
23
+ } generic_file_header;
24
+
25
+
26
+
27
+
28
+ /* An affy generic STRING */
29
+
30
+ typedef struct{
31
+ int32_t len;
32
+ char *value;
33
+ } ASTRING;
34
+
35
+ /* An affy generic WSTRING */
36
+
37
+ typedef struct{
38
+ int32_t len;
39
+ wchar_t *value;
40
+ } AWSTRING;
41
+
42
+
43
+ /* Name Value Type Triplet */
44
+
45
+ typedef struct{
46
+ AWSTRING name;
47
+ ASTRING value;
48
+ AWSTRING type;
49
+ } nvt_triplet;
50
+
51
+
52
+
53
+
54
+ /* Data Header */
55
+
56
+ typedef struct generic_data_header *generic_data_header_pointer;
57
+
58
+ typedef struct{
59
+ ASTRING data_type_id; /*Stored in file as INT followed by CHAR array */
60
+ ASTRING unique_file_id; /*See above */
61
+ AWSTRING Date_time; /*Stored in file as INT followed by WCHAR array */
62
+ AWSTRING locale;
63
+ int32_t n_name_type_value;
64
+ nvt_triplet *name_type_value;
65
+ int32_t n_parent_headers;
66
+ void **parent_headers;
67
+ } generic_data_header;
68
+
69
+
70
+ /* Data Group */
71
+
72
+ typedef struct {
73
+
74
+ uint32_t file_position_nextgroup;
75
+ uint32_t file_position_first_data;
76
+ int32_t n_data_sets;
77
+ AWSTRING data_group_name;
78
+ } generic_data_group;
79
+
80
+
81
+ /* Dataset */
82
+
83
+ typedef struct {
84
+ AWSTRING name;
85
+ uint8_t type;
86
+ int32_t size;
87
+ } col_nvts_triplet;
88
+
89
+
90
+
91
+ typedef struct {
92
+ uint32_t file_pos_first;
93
+ uint32_t file_pos_last;
94
+ AWSTRING data_set_name;
95
+ int32_t n_name_type_value;
96
+ nvt_triplet *name_type_value;
97
+ uint32_t ncols;
98
+ col_nvts_triplet* col_name_type_value;
99
+ uint32_t nrows;
100
+ void **Data; /* in the docs this is rows */
101
+ } generic_data_set;
102
+
103
+
104
+
105
+
106
+ typedef enum{
107
+
108
+ ASCIITEXT = 1,
109
+ PLAINTEXT = 2,
110
+ UINT8 = 3,
111
+ INT8= 4,
112
+ UINT16 = 5,
113
+ INT16 = 6,
114
+ UINT32 = 7,
115
+ INT32 = 8,
116
+ FLOAT32 = 9
117
+
118
+ } AffyMIMEtypes;
119
+
120
+
121
+
122
+ AffyMIMEtypes determine_MIMETYPE(nvt_triplet triplet);
123
+ void *decode_MIME_value(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size);
124
+ char *decode_MIME_value_toASCII(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size);
125
+
126
+ nvt_triplet* find_nvt(generic_data_header *data_header,char *name);
127
+
128
+ int read_generic_file_header(generic_file_header* file_header, FILE *instream);
129
+ int read_generic_data_header(generic_data_header *data_header, FILE *instream);
130
+ int read_generic_data_group(generic_data_group *data_group, FILE *instream);
131
+ int read_generic_data_set(generic_data_set *data_set, FILE *instream);
132
+ int read_generic_data_set_rows(generic_data_set *data_set, FILE *instream);
133
+
134
+
135
+ void Free_generic_data_header(generic_data_header *header);
136
+ void Free_generic_data_group(generic_data_group *data_group);
137
+ void Free_generic_data_set(generic_data_set *data_set);
138
+
139
+
140
+ int gzread_generic_file_header(generic_file_header* file_header, gzFile *instream);
141
+ int gzread_generic_data_header(generic_data_header *data_header, gzFile *instream);
142
+ int gzread_generic_data_group(generic_data_group *data_group,gzFile *instream);
143
+ int gzread_generic_data_set(generic_data_set *data_set, gzFile *instream);
144
+ int gzread_generic_data_set_rows(generic_data_set *data_set, gzFile *instream);
Binary file
@@ -0,0 +1,1337 @@
1
+ /******************************************************************
2
+ **
3
+ ** file: read_pgf.c
4
+ **
5
+ ** Aim: implement parsing of PGF format files
6
+ **
7
+ ** Copyright (C) 2007 B. M. Bolstad
8
+ **
9
+ ** Created on Nov 4, 2007
10
+ **
11
+ ** History
12
+ ** Nov 4, 2007 - Initial version
13
+ ** Dec 17. 2007 - add function for counting number of each type of probeset
14
+ ** Dec 31, 2007 - add function which checks that all required fields are present
15
+ ** Mar 18, 2008 - fix error in read_pgf_header function
16
+ **
17
+ **
18
+ **
19
+ ******************************************************************/
20
+
21
+ #include <R.h>
22
+
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+
26
+
27
+ #define BUFFERSIZE 1024
28
+
29
+
30
+ /*******************************************************************
31
+ *******************************************************************
32
+ **
33
+ ** Structures for dealing with pgf file information
34
+ **
35
+ **
36
+ **
37
+ *******************************************************************
38
+ ******************************************************************/
39
+
40
+ /*******************************************************************
41
+ *******************************************************************
42
+ **
43
+ ** Starting off with the headers
44
+ **
45
+ *******************************************************************
46
+ ******************************************************************/
47
+
48
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
49
+
50
+ typedef struct{
51
+ int probeset_id;
52
+ int type;
53
+ int probeset_name;
54
+ } header_0;
55
+
56
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
57
+
58
+ typedef struct{
59
+ int atom_id;
60
+ int type;
61
+ int exon_position;
62
+ } header_1;
63
+
64
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
65
+
66
+ typedef struct{
67
+ int probe_id;
68
+ int type;
69
+ int gc_count;
70
+ int probe_length;
71
+ int interrogation_position;
72
+ int probe_sequence;
73
+ } header_2;
74
+
75
+
76
+
77
+ typedef struct{
78
+ char **chip_type;
79
+ int n_chip_type;
80
+ char *lib_set_name;
81
+ char *lib_set_version;
82
+ char *pgf_format_version;
83
+ char *header0_str;
84
+ header_0 *header0;
85
+ char *header1_str;
86
+ header_1 *header1;
87
+ char *header2_str;
88
+ header_2 *header2;
89
+ char *create_date;
90
+ char *guid;
91
+ char **other_headers_keys;
92
+ char **other_headers_values;
93
+ int n_other_headers;
94
+ } pgf_headers;
95
+
96
+
97
+ /********************************************************************
98
+ *******************************************************************
99
+ **
100
+ ** Structures for dealing with data stored at the probelevel
101
+ **
102
+ **
103
+ *******************************************************************
104
+ *******************************************************************/
105
+
106
+ typedef struct{
107
+
108
+ int probe_id;
109
+ char *type;
110
+ int gc_count;
111
+ int probe_length;
112
+ int interrogation_position;
113
+ char *probe_sequence;
114
+ struct probe_list_node *next;
115
+ } probe_list_node;
116
+
117
+
118
+ typedef struct{
119
+ int n_probes;
120
+ probe_list_node *first;
121
+ } probe_list_header;
122
+
123
+
124
+
125
+ /********************************************************************
126
+ *******************************************************************
127
+ **
128
+ ** Structures for dealing with data stored at the atom level
129
+ **
130
+ **
131
+ *******************************************************************
132
+ *******************************************************************/
133
+
134
+ typedef struct{
135
+ int atom_id;
136
+ char *type;
137
+ char *exon_position;
138
+ probe_list_header *probes;
139
+ struct atom_list_node *next;
140
+ } atom_list_node;
141
+
142
+
143
+ typedef struct{
144
+ int n_atoms;
145
+ atom_list_node *first;
146
+ } atom_list_header;
147
+
148
+
149
+
150
+ /*******************************************************************
151
+ *******************************************************************
152
+ **
153
+ ** Structures for dealing with data as stored at the probeset level
154
+ **
155
+ **
156
+ **
157
+ *******************************************************************
158
+ *******************************************************************/
159
+
160
+ typedef struct probeset_list_node *node_pointer;
161
+
162
+
163
+ typedef struct{
164
+ int probeset_id;
165
+ char *type;
166
+ char *probeset_name;
167
+
168
+ atom_list_header *atoms;
169
+
170
+ struct probeset_list_node *next;
171
+ } probeset_list_node;
172
+
173
+
174
+
175
+ typedef struct{
176
+
177
+ int n_probesets;
178
+
179
+ probeset_list_node *first;
180
+ probeset_list_node *current;
181
+ probeset_list_node *last;
182
+ } probeset_list_header;
183
+
184
+
185
+
186
+ /*******************************************************************
187
+ *******************************************************************
188
+ **
189
+ ** Structure for storing pgf file (after it is read from file)
190
+ **
191
+ *******************************************************************
192
+ ******************************************************************/
193
+
194
+
195
+ typedef struct{
196
+ pgf_headers *headers;
197
+ probeset_list_header *probesets;
198
+ } pgf_file;
199
+
200
+
201
+ /*******************************************************************
202
+ *******************************************************************
203
+ **
204
+ **
205
+ ** Code for splitting a string into a series of tokens
206
+ **
207
+ **
208
+ *******************************************************************
209
+ *******************************************************************/
210
+
211
+
212
+ /***************************************************************
213
+ **
214
+ ** tokenset
215
+ **
216
+ ** char **tokens - a array of token strings
217
+ ** int n - number of tokens in this set.
218
+ **
219
+ ** a structure to hold a set of tokens. Typically a tokenset is
220
+ ** created by breaking a character string based upon a set of
221
+ ** delimiters.
222
+ **
223
+ **
224
+ **************************************************************/
225
+
226
+ typedef struct{
227
+ char **tokens;
228
+ int n;
229
+ } tokenset;
230
+
231
+
232
+
233
+ /******************************************************************
234
+ **
235
+ ** tokenset *tokenize(char *str, char *delimiters)
236
+ **
237
+ ** char *str - a string to break into tokens
238
+ ** char *delimiters - delimiters to use in breaking up the line
239
+ **
240
+ **
241
+ ** RETURNS a new tokenset
242
+ **
243
+ ** Given a string, split into tokens based on a set of delimitors
244
+ **
245
+ *****************************************************************/
246
+
247
+ static tokenset *tokenize(char *str, char *delimiters){
248
+
249
+ #if USE_PTHREADS
250
+ char *tmp_pointer;
251
+ #endif
252
+ int i=0;
253
+
254
+ char *current_token;
255
+ tokenset *my_tokenset = Calloc(1,tokenset);
256
+ my_tokenset->n=0;
257
+
258
+ my_tokenset->tokens = NULL;
259
+ #if USE_PTHREADS
260
+ current_token = strtok_r(str,delimiters,&tmp_pointer);
261
+ #else
262
+ current_token = strtok(str,delimiters);
263
+ #endif
264
+ while (current_token != NULL){
265
+ my_tokenset->n++;
266
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
267
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
268
+ strcpy(my_tokenset->tokens[i],current_token);
269
+ my_tokenset->tokens[i][(strlen(current_token))] = '\0';
270
+ i++;
271
+ #if USE_PTHREADS
272
+ current_token = strtok_r(NULL,delimiters,&tmp_pointer);
273
+ #else
274
+ current_token = strtok(NULL,delimiters);
275
+ #endif
276
+ }
277
+ return my_tokenset;
278
+ }
279
+
280
+
281
+ /******************************************************************
282
+ **
283
+ ** int tokenset_size(tokenset *x)
284
+ **
285
+ ** tokenset *x - a tokenset
286
+ **
287
+ ** RETURNS the number of tokens in the tokenset
288
+ **
289
+ ******************************************************************/
290
+
291
+ static int tokenset_size(tokenset *x){
292
+ return x->n;
293
+ }
294
+
295
+
296
+ /******************************************************************
297
+ **
298
+ ** char *get_token(tokenset *x, int i)
299
+ **
300
+ ** tokenset *x - a tokenset
301
+ ** int i - index of the token to return
302
+ **
303
+ ** RETURNS pointer to the i'th token
304
+ **
305
+ ******************************************************************/
306
+
307
+ static char *get_token(tokenset *x,int i){
308
+ return x->tokens[i];
309
+ }
310
+
311
+ /******************************************************************
312
+ **
313
+ ** void delete_tokens(tokenset *x)
314
+ **
315
+ ** tokenset *x - a tokenset
316
+ **
317
+ ** Deallocates all the space allocated for a tokenset
318
+ **
319
+ ******************************************************************/
320
+
321
+ static void delete_tokens(tokenset *x){
322
+
323
+ int i;
324
+
325
+ for (i=0; i < x->n; i++){
326
+ Free(x->tokens[i]);
327
+ }
328
+ Free(x->tokens);
329
+ Free(x);
330
+ }
331
+
332
+ /*******************************************************************
333
+ **
334
+ ** int token_ends_with(char *token, char *ends)
335
+ **
336
+ ** char *token - a string to check
337
+ ** char *ends_in - we are looking for this string at the end of token
338
+ **
339
+ **
340
+ ** returns 0 if no match, otherwise it returns the index of the first character
341
+ ** which matchs the start of *ends.
342
+ **
343
+ ** Note that there must be one additional character in "token" beyond
344
+ ** the characters in "ends". So
345
+ **
346
+ ** *token = "TestStr"
347
+ ** *ends = "TestStr"
348
+ **
349
+ ** would return 0 but if
350
+ **
351
+ ** ends = "estStr"
352
+ **
353
+ ** we would return 1.
354
+ **
355
+ ** and if
356
+ **
357
+ ** ends= "stStr"
358
+ ** we would return 2 .....etc
359
+ **
360
+ **
361
+ ******************************************************************/
362
+
363
+ static int token_ends_with(char *token, char *ends_in){
364
+
365
+ int tokenlength = strlen(token);
366
+ int ends_length = strlen(ends_in);
367
+ int start_pos;
368
+ char *tmp_ptr;
369
+
370
+ if (tokenlength <= ends_length){
371
+ /* token string is too short so can't possibly end with ends */
372
+ return 0;
373
+ }
374
+
375
+ start_pos = tokenlength - ends_length;
376
+
377
+ tmp_ptr = &token[start_pos];
378
+
379
+ if (strcmp(tmp_ptr,ends_in)==0){
380
+ return start_pos;
381
+ } else {
382
+ return 0;
383
+ }
384
+ }
385
+
386
+
387
+ /*******************************************************************
388
+ *******************************************************************
389
+ **
390
+ ** Code for Reading from file
391
+ **
392
+ *******************************************************************
393
+ *******************************************************************/
394
+
395
+
396
+
397
+ /****************************************************************
398
+ **
399
+ ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile)
400
+ **
401
+ ** char *buffer - place to store contents of the line
402
+ ** int buffersize - size of the buffer
403
+ ** FILE *currentFile - FILE pointer to an opened CEL file.
404
+ **
405
+ ** Read a line from a file, into a buffer of specified size.
406
+ ** otherwise die.
407
+ **
408
+ ***************************************************************/
409
+
410
+ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
411
+ if (fgets(buffer, buffersize, currentFile) == NULL){
412
+ return 0;
413
+ //error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
414
+ }
415
+ return 1;
416
+ }
417
+
418
+
419
+
420
+
421
+ /****************************************************************
422
+ **
423
+ ** Code for identifying what type of information is stored in
424
+ ** the current line
425
+ **
426
+ ***************************************************************/
427
+
428
+ /****************************************************************
429
+ **
430
+ ** static int IsHeaderLine(char *buffer)
431
+ **
432
+ ** char *buffer - contains line to evaluate
433
+ **
434
+ ** Checks whether supplied line is a header line (ie starts with #%)
435
+ **
436
+ ** return 1 (ie true) if header line. 0 otherwise
437
+ **
438
+ ***************************************************************/
439
+
440
+
441
+ static int IsHeaderLine(char *buffer){
442
+
443
+ if (strncmp("#%",buffer,2) == 0){
444
+ return 1;
445
+ }
446
+ return 0;
447
+ }
448
+
449
+ /****************************************************************
450
+ **
451
+ ** static int IsHeaderLine(char *buffer)
452
+ **
453
+ ** char *buffer - contains line to evaluate
454
+ **
455
+ ** Checks whether supplied line is a comment line (ie starts with #)
456
+ **
457
+ **
458
+ ***************************************************************/
459
+
460
+ static int IsCommentLine(char *buffer){
461
+ if (strncmp("#",buffer,1) == 0){
462
+ return 1;
463
+ }
464
+ return 0;
465
+ }
466
+
467
+
468
+ /*****************************************************************
469
+ **
470
+ ** static int IsLevel2(char *buffer)
471
+ **
472
+ ** char *buffer - contains line to evaluate
473
+ **
474
+ ** checks whether supplied line begins with two tab characters it \t\t
475
+ **
476
+ ** Return 1 if true, 0 otherwise
477
+ **
478
+ ***************************************************************/
479
+
480
+ static int IsLevel2(char *buffer){
481
+ if (strncmp("\t\t",buffer,2) == 0){
482
+ return 1;
483
+ }
484
+ return 0;
485
+ }
486
+
487
+
488
+
489
+ /*****************************************************************
490
+ **
491
+ ** static int IsLevel1(char *buffer)
492
+ **
493
+ ** char *buffer - contains line to evaluate
494
+ **
495
+ ** checks whether supplied line begins with a single tab characters it \t
496
+ **
497
+ ** Return 1 if true, 0 otherwise
498
+ **
499
+ ***************************************************************/
500
+
501
+ static int IsLevel1(char *buffer){
502
+ if (strncmp("\t",buffer,1) == 0){
503
+ if (strncmp("\t\t",buffer,2) != 0){
504
+ return 1;
505
+ }
506
+ return 0;
507
+ }
508
+ return 0;
509
+ }
510
+
511
+
512
+
513
+ /****************************************************************
514
+ ****************************************************************
515
+ **
516
+ ** Code for deallocating or initializing header data structures
517
+ **
518
+ ****************************************************************
519
+ ****************************************************************/
520
+
521
+ void dealloc_pgf_headers(pgf_headers *header){
522
+ int i;
523
+
524
+ if (header->n_chip_type > 0){
525
+ for (i = 0; i < header->n_chip_type; i++){
526
+ Free(header->chip_type[i]);
527
+ }
528
+ Free(header->chip_type);
529
+ }
530
+
531
+ if (header->lib_set_name != NULL){
532
+ Free(header->lib_set_name);
533
+ }
534
+
535
+ if (header->lib_set_version != NULL){
536
+ Free(header->lib_set_version);
537
+ }
538
+
539
+ if (header->pgf_format_version != NULL){
540
+ Free(header->pgf_format_version);
541
+ }
542
+
543
+ if (header->header0_str != NULL){
544
+ Free(header->header0_str);
545
+ Free(header->header0);
546
+ }
547
+ if (header->header1_str != NULL){
548
+ Free(header->header1_str);
549
+ Free(header->header1);
550
+ }
551
+ if (header->header2_str != NULL){
552
+ Free(header->header2_str);
553
+ Free(header->header2);
554
+ }
555
+
556
+ if (header->create_date != NULL){
557
+ Free(header->create_date);
558
+ }
559
+
560
+ if (header->guid != NULL){
561
+ Free(header->guid);
562
+ }
563
+
564
+ if (header->n_other_headers > 0){
565
+ for (i = 0; i < header->n_other_headers; i++){
566
+ Free(header->other_headers_keys[i]);
567
+ Free(header->other_headers_values[i]);
568
+ }
569
+ Free(header->other_headers_keys);
570
+ Free(header->other_headers_values);
571
+ }
572
+ }
573
+
574
+
575
+ void dealloc_probes(probe_list_header *probes){
576
+
577
+ probe_list_node *temp_node;
578
+
579
+ if (probes->first != NULL){
580
+ temp_node = probes->first;
581
+ while (temp_node != NULL){
582
+ probes->first = (probe_list_node *)temp_node->next;
583
+ if (temp_node->type != NULL){
584
+ Free(temp_node->type);
585
+ }
586
+ if (temp_node->probe_sequence != NULL){
587
+ Free(temp_node->probe_sequence);
588
+ }
589
+ Free(temp_node);
590
+ temp_node = probes->first;
591
+ }
592
+
593
+
594
+ }
595
+ }
596
+
597
+
598
+
599
+ void dealloc_atoms(atom_list_header *atoms){
600
+
601
+ atom_list_node *temp_node;
602
+
603
+ if (atoms->first != NULL){
604
+ temp_node = atoms->first;
605
+ while (temp_node != NULL){
606
+ atoms->first = (atom_list_node *)temp_node->next;
607
+ if (temp_node->type != NULL){
608
+ Free(temp_node->type);
609
+ }
610
+ if (temp_node->exon_position != NULL){
611
+ Free(temp_node->exon_position);
612
+ }
613
+ if (temp_node->probes != NULL){
614
+ dealloc_probes(temp_node->probes);
615
+ Free(temp_node->probes);
616
+ }
617
+
618
+ Free(temp_node);
619
+ temp_node = atoms->first;
620
+ }
621
+
622
+
623
+ }
624
+
625
+
626
+
627
+ }
628
+
629
+
630
+ void dealloc_pgf_probesets(probeset_list_header *probesets){
631
+
632
+ probeset_list_node *temp_node;
633
+
634
+ if (probesets->first != NULL){
635
+ temp_node = probesets->first;
636
+ while (temp_node != NULL){
637
+ probesets->first = (probeset_list_node *)temp_node->next;
638
+
639
+ if (temp_node->type != NULL){
640
+ Free(temp_node->type);
641
+ }
642
+ if (temp_node->probeset_name != NULL){
643
+ Free(temp_node->probeset_name);
644
+ }
645
+
646
+ if (temp_node->atoms != NULL){
647
+ dealloc_atoms(temp_node->atoms);
648
+ Free(temp_node->atoms);
649
+ }
650
+
651
+ Free(temp_node);
652
+ temp_node = probesets->first;
653
+ }
654
+ }
655
+
656
+ }
657
+
658
+
659
+
660
+ void dealloc_pgf_file(pgf_file* my_pgf){
661
+
662
+
663
+ if (my_pgf->headers != NULL){
664
+ dealloc_pgf_headers(my_pgf->headers);
665
+ Free(my_pgf->headers);
666
+ }
667
+
668
+
669
+ if (my_pgf->probesets !=NULL){
670
+ dealloc_pgf_probesets(my_pgf->probesets);
671
+ Free(my_pgf->probesets);
672
+ }
673
+
674
+
675
+ }
676
+
677
+
678
+ void initialize_pgf_header(pgf_headers *header){
679
+
680
+ header->chip_type = NULL;
681
+ header->n_chip_type = 0;
682
+
683
+ header->lib_set_name= NULL;
684
+ header->lib_set_version= NULL;
685
+ header->pgf_format_version= NULL;
686
+ header->header0_str= NULL;
687
+ header->header0= NULL;
688
+ header->header1_str= NULL;
689
+ header->header1= NULL;
690
+ header->header2_str= NULL;
691
+ header->header2= NULL;
692
+ header->create_date= NULL;
693
+ header->guid= NULL;
694
+ header->other_headers_keys= NULL;
695
+ header->other_headers_values= NULL;
696
+ header->n_other_headers=0;
697
+ }
698
+
699
+ /****************************************************************
700
+ ****************************************************************
701
+ **
702
+ ** Code for figuring out column ordering
703
+ **
704
+ ****************************************************************
705
+ ***************************************************************/
706
+
707
+
708
+ static void determine_order_header0(char *header_str, header_0 *header0){
709
+
710
+ tokenset *cur_tokenset;
711
+ int i;
712
+ char *temp_str = Calloc(strlen(header_str) +1, char);
713
+
714
+
715
+ strcpy(temp_str,header_str);
716
+
717
+ header0->probeset_id = -1;
718
+ header0->type = -1;
719
+ header0->probeset_name = -1;
720
+
721
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
722
+
723
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
724
+ if (strcmp(get_token(cur_tokenset,i),"probeset_id")==0){
725
+ header0->probeset_id = i;
726
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
727
+ header0->type = i;
728
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
729
+ header0->probeset_name = i;
730
+ }
731
+ }
732
+ delete_tokens(cur_tokenset);
733
+
734
+ Free(temp_str);
735
+
736
+ }
737
+
738
+ static void determine_order_header1(char *header_str, header_1 *header1){
739
+
740
+ tokenset *cur_tokenset;
741
+ int i;
742
+ char *temp_str = Calloc(strlen(header_str) +1, char);
743
+
744
+
745
+ strcpy(temp_str,header_str);
746
+
747
+ header1->atom_id = -1;
748
+ header1->type = -1;
749
+ header1->exon_position = -1;
750
+
751
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
752
+
753
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
754
+ if (strcmp(get_token(cur_tokenset,i),"atom_id")==0){
755
+ header1->atom_id = i;
756
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
757
+ header1->type = i;
758
+ } else if (strcmp(get_token(cur_tokenset,i),"exon_position")==0){
759
+ header1->exon_position = i;
760
+ }
761
+ }
762
+ delete_tokens(cur_tokenset);
763
+
764
+ Free(temp_str);
765
+
766
+ }
767
+
768
+ static void determine_order_header2(char *header_str, header_2 *header2){
769
+
770
+ tokenset *cur_tokenset;
771
+ int i;
772
+ char *temp_str = Calloc(strlen(header_str) +1, char);
773
+
774
+
775
+ strcpy(temp_str,header_str);
776
+
777
+ header2->probe_id = -1;
778
+ header2->type = -1;
779
+ header2->gc_count = -1;
780
+ header2->probe_length = -1;
781
+ header2->interrogation_position = -1;
782
+ header2->probe_sequence = -1;
783
+
784
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
785
+
786
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
787
+ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){
788
+ header2->probe_id = i;
789
+ } else if (strcmp(get_token(cur_tokenset,i),"type")==0){
790
+ header2->type = i;
791
+ } else if (strcmp(get_token(cur_tokenset,i),"gc_count")==0){
792
+ header2->gc_count = i;
793
+ } else if (strcmp(get_token(cur_tokenset,i),"probe_length")==0){
794
+ header2->probe_length = i;
795
+ } else if (strcmp(get_token(cur_tokenset,i),"interrogation_position")==0){
796
+ header2->interrogation_position = i;
797
+ } else if (strcmp(get_token(cur_tokenset,i),"probe_sequence")==0){
798
+ header2->probe_sequence = i;
799
+ }
800
+
801
+ }
802
+ delete_tokens(cur_tokenset);
803
+
804
+ Free(temp_str);
805
+
806
+ }
807
+
808
+
809
+ /****************************************************************
810
+ **
811
+ ** Validate that required headers are present in file.
812
+ **
813
+ ** Return 0 if an expected header is not present.
814
+ ** Returns 1 otherwise (ie everything looks fine)
815
+ **
816
+ ***************************************************************/
817
+
818
+ static int validate_pgf_header(pgf_headers *header){
819
+
820
+
821
+ /* check that required headers are all there (have been read) */
822
+ if (header->chip_type == NULL)
823
+ return 0;
824
+
825
+ if (header->lib_set_name == NULL)
826
+ return 0;
827
+
828
+ if (header->lib_set_version == NULL)
829
+ return 0;
830
+
831
+ if (header->pgf_format_version == NULL)
832
+ return 0;
833
+
834
+ if (header->header0_str == NULL)
835
+ return 0;
836
+
837
+ if (header->header1_str == NULL)
838
+ return 0;
839
+
840
+ if (header->header2_str == NULL)
841
+ return 0;
842
+
843
+
844
+ /* Check that format version is 1.0 (only supported version) */
845
+
846
+ if (strcmp( header->pgf_format_version,"1.0") != 0){
847
+ return 0;
848
+ }
849
+
850
+ /* check that header0, header1, header2 (ie the three levels of headers) have required fields */
851
+
852
+ if (header->header0->probeset_id == -1)
853
+ return 0;
854
+
855
+ if (header->header1->atom_id == -1)
856
+ return 0;
857
+
858
+ if (header->header2->probe_id == -1)
859
+ return 0;
860
+
861
+ if (header->header2->type == -1)
862
+ return 0;
863
+
864
+ return 1;
865
+ }
866
+
867
+
868
+
869
+
870
+ /****************************************************************
871
+ ****************************************************************
872
+ **
873
+ ** Code for actually reading from the file
874
+ **
875
+ ****************************************************************
876
+ ***************************************************************/
877
+
878
+ static FILE *open_pgf_file(const char *filename){
879
+
880
+ const char *mode = "r";
881
+ FILE *currentFile = NULL;
882
+
883
+ currentFile = fopen(filename,mode);
884
+ if (currentFile == NULL){
885
+ error("Could not open file %s", filename);
886
+ }
887
+ return currentFile;
888
+
889
+ }
890
+
891
+ /****************************************************************
892
+ **
893
+ ** Reading the header
894
+ **
895
+ ***************************************************************/
896
+
897
+ void read_pgf_header(FILE *cur_file, char *buffer, pgf_headers *header){
898
+
899
+
900
+ tokenset *cur_tokenset;
901
+ int i;
902
+ char *temp_str;
903
+
904
+
905
+ initialize_pgf_header(header);
906
+ do {
907
+ ReadFileLine(buffer, 1024, cur_file);
908
+ /* Rprintf("%s\n",buffer); */
909
+ if (IsHeaderLine(buffer)){
910
+ cur_tokenset = tokenize(&buffer[2],"=\r\n");
911
+ /* hopefully token 0 is Key
912
+ and token 1 is Value */
913
+ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0));
914
+ Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */
915
+ /* Decode the Key/Value pair */
916
+ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){
917
+ if (header->n_chip_type == 0){
918
+ header->chip_type = Calloc(1, char *);
919
+ } else {
920
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
921
+ }
922
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
923
+ strcpy(temp_str,get_token(cur_tokenset,1));
924
+ header->chip_type[header->n_chip_type] = temp_str;
925
+ header->n_chip_type++;
926
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){
927
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
928
+ strcpy(temp_str,get_token(cur_tokenset,1));
929
+ header->lib_set_name = temp_str;
930
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){
931
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
932
+ strcpy(temp_str,get_token(cur_tokenset,1));
933
+ header->lib_set_version = temp_str;
934
+ } else if (strcmp(get_token(cur_tokenset,0), "pgf_format_version") == 0) {
935
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
936
+ strcpy(temp_str,get_token(cur_tokenset,1));
937
+ header->pgf_format_version = temp_str;
938
+ } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) {
939
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
940
+ strcpy(temp_str,get_token(cur_tokenset,1));
941
+ header->header0_str = temp_str;
942
+ header->header0 = Calloc(1,header_0);
943
+ determine_order_header0(header->header0_str,header->header0);
944
+ } else if (strcmp(get_token(cur_tokenset,0), "header1") == 0) {
945
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
946
+ strcpy(temp_str,get_token(cur_tokenset,1));
947
+ header->header1_str = temp_str;
948
+ header->header1 = Calloc(1,header_1);
949
+ determine_order_header1(header->header1_str,header->header1);
950
+ } else if (strcmp(get_token(cur_tokenset,0), "header2") == 0) {
951
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
952
+ strcpy(temp_str,get_token(cur_tokenset,1));
953
+ header->header2_str = temp_str;
954
+ header->header2 = Calloc(1,header_2);
955
+ determine_order_header2(header->header2_str,header->header2);
956
+ } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) {
957
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
958
+ strcpy(temp_str,get_token(cur_tokenset,1));
959
+ header->create_date = temp_str;
960
+ } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) {
961
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
962
+ strcpy(temp_str,get_token(cur_tokenset,1));
963
+ header->guid = temp_str;
964
+ } else {
965
+ /* not one of the recognised header types */
966
+ if ( header->n_other_headers == 0){
967
+ header->other_headers_keys = Calloc(1, char *);
968
+ header->other_headers_values = Calloc(1, char *);
969
+ } else {
970
+ header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *);
971
+ header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *);
972
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
973
+ }
974
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
975
+ strcpy(temp_str,get_token(cur_tokenset,1));
976
+ header->other_headers_values[header->n_other_headers] = temp_str;
977
+ temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char);
978
+ strcpy(temp_str,get_token(cur_tokenset,0));
979
+ header->other_headers_keys[header->n_other_headers] = temp_str;
980
+ header->n_other_headers++;
981
+
982
+ }
983
+
984
+ delete_tokens(cur_tokenset);
985
+ }
986
+ } while (IsHeaderLine(buffer));
987
+
988
+ }
989
+
990
+
991
+ /****************************************************************
992
+ **
993
+ ** Reading the probesets/body of the file
994
+ **
995
+ ***************************************************************/
996
+
997
+ void initialize_probeset_list(probeset_list_header *probeset_list){
998
+
999
+ probeset_list->n_probesets = 0;
1000
+ probeset_list->first = NULL;
1001
+ probeset_list->current = NULL;
1002
+ probeset_list->last = NULL;
1003
+ }
1004
+
1005
+
1006
+
1007
+ void insert_probe(char *buffer, probe_list_header *probe_list, header_2 *header2){
1008
+
1009
+ char *temp_str;
1010
+ tokenset *cur_tokenset;
1011
+ probe_list_node *temp_ptr;
1012
+
1013
+ probe_list_node *temp_node = Calloc(1,probe_list_node);
1014
+
1015
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1016
+ temp_node->probe_id = atoi(get_token(cur_tokenset,header2->probe_id));
1017
+
1018
+ if (header2->type != -1){
1019
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->type)) + 1,char);
1020
+ strcpy(temp_str,get_token(cur_tokenset,header2->type));
1021
+ temp_node->type = temp_str;
1022
+ }
1023
+ if (header2->gc_count != -1){
1024
+ temp_node->gc_count = atoi(get_token(cur_tokenset,header2->gc_count));
1025
+ }
1026
+ if (header2->probe_length != -1){
1027
+ temp_node->probe_length = atoi(get_token(cur_tokenset,header2->probe_length));
1028
+ }
1029
+ if (header2->interrogation_position != -1){
1030
+ temp_node->interrogation_position = atoi(get_token(cur_tokenset,header2->interrogation_position));
1031
+ }
1032
+ if (header2->probe_sequence != -1){
1033
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->probe_sequence)) + 1,char);
1034
+ strcpy(temp_str,get_token(cur_tokenset,header2->probe_sequence));
1035
+ temp_node->probe_sequence = temp_str;
1036
+ }
1037
+
1038
+
1039
+
1040
+ temp_node->next = NULL;
1041
+ if (probe_list->n_probes == 0){
1042
+ probe_list->first = temp_node;
1043
+ probe_list->n_probes = 1;
1044
+ } else {
1045
+ /* figure out where the end of the list is. Insert there */
1046
+ temp_ptr = probe_list->first;
1047
+
1048
+ while (temp_ptr->next != NULL){
1049
+ temp_ptr = (probe_list_node*)temp_ptr->next;
1050
+ }
1051
+ temp_ptr->next = (struct probe_list_node*)temp_node;
1052
+ probe_list->n_probes++;
1053
+ }
1054
+ delete_tokens(cur_tokenset);
1055
+ }
1056
+
1057
+
1058
+ void insert_level2(char *buffer, probeset_list_header *probeset_list, header_2 *header2){
1059
+
1060
+ atom_list_header *current_level1;
1061
+ atom_list_node *current_atom;
1062
+
1063
+ if (probeset_list->current == NULL){
1064
+ /* Oh Boy, this is a problem no header0 level object to insert into. */
1065
+ error("Can not read a level 2 line before seeing a level 0 line. File corrupted?");
1066
+ }
1067
+
1068
+ if (probeset_list->current->atoms == NULL){
1069
+ /* Oh Boy, this is a problem no header1 level object to insert into. */
1070
+ error("Can not read a level 2 line before seeing a level 1 line. File corrupted?");
1071
+ }
1072
+
1073
+ current_level1 = probeset_list->current->atoms;
1074
+
1075
+ current_atom = current_level1->first;
1076
+
1077
+ while (current_atom->next != NULL){
1078
+ current_atom = (atom_list_node *)current_atom->next;
1079
+ }
1080
+
1081
+ if (current_atom->probes == NULL){
1082
+ current_atom->probes = Calloc(1,probe_list_header);
1083
+ }
1084
+
1085
+ insert_probe(buffer, current_atom->probes, header2);
1086
+ }
1087
+
1088
+
1089
+
1090
+
1091
+
1092
+ void insert_atom(char *buffer, atom_list_header *atoms_list, header_1 *header1){
1093
+
1094
+ char *temp_str;
1095
+ tokenset *cur_tokenset;
1096
+ atom_list_node *temp_ptr;
1097
+
1098
+ atom_list_node *temp_node = Calloc(1,atom_list_node);
1099
+
1100
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1101
+
1102
+ temp_node->atom_id = atoi(get_token(cur_tokenset,header1->atom_id));
1103
+
1104
+ if (header1->type != -1){
1105
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->type)) + 1,char);
1106
+ strcpy(temp_str,get_token(cur_tokenset,header1->type));
1107
+ temp_node->type = temp_str;
1108
+ }
1109
+ if (header1->exon_position != -1){
1110
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->exon_position)) + 1,char);
1111
+ strcpy(temp_str,get_token(cur_tokenset,header1->exon_position));
1112
+ temp_node->exon_position = temp_str;
1113
+ }
1114
+ temp_node->probes = NULL;
1115
+ temp_node->next = NULL;
1116
+
1117
+ if (atoms_list->n_atoms == 0){
1118
+ atoms_list->first = temp_node;
1119
+ atoms_list->n_atoms = 1;
1120
+ } else {
1121
+ /* figure out where the end of the list is. Insert there */
1122
+ temp_ptr = (atom_list_node*)atoms_list->first;
1123
+
1124
+ while (temp_ptr->next != NULL){
1125
+ temp_ptr= (atom_list_node*)temp_ptr->next;
1126
+ }
1127
+ temp_ptr->next = (struct atom_list_node*)temp_node;
1128
+ atoms_list->n_atoms++;
1129
+ }
1130
+ delete_tokens(cur_tokenset);
1131
+ }
1132
+
1133
+
1134
+ void insert_level1(char *buffer, probeset_list_header *probeset_list, header_1 *header1){
1135
+
1136
+ probeset_list_node *current_level0;
1137
+
1138
+
1139
+ if (probeset_list->current == NULL){
1140
+ /* Oh Boy, this is a problem no header0 level object to insert into. */
1141
+ error("Can not read a level 1 line before seeing a level 0 line. File corrupted?");
1142
+ }
1143
+
1144
+ current_level0 = probeset_list->current;
1145
+
1146
+ if (current_level0->atoms == NULL){
1147
+ current_level0->atoms = Calloc(1,atom_list_header);
1148
+ }
1149
+
1150
+ /* Now lets insert the data */
1151
+
1152
+ insert_atom(buffer, current_level0->atoms, header1);
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+ }
1159
+
1160
+
1161
+
1162
+
1163
+ void insert_level0(char *buffer, probeset_list_header *probeset_list, header_0 *header0){
1164
+
1165
+ char *temp_str;
1166
+ tokenset *cur_tokenset;
1167
+ probeset_list_node *temp_ptr;
1168
+
1169
+ probeset_list_node *temp_node = Calloc(1,probeset_list_node);
1170
+
1171
+ cur_tokenset = tokenize(buffer,"\t\r\n");
1172
+
1173
+ temp_node->probeset_id = atoi(get_token(cur_tokenset,header0->probeset_id));
1174
+
1175
+ if (header0->type != -1){
1176
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->type)) + 1,char);
1177
+ strcpy(temp_str,get_token(cur_tokenset,header0->type));
1178
+ temp_node->type = temp_str;
1179
+ }
1180
+ if (header0->probeset_name != -1){
1181
+ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->probeset_name)) + 1,char);
1182
+ strcpy(temp_str,get_token(cur_tokenset,header0->probeset_name));
1183
+ temp_node->probeset_name = temp_str;
1184
+ }
1185
+ temp_node->atoms = NULL;
1186
+ temp_node->next = NULL;
1187
+
1188
+ if (probeset_list->first == NULL){
1189
+ probeset_list->first = temp_node;
1190
+ probeset_list->current = temp_node;
1191
+ probeset_list->last = temp_node;
1192
+ probeset_list->n_probesets = 1;
1193
+ } else {
1194
+ probeset_list->last->next = (struct probeset_list_node *)temp_node;
1195
+ probeset_list->last = temp_node;
1196
+ probeset_list->current = temp_node;
1197
+ probeset_list->n_probesets++;
1198
+ }
1199
+ delete_tokens(cur_tokenset);
1200
+ }
1201
+
1202
+
1203
+ void read_pgf_probesets(FILE *cur_file, char *buffer, probeset_list_header *probeset_list, pgf_headers *header){
1204
+
1205
+ initialize_probeset_list(probeset_list);
1206
+
1207
+ insert_level0(buffer, probeset_list, header->header0);
1208
+
1209
+ while(ReadFileLine(buffer, 1024, cur_file)){
1210
+ if (IsLevel2(buffer)){
1211
+ insert_level2(buffer, probeset_list, header->header2);
1212
+ } else if (IsLevel1(buffer)){
1213
+ insert_level1(buffer, probeset_list, header->header1);
1214
+ } else if (IsCommentLine(buffer)){
1215
+ /*Ignore */
1216
+ } else {
1217
+ insert_level0(buffer, probeset_list, header->header0);
1218
+ }
1219
+ }
1220
+ }
1221
+
1222
+ /****************************************************************
1223
+ ****************************************************************
1224
+ **
1225
+ ** Funtionality for counting probeset types
1226
+ **
1227
+ ****************************************************************
1228
+ ****************************************************************/
1229
+
1230
+ typedef struct{
1231
+ char *type;
1232
+ int count;
1233
+ } probeset_type_list;
1234
+
1235
+
1236
+
1237
+ probeset_type_list *pgf_count_probeset_types(pgf_file *my_pgf, int *number){
1238
+
1239
+
1240
+ probeset_type_list *my_type_list = Calloc(1,probeset_type_list);
1241
+
1242
+ char *cur_type;
1243
+ int n;
1244
+
1245
+ /* traverse the probesets. each time examining the probeset type */
1246
+
1247
+
1248
+ if (my_pgf->probesets != NULL){
1249
+
1250
+ if (my_pgf->probesets->first != NULL){
1251
+
1252
+ my_pgf->probesets->current = my_pgf->probesets->first;
1253
+
1254
+ if (my_pgf->probesets->current->type == NULL){
1255
+ my_type_list[0].type = Calloc(5,char);
1256
+ strcpy(my_type_list[0].type,"none");
1257
+
1258
+ } else {
1259
+ my_type_list[0].type = Calloc(strlen(my_pgf->probesets->current->type) + 1,char);
1260
+ strcpy(my_type_list[0].type,my_pgf->probesets->current->type);
1261
+ }
1262
+ my_type_list[0].count = 1;
1263
+ *number = 1; /* number of different types seen */
1264
+ while (my_pgf->probesets->current->next != NULL){
1265
+ my_pgf->probesets->current= my_pgf->probesets->current->next;
1266
+ if (my_pgf->probesets->current->type == NULL){
1267
+ cur_type = "none";
1268
+ } else {
1269
+ cur_type = my_pgf->probesets->current->type;
1270
+ }
1271
+ n = 0;
1272
+ while (n < *number){
1273
+ if (strcmp(cur_type,my_type_list[n].type) == 0){
1274
+ break;
1275
+ }
1276
+ n++;
1277
+ }
1278
+ if (n == *number){
1279
+ my_type_list = Realloc(my_type_list,(n+1),probeset_type_list);
1280
+ my_type_list[n].type = Calloc(strlen(cur_type) + 1,char);
1281
+ strcpy(my_type_list[n].type,cur_type);
1282
+ my_type_list[n].count = 1;
1283
+ *number = *number + 1;
1284
+ } else {
1285
+ my_type_list[n].count++;
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+ return my_type_list;
1291
+ }
1292
+
1293
+
1294
+ void dealloc_probeset_type_list(probeset_type_list *my_type_list, int length){
1295
+
1296
+ int i;
1297
+
1298
+ for (i = 0; i < length; i++){
1299
+ Free(my_type_list[i].type);
1300
+ }
1301
+
1302
+ Free(my_type_list);
1303
+
1304
+ }
1305
+
1306
+ /****************************************************************
1307
+ ****************************************************************
1308
+ **
1309
+ ** Functionality for testing the parsers (from R .C interface)
1310
+ **
1311
+ ****************************************************************
1312
+ ****************************************************************/
1313
+
1314
+ void read_pgf_file(char **filename){
1315
+
1316
+ FILE *cur_file;
1317
+ pgf_file my_pgf;
1318
+ char *buffer = Calloc(1024, char);
1319
+ probeset_type_list *my_probeset_types;
1320
+ int ntypes;
1321
+
1322
+ cur_file = open_pgf_file(filename[0]);
1323
+
1324
+ my_pgf.headers = Calloc(1, pgf_headers);
1325
+ my_pgf.probesets = Calloc(1, probeset_list_header);
1326
+
1327
+ read_pgf_header(cur_file,buffer,my_pgf.headers);
1328
+ if (validate_pgf_header(my_pgf.headers)){
1329
+ read_pgf_probesets(cur_file, buffer, my_pgf.probesets, my_pgf.headers);
1330
+ my_probeset_types = pgf_count_probeset_types(&my_pgf, &ntypes);
1331
+ dealloc_probeset_type_list(my_probeset_types, ntypes);
1332
+ }
1333
+ Free(buffer);
1334
+ dealloc_pgf_file(&my_pgf);
1335
+ fclose(cur_file);
1336
+
1337
+ }