bio-affy 0.1.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,33 @@
1
+ #ifndef READ_CELFILE_GENERIC_H
2
+ #define READ_CELFILE_GENERIC_H
3
+
4
+ #ifdef BIOLIB
5
+ #include <biolib_R_map.h>
6
+ #endif
7
+
8
+ #include "read_abatch.h"
9
+
10
+ int isGenericCelFile(const char *filename);
11
+ char *generic_get_header_info(const char *filename, int *dim1, int *dim2);
12
+ void generic_get_detailed_header_info(const char *filename, detailed_header_info *header_info);
13
+ int read_genericcel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
14
+ int check_generic_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2);
15
+ int read_genericcel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
16
+ int read_genericcel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
17
+ void generic_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y);
18
+ void generic_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers);
19
+
20
+
21
+
22
+ int isgzGenericCelFile(const char *filename);
23
+ char *gzgeneric_get_header_info(const char *filename, int *dim1, int *dim2);
24
+ void gzgeneric_get_detailed_header_info(const char *filename, detailed_header_info *header_info);
25
+ int gzread_genericcel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
26
+ int check_gzgeneric_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2);
27
+ int gzread_genericcel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
28
+ int gzread_genericcel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
29
+ void gzgeneric_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y);
30
+ void gzgeneric_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers);
31
+
32
+
33
+ #endif
@@ -0,0 +1,870 @@
1
+ /******************************************************************
2
+ **
3
+ ** file: read_clf.c
4
+ **
5
+ ** Aim: implement parsing of CLF format files
6
+ **
7
+ ** Copyright (C) 2007-2008 B. M. Bolstad
8
+ **
9
+ ** Created on Nov 4, 2007
10
+ **
11
+ ** History
12
+ ** Dec 14, 2007 - Initial version
13
+ ** Dec 31, 2007 - Add function for checking that required headers were found
14
+ ** Jan 2, 2008 - port x,y to probe_id and probe_id to x,y functions from RMAExpress parsers
15
+ ** Mar 18, 2008 - fix error in read_clf_header function
16
+ **
17
+ **
18
+ **
19
+ ******************************************************************/
20
+
21
+ #include <R.h>
22
+
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+
26
+
27
+ #define BUFFERSIZE 1024
28
+
29
+ /*******************************************************************
30
+ *******************************************************************
31
+ **
32
+ ** Structures for dealing with clf file information
33
+ **
34
+ **
35
+ **
36
+ *******************************************************************
37
+ ******************************************************************/
38
+
39
+ /*******************************************************************
40
+ *******************************************************************
41
+ **
42
+ ** Starting off with the headers
43
+ **
44
+ *******************************************************************
45
+ ******************************************************************/
46
+
47
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
48
+
49
+ typedef struct{
50
+ int probe_id;
51
+ int x;
52
+ int y;
53
+ } header_0;
54
+
55
+ /*******************************************************************
56
+ **
57
+ ** These are all the headers that appear in CLF files
58
+ **
59
+ ** Note that some are required (chip_type, lib_set_name, lib_set_version, clf_format_version
60
+ ** rows, cols, header0)
61
+ ** While others are optional (sequential, order, create_date, guid and others)
62
+ **
63
+ **
64
+ *******************************************************************/
65
+
66
+ typedef struct{
67
+ char **chip_type;
68
+ int n_chip_type;
69
+ char *lib_set_name;
70
+ char *lib_set_version;
71
+ char *clf_format_version;
72
+ int rows;
73
+ int cols;
74
+ char *header0_str;
75
+ header_0 *header0;
76
+ int sequential;
77
+ char *order;
78
+ char *create_date;
79
+ char *guid;
80
+ char **other_headers_keys;
81
+ char **other_headers_values;
82
+ int n_other_headers;
83
+ } clf_headers;
84
+
85
+ /*******************************************************************
86
+ *******************************************************************
87
+ **
88
+ ** Now the actual data
89
+ **
90
+ ** (only store the probeset ids to save space)
91
+ **
92
+ ** length of probe_id is rows*cols.
93
+ **
94
+ ** Given an x, y it maps to probe_id[index]
95
+ **
96
+ ** index = y*cols + x
97
+ **
98
+ ** Which means that given an index, it maps to
99
+ **
100
+ ** x = index % cols where % means modulo (ie remainder)
101
+ ** y = index / cols
102
+ **
103
+ **
104
+ **
105
+ *******************************************************************
106
+ ******************************************************************/
107
+
108
+
109
+ typedef struct{
110
+ int *probe_id;
111
+ } clf_data;
112
+
113
+
114
+ /*******************************************************************
115
+ *******************************************************************
116
+ **
117
+ ** Structure for storing clf file (after it is read from file)
118
+ **
119
+ *******************************************************************
120
+ ******************************************************************/
121
+
122
+
123
+ typedef struct{
124
+ clf_headers *headers;
125
+ clf_data *data;
126
+ } clf_file;
127
+
128
+
129
+
130
+ /*******************************************************************
131
+ *******************************************************************
132
+ **
133
+ **
134
+ ** Code for splitting a string into a series of tokens
135
+ **
136
+ **
137
+ *******************************************************************
138
+ *******************************************************************/
139
+
140
+
141
+ /***************************************************************
142
+ **
143
+ ** tokenset
144
+ **
145
+ ** char **tokens - a array of token strings
146
+ ** int n - number of tokens in this set.
147
+ **
148
+ ** a structure to hold a set of tokens. Typically a tokenset is
149
+ ** created by breaking a character string based upon a set of
150
+ ** delimiters.
151
+ **
152
+ **
153
+ **************************************************************/
154
+
155
+ typedef struct{
156
+ char **tokens;
157
+ int n;
158
+ } tokenset;
159
+
160
+
161
+
162
+ /******************************************************************
163
+ **
164
+ ** tokenset *tokenize(char *str, char *delimiters)
165
+ **
166
+ ** char *str - a string to break into tokens
167
+ ** char *delimiters - delimiters to use in breaking up the line
168
+ **
169
+ **
170
+ ** RETURNS a new tokenset
171
+ **
172
+ ** Given a string, split into tokens based on a set of delimitors
173
+ **
174
+ *****************************************************************/
175
+
176
+ static tokenset *tokenize(char *str, char *delimiters){
177
+
178
+ #if USE_PTHREADS
179
+ char *tmp_pointer;
180
+ #endif
181
+ int i=0;
182
+
183
+ char *current_token;
184
+ tokenset *my_tokenset = Calloc(1,tokenset);
185
+ my_tokenset->n=0;
186
+
187
+ my_tokenset->tokens = NULL;
188
+ #if USE_PTHREADS
189
+ current_token = strtok_r(str,delimiters,&tmp_pointer);
190
+ #else
191
+ current_token = strtok(str,delimiters);
192
+ #endif
193
+ while (current_token != NULL){
194
+ my_tokenset->n++;
195
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
196
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
197
+ strcpy(my_tokenset->tokens[i],current_token);
198
+ my_tokenset->tokens[i][(strlen(current_token))] = '\0';
199
+ i++;
200
+ #if USE_PTHREADS
201
+ current_token = strtok_r(NULL,delimiters,&tmp_pointer);
202
+ #else
203
+ current_token = strtok(NULL,delimiters);
204
+ #endif
205
+ }
206
+ return my_tokenset;
207
+ }
208
+
209
+
210
+ /******************************************************************
211
+ **
212
+ ** int tokenset_size(tokenset *x)
213
+ **
214
+ ** tokenset *x - a tokenset
215
+ **
216
+ ** RETURNS the number of tokens in the tokenset
217
+ **
218
+ ******************************************************************/
219
+
220
+ static int tokenset_size(tokenset *x){
221
+ return x->n;
222
+ }
223
+
224
+
225
+ /******************************************************************
226
+ **
227
+ ** char *get_token(tokenset *x, int i)
228
+ **
229
+ ** tokenset *x - a tokenset
230
+ ** int i - index of the token to return
231
+ **
232
+ ** RETURNS pointer to the i'th token
233
+ **
234
+ ******************************************************************/
235
+
236
+ static char *get_token(tokenset *x,int i){
237
+ return x->tokens[i];
238
+ }
239
+
240
+ /******************************************************************
241
+ **
242
+ ** void delete_tokens(tokenset *x)
243
+ **
244
+ ** tokenset *x - a tokenset
245
+ **
246
+ ** Deallocates all the space allocated for a tokenset
247
+ **
248
+ ******************************************************************/
249
+
250
+ static void delete_tokens(tokenset *x){
251
+
252
+ int i;
253
+
254
+ for (i=0; i < x->n; i++){
255
+ Free(x->tokens[i]);
256
+ }
257
+ Free(x->tokens);
258
+ Free(x);
259
+ }
260
+
261
+ /*******************************************************************
262
+ **
263
+ ** int token_ends_with(char *token, char *ends)
264
+ **
265
+ ** char *token - a string to check
266
+ ** char *ends_in - we are looking for this string at the end of token
267
+ **
268
+ **
269
+ ** returns 0 if no match, otherwise it returns the index of the first character
270
+ ** which matchs the start of *ends.
271
+ **
272
+ ** Note that there must be one additional character in "token" beyond
273
+ ** the characters in "ends". So
274
+ **
275
+ ** *token = "TestStr"
276
+ ** *ends = "TestStr"
277
+ **
278
+ ** would return 0 but if
279
+ **
280
+ ** ends = "estStr"
281
+ **
282
+ ** we would return 1.
283
+ **
284
+ ** and if
285
+ **
286
+ ** ends= "stStr"
287
+ ** we would return 2 .....etc
288
+ **
289
+ **
290
+ ******************************************************************/
291
+
292
+ static int token_ends_with(char *token, char *ends_in){
293
+
294
+ int tokenlength = strlen(token);
295
+ int ends_length = strlen(ends_in);
296
+ int start_pos;
297
+ char *tmp_ptr;
298
+
299
+ if (tokenlength <= ends_length){
300
+ /* token string is too short so can't possibly end with ends */
301
+ return 0;
302
+ }
303
+
304
+ start_pos = tokenlength - ends_length;
305
+
306
+ tmp_ptr = &token[start_pos];
307
+
308
+ if (strcmp(tmp_ptr,ends_in)==0){
309
+ return start_pos;
310
+ } else {
311
+ return 0;
312
+ }
313
+ }
314
+
315
+
316
+ /*******************************************************************
317
+ *******************************************************************
318
+ **
319
+ ** Code for Reading from file
320
+ **
321
+ *******************************************************************
322
+ *******************************************************************/
323
+
324
+
325
+
326
+ /****************************************************************
327
+ **
328
+ ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile)
329
+ **
330
+ ** char *buffer - place to store contents of the line
331
+ ** int buffersize - size of the buffer
332
+ ** FILE *currentFile - FILE pointer to an opened CEL file.
333
+ **
334
+ ** Read a line from a file, into a buffer of specified size.
335
+ ** otherwise die.
336
+ **
337
+ ***************************************************************/
338
+
339
+ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
340
+ if (fgets(buffer, buffersize, currentFile) == NULL){
341
+ return 0;
342
+ //error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
343
+ }
344
+ return 1;
345
+ }
346
+
347
+
348
+ /****************************************************************
349
+ ****************************************************************
350
+ **
351
+ ** Code for identifying what type of information is stored in
352
+ ** the current line
353
+ **
354
+ ****************************************************************
355
+ ***************************************************************/
356
+
357
+ /****************************************************************
358
+ **
359
+ ** static int IsHeaderLine(char *buffer)
360
+ **
361
+ ** char *buffer - contains line to evaluate
362
+ **
363
+ ** Checks whether supplied line is a header line (ie starts with #%)
364
+ **
365
+ ** return 1 (ie true) if header line. 0 otherwise
366
+ **
367
+ ***************************************************************/
368
+
369
+
370
+ static int IsHeaderLine(char *buffer){
371
+
372
+ if (strncmp("#%",buffer,2) == 0){
373
+ return 1;
374
+ }
375
+ return 0;
376
+ }
377
+
378
+ /****************************************************************
379
+ **
380
+ ** static int IsHeaderLine(char *buffer)
381
+ **
382
+ ** char *buffer - contains line to evaluate
383
+ **
384
+ ** Checks whether supplied line is a comment line (ie starts with #)
385
+ **
386
+ **
387
+ ***************************************************************/
388
+
389
+ static int IsCommentLine(char *buffer){
390
+ if (strncmp("#",buffer,1) == 0){
391
+ return 1;
392
+ }
393
+ return 0;
394
+ }
395
+
396
+ /****************************************************************
397
+ **
398
+ ** void initialize_clf_header(clf_headers *header)
399
+ **
400
+ ** Initialize all the header values
401
+ **
402
+ **
403
+ **
404
+ ***************************************************************/
405
+
406
+ void initialize_clf_header(clf_headers *header){
407
+
408
+ header->chip_type = NULL;
409
+ header->n_chip_type = 0;
410
+
411
+ header->lib_set_name= NULL;
412
+ header->lib_set_version= NULL;
413
+ header->clf_format_version= NULL;
414
+ header->header0_str= NULL;
415
+ header->header0= NULL;
416
+ header->order = NULL;
417
+ header->create_date= NULL;
418
+ header->guid= NULL;
419
+ header->other_headers_keys= NULL;
420
+ header->other_headers_values= NULL;
421
+ header->n_other_headers=0;
422
+
423
+ header->rows = -1;
424
+ header->cols = -1;
425
+ header->n_other_headers = -1;
426
+
427
+ }
428
+
429
+
430
+ /****************************************************************
431
+ ****************************************************************
432
+ **
433
+ ** Code for reading in clf header
434
+ **
435
+ ****************************************************************
436
+ ***************************************************************/
437
+
438
+ static void determine_order_header0(char *header_str, header_0 *header0){
439
+
440
+ tokenset *cur_tokenset;
441
+ int i;
442
+ char *temp_str = Calloc(strlen(header_str) +1, char);
443
+
444
+
445
+ strcpy(temp_str,header_str);
446
+
447
+ header0->probe_id = -1;
448
+ header0->x = -1;
449
+ header0->y = -1;
450
+
451
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
452
+
453
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
454
+ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){
455
+ header0->probe_id = i;
456
+ } else if (strcmp(get_token(cur_tokenset,i),"x")==0){
457
+ header0->x = i;
458
+ } else if (strcmp(get_token(cur_tokenset,i),"y")==0){
459
+ header0->y = i;
460
+ }
461
+ }
462
+ delete_tokens(cur_tokenset);
463
+
464
+ Free(temp_str);
465
+
466
+ }
467
+
468
+ /****************************************************************
469
+ **
470
+ ** Validate that required headers are present in file.
471
+ **
472
+ ** Return 0 if an expected header is not present.
473
+ ** Returns 1 otherwise (ie everything looks fine)
474
+ **
475
+ ***************************************************************/
476
+
477
+ static int validate_clf_header(clf_headers *header){
478
+
479
+
480
+ /* check that required headers are all there (have been read) */
481
+ if (header->chip_type == NULL)
482
+ return 0;
483
+
484
+ if (header->lib_set_name == NULL)
485
+ return 0;
486
+
487
+ if (header->lib_set_version == NULL)
488
+ return 0;
489
+
490
+ if (header->clf_format_version == NULL)
491
+ return 0;
492
+
493
+ if (header->header0_str == NULL)
494
+ return 0;
495
+
496
+ if (header->rows == -1)
497
+ return 0;
498
+
499
+ if (header->cols == -1)
500
+ return 0;
501
+
502
+ /* Check that format version is 1.0 (only supported version) */
503
+
504
+ if (strcmp( header->clf_format_version,"1.0") != 0){
505
+ return 0;
506
+ }
507
+
508
+ /* check that header0, header1, header2 (ie the three levels of headers) have required fields */
509
+
510
+ if (header->header0->probe_id == -1)
511
+ return 0;
512
+
513
+ if (header->header0->x == -1)
514
+ return 0;
515
+
516
+ if (header->header0->y == -1)
517
+ return 0;
518
+
519
+
520
+ return 1;
521
+ }
522
+
523
+ /****************************************************************
524
+ **
525
+ ** static FILE *open_clf_file(const char *filename)
526
+ **
527
+ ** Open the CLF to begin reading from it.
528
+ **
529
+ ***************************************************************/
530
+
531
+ static FILE *open_clf_file(const char *filename){
532
+
533
+ const char *mode = "r";
534
+ FILE *currentFile = NULL;
535
+
536
+ currentFile = fopen(filename,mode);
537
+ if (currentFile == NULL){
538
+ error("Could not open file %s", filename);
539
+ }
540
+ return currentFile;
541
+
542
+ }
543
+
544
+ /****************************************************************
545
+ **
546
+ ** void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header)
547
+ **
548
+ ** read the CLF header section
549
+ **
550
+ **
551
+ ***************************************************************/
552
+
553
+ void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header){
554
+
555
+
556
+ tokenset *cur_tokenset;
557
+ int i;
558
+ char *temp_str;
559
+
560
+
561
+ initialize_clf_header(header);
562
+ do {
563
+ ReadFileLine(buffer, 1024, cur_file);
564
+ /* Rprintf("%s\n",buffer); */
565
+ if (IsHeaderLine(buffer)){
566
+ cur_tokenset = tokenize(&buffer[2],"=\r\n");
567
+ /* hopefully token 0 is Key
568
+ and token 1 is Value */
569
+ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0));
570
+ Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */
571
+ /* Decode the Key/Value pair */
572
+ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){
573
+ if (header->n_chip_type == 0){
574
+ header->chip_type = Calloc(1, char *);
575
+ } else {
576
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
577
+ }
578
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
579
+ strcpy(temp_str,get_token(cur_tokenset,1));
580
+ header->chip_type[header->n_chip_type] = temp_str;
581
+ header->n_chip_type++;
582
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){
583
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
584
+ strcpy(temp_str,get_token(cur_tokenset,1));
585
+ header->lib_set_name = temp_str;
586
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){
587
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
588
+ strcpy(temp_str,get_token(cur_tokenset,1));
589
+ header->lib_set_version = temp_str;
590
+ } else if (strcmp(get_token(cur_tokenset,0), "clf_format_version") == 0) {
591
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
592
+ strcpy(temp_str,get_token(cur_tokenset,1));
593
+ header->clf_format_version = temp_str;
594
+ } else if (strcmp(get_token(cur_tokenset,0), "rows") == 0) {
595
+ header->rows = atoi(get_token(cur_tokenset,1));
596
+ } else if (strcmp(get_token(cur_tokenset,0), "cols") == 0) {
597
+ header->cols = atoi(get_token(cur_tokenset,1));
598
+ } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) {
599
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
600
+ strcpy(temp_str,get_token(cur_tokenset,1));
601
+ header->header0_str = temp_str;
602
+ header->header0 = Calloc(1,header_0);
603
+ determine_order_header0(header->header0_str,header->header0);
604
+ } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) {
605
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
606
+ strcpy(temp_str,get_token(cur_tokenset,1));
607
+ header->create_date = temp_str;
608
+ } else if (strcmp(get_token(cur_tokenset,0), "order") == 0) {
609
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
610
+ strcpy(temp_str,get_token(cur_tokenset,1));
611
+ header->order = temp_str;
612
+ } else if (strcmp(get_token(cur_tokenset,0), "sequential") == 0) {
613
+ header->sequential = atoi(get_token(cur_tokenset,1));
614
+ } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) {
615
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
616
+ strcpy(temp_str,get_token(cur_tokenset,1));
617
+ header->guid = temp_str;
618
+ } else {
619
+ /* not one of the recognised header types */
620
+ if ( header->n_other_headers == 0){
621
+ header->other_headers_keys = Calloc(1, char *);
622
+ header->other_headers_values = Calloc(1, char *);
623
+ } else {
624
+ header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *);
625
+ header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *);
626
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
627
+ }
628
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
629
+ strcpy(temp_str,get_token(cur_tokenset,1));
630
+ header->other_headers_values[header->n_other_headers] = temp_str;
631
+ temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char);
632
+ strcpy(temp_str,get_token(cur_tokenset,0));
633
+ header->other_headers_keys[header->n_other_headers] = temp_str;
634
+ header->n_other_headers++;
635
+
636
+ }
637
+
638
+ delete_tokens(cur_tokenset);
639
+ }
640
+ } while (IsHeaderLine(buffer));
641
+
642
+ }
643
+
644
+ /****************************************************************
645
+ **
646
+ ** void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header)
647
+ **
648
+ ** Read in the data part of the file. Specifically, the x,y, probe_id section.
649
+ ** Note to save space only the probe_id are stored.
650
+ **
651
+ ****************************************************************/
652
+
653
+ void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header){
654
+ tokenset *cur_tokenset;
655
+ int x, y, cur_id;
656
+
657
+ /* Check to see if the header information includes enough to know that probe_ids are deterministic */
658
+ /* if the are deterministic then don't need to read the rest of the file */
659
+
660
+
661
+ if (header->sequential > -1){
662
+ data->probe_id = NULL;
663
+ return;
664
+ } else {
665
+ data->probe_id = Calloc((header->rows)*(header->cols), int);
666
+ cur_tokenset = tokenize(buffer,"\t\r\n");
667
+ cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id));
668
+ x = atoi(get_token(cur_tokenset,header->header0->x));
669
+ y = atoi(get_token(cur_tokenset,header->header0->y));
670
+ data->probe_id[y*header->cols + x] = cur_id;
671
+
672
+ delete_tokens(cur_tokenset);
673
+ while(ReadFileLine(buffer, 1024, cur_file)){
674
+ cur_tokenset = tokenize(buffer,"\t\r\n");
675
+ cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id));
676
+ x = atoi(get_token(cur_tokenset,header->header0->x));
677
+ y = atoi(get_token(cur_tokenset,header->header0->y));
678
+ data->probe_id[y*header->cols + x] = cur_id;
679
+
680
+ delete_tokens(cur_tokenset);
681
+ }
682
+ }
683
+ }
684
+
685
+
686
+
687
+
688
+
689
+
690
+ /****************************************************************
691
+ ****************************************************************
692
+ **
693
+ ** Code for deallocating or initializing header data structures
694
+ **
695
+ ****************************************************************
696
+ ****************************************************************/
697
+
698
+ void dealloc_clf_headers(clf_headers *header){
699
+ int i;
700
+
701
+ if (header->n_chip_type > 0){
702
+ for (i = 0; i < header->n_chip_type; i++){
703
+ Free(header->chip_type[i]);
704
+ }
705
+ Free(header->chip_type);
706
+ }
707
+
708
+ if (header->lib_set_name != NULL){
709
+ Free(header->lib_set_name);
710
+ }
711
+
712
+ if (header->lib_set_version != NULL){
713
+ Free(header->lib_set_version);
714
+ }
715
+
716
+ if (header->clf_format_version != NULL){
717
+ Free(header->clf_format_version);
718
+ }
719
+
720
+ if (header->header0_str != NULL){
721
+ Free(header->header0_str);
722
+ Free(header->header0);
723
+ }
724
+
725
+ if (header->order != NULL){
726
+ Free(header->order);
727
+ }
728
+
729
+ if (header->create_date != NULL){
730
+ Free(header->create_date);
731
+ }
732
+
733
+ if (header->guid != NULL){
734
+ Free(header->guid);
735
+ }
736
+
737
+ if (header->n_other_headers > 0){
738
+ for (i = 0; i < header->n_other_headers; i++){
739
+ Free(header->other_headers_keys[i]);
740
+ Free(header->other_headers_values[i]);
741
+ }
742
+ Free(header->other_headers_keys);
743
+ Free(header->other_headers_values);
744
+ }
745
+ }
746
+
747
+
748
+ void dealloc_clf_data(clf_data *data){
749
+ if (data->probe_id != NULL){
750
+ Free(data->probe_id);
751
+ }
752
+ }
753
+
754
+
755
+ void dealloc_clf_file(clf_file* my_clf){
756
+
757
+
758
+ if (my_clf->headers != NULL){
759
+ dealloc_clf_headers(my_clf->headers);
760
+ Free(my_clf->headers);
761
+ }
762
+
763
+
764
+ if (my_clf->data !=NULL){
765
+ dealloc_clf_data(my_clf->data);
766
+ Free(my_clf->data);
767
+ }
768
+
769
+
770
+ }
771
+
772
+ /**********************************************************************
773
+ ***
774
+ *** A function for getting the probe_id for a given x,y
775
+ ***
776
+ ***
777
+ *********************************************************************/
778
+
779
+ void clf_get_probe_id(clf_file *clf, int *probe_id, int x, int y){
780
+
781
+ if (clf->headers->sequential > -1){
782
+ /* Check if order is "col_major" or "row_major" */
783
+
784
+ if (strcmp(clf->headers->order,"col_major") == 0){
785
+ *probe_id = y*clf->headers->cols + x + clf->headers->sequential;
786
+ } else if (strcmp(clf->headers->order,"row_major") == 0){
787
+ *probe_id = x*clf->headers->rows + y + clf->headers->sequential;
788
+ } else {
789
+ *probe_id = -1; /* ie missing */
790
+ }
791
+
792
+ } else {
793
+
794
+ *probe_id = clf->data->probe_id[y*clf->headers->rows + x];
795
+ }
796
+ }
797
+
798
+ /**********************************************************************
799
+ ***
800
+ *** A function for getting the x , y for a given probe_id
801
+ ***
802
+ ***
803
+ *********************************************************************/
804
+
805
+ void clf_get_x_y(clf_file *clf, int probe_id, int *x, int *y){
806
+ int ind;
807
+
808
+ if (clf->headers->sequential > -1){
809
+ /* Check if order is "col_major" or "row_major" */
810
+
811
+ if (strcmp(clf->headers->order,"col_major") == 0){
812
+ ind = (probe_id - clf->headers->sequential);
813
+ *x = ind%clf->headers->cols;
814
+ *y = ind/clf->headers->cols;
815
+ } else if (strcmp(clf->headers->order,"row_major") == 0){
816
+ ind = (probe_id - clf->headers->sequential);
817
+ *x = ind/clf->headers->rows;
818
+ *y = ind%clf->headers->rows;
819
+ } else {
820
+ *x = -1; /* ie missing */
821
+ *y = -1;
822
+ }
823
+ } else {
824
+ /* Linear Search (this should be improved for routine use) */
825
+ ind = 0;
826
+
827
+ while (ind < (clf->headers->cols*clf->headers->rows)){
828
+ if (clf->data->probe_id[ind] == probe_id){
829
+ break;
830
+ }
831
+ ind++;
832
+ }
833
+
834
+ if (ind == (clf->headers->cols*clf->headers->rows)){
835
+ *x = -1; *y = -1;
836
+ } else {
837
+ *x = ind/clf->headers->rows;
838
+ *y = ind%clf->headers->rows;
839
+ }
840
+ }
841
+ }
842
+
843
+ /*
844
+ * Note this function is only for testing purposes. It provides no methodology for accessing anything
845
+ * stored in the CLF file in R.
846
+ *
847
+ */
848
+
849
+ void read_clf_file(char **filename){
850
+
851
+ FILE *cur_file;
852
+ clf_file my_clf;
853
+ char *buffer = Calloc(1024, char);
854
+
855
+
856
+
857
+ cur_file = open_clf_file(filename[0]);
858
+
859
+ my_clf.headers = Calloc(1, clf_headers);
860
+ my_clf.data = Calloc(1, clf_data);
861
+
862
+ read_clf_header(cur_file,buffer,my_clf.headers);
863
+ if (validate_clf_header(my_clf.headers))
864
+ read_clf_data(cur_file, buffer, my_clf.data, my_clf.headers);
865
+
866
+ Free(buffer);
867
+ dealloc_clf_file(&my_clf);
868
+ fclose(cur_file);
869
+
870
+ }