bio-affy 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +32 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +33 -0
  7. data/Rakefile +77 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-affy +80 -0
  10. data/bio-affy.gemspec +128 -0
  11. data/ext/DESCRIPTION +11 -0
  12. data/ext/HISTORY +3 -0
  13. data/ext/LICENSE +456 -0
  14. data/ext/NAMESPACE +2 -0
  15. data/ext/R/check.cdf.type.R +18 -0
  16. data/ext/R/read.cdffile.list.R +23 -0
  17. data/ext/R/read.celfile.R +11 -0
  18. data/ext/R/read.celfile.header.R +37 -0
  19. data/ext/R/read.probematrices.R +29 -0
  20. data/ext/README_BIOLIB +36 -0
  21. data/ext/aclocal.m4 +32 -0
  22. data/ext/configure +4898 -0
  23. data/ext/configure.in +51 -0
  24. data/ext/man/check.cdf.type.Rd +22 -0
  25. data/ext/man/read.cdffile.list.Rd +20 -0
  26. data/ext/man/read.celfile.Rd +23 -0
  27. data/ext/man/read.celfile.header.Rd +22 -0
  28. data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
  29. data/ext/src/CMakeLists.txt +39 -0
  30. data/ext/src/Makevars.in +3 -0
  31. data/ext/src/Makevars.win +2 -0
  32. data/ext/src/Rakefile +43 -0
  33. data/ext/src/biolib_affyio.c +416 -0
  34. data/ext/src/biolib_affyio.h +132 -0
  35. data/ext/src/biolib_affyio.o +0 -0
  36. data/ext/src/fread_functions.c +871 -0
  37. data/ext/src/fread_functions.h +60 -0
  38. data/ext/src/fread_functions.o +0 -0
  39. data/ext/src/libaffyext.so +0 -0
  40. data/ext/src/mkrf.log +11 -0
  41. data/ext/src/mkrf_conf.rb +6 -0
  42. data/ext/src/read_abatch.c +5484 -0
  43. data/ext/src/read_abatch.h +63 -0
  44. data/ext/src/read_abatch.o +0 -0
  45. data/ext/src/read_bpmap.c +888 -0
  46. data/ext/src/read_bpmap.o +0 -0
  47. data/ext/src/read_cdf.h +347 -0
  48. data/ext/src/read_cdf_xda.c +1342 -0
  49. data/ext/src/read_cdf_xda.o +0 -0
  50. data/ext/src/read_cdffile2.c +1576 -0
  51. data/ext/src/read_cdffile2.o +0 -0
  52. data/ext/src/read_celfile_generic.c +2061 -0
  53. data/ext/src/read_celfile_generic.h +33 -0
  54. data/ext/src/read_celfile_generic.o +0 -0
  55. data/ext/src/read_clf.c +870 -0
  56. data/ext/src/read_clf.o +0 -0
  57. data/ext/src/read_generic.c +1446 -0
  58. data/ext/src/read_generic.h +144 -0
  59. data/ext/src/read_generic.o +0 -0
  60. data/ext/src/read_pgf.c +1337 -0
  61. data/ext/src/read_pgf.o +0 -0
  62. data/lib/bio-affy.rb +5 -0
  63. data/lib/bio/affy.rb +7 -0
  64. data/lib/bio/affyext.rb +23 -0
  65. data/lib/bio/libaffyext.so +0 -0
  66. data/spec/bio-affy_spec.rb +22 -0
  67. data/spec/spec_helper.rb +13 -0
  68. data/test/data/affy/GSM103328.CEL.gz +0 -0
  69. data/test/data/affy/GSM103329.CEL.gz +0 -0
  70. data/test/data/affy/GSM103330.CEL.gz +0 -0
  71. data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
  72. metadata +190 -0
@@ -0,0 +1,33 @@
1
+ #ifndef READ_CELFILE_GENERIC_H
2
+ #define READ_CELFILE_GENERIC_H
3
+
4
+ #ifdef BIOLIB
5
+ #include <biolib_R_map.h>
6
+ #endif
7
+
8
+ #include "read_abatch.h"
9
+
10
+ int isGenericCelFile(const char *filename);
11
+ char *generic_get_header_info(const char *filename, int *dim1, int *dim2);
12
+ void generic_get_detailed_header_info(const char *filename, detailed_header_info *header_info);
13
+ int read_genericcel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
14
+ int check_generic_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2);
15
+ int read_genericcel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
16
+ int read_genericcel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
17
+ void generic_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y);
18
+ void generic_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers);
19
+
20
+
21
+
22
+ int isgzGenericCelFile(const char *filename);
23
+ char *gzgeneric_get_header_info(const char *filename, int *dim1, int *dim2);
24
+ void gzgeneric_get_detailed_header_info(const char *filename, detailed_header_info *header_info);
25
+ int gzread_genericcel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
26
+ int check_gzgeneric_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2);
27
+ int gzread_genericcel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
28
+ int gzread_genericcel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows);
29
+ void gzgeneric_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y);
30
+ void gzgeneric_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers);
31
+
32
+
33
+ #endif
@@ -0,0 +1,870 @@
1
+ /******************************************************************
2
+ **
3
+ ** file: read_clf.c
4
+ **
5
+ ** Aim: implement parsing of CLF format files
6
+ **
7
+ ** Copyright (C) 2007-2008 B. M. Bolstad
8
+ **
9
+ ** Created on Nov 4, 2007
10
+ **
11
+ ** History
12
+ ** Dec 14, 2007 - Initial version
13
+ ** Dec 31, 2007 - Add function for checking that required headers were found
14
+ ** Jan 2, 2008 - port x,y to probe_id and probe_id to x,y functions from RMAExpress parsers
15
+ ** Mar 18, 2008 - fix error in read_clf_header function
16
+ **
17
+ **
18
+ **
19
+ ******************************************************************/
20
+
21
+ #include <R.h>
22
+
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+
26
+
27
+ #define BUFFERSIZE 1024
28
+
29
+ /*******************************************************************
30
+ *******************************************************************
31
+ **
32
+ ** Structures for dealing with clf file information
33
+ **
34
+ **
35
+ **
36
+ *******************************************************************
37
+ ******************************************************************/
38
+
39
+ /*******************************************************************
40
+ *******************************************************************
41
+ **
42
+ ** Starting off with the headers
43
+ **
44
+ *******************************************************************
45
+ ******************************************************************/
46
+
47
+ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */
48
+
49
+ typedef struct{
50
+ int probe_id;
51
+ int x;
52
+ int y;
53
+ } header_0;
54
+
55
+ /*******************************************************************
56
+ **
57
+ ** These are all the headers that appear in CLF files
58
+ **
59
+ ** Note that some are required (chip_type, lib_set_name, lib_set_version, clf_format_version
60
+ ** rows, cols, header0)
61
+ ** While others are optional (sequential, order, create_date, guid and others)
62
+ **
63
+ **
64
+ *******************************************************************/
65
+
66
+ typedef struct{
67
+ char **chip_type;
68
+ int n_chip_type;
69
+ char *lib_set_name;
70
+ char *lib_set_version;
71
+ char *clf_format_version;
72
+ int rows;
73
+ int cols;
74
+ char *header0_str;
75
+ header_0 *header0;
76
+ int sequential;
77
+ char *order;
78
+ char *create_date;
79
+ char *guid;
80
+ char **other_headers_keys;
81
+ char **other_headers_values;
82
+ int n_other_headers;
83
+ } clf_headers;
84
+
85
+ /*******************************************************************
86
+ *******************************************************************
87
+ **
88
+ ** Now the actual data
89
+ **
90
+ ** (only store the probeset ids to save space)
91
+ **
92
+ ** length of probe_id is rows*cols.
93
+ **
94
+ ** Given an x, y it maps to probe_id[index]
95
+ **
96
+ ** index = y*cols + x
97
+ **
98
+ ** Which means that given an index, it maps to
99
+ **
100
+ ** x = index % cols where % means modulo (ie remainder)
101
+ ** y = index / cols
102
+ **
103
+ **
104
+ **
105
+ *******************************************************************
106
+ ******************************************************************/
107
+
108
+
109
+ typedef struct{
110
+ int *probe_id;
111
+ } clf_data;
112
+
113
+
114
+ /*******************************************************************
115
+ *******************************************************************
116
+ **
117
+ ** Structure for storing clf file (after it is read from file)
118
+ **
119
+ *******************************************************************
120
+ ******************************************************************/
121
+
122
+
123
+ typedef struct{
124
+ clf_headers *headers;
125
+ clf_data *data;
126
+ } clf_file;
127
+
128
+
129
+
130
+ /*******************************************************************
131
+ *******************************************************************
132
+ **
133
+ **
134
+ ** Code for splitting a string into a series of tokens
135
+ **
136
+ **
137
+ *******************************************************************
138
+ *******************************************************************/
139
+
140
+
141
+ /***************************************************************
142
+ **
143
+ ** tokenset
144
+ **
145
+ ** char **tokens - a array of token strings
146
+ ** int n - number of tokens in this set.
147
+ **
148
+ ** a structure to hold a set of tokens. Typically a tokenset is
149
+ ** created by breaking a character string based upon a set of
150
+ ** delimiters.
151
+ **
152
+ **
153
+ **************************************************************/
154
+
155
+ typedef struct{
156
+ char **tokens;
157
+ int n;
158
+ } tokenset;
159
+
160
+
161
+
162
+ /******************************************************************
163
+ **
164
+ ** tokenset *tokenize(char *str, char *delimiters)
165
+ **
166
+ ** char *str - a string to break into tokens
167
+ ** char *delimiters - delimiters to use in breaking up the line
168
+ **
169
+ **
170
+ ** RETURNS a new tokenset
171
+ **
172
+ ** Given a string, split into tokens based on a set of delimitors
173
+ **
174
+ *****************************************************************/
175
+
176
+ static tokenset *tokenize(char *str, char *delimiters){
177
+
178
+ #if USE_PTHREADS
179
+ char *tmp_pointer;
180
+ #endif
181
+ int i=0;
182
+
183
+ char *current_token;
184
+ tokenset *my_tokenset = Calloc(1,tokenset);
185
+ my_tokenset->n=0;
186
+
187
+ my_tokenset->tokens = NULL;
188
+ #if USE_PTHREADS
189
+ current_token = strtok_r(str,delimiters,&tmp_pointer);
190
+ #else
191
+ current_token = strtok(str,delimiters);
192
+ #endif
193
+ while (current_token != NULL){
194
+ my_tokenset->n++;
195
+ my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*);
196
+ my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char);
197
+ strcpy(my_tokenset->tokens[i],current_token);
198
+ my_tokenset->tokens[i][(strlen(current_token))] = '\0';
199
+ i++;
200
+ #if USE_PTHREADS
201
+ current_token = strtok_r(NULL,delimiters,&tmp_pointer);
202
+ #else
203
+ current_token = strtok(NULL,delimiters);
204
+ #endif
205
+ }
206
+ return my_tokenset;
207
+ }
208
+
209
+
210
+ /******************************************************************
211
+ **
212
+ ** int tokenset_size(tokenset *x)
213
+ **
214
+ ** tokenset *x - a tokenset
215
+ **
216
+ ** RETURNS the number of tokens in the tokenset
217
+ **
218
+ ******************************************************************/
219
+
220
+ static int tokenset_size(tokenset *x){
221
+ return x->n;
222
+ }
223
+
224
+
225
+ /******************************************************************
226
+ **
227
+ ** char *get_token(tokenset *x, int i)
228
+ **
229
+ ** tokenset *x - a tokenset
230
+ ** int i - index of the token to return
231
+ **
232
+ ** RETURNS pointer to the i'th token
233
+ **
234
+ ******************************************************************/
235
+
236
+ static char *get_token(tokenset *x,int i){
237
+ return x->tokens[i];
238
+ }
239
+
240
+ /******************************************************************
241
+ **
242
+ ** void delete_tokens(tokenset *x)
243
+ **
244
+ ** tokenset *x - a tokenset
245
+ **
246
+ ** Deallocates all the space allocated for a tokenset
247
+ **
248
+ ******************************************************************/
249
+
250
+ static void delete_tokens(tokenset *x){
251
+
252
+ int i;
253
+
254
+ for (i=0; i < x->n; i++){
255
+ Free(x->tokens[i]);
256
+ }
257
+ Free(x->tokens);
258
+ Free(x);
259
+ }
260
+
261
+ /*******************************************************************
262
+ **
263
+ ** int token_ends_with(char *token, char *ends)
264
+ **
265
+ ** char *token - a string to check
266
+ ** char *ends_in - we are looking for this string at the end of token
267
+ **
268
+ **
269
+ ** returns 0 if no match, otherwise it returns the index of the first character
270
+ ** which matchs the start of *ends.
271
+ **
272
+ ** Note that there must be one additional character in "token" beyond
273
+ ** the characters in "ends". So
274
+ **
275
+ ** *token = "TestStr"
276
+ ** *ends = "TestStr"
277
+ **
278
+ ** would return 0 but if
279
+ **
280
+ ** ends = "estStr"
281
+ **
282
+ ** we would return 1.
283
+ **
284
+ ** and if
285
+ **
286
+ ** ends= "stStr"
287
+ ** we would return 2 .....etc
288
+ **
289
+ **
290
+ ******************************************************************/
291
+
292
+ static int token_ends_with(char *token, char *ends_in){
293
+
294
+ int tokenlength = strlen(token);
295
+ int ends_length = strlen(ends_in);
296
+ int start_pos;
297
+ char *tmp_ptr;
298
+
299
+ if (tokenlength <= ends_length){
300
+ /* token string is too short so can't possibly end with ends */
301
+ return 0;
302
+ }
303
+
304
+ start_pos = tokenlength - ends_length;
305
+
306
+ tmp_ptr = &token[start_pos];
307
+
308
+ if (strcmp(tmp_ptr,ends_in)==0){
309
+ return start_pos;
310
+ } else {
311
+ return 0;
312
+ }
313
+ }
314
+
315
+
316
+ /*******************************************************************
317
+ *******************************************************************
318
+ **
319
+ ** Code for Reading from file
320
+ **
321
+ *******************************************************************
322
+ *******************************************************************/
323
+
324
+
325
+
326
+ /****************************************************************
327
+ **
328
+ ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile)
329
+ **
330
+ ** char *buffer - place to store contents of the line
331
+ ** int buffersize - size of the buffer
332
+ ** FILE *currentFile - FILE pointer to an opened CEL file.
333
+ **
334
+ ** Read a line from a file, into a buffer of specified size.
335
+ ** otherwise die.
336
+ **
337
+ ***************************************************************/
338
+
339
+ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){
340
+ if (fgets(buffer, buffersize, currentFile) == NULL){
341
+ return 0;
342
+ //error("End of file reached unexpectedly. Perhaps this file is truncated.\n");
343
+ }
344
+ return 1;
345
+ }
346
+
347
+
348
+ /****************************************************************
349
+ ****************************************************************
350
+ **
351
+ ** Code for identifying what type of information is stored in
352
+ ** the current line
353
+ **
354
+ ****************************************************************
355
+ ***************************************************************/
356
+
357
+ /****************************************************************
358
+ **
359
+ ** static int IsHeaderLine(char *buffer)
360
+ **
361
+ ** char *buffer - contains line to evaluate
362
+ **
363
+ ** Checks whether supplied line is a header line (ie starts with #%)
364
+ **
365
+ ** return 1 (ie true) if header line. 0 otherwise
366
+ **
367
+ ***************************************************************/
368
+
369
+
370
+ static int IsHeaderLine(char *buffer){
371
+
372
+ if (strncmp("#%",buffer,2) == 0){
373
+ return 1;
374
+ }
375
+ return 0;
376
+ }
377
+
378
+ /****************************************************************
379
+ **
380
+ ** static int IsHeaderLine(char *buffer)
381
+ **
382
+ ** char *buffer - contains line to evaluate
383
+ **
384
+ ** Checks whether supplied line is a comment line (ie starts with #)
385
+ **
386
+ **
387
+ ***************************************************************/
388
+
389
+ static int IsCommentLine(char *buffer){
390
+ if (strncmp("#",buffer,1) == 0){
391
+ return 1;
392
+ }
393
+ return 0;
394
+ }
395
+
396
+ /****************************************************************
397
+ **
398
+ ** void initialize_clf_header(clf_headers *header)
399
+ **
400
+ ** Initialize all the header values
401
+ **
402
+ **
403
+ **
404
+ ***************************************************************/
405
+
406
+ void initialize_clf_header(clf_headers *header){
407
+
408
+ header->chip_type = NULL;
409
+ header->n_chip_type = 0;
410
+
411
+ header->lib_set_name= NULL;
412
+ header->lib_set_version= NULL;
413
+ header->clf_format_version= NULL;
414
+ header->header0_str= NULL;
415
+ header->header0= NULL;
416
+ header->order = NULL;
417
+ header->create_date= NULL;
418
+ header->guid= NULL;
419
+ header->other_headers_keys= NULL;
420
+ header->other_headers_values= NULL;
421
+ header->n_other_headers=0;
422
+
423
+ header->rows = -1;
424
+ header->cols = -1;
425
+ header->n_other_headers = -1;
426
+
427
+ }
428
+
429
+
430
+ /****************************************************************
431
+ ****************************************************************
432
+ **
433
+ ** Code for reading in clf header
434
+ **
435
+ ****************************************************************
436
+ ***************************************************************/
437
+
438
+ static void determine_order_header0(char *header_str, header_0 *header0){
439
+
440
+ tokenset *cur_tokenset;
441
+ int i;
442
+ char *temp_str = Calloc(strlen(header_str) +1, char);
443
+
444
+
445
+ strcpy(temp_str,header_str);
446
+
447
+ header0->probe_id = -1;
448
+ header0->x = -1;
449
+ header0->y = -1;
450
+
451
+ cur_tokenset = tokenize(temp_str,"\t\r\n");
452
+
453
+ for (i=0; i < tokenset_size(cur_tokenset); i++){
454
+ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){
455
+ header0->probe_id = i;
456
+ } else if (strcmp(get_token(cur_tokenset,i),"x")==0){
457
+ header0->x = i;
458
+ } else if (strcmp(get_token(cur_tokenset,i),"y")==0){
459
+ header0->y = i;
460
+ }
461
+ }
462
+ delete_tokens(cur_tokenset);
463
+
464
+ Free(temp_str);
465
+
466
+ }
467
+
468
+ /****************************************************************
469
+ **
470
+ ** Validate that required headers are present in file.
471
+ **
472
+ ** Return 0 if an expected header is not present.
473
+ ** Returns 1 otherwise (ie everything looks fine)
474
+ **
475
+ ***************************************************************/
476
+
477
+ static int validate_clf_header(clf_headers *header){
478
+
479
+
480
+ /* check that required headers are all there (have been read) */
481
+ if (header->chip_type == NULL)
482
+ return 0;
483
+
484
+ if (header->lib_set_name == NULL)
485
+ return 0;
486
+
487
+ if (header->lib_set_version == NULL)
488
+ return 0;
489
+
490
+ if (header->clf_format_version == NULL)
491
+ return 0;
492
+
493
+ if (header->header0_str == NULL)
494
+ return 0;
495
+
496
+ if (header->rows == -1)
497
+ return 0;
498
+
499
+ if (header->cols == -1)
500
+ return 0;
501
+
502
+ /* Check that format version is 1.0 (only supported version) */
503
+
504
+ if (strcmp( header->clf_format_version,"1.0") != 0){
505
+ return 0;
506
+ }
507
+
508
+ /* check that header0, header1, header2 (ie the three levels of headers) have required fields */
509
+
510
+ if (header->header0->probe_id == -1)
511
+ return 0;
512
+
513
+ if (header->header0->x == -1)
514
+ return 0;
515
+
516
+ if (header->header0->y == -1)
517
+ return 0;
518
+
519
+
520
+ return 1;
521
+ }
522
+
523
+ /****************************************************************
524
+ **
525
+ ** static FILE *open_clf_file(const char *filename)
526
+ **
527
+ ** Open the CLF to begin reading from it.
528
+ **
529
+ ***************************************************************/
530
+
531
+ static FILE *open_clf_file(const char *filename){
532
+
533
+ const char *mode = "r";
534
+ FILE *currentFile = NULL;
535
+
536
+ currentFile = fopen(filename,mode);
537
+ if (currentFile == NULL){
538
+ error("Could not open file %s", filename);
539
+ }
540
+ return currentFile;
541
+
542
+ }
543
+
544
+ /****************************************************************
545
+ **
546
+ ** void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header)
547
+ **
548
+ ** read the CLF header section
549
+ **
550
+ **
551
+ ***************************************************************/
552
+
553
+ void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header){
554
+
555
+
556
+ tokenset *cur_tokenset;
557
+ int i;
558
+ char *temp_str;
559
+
560
+
561
+ initialize_clf_header(header);
562
+ do {
563
+ ReadFileLine(buffer, 1024, cur_file);
564
+ /* Rprintf("%s\n",buffer); */
565
+ if (IsHeaderLine(buffer)){
566
+ cur_tokenset = tokenize(&buffer[2],"=\r\n");
567
+ /* hopefully token 0 is Key
568
+ and token 1 is Value */
569
+ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0));
570
+ Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */
571
+ /* Decode the Key/Value pair */
572
+ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){
573
+ if (header->n_chip_type == 0){
574
+ header->chip_type = Calloc(1, char *);
575
+ } else {
576
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
577
+ }
578
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char);
579
+ strcpy(temp_str,get_token(cur_tokenset,1));
580
+ header->chip_type[header->n_chip_type] = temp_str;
581
+ header->n_chip_type++;
582
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){
583
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
584
+ strcpy(temp_str,get_token(cur_tokenset,1));
585
+ header->lib_set_name = temp_str;
586
+ } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){
587
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
588
+ strcpy(temp_str,get_token(cur_tokenset,1));
589
+ header->lib_set_version = temp_str;
590
+ } else if (strcmp(get_token(cur_tokenset,0), "clf_format_version") == 0) {
591
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
592
+ strcpy(temp_str,get_token(cur_tokenset,1));
593
+ header->clf_format_version = temp_str;
594
+ } else if (strcmp(get_token(cur_tokenset,0), "rows") == 0) {
595
+ header->rows = atoi(get_token(cur_tokenset,1));
596
+ } else if (strcmp(get_token(cur_tokenset,0), "cols") == 0) {
597
+ header->cols = atoi(get_token(cur_tokenset,1));
598
+ } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) {
599
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
600
+ strcpy(temp_str,get_token(cur_tokenset,1));
601
+ header->header0_str = temp_str;
602
+ header->header0 = Calloc(1,header_0);
603
+ determine_order_header0(header->header0_str,header->header0);
604
+ } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) {
605
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
606
+ strcpy(temp_str,get_token(cur_tokenset,1));
607
+ header->create_date = temp_str;
608
+ } else if (strcmp(get_token(cur_tokenset,0), "order") == 0) {
609
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
610
+ strcpy(temp_str,get_token(cur_tokenset,1));
611
+ header->order = temp_str;
612
+ } else if (strcmp(get_token(cur_tokenset,0), "sequential") == 0) {
613
+ header->sequential = atoi(get_token(cur_tokenset,1));
614
+ } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) {
615
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
616
+ strcpy(temp_str,get_token(cur_tokenset,1));
617
+ header->guid = temp_str;
618
+ } else {
619
+ /* not one of the recognised header types */
620
+ if ( header->n_other_headers == 0){
621
+ header->other_headers_keys = Calloc(1, char *);
622
+ header->other_headers_values = Calloc(1, char *);
623
+ } else {
624
+ header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *);
625
+ header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *);
626
+ header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *);
627
+ }
628
+ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char);
629
+ strcpy(temp_str,get_token(cur_tokenset,1));
630
+ header->other_headers_values[header->n_other_headers] = temp_str;
631
+ temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char);
632
+ strcpy(temp_str,get_token(cur_tokenset,0));
633
+ header->other_headers_keys[header->n_other_headers] = temp_str;
634
+ header->n_other_headers++;
635
+
636
+ }
637
+
638
+ delete_tokens(cur_tokenset);
639
+ }
640
+ } while (IsHeaderLine(buffer));
641
+
642
+ }
643
+
644
+ /****************************************************************
645
+ **
646
+ ** void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header)
647
+ **
648
+ ** Read in the data part of the file. Specifically, the x,y, probe_id section.
649
+ ** Note to save space only the probe_id are stored.
650
+ **
651
+ ****************************************************************/
652
+
653
+ void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header){
654
+ tokenset *cur_tokenset;
655
+ int x, y, cur_id;
656
+
657
+ /* Check to see if the header information includes enough to know that probe_ids are deterministic */
658
+ /* if the are deterministic then don't need to read the rest of the file */
659
+
660
+
661
+ if (header->sequential > -1){
662
+ data->probe_id = NULL;
663
+ return;
664
+ } else {
665
+ data->probe_id = Calloc((header->rows)*(header->cols), int);
666
+ cur_tokenset = tokenize(buffer,"\t\r\n");
667
+ cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id));
668
+ x = atoi(get_token(cur_tokenset,header->header0->x));
669
+ y = atoi(get_token(cur_tokenset,header->header0->y));
670
+ data->probe_id[y*header->cols + x] = cur_id;
671
+
672
+ delete_tokens(cur_tokenset);
673
+ while(ReadFileLine(buffer, 1024, cur_file)){
674
+ cur_tokenset = tokenize(buffer,"\t\r\n");
675
+ cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id));
676
+ x = atoi(get_token(cur_tokenset,header->header0->x));
677
+ y = atoi(get_token(cur_tokenset,header->header0->y));
678
+ data->probe_id[y*header->cols + x] = cur_id;
679
+
680
+ delete_tokens(cur_tokenset);
681
+ }
682
+ }
683
+ }
684
+
685
+
686
+
687
+
688
+
689
+
690
+ /****************************************************************
691
+ ****************************************************************
692
+ **
693
+ ** Code for deallocating or initializing header data structures
694
+ **
695
+ ****************************************************************
696
+ ****************************************************************/
697
+
698
+ void dealloc_clf_headers(clf_headers *header){
699
+ int i;
700
+
701
+ if (header->n_chip_type > 0){
702
+ for (i = 0; i < header->n_chip_type; i++){
703
+ Free(header->chip_type[i]);
704
+ }
705
+ Free(header->chip_type);
706
+ }
707
+
708
+ if (header->lib_set_name != NULL){
709
+ Free(header->lib_set_name);
710
+ }
711
+
712
+ if (header->lib_set_version != NULL){
713
+ Free(header->lib_set_version);
714
+ }
715
+
716
+ if (header->clf_format_version != NULL){
717
+ Free(header->clf_format_version);
718
+ }
719
+
720
+ if (header->header0_str != NULL){
721
+ Free(header->header0_str);
722
+ Free(header->header0);
723
+ }
724
+
725
+ if (header->order != NULL){
726
+ Free(header->order);
727
+ }
728
+
729
+ if (header->create_date != NULL){
730
+ Free(header->create_date);
731
+ }
732
+
733
+ if (header->guid != NULL){
734
+ Free(header->guid);
735
+ }
736
+
737
+ if (header->n_other_headers > 0){
738
+ for (i = 0; i < header->n_other_headers; i++){
739
+ Free(header->other_headers_keys[i]);
740
+ Free(header->other_headers_values[i]);
741
+ }
742
+ Free(header->other_headers_keys);
743
+ Free(header->other_headers_values);
744
+ }
745
+ }
746
+
747
+
748
+ void dealloc_clf_data(clf_data *data){
749
+ if (data->probe_id != NULL){
750
+ Free(data->probe_id);
751
+ }
752
+ }
753
+
754
+
755
+ void dealloc_clf_file(clf_file* my_clf){
756
+
757
+
758
+ if (my_clf->headers != NULL){
759
+ dealloc_clf_headers(my_clf->headers);
760
+ Free(my_clf->headers);
761
+ }
762
+
763
+
764
+ if (my_clf->data !=NULL){
765
+ dealloc_clf_data(my_clf->data);
766
+ Free(my_clf->data);
767
+ }
768
+
769
+
770
+ }
771
+
772
+ /**********************************************************************
773
+ ***
774
+ *** A function for getting the probe_id for a given x,y
775
+ ***
776
+ ***
777
+ *********************************************************************/
778
+
779
+ void clf_get_probe_id(clf_file *clf, int *probe_id, int x, int y){
780
+
781
+ if (clf->headers->sequential > -1){
782
+ /* Check if order is "col_major" or "row_major" */
783
+
784
+ if (strcmp(clf->headers->order,"col_major") == 0){
785
+ *probe_id = y*clf->headers->cols + x + clf->headers->sequential;
786
+ } else if (strcmp(clf->headers->order,"row_major") == 0){
787
+ *probe_id = x*clf->headers->rows + y + clf->headers->sequential;
788
+ } else {
789
+ *probe_id = -1; /* ie missing */
790
+ }
791
+
792
+ } else {
793
+
794
+ *probe_id = clf->data->probe_id[y*clf->headers->rows + x];
795
+ }
796
+ }
797
+
798
+ /**********************************************************************
799
+ ***
800
+ *** A function for getting the x , y for a given probe_id
801
+ ***
802
+ ***
803
+ *********************************************************************/
804
+
805
+ void clf_get_x_y(clf_file *clf, int probe_id, int *x, int *y){
806
+ int ind;
807
+
808
+ if (clf->headers->sequential > -1){
809
+ /* Check if order is "col_major" or "row_major" */
810
+
811
+ if (strcmp(clf->headers->order,"col_major") == 0){
812
+ ind = (probe_id - clf->headers->sequential);
813
+ *x = ind%clf->headers->cols;
814
+ *y = ind/clf->headers->cols;
815
+ } else if (strcmp(clf->headers->order,"row_major") == 0){
816
+ ind = (probe_id - clf->headers->sequential);
817
+ *x = ind/clf->headers->rows;
818
+ *y = ind%clf->headers->rows;
819
+ } else {
820
+ *x = -1; /* ie missing */
821
+ *y = -1;
822
+ }
823
+ } else {
824
+ /* Linear Search (this should be improved for routine use) */
825
+ ind = 0;
826
+
827
+ while (ind < (clf->headers->cols*clf->headers->rows)){
828
+ if (clf->data->probe_id[ind] == probe_id){
829
+ break;
830
+ }
831
+ ind++;
832
+ }
833
+
834
+ if (ind == (clf->headers->cols*clf->headers->rows)){
835
+ *x = -1; *y = -1;
836
+ } else {
837
+ *x = ind/clf->headers->rows;
838
+ *y = ind%clf->headers->rows;
839
+ }
840
+ }
841
+ }
842
+
843
+ /*
844
+ * Note this function is only for testing purposes. It provides no methodology for accessing anything
845
+ * stored in the CLF file in R.
846
+ *
847
+ */
848
+
849
+ void read_clf_file(char **filename){
850
+
851
+ FILE *cur_file;
852
+ clf_file my_clf;
853
+ char *buffer = Calloc(1024, char);
854
+
855
+
856
+
857
+ cur_file = open_clf_file(filename[0]);
858
+
859
+ my_clf.headers = Calloc(1, clf_headers);
860
+ my_clf.data = Calloc(1, clf_data);
861
+
862
+ read_clf_header(cur_file,buffer,my_clf.headers);
863
+ if (validate_clf_header(my_clf.headers))
864
+ read_clf_data(cur_file, buffer, my_clf.data, my_clf.headers);
865
+
866
+ Free(buffer);
867
+ dealloc_clf_file(&my_clf);
868
+ fclose(cur_file);
869
+
870
+ }