bio-affy 0.1.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
#ifndef READ_ABATCH_H
|
2
|
+
#define READ_ABATCH_H
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
/****************************************************************
|
7
|
+
**
|
8
|
+
** A structure for holding full header information
|
9
|
+
**
|
10
|
+
**
|
11
|
+
**
|
12
|
+
***************************************************************/
|
13
|
+
|
14
|
+
typedef struct{
|
15
|
+
char *cdfName;
|
16
|
+
int cols;
|
17
|
+
int rows;
|
18
|
+
int GridCornerULx,GridCornerULy; /* XY coordinates of the upper left grid corner in pixel coordinates.*/
|
19
|
+
int GridCornerURx,GridCornerURy; /* XY coordinates of the upper right grid corner in pixel coordinates.*/
|
20
|
+
int GridCornerLRx,GridCornerLRy; /* XY coordinates of the lower right grid corner in pixel coordinates.*/
|
21
|
+
int GridCornerLLx,GridCornerLLy; /* XY coordinates of the lower left grid corner in pixel coordinates.*/
|
22
|
+
char *DatHeader;
|
23
|
+
char *Algorithm;
|
24
|
+
char *AlgorithmParameters;
|
25
|
+
char *ScanDate;
|
26
|
+
} detailed_header_info;
|
27
|
+
|
28
|
+
/******************************************************************
|
29
|
+
**
|
30
|
+
** A "C" level object designed to hold information for a
|
31
|
+
** single CEL file
|
32
|
+
**
|
33
|
+
** These should be created using the function
|
34
|
+
**
|
35
|
+
** read_cel_file()
|
36
|
+
**
|
37
|
+
**
|
38
|
+
**
|
39
|
+
*****************************************************************/
|
40
|
+
|
41
|
+
typedef struct{
|
42
|
+
detailed_header_info header;
|
43
|
+
|
44
|
+
/** these are for storing the intensities, the sds and the number of pixels **/
|
45
|
+
double *intensities;
|
46
|
+
double *stddev;
|
47
|
+
double *npixels;
|
48
|
+
|
49
|
+
/** these are for storing information in the masks and outliers section **/
|
50
|
+
|
51
|
+
int nmasks;
|
52
|
+
int noutliers;
|
53
|
+
|
54
|
+
short *masks_x, *masks_y;
|
55
|
+
short *outliers_x, *outliers_y;
|
56
|
+
|
57
|
+
} CEL;
|
58
|
+
|
59
|
+
extern CEL *read_cel_file(const char *filename, int read_intensities_only);
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
#endif
|
Binary file
|
@@ -0,0 +1,888 @@
|
|
1
|
+
/****************************************************************
|
2
|
+
**
|
3
|
+
** File: read_bpmap.c
|
4
|
+
**
|
5
|
+
** Implementation by: B. M. Bolstad
|
6
|
+
**
|
7
|
+
** Copyright (C) B. M. Bolstad 2006-2007
|
8
|
+
**
|
9
|
+
** A parser designed to read bpmap files into an R List structure
|
10
|
+
**
|
11
|
+
** History
|
12
|
+
** Mar 11, 2006 - Initial version
|
13
|
+
** Mar 12, 2006 - add additional support for versions 2 and 3
|
14
|
+
** May 31, 2006 - Fix some compiler warnings
|
15
|
+
** June 12, 2006 - fix naming vector length issue.
|
16
|
+
** June 12, 2007 - much wailing and grinding of teeth, but finally a fix for reading version number right.
|
17
|
+
** Aug 25, 2007 - Move file reading functions to centralized location
|
18
|
+
** Mar 14, 2008 - Fix reading of version number for big endian platforms
|
19
|
+
** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
|
20
|
+
**
|
21
|
+
*******************************************************************/
|
22
|
+
|
23
|
+
#include <R.h>
|
24
|
+
#include <Rdefines.h>
|
25
|
+
|
26
|
+
#include "stdlib.h"
|
27
|
+
#include "stdio.h"
|
28
|
+
|
29
|
+
#include "fread_functions.h"
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
/****************************************************************
|
34
|
+
**
|
35
|
+
**
|
36
|
+
**
|
37
|
+
**
|
38
|
+
** Note BPMAP files are stored in big endian format
|
39
|
+
**
|
40
|
+
*******************************************************************/
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
/*************************************************************************
|
45
|
+
**
|
46
|
+
** Code for reading from the big endian binary files, doing bit flipping if
|
47
|
+
** necessary (on little-endian machines)
|
48
|
+
**
|
49
|
+
**
|
50
|
+
************************************************************************/
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
static void swap_float_4(float *tnf4) /* 4 byte floating point numbers */
|
55
|
+
{
|
56
|
+
int tni = (int)(*tnf4);
|
57
|
+
|
58
|
+
tni=(((tni>>24)&0xff) | ((tni&0xff)<<24) |
|
59
|
+
((tni>>8)&0xff00) | ((tni&0xff00)<<8));
|
60
|
+
|
61
|
+
*tnf4 = (float)tni;
|
62
|
+
|
63
|
+
}
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
static SEXP ReadBPMAPHeader(FILE *infile){
|
70
|
+
|
71
|
+
|
72
|
+
SEXP Header;
|
73
|
+
SEXP tmpSXP;
|
74
|
+
|
75
|
+
|
76
|
+
char *Magicnumber = R_alloc(8,sizeof(char));
|
77
|
+
float version_number = 0.0;
|
78
|
+
int version_number_int;
|
79
|
+
unsigned int unsigned_version_number_int;
|
80
|
+
|
81
|
+
|
82
|
+
unsigned int n_seq;
|
83
|
+
static double new_version_number;
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
fread_be_char(Magicnumber,8,infile);
|
88
|
+
|
89
|
+
if (strncmp(Magicnumber,"PHT7",4) !=0){
|
90
|
+
error("Based on the magic number which was %s, this does not appear to be a BPMAP file",Magicnumber);
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
/* version number is a little bit funky
|
95
|
+
need to do some funny things to coax it
|
96
|
+
into the right format
|
97
|
+
*/
|
98
|
+
|
99
|
+
|
100
|
+
/* cast to integer, swap bytes, cast to float */
|
101
|
+
/* fread_be_float32(&version_number,1,infile); */
|
102
|
+
fread_float32(&version_number,1,infile);
|
103
|
+
swap_float_4(&version_number);
|
104
|
+
|
105
|
+
new_version_number = (double)version_number;
|
106
|
+
/* // Rprintf("A %f\n",version_number);*/
|
107
|
+
|
108
|
+
if ((version_number <=0.5) || (version_number > 3.5)){
|
109
|
+
/* // Rprintf("Rereading\n"); */
|
110
|
+
fseek(infile,-sizeof(float),SEEK_CUR);
|
111
|
+
fread_be_uint32(&unsigned_version_number_int,1,infile);
|
112
|
+
memcpy(&version_number,&unsigned_version_number_int, sizeof(float));
|
113
|
+
new_version_number = (double)version_number;
|
114
|
+
}
|
115
|
+
|
116
|
+
fread_be_uint32(&n_seq,1,infile);
|
117
|
+
|
118
|
+
PROTECT(Header=allocVector(VECSXP,3));
|
119
|
+
|
120
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
121
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(Magicnumber));
|
122
|
+
SET_VECTOR_ELT(Header,0,tmpSXP);
|
123
|
+
UNPROTECT(1);
|
124
|
+
|
125
|
+
|
126
|
+
PROTECT(tmpSXP=allocVector(REALSXP,1));
|
127
|
+
REAL(tmpSXP)[0] = (double)new_version_number;
|
128
|
+
SET_VECTOR_ELT(Header,1,tmpSXP);
|
129
|
+
UNPROTECT(1);
|
130
|
+
|
131
|
+
|
132
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
133
|
+
INTEGER(tmpSXP)[0] = (int)n_seq;
|
134
|
+
SET_VECTOR_ELT(Header,2,tmpSXP);
|
135
|
+
UNPROTECT(1);
|
136
|
+
|
137
|
+
PROTECT(tmpSXP=allocVector(STRSXP,3));
|
138
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("magic.number"));
|
139
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("version"));
|
140
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("n.seq"));
|
141
|
+
setAttrib(Header,R_NamesSymbol,tmpSXP);
|
142
|
+
UNPROTECT(2);
|
143
|
+
|
144
|
+
/* Rprintf("D %f %f\n",version_number,new_version_number); */
|
145
|
+
return Header;
|
146
|
+
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
static SEXP ReadBPMAPSeqDescription(FILE *infile, float version, int nseq){
|
152
|
+
|
153
|
+
|
154
|
+
SEXP SequenceDescriptionList;
|
155
|
+
|
156
|
+
SEXP CurSequenceDescription = R_NilValue;
|
157
|
+
SEXP tmpSXP,tmpSXP2;
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
int i,j;
|
162
|
+
|
163
|
+
unsigned int seq_name_length;
|
164
|
+
|
165
|
+
char *seq_name;
|
166
|
+
|
167
|
+
unsigned int probe_mapping_type;
|
168
|
+
unsigned int seq_file_offset;
|
169
|
+
|
170
|
+
unsigned int n_probes;
|
171
|
+
|
172
|
+
unsigned int group_name_length;
|
173
|
+
char *group_name;
|
174
|
+
|
175
|
+
unsigned int version_number_length;
|
176
|
+
char *version_number;
|
177
|
+
|
178
|
+
unsigned int number_parameters;
|
179
|
+
|
180
|
+
unsigned int param_length;
|
181
|
+
char *param_name;
|
182
|
+
|
183
|
+
/* Rprintf("%f %d\n",version,nseq); */
|
184
|
+
|
185
|
+
PROTECT(SequenceDescriptionList=allocVector(VECSXP,(int)nseq));
|
186
|
+
|
187
|
+
for (i=0; i < nseq; i++){
|
188
|
+
fread_be_uint32(&seq_name_length,1,infile);
|
189
|
+
seq_name = (char *)Calloc(seq_name_length+1,char);
|
190
|
+
fread_be_char(seq_name,seq_name_length,infile);
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
if (version == 3.00){
|
195
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,8));
|
196
|
+
PROTECT(tmpSXP=allocVector(STRSXP,7));
|
197
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
198
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("ProbeMappingType"));
|
199
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("SequenceFileOffset"));
|
200
|
+
SET_STRING_ELT(tmpSXP,3,mkChar("n.probepairs"));
|
201
|
+
SET_STRING_ELT(tmpSXP,4,mkChar("GroupName"));
|
202
|
+
SET_STRING_ELT(tmpSXP,5,mkChar("VersionNumber"));
|
203
|
+
SET_STRING_ELT(tmpSXP,6,mkChar("NumberOfParameters"));
|
204
|
+
SET_STRING_ELT(tmpSXP,7,mkChar("Parameters"));
|
205
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
206
|
+
UNPROTECT(1);
|
207
|
+
} else if (version == 2.00){
|
208
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,6));
|
209
|
+
PROTECT(tmpSXP=allocVector(STRSXP,6));
|
210
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
211
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs"));
|
212
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("GroupName"));
|
213
|
+
SET_STRING_ELT(tmpSXP,3,mkChar("VersionNumber"));
|
214
|
+
SET_STRING_ELT(tmpSXP,4,mkChar("NumberOfParameters"));
|
215
|
+
SET_STRING_ELT(tmpSXP,5,mkChar("Parameters"));
|
216
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
217
|
+
UNPROTECT(1);
|
218
|
+
} else if (version == 1.00){
|
219
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,2));
|
220
|
+
PROTECT(tmpSXP=allocVector(STRSXP,2));
|
221
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
222
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs"));
|
223
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
224
|
+
UNPROTECT(1);
|
225
|
+
|
226
|
+
}
|
227
|
+
|
228
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
229
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(seq_name));
|
230
|
+
SET_VECTOR_ELT(CurSequenceDescription,0,tmpSXP);
|
231
|
+
UNPROTECT(1);
|
232
|
+
Free(seq_name);
|
233
|
+
|
234
|
+
|
235
|
+
if (version == 1.0){
|
236
|
+
fread_be_uint32(&n_probes,1,infile);
|
237
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
238
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
239
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
240
|
+
UNPROTECT(1);
|
241
|
+
} else if (version ==2.0){
|
242
|
+
fread_be_uint32(&n_probes,1,infile);
|
243
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
244
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
245
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
246
|
+
UNPROTECT(1);
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
|
251
|
+
fread_be_uint32(&group_name_length,1,infile);
|
252
|
+
group_name = (char *)Calloc(group_name_length+1,char);
|
253
|
+
fread_be_char(group_name,group_name_length,infile);
|
254
|
+
|
255
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
256
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(group_name));
|
257
|
+
SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP);
|
258
|
+
UNPROTECT(1);
|
259
|
+
Free(group_name);
|
260
|
+
|
261
|
+
|
262
|
+
fread_be_uint32(&version_number_length,1,infile);
|
263
|
+
version_number = (char *)Calloc(version_number_length+1,char);
|
264
|
+
fread_be_char(version_number,version_number_length,infile);
|
265
|
+
|
266
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
267
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(version_number));
|
268
|
+
SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP);
|
269
|
+
UNPROTECT(1);
|
270
|
+
Free(version_number);
|
271
|
+
|
272
|
+
|
273
|
+
fread_be_uint32(&number_parameters,1,infile);
|
274
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
275
|
+
INTEGER(tmpSXP)[0] = number_parameters;
|
276
|
+
SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP);
|
277
|
+
UNPROTECT(1);
|
278
|
+
|
279
|
+
PROTECT(tmpSXP=allocVector(VECSXP,number_parameters));
|
280
|
+
|
281
|
+
|
282
|
+
for (j=0; j < number_parameters; j++){
|
283
|
+
PROTECT(tmpSXP2 = allocVector(STRSXP,2));
|
284
|
+
fread_be_uint32(¶m_length,1,infile);
|
285
|
+
param_name = (char *)Calloc(param_length+1,char);
|
286
|
+
fread_be_char(param_name,param_length,infile);
|
287
|
+
SET_STRING_ELT(tmpSXP2,0,mkChar(param_name));
|
288
|
+
Free(param_name);
|
289
|
+
fread_be_uint32(¶m_length,1,infile);
|
290
|
+
param_name = (char *)Calloc(param_length+1,char);
|
291
|
+
fread_be_char(param_name,param_length,infile);
|
292
|
+
SET_STRING_ELT(tmpSXP2,1,mkChar(param_name));
|
293
|
+
Free(param_name);
|
294
|
+
|
295
|
+
SET_VECTOR_ELT(tmpSXP,j,tmpSXP2);
|
296
|
+
UNPROTECT(1);
|
297
|
+
}
|
298
|
+
SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP);
|
299
|
+
UNPROTECT(1);
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
} else if (version ==3.0){
|
304
|
+
fread_be_uint32(&probe_mapping_type,1,infile);
|
305
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
306
|
+
INTEGER(tmpSXP)[0] = probe_mapping_type;
|
307
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
308
|
+
UNPROTECT(1);
|
309
|
+
|
310
|
+
fread_be_uint32(&seq_file_offset,1,infile);
|
311
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
312
|
+
INTEGER(tmpSXP)[0] = seq_file_offset;
|
313
|
+
SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP);
|
314
|
+
UNPROTECT(1);
|
315
|
+
|
316
|
+
fread_be_uint32(&n_probes,1,infile);
|
317
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
318
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
319
|
+
SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP);
|
320
|
+
UNPROTECT(1);
|
321
|
+
|
322
|
+
fread_be_uint32(&group_name_length,1,infile);
|
323
|
+
group_name = (char *)Calloc(group_name_length+1,char);
|
324
|
+
fread_be_char(group_name,group_name_length,infile);
|
325
|
+
|
326
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
327
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(group_name));
|
328
|
+
SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP);
|
329
|
+
UNPROTECT(1);
|
330
|
+
Free(group_name);
|
331
|
+
|
332
|
+
fread_be_uint32(&version_number_length,1,infile);
|
333
|
+
version_number = (char *)Calloc(version_number_length+1,char);
|
334
|
+
fread_be_char(version_number,version_number_length,infile);
|
335
|
+
|
336
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
337
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(version_number));
|
338
|
+
SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP);
|
339
|
+
UNPROTECT(1);
|
340
|
+
Free(version_number);
|
341
|
+
|
342
|
+
fread_be_uint32(&number_parameters,1,infile);
|
343
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
344
|
+
INTEGER(tmpSXP)[0] = number_parameters;
|
345
|
+
SET_VECTOR_ELT(CurSequenceDescription,6,tmpSXP);
|
346
|
+
UNPROTECT(1);
|
347
|
+
|
348
|
+
|
349
|
+
|
350
|
+
PROTECT(tmpSXP=allocVector(VECSXP,number_parameters));
|
351
|
+
|
352
|
+
|
353
|
+
for (j=0; j < number_parameters; j++){
|
354
|
+
PROTECT(tmpSXP2 = allocVector(STRSXP,2));
|
355
|
+
fread_be_uint32(¶m_length,1,infile);
|
356
|
+
param_name = (char *)Calloc(param_length+1,char);
|
357
|
+
fread_be_char(param_name,param_length,infile);
|
358
|
+
SET_STRING_ELT(tmpSXP2,0,mkChar(param_name));
|
359
|
+
Free(param_name);
|
360
|
+
fread_be_uint32(¶m_length,1,infile);
|
361
|
+
param_name = (char *)Calloc(param_length+1,char);
|
362
|
+
fread_be_char(param_name,param_length,infile);
|
363
|
+
SET_STRING_ELT(tmpSXP2,1,mkChar(param_name));
|
364
|
+
Free(param_name);
|
365
|
+
|
366
|
+
SET_VECTOR_ELT(tmpSXP,j,tmpSXP2);
|
367
|
+
UNPROTECT(1);
|
368
|
+
}
|
369
|
+
SET_VECTOR_ELT(CurSequenceDescription,7,tmpSXP);
|
370
|
+
UNPROTECT(1);
|
371
|
+
}
|
372
|
+
|
373
|
+
SET_VECTOR_ELT(SequenceDescriptionList,i,CurSequenceDescription);
|
374
|
+
UNPROTECT(1);
|
375
|
+
|
376
|
+
}
|
377
|
+
|
378
|
+
UNPROTECT(1);
|
379
|
+
return SequenceDescriptionList;
|
380
|
+
|
381
|
+
}
|
382
|
+
|
383
|
+
|
384
|
+
|
385
|
+
static void packedSeqTobaseStr(unsigned char probeseq[7], char *dest){
|
386
|
+
|
387
|
+
unsigned char currentchar;
|
388
|
+
|
389
|
+
unsigned char firsttwobits;
|
390
|
+
unsigned char secondtwobits;
|
391
|
+
unsigned char thirdtwobits;
|
392
|
+
unsigned char fourthtwobits;
|
393
|
+
|
394
|
+
int i;
|
395
|
+
|
396
|
+
|
397
|
+
/* Rprintf("\n\n\n\n\n"); */
|
398
|
+
|
399
|
+
|
400
|
+
for (i =0; i < 6;i++){
|
401
|
+
currentchar = probeseq[i];
|
402
|
+
|
403
|
+
/* extract first two bits */
|
404
|
+
firsttwobits = (currentchar & 192);
|
405
|
+
secondtwobits = (currentchar & 48);
|
406
|
+
thirdtwobits = (currentchar & 12);
|
407
|
+
fourthtwobits = (currentchar & 3);
|
408
|
+
|
409
|
+
|
410
|
+
|
411
|
+
firsttwobits = firsttwobits >> 6;
|
412
|
+
secondtwobits = secondtwobits >> 4;
|
413
|
+
thirdtwobits = thirdtwobits >> 2;
|
414
|
+
|
415
|
+
/* Rprintf("%x %x %x %x\n",firsttwobits,secondtwobits,thirdtwobits,fourthtwobits); */
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
if (firsttwobits == 0){
|
421
|
+
dest[4*i +0]='A';
|
422
|
+
}
|
423
|
+
if (firsttwobits == 1){
|
424
|
+
dest[4*i +0]='C';
|
425
|
+
}
|
426
|
+
if (firsttwobits == 2){
|
427
|
+
dest[4*i +0]='G';
|
428
|
+
}
|
429
|
+
if (firsttwobits == 3){
|
430
|
+
dest[4*i +0]='T';
|
431
|
+
}
|
432
|
+
|
433
|
+
if (secondtwobits == 0){
|
434
|
+
dest[4*i +1]='A';
|
435
|
+
}
|
436
|
+
if (secondtwobits == 1){
|
437
|
+
dest[4*i +1]='C';
|
438
|
+
}
|
439
|
+
if (secondtwobits == 2){
|
440
|
+
dest[4*i +1]='G';
|
441
|
+
}
|
442
|
+
if (secondtwobits == 3){
|
443
|
+
dest[4*i +1]='T';
|
444
|
+
}
|
445
|
+
|
446
|
+
if (thirdtwobits == 0){
|
447
|
+
dest[4*i +2]='A';
|
448
|
+
}
|
449
|
+
if (thirdtwobits == 1){
|
450
|
+
dest[4*i +2]='C';
|
451
|
+
}
|
452
|
+
if (thirdtwobits == 2){
|
453
|
+
dest[4*i +2]='G';
|
454
|
+
}
|
455
|
+
if (thirdtwobits == 3){
|
456
|
+
dest[4*i +2]='T';
|
457
|
+
}
|
458
|
+
|
459
|
+
if (fourthtwobits == 0){
|
460
|
+
dest[4*i +3]='A';
|
461
|
+
}
|
462
|
+
if (fourthtwobits == 1){
|
463
|
+
dest[4*i +3]='C';
|
464
|
+
}
|
465
|
+
if (fourthtwobits == 2){
|
466
|
+
dest[4*i +3]='G';
|
467
|
+
}
|
468
|
+
if (fourthtwobits == 3){
|
469
|
+
dest[4*i +3]='T';
|
470
|
+
}
|
471
|
+
|
472
|
+
/* Rprintf("%c%c%c%c\n",dest[4*i],dest[4*i +1],dest[4*i +2], dest[4*i +3]); */
|
473
|
+
}
|
474
|
+
|
475
|
+
currentchar = probeseq[6];
|
476
|
+
|
477
|
+
/* extract first two bits */
|
478
|
+
|
479
|
+
firsttwobits = (currentchar & 192);
|
480
|
+
firsttwobits = firsttwobits >> 6;
|
481
|
+
if (firsttwobits == 0){
|
482
|
+
dest[24]='A';
|
483
|
+
}
|
484
|
+
if (firsttwobits == 1){
|
485
|
+
dest[24]='C';
|
486
|
+
}
|
487
|
+
if (firsttwobits == 2){
|
488
|
+
dest[24]='G';
|
489
|
+
}
|
490
|
+
if (firsttwobits == 3){
|
491
|
+
dest[24]='T';
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
|
498
|
+
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
static SEXP readBPMAPSeqIdPositionInfo(FILE *infile, float version, int nseq, SEXP seqDesc){
|
503
|
+
|
504
|
+
|
505
|
+
SEXP SeqIdPositionInfoList;
|
506
|
+
SEXP curSeqIdPositionInfo;
|
507
|
+
SEXP PositionInfo= R_NilValue;
|
508
|
+
SEXP PositionInfoRowNames;
|
509
|
+
|
510
|
+
|
511
|
+
SEXP tmpSEXP;
|
512
|
+
|
513
|
+
SEXP xPM= R_NilValue,yPM= R_NilValue,xMM= R_NilValue,yMM= R_NilValue;
|
514
|
+
SEXP PMprobeLength= R_NilValue;
|
515
|
+
SEXP probeSeqString= R_NilValue;
|
516
|
+
SEXP MatchScore= R_NilValue;
|
517
|
+
SEXP PMposition= R_NilValue;
|
518
|
+
SEXP Strand= R_NilValue;
|
519
|
+
|
520
|
+
char buf[10];
|
521
|
+
|
522
|
+
char *dest;
|
523
|
+
|
524
|
+
|
525
|
+
int nprobes=0;
|
526
|
+
int probe_mapping_type=0;
|
527
|
+
int i,j;
|
528
|
+
|
529
|
+
|
530
|
+
unsigned int SeqId;
|
531
|
+
|
532
|
+
unsigned int x;
|
533
|
+
unsigned int y;
|
534
|
+
|
535
|
+
unsigned int x_mm;
|
536
|
+
unsigned int y_mm;
|
537
|
+
|
538
|
+
unsigned char probelength;
|
539
|
+
|
540
|
+
unsigned char probeseq[7];
|
541
|
+
|
542
|
+
float matchScore;
|
543
|
+
int matchScore_int;
|
544
|
+
|
545
|
+
unsigned int positionPM;
|
546
|
+
unsigned char strand;
|
547
|
+
|
548
|
+
|
549
|
+
PROTECT(SeqIdPositionInfoList = allocVector(VECSXP,nseq));
|
550
|
+
|
551
|
+
for (i =0; i < nseq; i++){
|
552
|
+
fread_be_uint32(&SeqId,1,infile);
|
553
|
+
/*Rprintf("Seq id:%u\n",SeqId);*/
|
554
|
+
|
555
|
+
PROTECT(curSeqIdPositionInfo = allocVector(VECSXP,2));
|
556
|
+
|
557
|
+
|
558
|
+
PROTECT(tmpSEXP=allocVector(INTSXP,1));
|
559
|
+
INTEGER(tmpSEXP)[0] = (int)SeqId;
|
560
|
+
SET_VECTOR_ELT(curSeqIdPositionInfo,0,tmpSEXP);
|
561
|
+
UNPROTECT(1);
|
562
|
+
|
563
|
+
|
564
|
+
PROTECT(tmpSEXP=allocVector(STRSXP,2));
|
565
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("Header"));
|
566
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("PositionInformation"));
|
567
|
+
setAttrib(curSeqIdPositionInfo,R_NamesSymbol,tmpSEXP);
|
568
|
+
UNPROTECT(1);
|
569
|
+
|
570
|
+
|
571
|
+
|
572
|
+
if ((version == 1.0) || (version == 2.0)){
|
573
|
+
nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0];
|
574
|
+
/* Rprintf("nprobes: %d\n",nprobes); */
|
575
|
+
probe_mapping_type = 0; /* PM/MM tiling */
|
576
|
+
|
577
|
+
PROTECT(PositionInfo = allocVector(VECSXP,9));
|
578
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
579
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
580
|
+
PROTECT(xMM = allocVector(INTSXP,nprobes));
|
581
|
+
PROTECT(yMM = allocVector(INTSXP,nprobes));
|
582
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
583
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
584
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
585
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
586
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
587
|
+
|
588
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
589
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
590
|
+
SET_VECTOR_ELT(PositionInfo,2,xMM);
|
591
|
+
SET_VECTOR_ELT(PositionInfo,3,yMM);
|
592
|
+
SET_VECTOR_ELT(PositionInfo,4,PMprobeLength);
|
593
|
+
SET_VECTOR_ELT(PositionInfo,5,probeSeqString);
|
594
|
+
SET_VECTOR_ELT(PositionInfo,6,MatchScore);
|
595
|
+
SET_VECTOR_ELT(PositionInfo,7,PMposition);
|
596
|
+
SET_VECTOR_ELT(PositionInfo,8,Strand);
|
597
|
+
UNPROTECT(9);
|
598
|
+
|
599
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
600
|
+
|
601
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
602
|
+
for (j=0; j < nprobes; j++){
|
603
|
+
sprintf(buf, "%d", j+1);
|
604
|
+
SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf));
|
605
|
+
}
|
606
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
607
|
+
UNPROTECT(1);
|
608
|
+
|
609
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,9));
|
610
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
611
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
612
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm"));
|
613
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm"));
|
614
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength"));
|
615
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq"));
|
616
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore"));
|
617
|
+
SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition"));
|
618
|
+
SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand"));
|
619
|
+
|
620
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
621
|
+
UNPROTECT(1);
|
622
|
+
|
623
|
+
} else if (version == 3.0){
|
624
|
+
nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),3))[0];
|
625
|
+
probe_mapping_type = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0];
|
626
|
+
|
627
|
+
|
628
|
+
if (probe_mapping_type == 0){
|
629
|
+
PROTECT(PositionInfo = allocVector(VECSXP,9));
|
630
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
631
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
632
|
+
PROTECT(xMM = allocVector(INTSXP,nprobes));
|
633
|
+
PROTECT(yMM = allocVector(INTSXP,nprobes));
|
634
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
635
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
636
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
637
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
638
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
639
|
+
|
640
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
641
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
642
|
+
SET_VECTOR_ELT(PositionInfo,2,xMM);
|
643
|
+
SET_VECTOR_ELT(PositionInfo,3,yMM);
|
644
|
+
SET_VECTOR_ELT(PositionInfo,4,PMprobeLength);
|
645
|
+
SET_VECTOR_ELT(PositionInfo,5,probeSeqString);
|
646
|
+
SET_VECTOR_ELT(PositionInfo,6,MatchScore);
|
647
|
+
SET_VECTOR_ELT(PositionInfo,7,PMposition);
|
648
|
+
SET_VECTOR_ELT(PositionInfo,8,Strand);
|
649
|
+
UNPROTECT(9);
|
650
|
+
|
651
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
652
|
+
|
653
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
654
|
+
for (j=0; j < nprobes; j++){
|
655
|
+
sprintf(buf, "%d", j+1);
|
656
|
+
SET_VECTOR_ELT(PositionInfoRowNames,j,mkChar(buf));
|
657
|
+
}
|
658
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
659
|
+
UNPROTECT(1);
|
660
|
+
|
661
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,9));
|
662
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
663
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
664
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm"));
|
665
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm"));
|
666
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength"));
|
667
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq"));
|
668
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore"));
|
669
|
+
SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition"));
|
670
|
+
SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand"));
|
671
|
+
|
672
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
673
|
+
UNPROTECT(1);
|
674
|
+
} else {
|
675
|
+
|
676
|
+
PROTECT(PositionInfo = allocVector(VECSXP,7));
|
677
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
678
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
679
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
680
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
681
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
682
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
683
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
684
|
+
|
685
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
686
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
687
|
+
SET_VECTOR_ELT(PositionInfo,2,PMprobeLength);
|
688
|
+
SET_VECTOR_ELT(PositionInfo,3,probeSeqString);
|
689
|
+
SET_VECTOR_ELT(PositionInfo,4,MatchScore);
|
690
|
+
SET_VECTOR_ELT(PositionInfo,5,PMposition);
|
691
|
+
SET_VECTOR_ELT(PositionInfo,6,Strand);
|
692
|
+
UNPROTECT(7);
|
693
|
+
|
694
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
695
|
+
|
696
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
697
|
+
for (j=0; j < nprobes; j++){
|
698
|
+
sprintf(buf, "%d", j+1);
|
699
|
+
SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf));
|
700
|
+
}
|
701
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
702
|
+
UNPROTECT(1);
|
703
|
+
|
704
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,7));
|
705
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
706
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
707
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("PMLength"));
|
708
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("ProbeSeq"));
|
709
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("MatchScore"));
|
710
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("PMPosition"));
|
711
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("TargetStrand"));
|
712
|
+
|
713
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
714
|
+
UNPROTECT(1);
|
715
|
+
}
|
716
|
+
|
717
|
+
|
718
|
+
}
|
719
|
+
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
|
724
|
+
for (j=0; j < nprobes; j++){
|
725
|
+
fread_be_uint32(&x,1,infile);
|
726
|
+
fread_be_uint32(&y,1,infile);
|
727
|
+
/* Rprintf("x y :%u %u\n",x,y); */
|
728
|
+
|
729
|
+
if (probe_mapping_type == 0){
|
730
|
+
fread_be_uint32(&x_mm,1,infile);
|
731
|
+
fread_be_uint32(&y_mm,1,infile);
|
732
|
+
}
|
733
|
+
|
734
|
+
/* Rprintf("mm x y :%u %u\n",x_mm,y_mm); */
|
735
|
+
|
736
|
+
INTEGER(xPM)[j] = x;
|
737
|
+
INTEGER(yPM)[j] = y;
|
738
|
+
|
739
|
+
if (probe_mapping_type == 0){
|
740
|
+
INTEGER(xMM)[j] = x_mm;
|
741
|
+
INTEGER(yMM)[j] = y_mm;
|
742
|
+
}
|
743
|
+
fread_be_uchar(&probelength,1,infile);
|
744
|
+
/* Rprintf("probelength : %d\n",(int)probelength);*/
|
745
|
+
|
746
|
+
INTEGER(PMprobeLength)[j] = probelength;
|
747
|
+
|
748
|
+
|
749
|
+
fread_be_uchar(probeseq,7,infile);
|
750
|
+
/* Rprintf("probeseq : %s\n",probeseq); */
|
751
|
+
|
752
|
+
|
753
|
+
|
754
|
+
dest = (char *)Calloc(25+1,char);
|
755
|
+
packedSeqTobaseStr(probeseq,dest);
|
756
|
+
|
757
|
+
SET_STRING_ELT(probeSeqString,j,mkChar(dest));
|
758
|
+
Free(dest);
|
759
|
+
|
760
|
+
|
761
|
+
|
762
|
+
|
763
|
+
/* matchScore is treated same as version number in header */
|
764
|
+
#ifdef WORDS_BIGENDIAN
|
765
|
+
/* swap, cast to integer, swap bytes and cast back to float */
|
766
|
+
fread_be_float32(&matchScore,1,infile);
|
767
|
+
swap_float_4(&matchScore);
|
768
|
+
matchScore_int = (int)matchScore;
|
769
|
+
|
770
|
+
|
771
|
+
matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) |
|
772
|
+
((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8));
|
773
|
+
matchScore = (float)matchScore_int;
|
774
|
+
|
775
|
+
#else
|
776
|
+
/* cast to integer, swap bytes, cast to float */
|
777
|
+
fread_float32(&matchScore,1,infile);
|
778
|
+
matchScore_int = (int)matchScore;
|
779
|
+
matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) |
|
780
|
+
((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8));
|
781
|
+
matchScore = (float)matchScore_int;
|
782
|
+
#endif
|
783
|
+
/* Rprintf("matchScore : %f\n",matchScore); */
|
784
|
+
|
785
|
+
REAL(MatchScore)[j] = matchScore;
|
786
|
+
|
787
|
+
|
788
|
+
|
789
|
+
fread_be_uint32(&positionPM,1,infile);
|
790
|
+
/* Rprintf("positionPM : %u\n",positionPM);*/
|
791
|
+
INTEGER(PMposition)[j] = positionPM;
|
792
|
+
|
793
|
+
|
794
|
+
fread_be_uchar(&strand,1,infile);
|
795
|
+
/* Rprintf("strand: %d\n",(int)strand);*/
|
796
|
+
|
797
|
+
if ((int)strand ==1){
|
798
|
+
SET_STRING_ELT(Strand,j,mkChar("F"));
|
799
|
+
} else {
|
800
|
+
SET_STRING_ELT(Strand,j,mkChar("R"));
|
801
|
+
}
|
802
|
+
|
803
|
+
|
804
|
+
}
|
805
|
+
|
806
|
+
SET_VECTOR_ELT(curSeqIdPositionInfo,1,PositionInfo);
|
807
|
+
UNPROTECT(1);
|
808
|
+
|
809
|
+
SET_VECTOR_ELT(SeqIdPositionInfoList,i,curSeqIdPositionInfo);
|
810
|
+
UNPROTECT(1);
|
811
|
+
}
|
812
|
+
|
813
|
+
|
814
|
+
UNPROTECT(1);
|
815
|
+
return SeqIdPositionInfoList;
|
816
|
+
|
817
|
+
}
|
818
|
+
|
819
|
+
|
820
|
+
|
821
|
+
|
822
|
+
|
823
|
+
|
824
|
+
SEXP ReadBPMAPFileIntoRList(SEXP filename){
|
825
|
+
|
826
|
+
|
827
|
+
|
828
|
+
SEXP bpmapRlist;
|
829
|
+
|
830
|
+
SEXP bpmapHeader;
|
831
|
+
SEXP bpmapSeqDesc;
|
832
|
+
|
833
|
+
SEXP tmpSXP;
|
834
|
+
|
835
|
+
FILE *infile;
|
836
|
+
|
837
|
+
|
838
|
+
int n_seq;
|
839
|
+
float version;
|
840
|
+
|
841
|
+
|
842
|
+
const char *cur_file_name;
|
843
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
844
|
+
|
845
|
+
|
846
|
+
|
847
|
+
if ((infile = fopen(cur_file_name, "rb")) == NULL)
|
848
|
+
{
|
849
|
+
error("Unable to open the file %s",filename);
|
850
|
+
}
|
851
|
+
|
852
|
+
|
853
|
+
|
854
|
+
/*
|
855
|
+
first element is header, second item is sequence descriptions
|
856
|
+
third item is sequence header/position information
|
857
|
+
|
858
|
+
*/
|
859
|
+
PROTECT(bpmapRlist = allocVector(VECSXP,3));
|
860
|
+
|
861
|
+
|
862
|
+
PROTECT(bpmapHeader = ReadBPMAPHeader(infile));
|
863
|
+
SET_VECTOR_ELT(bpmapRlist,0,bpmapHeader);
|
864
|
+
version = REAL(VECTOR_ELT(bpmapHeader,1))[0];
|
865
|
+
n_seq = INTEGER(VECTOR_ELT(bpmapHeader,2))[0];
|
866
|
+
UNPROTECT(1);
|
867
|
+
|
868
|
+
/* Rprintf("version nseq: %f %d\n", version, n_seq); */
|
869
|
+
|
870
|
+
|
871
|
+
PROTECT(bpmapSeqDesc = ReadBPMAPSeqDescription(infile,version,n_seq));
|
872
|
+
SET_VECTOR_ELT(bpmapRlist,1,bpmapSeqDesc);
|
873
|
+
SET_VECTOR_ELT(bpmapRlist,2,readBPMAPSeqIdPositionInfo(infile,version,n_seq,bpmapSeqDesc));
|
874
|
+
UNPROTECT(1);
|
875
|
+
|
876
|
+
PROTECT(tmpSXP=allocVector(STRSXP,3));
|
877
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Header"));
|
878
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("SequenceDescription"));
|
879
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("SeqHead.PosInfo"));
|
880
|
+
setAttrib(bpmapRlist,R_NamesSymbol,tmpSXP);
|
881
|
+
UNPROTECT(1);
|
882
|
+
|
883
|
+
UNPROTECT(1);
|
884
|
+
return bpmapRlist;
|
885
|
+
|
886
|
+
|
887
|
+
}
|
888
|
+
|