bio-affy 0.1.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +32 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/bin/bio-affy +80 -0
- data/bio-affy.gemspec +128 -0
- data/ext/DESCRIPTION +11 -0
- data/ext/HISTORY +3 -0
- data/ext/LICENSE +456 -0
- data/ext/NAMESPACE +2 -0
- data/ext/R/check.cdf.type.R +18 -0
- data/ext/R/read.cdffile.list.R +23 -0
- data/ext/R/read.celfile.R +11 -0
- data/ext/R/read.celfile.header.R +37 -0
- data/ext/R/read.probematrices.R +29 -0
- data/ext/README_BIOLIB +36 -0
- data/ext/aclocal.m4 +32 -0
- data/ext/configure +4898 -0
- data/ext/configure.in +51 -0
- data/ext/man/check.cdf.type.Rd +22 -0
- data/ext/man/read.cdffile.list.Rd +20 -0
- data/ext/man/read.celfile.Rd +23 -0
- data/ext/man/read.celfile.header.Rd +22 -0
- data/ext/man/read.celfile.probeintensity.matrices.Rd +31 -0
- data/ext/src/CMakeLists.txt +39 -0
- data/ext/src/Makevars.in +3 -0
- data/ext/src/Makevars.win +2 -0
- data/ext/src/Rakefile +43 -0
- data/ext/src/biolib_affyio.c +416 -0
- data/ext/src/biolib_affyio.h +132 -0
- data/ext/src/biolib_affyio.o +0 -0
- data/ext/src/fread_functions.c +871 -0
- data/ext/src/fread_functions.h +60 -0
- data/ext/src/fread_functions.o +0 -0
- data/ext/src/libaffyext.so +0 -0
- data/ext/src/mkrf.log +11 -0
- data/ext/src/mkrf_conf.rb +6 -0
- data/ext/src/read_abatch.c +5484 -0
- data/ext/src/read_abatch.h +63 -0
- data/ext/src/read_abatch.o +0 -0
- data/ext/src/read_bpmap.c +888 -0
- data/ext/src/read_bpmap.o +0 -0
- data/ext/src/read_cdf.h +347 -0
- data/ext/src/read_cdf_xda.c +1342 -0
- data/ext/src/read_cdf_xda.o +0 -0
- data/ext/src/read_cdffile2.c +1576 -0
- data/ext/src/read_cdffile2.o +0 -0
- data/ext/src/read_celfile_generic.c +2061 -0
- data/ext/src/read_celfile_generic.h +33 -0
- data/ext/src/read_celfile_generic.o +0 -0
- data/ext/src/read_clf.c +870 -0
- data/ext/src/read_clf.o +0 -0
- data/ext/src/read_generic.c +1446 -0
- data/ext/src/read_generic.h +144 -0
- data/ext/src/read_generic.o +0 -0
- data/ext/src/read_pgf.c +1337 -0
- data/ext/src/read_pgf.o +0 -0
- data/lib/bio-affy.rb +5 -0
- data/lib/bio/affy.rb +7 -0
- data/lib/bio/affyext.rb +23 -0
- data/lib/bio/libaffyext.so +0 -0
- data/spec/bio-affy_spec.rb +22 -0
- data/spec/spec_helper.rb +13 -0
- data/test/data/affy/GSM103328.CEL.gz +0 -0
- data/test/data/affy/GSM103329.CEL.gz +0 -0
- data/test/data/affy/GSM103330.CEL.gz +0 -0
- data/test/data/affy/MG_U74Av2.CDF.gz +0 -0
- metadata +190 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
#ifndef READ_ABATCH_H
|
2
|
+
#define READ_ABATCH_H
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
/****************************************************************
|
7
|
+
**
|
8
|
+
** A structure for holding full header information
|
9
|
+
**
|
10
|
+
**
|
11
|
+
**
|
12
|
+
***************************************************************/
|
13
|
+
|
14
|
+
typedef struct{
|
15
|
+
char *cdfName;
|
16
|
+
int cols;
|
17
|
+
int rows;
|
18
|
+
int GridCornerULx,GridCornerULy; /* XY coordinates of the upper left grid corner in pixel coordinates.*/
|
19
|
+
int GridCornerURx,GridCornerURy; /* XY coordinates of the upper right grid corner in pixel coordinates.*/
|
20
|
+
int GridCornerLRx,GridCornerLRy; /* XY coordinates of the lower right grid corner in pixel coordinates.*/
|
21
|
+
int GridCornerLLx,GridCornerLLy; /* XY coordinates of the lower left grid corner in pixel coordinates.*/
|
22
|
+
char *DatHeader;
|
23
|
+
char *Algorithm;
|
24
|
+
char *AlgorithmParameters;
|
25
|
+
char *ScanDate;
|
26
|
+
} detailed_header_info;
|
27
|
+
|
28
|
+
/******************************************************************
|
29
|
+
**
|
30
|
+
** A "C" level object designed to hold information for a
|
31
|
+
** single CEL file
|
32
|
+
**
|
33
|
+
** These should be created using the function
|
34
|
+
**
|
35
|
+
** read_cel_file()
|
36
|
+
**
|
37
|
+
**
|
38
|
+
**
|
39
|
+
*****************************************************************/
|
40
|
+
|
41
|
+
typedef struct{
|
42
|
+
detailed_header_info header;
|
43
|
+
|
44
|
+
/** these are for storing the intensities, the sds and the number of pixels **/
|
45
|
+
double *intensities;
|
46
|
+
double *stddev;
|
47
|
+
double *npixels;
|
48
|
+
|
49
|
+
/** these are for storing information in the masks and outliers section **/
|
50
|
+
|
51
|
+
int nmasks;
|
52
|
+
int noutliers;
|
53
|
+
|
54
|
+
short *masks_x, *masks_y;
|
55
|
+
short *outliers_x, *outliers_y;
|
56
|
+
|
57
|
+
} CEL;
|
58
|
+
|
59
|
+
extern CEL *read_cel_file(const char *filename, int read_intensities_only);
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
#endif
|
Binary file
|
@@ -0,0 +1,888 @@
|
|
1
|
+
/****************************************************************
|
2
|
+
**
|
3
|
+
** File: read_bpmap.c
|
4
|
+
**
|
5
|
+
** Implementation by: B. M. Bolstad
|
6
|
+
**
|
7
|
+
** Copyright (C) B. M. Bolstad 2006-2007
|
8
|
+
**
|
9
|
+
** A parser designed to read bpmap files into an R List structure
|
10
|
+
**
|
11
|
+
** History
|
12
|
+
** Mar 11, 2006 - Initial version
|
13
|
+
** Mar 12, 2006 - add additional support for versions 2 and 3
|
14
|
+
** May 31, 2006 - Fix some compiler warnings
|
15
|
+
** June 12, 2006 - fix naming vector length issue.
|
16
|
+
** June 12, 2007 - much wailing and grinding of teeth, but finally a fix for reading version number right.
|
17
|
+
** Aug 25, 2007 - Move file reading functions to centralized location
|
18
|
+
** Mar 14, 2008 - Fix reading of version number for big endian platforms
|
19
|
+
** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
|
20
|
+
**
|
21
|
+
*******************************************************************/
|
22
|
+
|
23
|
+
#include <R.h>
|
24
|
+
#include <Rdefines.h>
|
25
|
+
|
26
|
+
#include "stdlib.h"
|
27
|
+
#include "stdio.h"
|
28
|
+
|
29
|
+
#include "fread_functions.h"
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
/****************************************************************
|
34
|
+
**
|
35
|
+
**
|
36
|
+
**
|
37
|
+
**
|
38
|
+
** Note BPMAP files are stored in big endian format
|
39
|
+
**
|
40
|
+
*******************************************************************/
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
/*************************************************************************
|
45
|
+
**
|
46
|
+
** Code for reading from the big endian binary files, doing bit flipping if
|
47
|
+
** necessary (on little-endian machines)
|
48
|
+
**
|
49
|
+
**
|
50
|
+
************************************************************************/
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
static void swap_float_4(float *tnf4) /* 4 byte floating point numbers */
|
55
|
+
{
|
56
|
+
int tni = (int)(*tnf4);
|
57
|
+
|
58
|
+
tni=(((tni>>24)&0xff) | ((tni&0xff)<<24) |
|
59
|
+
((tni>>8)&0xff00) | ((tni&0xff00)<<8));
|
60
|
+
|
61
|
+
*tnf4 = (float)tni;
|
62
|
+
|
63
|
+
}
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
static SEXP ReadBPMAPHeader(FILE *infile){
|
70
|
+
|
71
|
+
|
72
|
+
SEXP Header;
|
73
|
+
SEXP tmpSXP;
|
74
|
+
|
75
|
+
|
76
|
+
char *Magicnumber = R_alloc(8,sizeof(char));
|
77
|
+
float version_number = 0.0;
|
78
|
+
int version_number_int;
|
79
|
+
unsigned int unsigned_version_number_int;
|
80
|
+
|
81
|
+
|
82
|
+
unsigned int n_seq;
|
83
|
+
static double new_version_number;
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
fread_be_char(Magicnumber,8,infile);
|
88
|
+
|
89
|
+
if (strncmp(Magicnumber,"PHT7",4) !=0){
|
90
|
+
error("Based on the magic number which was %s, this does not appear to be a BPMAP file",Magicnumber);
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
/* version number is a little bit funky
|
95
|
+
need to do some funny things to coax it
|
96
|
+
into the right format
|
97
|
+
*/
|
98
|
+
|
99
|
+
|
100
|
+
/* cast to integer, swap bytes, cast to float */
|
101
|
+
/* fread_be_float32(&version_number,1,infile); */
|
102
|
+
fread_float32(&version_number,1,infile);
|
103
|
+
swap_float_4(&version_number);
|
104
|
+
|
105
|
+
new_version_number = (double)version_number;
|
106
|
+
/* // Rprintf("A %f\n",version_number);*/
|
107
|
+
|
108
|
+
if ((version_number <=0.5) || (version_number > 3.5)){
|
109
|
+
/* // Rprintf("Rereading\n"); */
|
110
|
+
fseek(infile,-sizeof(float),SEEK_CUR);
|
111
|
+
fread_be_uint32(&unsigned_version_number_int,1,infile);
|
112
|
+
memcpy(&version_number,&unsigned_version_number_int, sizeof(float));
|
113
|
+
new_version_number = (double)version_number;
|
114
|
+
}
|
115
|
+
|
116
|
+
fread_be_uint32(&n_seq,1,infile);
|
117
|
+
|
118
|
+
PROTECT(Header=allocVector(VECSXP,3));
|
119
|
+
|
120
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
121
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(Magicnumber));
|
122
|
+
SET_VECTOR_ELT(Header,0,tmpSXP);
|
123
|
+
UNPROTECT(1);
|
124
|
+
|
125
|
+
|
126
|
+
PROTECT(tmpSXP=allocVector(REALSXP,1));
|
127
|
+
REAL(tmpSXP)[0] = (double)new_version_number;
|
128
|
+
SET_VECTOR_ELT(Header,1,tmpSXP);
|
129
|
+
UNPROTECT(1);
|
130
|
+
|
131
|
+
|
132
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
133
|
+
INTEGER(tmpSXP)[0] = (int)n_seq;
|
134
|
+
SET_VECTOR_ELT(Header,2,tmpSXP);
|
135
|
+
UNPROTECT(1);
|
136
|
+
|
137
|
+
PROTECT(tmpSXP=allocVector(STRSXP,3));
|
138
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("magic.number"));
|
139
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("version"));
|
140
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("n.seq"));
|
141
|
+
setAttrib(Header,R_NamesSymbol,tmpSXP);
|
142
|
+
UNPROTECT(2);
|
143
|
+
|
144
|
+
/* Rprintf("D %f %f\n",version_number,new_version_number); */
|
145
|
+
return Header;
|
146
|
+
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
static SEXP ReadBPMAPSeqDescription(FILE *infile, float version, int nseq){
|
152
|
+
|
153
|
+
|
154
|
+
SEXP SequenceDescriptionList;
|
155
|
+
|
156
|
+
SEXP CurSequenceDescription = R_NilValue;
|
157
|
+
SEXP tmpSXP,tmpSXP2;
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
int i,j;
|
162
|
+
|
163
|
+
unsigned int seq_name_length;
|
164
|
+
|
165
|
+
char *seq_name;
|
166
|
+
|
167
|
+
unsigned int probe_mapping_type;
|
168
|
+
unsigned int seq_file_offset;
|
169
|
+
|
170
|
+
unsigned int n_probes;
|
171
|
+
|
172
|
+
unsigned int group_name_length;
|
173
|
+
char *group_name;
|
174
|
+
|
175
|
+
unsigned int version_number_length;
|
176
|
+
char *version_number;
|
177
|
+
|
178
|
+
unsigned int number_parameters;
|
179
|
+
|
180
|
+
unsigned int param_length;
|
181
|
+
char *param_name;
|
182
|
+
|
183
|
+
/* Rprintf("%f %d\n",version,nseq); */
|
184
|
+
|
185
|
+
PROTECT(SequenceDescriptionList=allocVector(VECSXP,(int)nseq));
|
186
|
+
|
187
|
+
for (i=0; i < nseq; i++){
|
188
|
+
fread_be_uint32(&seq_name_length,1,infile);
|
189
|
+
seq_name = (char *)Calloc(seq_name_length+1,char);
|
190
|
+
fread_be_char(seq_name,seq_name_length,infile);
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
if (version == 3.00){
|
195
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,8));
|
196
|
+
PROTECT(tmpSXP=allocVector(STRSXP,7));
|
197
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
198
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("ProbeMappingType"));
|
199
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("SequenceFileOffset"));
|
200
|
+
SET_STRING_ELT(tmpSXP,3,mkChar("n.probepairs"));
|
201
|
+
SET_STRING_ELT(tmpSXP,4,mkChar("GroupName"));
|
202
|
+
SET_STRING_ELT(tmpSXP,5,mkChar("VersionNumber"));
|
203
|
+
SET_STRING_ELT(tmpSXP,6,mkChar("NumberOfParameters"));
|
204
|
+
SET_STRING_ELT(tmpSXP,7,mkChar("Parameters"));
|
205
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
206
|
+
UNPROTECT(1);
|
207
|
+
} else if (version == 2.00){
|
208
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,6));
|
209
|
+
PROTECT(tmpSXP=allocVector(STRSXP,6));
|
210
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
211
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs"));
|
212
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("GroupName"));
|
213
|
+
SET_STRING_ELT(tmpSXP,3,mkChar("VersionNumber"));
|
214
|
+
SET_STRING_ELT(tmpSXP,4,mkChar("NumberOfParameters"));
|
215
|
+
SET_STRING_ELT(tmpSXP,5,mkChar("Parameters"));
|
216
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
217
|
+
UNPROTECT(1);
|
218
|
+
} else if (version == 1.00){
|
219
|
+
PROTECT(CurSequenceDescription=allocVector(VECSXP,2));
|
220
|
+
PROTECT(tmpSXP=allocVector(STRSXP,2));
|
221
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Name"));
|
222
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs"));
|
223
|
+
setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP);
|
224
|
+
UNPROTECT(1);
|
225
|
+
|
226
|
+
}
|
227
|
+
|
228
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
229
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(seq_name));
|
230
|
+
SET_VECTOR_ELT(CurSequenceDescription,0,tmpSXP);
|
231
|
+
UNPROTECT(1);
|
232
|
+
Free(seq_name);
|
233
|
+
|
234
|
+
|
235
|
+
if (version == 1.0){
|
236
|
+
fread_be_uint32(&n_probes,1,infile);
|
237
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
238
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
239
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
240
|
+
UNPROTECT(1);
|
241
|
+
} else if (version ==2.0){
|
242
|
+
fread_be_uint32(&n_probes,1,infile);
|
243
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
244
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
245
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
246
|
+
UNPROTECT(1);
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
|
251
|
+
fread_be_uint32(&group_name_length,1,infile);
|
252
|
+
group_name = (char *)Calloc(group_name_length+1,char);
|
253
|
+
fread_be_char(group_name,group_name_length,infile);
|
254
|
+
|
255
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
256
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(group_name));
|
257
|
+
SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP);
|
258
|
+
UNPROTECT(1);
|
259
|
+
Free(group_name);
|
260
|
+
|
261
|
+
|
262
|
+
fread_be_uint32(&version_number_length,1,infile);
|
263
|
+
version_number = (char *)Calloc(version_number_length+1,char);
|
264
|
+
fread_be_char(version_number,version_number_length,infile);
|
265
|
+
|
266
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
267
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(version_number));
|
268
|
+
SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP);
|
269
|
+
UNPROTECT(1);
|
270
|
+
Free(version_number);
|
271
|
+
|
272
|
+
|
273
|
+
fread_be_uint32(&number_parameters,1,infile);
|
274
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
275
|
+
INTEGER(tmpSXP)[0] = number_parameters;
|
276
|
+
SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP);
|
277
|
+
UNPROTECT(1);
|
278
|
+
|
279
|
+
PROTECT(tmpSXP=allocVector(VECSXP,number_parameters));
|
280
|
+
|
281
|
+
|
282
|
+
for (j=0; j < number_parameters; j++){
|
283
|
+
PROTECT(tmpSXP2 = allocVector(STRSXP,2));
|
284
|
+
fread_be_uint32(¶m_length,1,infile);
|
285
|
+
param_name = (char *)Calloc(param_length+1,char);
|
286
|
+
fread_be_char(param_name,param_length,infile);
|
287
|
+
SET_STRING_ELT(tmpSXP2,0,mkChar(param_name));
|
288
|
+
Free(param_name);
|
289
|
+
fread_be_uint32(¶m_length,1,infile);
|
290
|
+
param_name = (char *)Calloc(param_length+1,char);
|
291
|
+
fread_be_char(param_name,param_length,infile);
|
292
|
+
SET_STRING_ELT(tmpSXP2,1,mkChar(param_name));
|
293
|
+
Free(param_name);
|
294
|
+
|
295
|
+
SET_VECTOR_ELT(tmpSXP,j,tmpSXP2);
|
296
|
+
UNPROTECT(1);
|
297
|
+
}
|
298
|
+
SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP);
|
299
|
+
UNPROTECT(1);
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
} else if (version ==3.0){
|
304
|
+
fread_be_uint32(&probe_mapping_type,1,infile);
|
305
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
306
|
+
INTEGER(tmpSXP)[0] = probe_mapping_type;
|
307
|
+
SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP);
|
308
|
+
UNPROTECT(1);
|
309
|
+
|
310
|
+
fread_be_uint32(&seq_file_offset,1,infile);
|
311
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
312
|
+
INTEGER(tmpSXP)[0] = seq_file_offset;
|
313
|
+
SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP);
|
314
|
+
UNPROTECT(1);
|
315
|
+
|
316
|
+
fread_be_uint32(&n_probes,1,infile);
|
317
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
318
|
+
INTEGER(tmpSXP)[0] = n_probes;
|
319
|
+
SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP);
|
320
|
+
UNPROTECT(1);
|
321
|
+
|
322
|
+
fread_be_uint32(&group_name_length,1,infile);
|
323
|
+
group_name = (char *)Calloc(group_name_length+1,char);
|
324
|
+
fread_be_char(group_name,group_name_length,infile);
|
325
|
+
|
326
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
327
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(group_name));
|
328
|
+
SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP);
|
329
|
+
UNPROTECT(1);
|
330
|
+
Free(group_name);
|
331
|
+
|
332
|
+
fread_be_uint32(&version_number_length,1,infile);
|
333
|
+
version_number = (char *)Calloc(version_number_length+1,char);
|
334
|
+
fread_be_char(version_number,version_number_length,infile);
|
335
|
+
|
336
|
+
PROTECT(tmpSXP=allocVector(STRSXP,1));
|
337
|
+
SET_STRING_ELT(tmpSXP,0,mkChar(version_number));
|
338
|
+
SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP);
|
339
|
+
UNPROTECT(1);
|
340
|
+
Free(version_number);
|
341
|
+
|
342
|
+
fread_be_uint32(&number_parameters,1,infile);
|
343
|
+
PROTECT(tmpSXP=allocVector(INTSXP,1));
|
344
|
+
INTEGER(tmpSXP)[0] = number_parameters;
|
345
|
+
SET_VECTOR_ELT(CurSequenceDescription,6,tmpSXP);
|
346
|
+
UNPROTECT(1);
|
347
|
+
|
348
|
+
|
349
|
+
|
350
|
+
PROTECT(tmpSXP=allocVector(VECSXP,number_parameters));
|
351
|
+
|
352
|
+
|
353
|
+
for (j=0; j < number_parameters; j++){
|
354
|
+
PROTECT(tmpSXP2 = allocVector(STRSXP,2));
|
355
|
+
fread_be_uint32(¶m_length,1,infile);
|
356
|
+
param_name = (char *)Calloc(param_length+1,char);
|
357
|
+
fread_be_char(param_name,param_length,infile);
|
358
|
+
SET_STRING_ELT(tmpSXP2,0,mkChar(param_name));
|
359
|
+
Free(param_name);
|
360
|
+
fread_be_uint32(¶m_length,1,infile);
|
361
|
+
param_name = (char *)Calloc(param_length+1,char);
|
362
|
+
fread_be_char(param_name,param_length,infile);
|
363
|
+
SET_STRING_ELT(tmpSXP2,1,mkChar(param_name));
|
364
|
+
Free(param_name);
|
365
|
+
|
366
|
+
SET_VECTOR_ELT(tmpSXP,j,tmpSXP2);
|
367
|
+
UNPROTECT(1);
|
368
|
+
}
|
369
|
+
SET_VECTOR_ELT(CurSequenceDescription,7,tmpSXP);
|
370
|
+
UNPROTECT(1);
|
371
|
+
}
|
372
|
+
|
373
|
+
SET_VECTOR_ELT(SequenceDescriptionList,i,CurSequenceDescription);
|
374
|
+
UNPROTECT(1);
|
375
|
+
|
376
|
+
}
|
377
|
+
|
378
|
+
UNPROTECT(1);
|
379
|
+
return SequenceDescriptionList;
|
380
|
+
|
381
|
+
}
|
382
|
+
|
383
|
+
|
384
|
+
|
385
|
+
static void packedSeqTobaseStr(unsigned char probeseq[7], char *dest){
|
386
|
+
|
387
|
+
unsigned char currentchar;
|
388
|
+
|
389
|
+
unsigned char firsttwobits;
|
390
|
+
unsigned char secondtwobits;
|
391
|
+
unsigned char thirdtwobits;
|
392
|
+
unsigned char fourthtwobits;
|
393
|
+
|
394
|
+
int i;
|
395
|
+
|
396
|
+
|
397
|
+
/* Rprintf("\n\n\n\n\n"); */
|
398
|
+
|
399
|
+
|
400
|
+
for (i =0; i < 6;i++){
|
401
|
+
currentchar = probeseq[i];
|
402
|
+
|
403
|
+
/* extract first two bits */
|
404
|
+
firsttwobits = (currentchar & 192);
|
405
|
+
secondtwobits = (currentchar & 48);
|
406
|
+
thirdtwobits = (currentchar & 12);
|
407
|
+
fourthtwobits = (currentchar & 3);
|
408
|
+
|
409
|
+
|
410
|
+
|
411
|
+
firsttwobits = firsttwobits >> 6;
|
412
|
+
secondtwobits = secondtwobits >> 4;
|
413
|
+
thirdtwobits = thirdtwobits >> 2;
|
414
|
+
|
415
|
+
/* Rprintf("%x %x %x %x\n",firsttwobits,secondtwobits,thirdtwobits,fourthtwobits); */
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
if (firsttwobits == 0){
|
421
|
+
dest[4*i +0]='A';
|
422
|
+
}
|
423
|
+
if (firsttwobits == 1){
|
424
|
+
dest[4*i +0]='C';
|
425
|
+
}
|
426
|
+
if (firsttwobits == 2){
|
427
|
+
dest[4*i +0]='G';
|
428
|
+
}
|
429
|
+
if (firsttwobits == 3){
|
430
|
+
dest[4*i +0]='T';
|
431
|
+
}
|
432
|
+
|
433
|
+
if (secondtwobits == 0){
|
434
|
+
dest[4*i +1]='A';
|
435
|
+
}
|
436
|
+
if (secondtwobits == 1){
|
437
|
+
dest[4*i +1]='C';
|
438
|
+
}
|
439
|
+
if (secondtwobits == 2){
|
440
|
+
dest[4*i +1]='G';
|
441
|
+
}
|
442
|
+
if (secondtwobits == 3){
|
443
|
+
dest[4*i +1]='T';
|
444
|
+
}
|
445
|
+
|
446
|
+
if (thirdtwobits == 0){
|
447
|
+
dest[4*i +2]='A';
|
448
|
+
}
|
449
|
+
if (thirdtwobits == 1){
|
450
|
+
dest[4*i +2]='C';
|
451
|
+
}
|
452
|
+
if (thirdtwobits == 2){
|
453
|
+
dest[4*i +2]='G';
|
454
|
+
}
|
455
|
+
if (thirdtwobits == 3){
|
456
|
+
dest[4*i +2]='T';
|
457
|
+
}
|
458
|
+
|
459
|
+
if (fourthtwobits == 0){
|
460
|
+
dest[4*i +3]='A';
|
461
|
+
}
|
462
|
+
if (fourthtwobits == 1){
|
463
|
+
dest[4*i +3]='C';
|
464
|
+
}
|
465
|
+
if (fourthtwobits == 2){
|
466
|
+
dest[4*i +3]='G';
|
467
|
+
}
|
468
|
+
if (fourthtwobits == 3){
|
469
|
+
dest[4*i +3]='T';
|
470
|
+
}
|
471
|
+
|
472
|
+
/* Rprintf("%c%c%c%c\n",dest[4*i],dest[4*i +1],dest[4*i +2], dest[4*i +3]); */
|
473
|
+
}
|
474
|
+
|
475
|
+
currentchar = probeseq[6];
|
476
|
+
|
477
|
+
/* extract first two bits */
|
478
|
+
|
479
|
+
firsttwobits = (currentchar & 192);
|
480
|
+
firsttwobits = firsttwobits >> 6;
|
481
|
+
if (firsttwobits == 0){
|
482
|
+
dest[24]='A';
|
483
|
+
}
|
484
|
+
if (firsttwobits == 1){
|
485
|
+
dest[24]='C';
|
486
|
+
}
|
487
|
+
if (firsttwobits == 2){
|
488
|
+
dest[24]='G';
|
489
|
+
}
|
490
|
+
if (firsttwobits == 3){
|
491
|
+
dest[24]='T';
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
|
498
|
+
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
static SEXP readBPMAPSeqIdPositionInfo(FILE *infile, float version, int nseq, SEXP seqDesc){
|
503
|
+
|
504
|
+
|
505
|
+
SEXP SeqIdPositionInfoList;
|
506
|
+
SEXP curSeqIdPositionInfo;
|
507
|
+
SEXP PositionInfo= R_NilValue;
|
508
|
+
SEXP PositionInfoRowNames;
|
509
|
+
|
510
|
+
|
511
|
+
SEXP tmpSEXP;
|
512
|
+
|
513
|
+
SEXP xPM= R_NilValue,yPM= R_NilValue,xMM= R_NilValue,yMM= R_NilValue;
|
514
|
+
SEXP PMprobeLength= R_NilValue;
|
515
|
+
SEXP probeSeqString= R_NilValue;
|
516
|
+
SEXP MatchScore= R_NilValue;
|
517
|
+
SEXP PMposition= R_NilValue;
|
518
|
+
SEXP Strand= R_NilValue;
|
519
|
+
|
520
|
+
char buf[10];
|
521
|
+
|
522
|
+
char *dest;
|
523
|
+
|
524
|
+
|
525
|
+
int nprobes=0;
|
526
|
+
int probe_mapping_type=0;
|
527
|
+
int i,j;
|
528
|
+
|
529
|
+
|
530
|
+
unsigned int SeqId;
|
531
|
+
|
532
|
+
unsigned int x;
|
533
|
+
unsigned int y;
|
534
|
+
|
535
|
+
unsigned int x_mm;
|
536
|
+
unsigned int y_mm;
|
537
|
+
|
538
|
+
unsigned char probelength;
|
539
|
+
|
540
|
+
unsigned char probeseq[7];
|
541
|
+
|
542
|
+
float matchScore;
|
543
|
+
int matchScore_int;
|
544
|
+
|
545
|
+
unsigned int positionPM;
|
546
|
+
unsigned char strand;
|
547
|
+
|
548
|
+
|
549
|
+
PROTECT(SeqIdPositionInfoList = allocVector(VECSXP,nseq));
|
550
|
+
|
551
|
+
for (i =0; i < nseq; i++){
|
552
|
+
fread_be_uint32(&SeqId,1,infile);
|
553
|
+
/*Rprintf("Seq id:%u\n",SeqId);*/
|
554
|
+
|
555
|
+
PROTECT(curSeqIdPositionInfo = allocVector(VECSXP,2));
|
556
|
+
|
557
|
+
|
558
|
+
PROTECT(tmpSEXP=allocVector(INTSXP,1));
|
559
|
+
INTEGER(tmpSEXP)[0] = (int)SeqId;
|
560
|
+
SET_VECTOR_ELT(curSeqIdPositionInfo,0,tmpSEXP);
|
561
|
+
UNPROTECT(1);
|
562
|
+
|
563
|
+
|
564
|
+
PROTECT(tmpSEXP=allocVector(STRSXP,2));
|
565
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("Header"));
|
566
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("PositionInformation"));
|
567
|
+
setAttrib(curSeqIdPositionInfo,R_NamesSymbol,tmpSEXP);
|
568
|
+
UNPROTECT(1);
|
569
|
+
|
570
|
+
|
571
|
+
|
572
|
+
if ((version == 1.0) || (version == 2.0)){
|
573
|
+
nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0];
|
574
|
+
/* Rprintf("nprobes: %d\n",nprobes); */
|
575
|
+
probe_mapping_type = 0; /* PM/MM tiling */
|
576
|
+
|
577
|
+
PROTECT(PositionInfo = allocVector(VECSXP,9));
|
578
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
579
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
580
|
+
PROTECT(xMM = allocVector(INTSXP,nprobes));
|
581
|
+
PROTECT(yMM = allocVector(INTSXP,nprobes));
|
582
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
583
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
584
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
585
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
586
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
587
|
+
|
588
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
589
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
590
|
+
SET_VECTOR_ELT(PositionInfo,2,xMM);
|
591
|
+
SET_VECTOR_ELT(PositionInfo,3,yMM);
|
592
|
+
SET_VECTOR_ELT(PositionInfo,4,PMprobeLength);
|
593
|
+
SET_VECTOR_ELT(PositionInfo,5,probeSeqString);
|
594
|
+
SET_VECTOR_ELT(PositionInfo,6,MatchScore);
|
595
|
+
SET_VECTOR_ELT(PositionInfo,7,PMposition);
|
596
|
+
SET_VECTOR_ELT(PositionInfo,8,Strand);
|
597
|
+
UNPROTECT(9);
|
598
|
+
|
599
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
600
|
+
|
601
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
602
|
+
for (j=0; j < nprobes; j++){
|
603
|
+
sprintf(buf, "%d", j+1);
|
604
|
+
SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf));
|
605
|
+
}
|
606
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
607
|
+
UNPROTECT(1);
|
608
|
+
|
609
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,9));
|
610
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
611
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
612
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm"));
|
613
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm"));
|
614
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength"));
|
615
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq"));
|
616
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore"));
|
617
|
+
SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition"));
|
618
|
+
SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand"));
|
619
|
+
|
620
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
621
|
+
UNPROTECT(1);
|
622
|
+
|
623
|
+
} else if (version == 3.0){
|
624
|
+
nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),3))[0];
|
625
|
+
probe_mapping_type = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0];
|
626
|
+
|
627
|
+
|
628
|
+
if (probe_mapping_type == 0){
|
629
|
+
PROTECT(PositionInfo = allocVector(VECSXP,9));
|
630
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
631
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
632
|
+
PROTECT(xMM = allocVector(INTSXP,nprobes));
|
633
|
+
PROTECT(yMM = allocVector(INTSXP,nprobes));
|
634
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
635
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
636
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
637
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
638
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
639
|
+
|
640
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
641
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
642
|
+
SET_VECTOR_ELT(PositionInfo,2,xMM);
|
643
|
+
SET_VECTOR_ELT(PositionInfo,3,yMM);
|
644
|
+
SET_VECTOR_ELT(PositionInfo,4,PMprobeLength);
|
645
|
+
SET_VECTOR_ELT(PositionInfo,5,probeSeqString);
|
646
|
+
SET_VECTOR_ELT(PositionInfo,6,MatchScore);
|
647
|
+
SET_VECTOR_ELT(PositionInfo,7,PMposition);
|
648
|
+
SET_VECTOR_ELT(PositionInfo,8,Strand);
|
649
|
+
UNPROTECT(9);
|
650
|
+
|
651
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
652
|
+
|
653
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
654
|
+
for (j=0; j < nprobes; j++){
|
655
|
+
sprintf(buf, "%d", j+1);
|
656
|
+
SET_VECTOR_ELT(PositionInfoRowNames,j,mkChar(buf));
|
657
|
+
}
|
658
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
659
|
+
UNPROTECT(1);
|
660
|
+
|
661
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,9));
|
662
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
663
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
664
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm"));
|
665
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm"));
|
666
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength"));
|
667
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq"));
|
668
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore"));
|
669
|
+
SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition"));
|
670
|
+
SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand"));
|
671
|
+
|
672
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
673
|
+
UNPROTECT(1);
|
674
|
+
} else {
|
675
|
+
|
676
|
+
PROTECT(PositionInfo = allocVector(VECSXP,7));
|
677
|
+
PROTECT(xPM = allocVector(INTSXP,nprobes));
|
678
|
+
PROTECT(yPM = allocVector(INTSXP,nprobes));
|
679
|
+
PROTECT(PMprobeLength = allocVector(INTSXP,nprobes));
|
680
|
+
PROTECT(probeSeqString = allocVector(STRSXP,nprobes));
|
681
|
+
PROTECT(MatchScore = allocVector(REALSXP,nprobes));
|
682
|
+
PROTECT(PMposition = allocVector(INTSXP,nprobes));
|
683
|
+
PROTECT(Strand = allocVector(STRSXP,nprobes));
|
684
|
+
|
685
|
+
SET_VECTOR_ELT(PositionInfo,0,xPM);
|
686
|
+
SET_VECTOR_ELT(PositionInfo,1,yPM);
|
687
|
+
SET_VECTOR_ELT(PositionInfo,2,PMprobeLength);
|
688
|
+
SET_VECTOR_ELT(PositionInfo,3,probeSeqString);
|
689
|
+
SET_VECTOR_ELT(PositionInfo,4,MatchScore);
|
690
|
+
SET_VECTOR_ELT(PositionInfo,5,PMposition);
|
691
|
+
SET_VECTOR_ELT(PositionInfo,6,Strand);
|
692
|
+
UNPROTECT(7);
|
693
|
+
|
694
|
+
setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame"));
|
695
|
+
|
696
|
+
PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes));
|
697
|
+
for (j=0; j < nprobes; j++){
|
698
|
+
sprintf(buf, "%d", j+1);
|
699
|
+
SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf));
|
700
|
+
}
|
701
|
+
setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames);
|
702
|
+
UNPROTECT(1);
|
703
|
+
|
704
|
+
PROTECT(tmpSEXP = allocVector(STRSXP,7));
|
705
|
+
SET_STRING_ELT(tmpSEXP,0,mkChar("x"));
|
706
|
+
SET_STRING_ELT(tmpSEXP,1,mkChar("y"));
|
707
|
+
SET_STRING_ELT(tmpSEXP,2,mkChar("PMLength"));
|
708
|
+
SET_STRING_ELT(tmpSEXP,3,mkChar("ProbeSeq"));
|
709
|
+
SET_STRING_ELT(tmpSEXP,4,mkChar("MatchScore"));
|
710
|
+
SET_STRING_ELT(tmpSEXP,5,mkChar("PMPosition"));
|
711
|
+
SET_STRING_ELT(tmpSEXP,6,mkChar("TargetStrand"));
|
712
|
+
|
713
|
+
setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP);
|
714
|
+
UNPROTECT(1);
|
715
|
+
}
|
716
|
+
|
717
|
+
|
718
|
+
}
|
719
|
+
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
|
724
|
+
for (j=0; j < nprobes; j++){
|
725
|
+
fread_be_uint32(&x,1,infile);
|
726
|
+
fread_be_uint32(&y,1,infile);
|
727
|
+
/* Rprintf("x y :%u %u\n",x,y); */
|
728
|
+
|
729
|
+
if (probe_mapping_type == 0){
|
730
|
+
fread_be_uint32(&x_mm,1,infile);
|
731
|
+
fread_be_uint32(&y_mm,1,infile);
|
732
|
+
}
|
733
|
+
|
734
|
+
/* Rprintf("mm x y :%u %u\n",x_mm,y_mm); */
|
735
|
+
|
736
|
+
INTEGER(xPM)[j] = x;
|
737
|
+
INTEGER(yPM)[j] = y;
|
738
|
+
|
739
|
+
if (probe_mapping_type == 0){
|
740
|
+
INTEGER(xMM)[j] = x_mm;
|
741
|
+
INTEGER(yMM)[j] = y_mm;
|
742
|
+
}
|
743
|
+
fread_be_uchar(&probelength,1,infile);
|
744
|
+
/* Rprintf("probelength : %d\n",(int)probelength);*/
|
745
|
+
|
746
|
+
INTEGER(PMprobeLength)[j] = probelength;
|
747
|
+
|
748
|
+
|
749
|
+
fread_be_uchar(probeseq,7,infile);
|
750
|
+
/* Rprintf("probeseq : %s\n",probeseq); */
|
751
|
+
|
752
|
+
|
753
|
+
|
754
|
+
dest = (char *)Calloc(25+1,char);
|
755
|
+
packedSeqTobaseStr(probeseq,dest);
|
756
|
+
|
757
|
+
SET_STRING_ELT(probeSeqString,j,mkChar(dest));
|
758
|
+
Free(dest);
|
759
|
+
|
760
|
+
|
761
|
+
|
762
|
+
|
763
|
+
/* matchScore is treated same as version number in header */
|
764
|
+
#ifdef WORDS_BIGENDIAN
|
765
|
+
/* swap, cast to integer, swap bytes and cast back to float */
|
766
|
+
fread_be_float32(&matchScore,1,infile);
|
767
|
+
swap_float_4(&matchScore);
|
768
|
+
matchScore_int = (int)matchScore;
|
769
|
+
|
770
|
+
|
771
|
+
matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) |
|
772
|
+
((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8));
|
773
|
+
matchScore = (float)matchScore_int;
|
774
|
+
|
775
|
+
#else
|
776
|
+
/* cast to integer, swap bytes, cast to float */
|
777
|
+
fread_float32(&matchScore,1,infile);
|
778
|
+
matchScore_int = (int)matchScore;
|
779
|
+
matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) |
|
780
|
+
((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8));
|
781
|
+
matchScore = (float)matchScore_int;
|
782
|
+
#endif
|
783
|
+
/* Rprintf("matchScore : %f\n",matchScore); */
|
784
|
+
|
785
|
+
REAL(MatchScore)[j] = matchScore;
|
786
|
+
|
787
|
+
|
788
|
+
|
789
|
+
fread_be_uint32(&positionPM,1,infile);
|
790
|
+
/* Rprintf("positionPM : %u\n",positionPM);*/
|
791
|
+
INTEGER(PMposition)[j] = positionPM;
|
792
|
+
|
793
|
+
|
794
|
+
fread_be_uchar(&strand,1,infile);
|
795
|
+
/* Rprintf("strand: %d\n",(int)strand);*/
|
796
|
+
|
797
|
+
if ((int)strand ==1){
|
798
|
+
SET_STRING_ELT(Strand,j,mkChar("F"));
|
799
|
+
} else {
|
800
|
+
SET_STRING_ELT(Strand,j,mkChar("R"));
|
801
|
+
}
|
802
|
+
|
803
|
+
|
804
|
+
}
|
805
|
+
|
806
|
+
SET_VECTOR_ELT(curSeqIdPositionInfo,1,PositionInfo);
|
807
|
+
UNPROTECT(1);
|
808
|
+
|
809
|
+
SET_VECTOR_ELT(SeqIdPositionInfoList,i,curSeqIdPositionInfo);
|
810
|
+
UNPROTECT(1);
|
811
|
+
}
|
812
|
+
|
813
|
+
|
814
|
+
UNPROTECT(1);
|
815
|
+
return SeqIdPositionInfoList;
|
816
|
+
|
817
|
+
}
|
818
|
+
|
819
|
+
|
820
|
+
|
821
|
+
|
822
|
+
|
823
|
+
|
824
|
+
SEXP ReadBPMAPFileIntoRList(SEXP filename){
|
825
|
+
|
826
|
+
|
827
|
+
|
828
|
+
SEXP bpmapRlist;
|
829
|
+
|
830
|
+
SEXP bpmapHeader;
|
831
|
+
SEXP bpmapSeqDesc;
|
832
|
+
|
833
|
+
SEXP tmpSXP;
|
834
|
+
|
835
|
+
FILE *infile;
|
836
|
+
|
837
|
+
|
838
|
+
int n_seq;
|
839
|
+
float version;
|
840
|
+
|
841
|
+
|
842
|
+
const char *cur_file_name;
|
843
|
+
cur_file_name = CHAR(STRING_ELT(filename,0));
|
844
|
+
|
845
|
+
|
846
|
+
|
847
|
+
if ((infile = fopen(cur_file_name, "rb")) == NULL)
|
848
|
+
{
|
849
|
+
error("Unable to open the file %s",filename);
|
850
|
+
}
|
851
|
+
|
852
|
+
|
853
|
+
|
854
|
+
/*
|
855
|
+
first element is header, second item is sequence descriptions
|
856
|
+
third item is sequence header/position information
|
857
|
+
|
858
|
+
*/
|
859
|
+
PROTECT(bpmapRlist = allocVector(VECSXP,3));
|
860
|
+
|
861
|
+
|
862
|
+
PROTECT(bpmapHeader = ReadBPMAPHeader(infile));
|
863
|
+
SET_VECTOR_ELT(bpmapRlist,0,bpmapHeader);
|
864
|
+
version = REAL(VECTOR_ELT(bpmapHeader,1))[0];
|
865
|
+
n_seq = INTEGER(VECTOR_ELT(bpmapHeader,2))[0];
|
866
|
+
UNPROTECT(1);
|
867
|
+
|
868
|
+
/* Rprintf("version nseq: %f %d\n", version, n_seq); */
|
869
|
+
|
870
|
+
|
871
|
+
PROTECT(bpmapSeqDesc = ReadBPMAPSeqDescription(infile,version,n_seq));
|
872
|
+
SET_VECTOR_ELT(bpmapRlist,1,bpmapSeqDesc);
|
873
|
+
SET_VECTOR_ELT(bpmapRlist,2,readBPMAPSeqIdPositionInfo(infile,version,n_seq,bpmapSeqDesc));
|
874
|
+
UNPROTECT(1);
|
875
|
+
|
876
|
+
PROTECT(tmpSXP=allocVector(STRSXP,3));
|
877
|
+
SET_STRING_ELT(tmpSXP,0,mkChar("Header"));
|
878
|
+
SET_STRING_ELT(tmpSXP,1,mkChar("SequenceDescription"));
|
879
|
+
SET_STRING_ELT(tmpSXP,2,mkChar("SeqHead.PosInfo"));
|
880
|
+
setAttrib(bpmapRlist,R_NamesSymbol,tmpSXP);
|
881
|
+
UNPROTECT(1);
|
882
|
+
|
883
|
+
UNPROTECT(1);
|
884
|
+
return bpmapRlist;
|
885
|
+
|
886
|
+
|
887
|
+
}
|
888
|
+
|