bio-bigwig 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +131 -0
- data/ext/bio/bigwig/bigwigext.c +747 -0
- data/ext/bio/bigwig/bigwigext.h +9 -0
- data/ext/bio/bigwig/extconf.rb +30 -0
- data/ext/bio/bigwig/libBigWig/LICENSE +22 -0
- data/ext/bio/bigwig/libBigWig/bigWig.h +606 -0
- data/ext/bio/bigwig/libBigWig/bigWigIO.h +110 -0
- data/ext/bio/bigwig/libBigWig/bwCommon.h +70 -0
- data/ext/bio/bigwig/libBigWig/bwRead.c +427 -0
- data/ext/bio/bigwig/libBigWig/bwStats.c +537 -0
- data/ext/bio/bigwig/libBigWig/bwValues.c +803 -0
- data/ext/bio/bigwig/libBigWig/bwValues.h +77 -0
- data/ext/bio/bigwig/libBigWig/bwWrite.c +1333 -0
- data/ext/bio/bigwig/libBigWig/io.c +296 -0
- data/ext/bio/bigwig/libBigWig/test/exampleWrite.c +76 -0
- data/ext/bio/bigwig/libBigWig/test/testBigBed.c +132 -0
- data/ext/bio/bigwig/libBigWig/test/testIterator.c +67 -0
- data/ext/bio/bigwig/libBigWig/test/testLocal.c +223 -0
- data/ext/bio/bigwig/libBigWig/test/testRemote.c +203 -0
- data/ext/bio/bigwig/libBigWig/test/testRemoteManyContigs.c +46 -0
- data/ext/bio/bigwig/libBigWig/test/testWrite.c +68 -0
- data/lib/bio/bigwig/version.rb +7 -0
- data/lib/bio/bigwig.rb +47 -0
- metadata +69 -0
@@ -0,0 +1,747 @@
|
|
1
|
+
#include "bigwigext.h"
|
2
|
+
|
3
|
+
#define SIZEOF_INT32 4
|
4
|
+
#define SIZEOF_INT64 8
|
5
|
+
|
6
|
+
#if SIZEOF_SHORT == SIZEOF_INT32
|
7
|
+
#define NUM2UINT32 NUM2USHORT
|
8
|
+
#define NUM2INT32 NUM2SHORT
|
9
|
+
#define UINT32_2NUM USHORT2NUM
|
10
|
+
#define INT32_2NUM SHORT2NUM
|
11
|
+
#elif SIZEOF_INT == SIZEOF_INT32
|
12
|
+
#define NUM2UINT32 NUM2UINT
|
13
|
+
#define NUM2INT32 NUM2INT
|
14
|
+
#define UINT32_2NUM UINT2NUM
|
15
|
+
#define INT32_2NUM INT2NUM
|
16
|
+
#elif SIZEOF_LONG == SIZEOF_INT32
|
17
|
+
#define NUM2UINT32 NUM2ULONG
|
18
|
+
#define NUM2INT32 NUM2LONG
|
19
|
+
#define UINT32_2NUM ULONG2NUM
|
20
|
+
#define INT32_2NUM LONG2NUM
|
21
|
+
#else
|
22
|
+
#error "Neither int, long, nor short is the same size as int32_t"
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#if SIZEOF_INT == SIZEOF_INT64
|
26
|
+
#define NUM2UINT64 NUM2UINT
|
27
|
+
#define NUM2INT64 NUM2INT
|
28
|
+
#define UINT64_2NUM UINT2NUM
|
29
|
+
#define INT64_2NUM INT2NUM
|
30
|
+
#elif SIZEOF_LONG == SIZEOF_INT64
|
31
|
+
#define NUM2UINT64 NUM2ULONG
|
32
|
+
#define NUM2INT64 NUM2LONG
|
33
|
+
#define UINT64_2NUM ULONG2NUM
|
34
|
+
#define INT64_2NUM LONG2NUM
|
35
|
+
#elif SIZEOF_LONGLONG == SIZEOF_INT64
|
36
|
+
#define NUM2UINT64 NUM2ULL
|
37
|
+
#define NUM2INT64 NUM2LL
|
38
|
+
#define UINT64_2NUM ULL2NUM
|
39
|
+
#define INT64_2NUM LL2NUM
|
40
|
+
#else
|
41
|
+
#error "Neither int, long, nor short is the same size as int64_t"
|
42
|
+
#endif
|
43
|
+
|
44
|
+
VALUE rb_Bio;
|
45
|
+
VALUE rb_BigWig;
|
46
|
+
|
47
|
+
static void BigWig_free(void *ptr);
|
48
|
+
static size_t BigWig_memsize(const void *ptr);
|
49
|
+
|
50
|
+
static const rb_data_type_t BigWig_type = {
|
51
|
+
"BigWig",
|
52
|
+
{
|
53
|
+
0,
|
54
|
+
BigWig_free,
|
55
|
+
BigWig_memsize,
|
56
|
+
},
|
57
|
+
0,
|
58
|
+
0,
|
59
|
+
RUBY_TYPED_FREE_IMMEDIATELY};
|
60
|
+
|
61
|
+
static void
|
62
|
+
BigWig_free(void *ptr)
|
63
|
+
{
|
64
|
+
if (ptr)
|
65
|
+
{
|
66
|
+
bwClose((bigWigFile_t *)ptr);
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
static size_t BigWig_memsize(const void *ptr)
|
71
|
+
{
|
72
|
+
const bigWigFile_t *data = ptr;
|
73
|
+
|
74
|
+
return data ? sizeof(*data) : 0;
|
75
|
+
}
|
76
|
+
|
77
|
+
static bigWigFile_t *get_bigWigFile(VALUE obj)
|
78
|
+
{
|
79
|
+
bigWigFile_t *data;
|
80
|
+
TypedData_Get_Struct(obj, bigWigFile_t, &BigWig_type, data);
|
81
|
+
return data;
|
82
|
+
}
|
83
|
+
|
84
|
+
static VALUE
|
85
|
+
bigwig_allocate(VALUE klass)
|
86
|
+
{
|
87
|
+
bigWigFile_t *bw = NULL;
|
88
|
+
|
89
|
+
return TypedData_Wrap_Struct(klass, &BigWig_type, bw);
|
90
|
+
}
|
91
|
+
|
92
|
+
//Return 1 if there are any entries at all
|
93
|
+
int hasEntries(bigWigFile_t *bw)
|
94
|
+
{
|
95
|
+
if (bw->hdr->indexOffset != 0)
|
96
|
+
return 1; // No index, no entries pyBigWig issue #111
|
97
|
+
//if(bw->hdr->nBasesCovered > 0) return 1; // Sometimes headers are broken
|
98
|
+
return 0;
|
99
|
+
}
|
100
|
+
|
101
|
+
static VALUE
|
102
|
+
bigwig_init(VALUE self, VALUE rb_fname, VALUE rb_mode)
|
103
|
+
{
|
104
|
+
char *fname = NULL;
|
105
|
+
char *mode = "r";
|
106
|
+
bigWigFile_t *bw = NULL;
|
107
|
+
|
108
|
+
fname = StringValueCStr(rb_fname);
|
109
|
+
mode = StringValueCStr(rb_mode);
|
110
|
+
|
111
|
+
//Open the local/remote file
|
112
|
+
if (strchr(mode, 'w') != NULL || bwIsBigWig(fname, NULL))
|
113
|
+
{
|
114
|
+
bw = bwOpen(fname, NULL, mode);
|
115
|
+
}
|
116
|
+
else
|
117
|
+
{
|
118
|
+
bw = bbOpen(fname, NULL);
|
119
|
+
}
|
120
|
+
|
121
|
+
if (!bw)
|
122
|
+
{
|
123
|
+
fprintf(stderr, "bw is NULL!\n");
|
124
|
+
goto error;
|
125
|
+
}
|
126
|
+
|
127
|
+
if (!mode || !strchr(mode, 'w'))
|
128
|
+
{
|
129
|
+
if (!bw->cl)
|
130
|
+
goto error;
|
131
|
+
}
|
132
|
+
|
133
|
+
//Set the data pointer
|
134
|
+
DATA_PTR(self) = bw;
|
135
|
+
|
136
|
+
rb_ivar_set(self, rb_intern("@last_tid"), INT2NUM(-1));
|
137
|
+
rb_ivar_set(self, rb_intern("@last_type"), INT2NUM(-1));
|
138
|
+
rb_ivar_set(self, rb_intern("@last_span"), INT2NUM(-1));
|
139
|
+
rb_ivar_set(self, rb_intern("@last_step"), INT2NUM(-1));
|
140
|
+
rb_ivar_set(self, rb_intern("@last_start"), INT2NUM(-1));
|
141
|
+
|
142
|
+
return self;
|
143
|
+
|
144
|
+
error:
|
145
|
+
if (bw)
|
146
|
+
bwClose(bw);
|
147
|
+
rb_raise(rb_eRuntimeError, "Received an error during file opening!");
|
148
|
+
return Qnil;
|
149
|
+
}
|
150
|
+
|
151
|
+
static VALUE
|
152
|
+
bigwig_close(VALUE self)
|
153
|
+
{
|
154
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
155
|
+
|
156
|
+
if (bw)
|
157
|
+
{
|
158
|
+
bwClose(bw);
|
159
|
+
DATA_PTR(self) = NULL;
|
160
|
+
}
|
161
|
+
|
162
|
+
return Qnil;
|
163
|
+
}
|
164
|
+
|
165
|
+
static VALUE
|
166
|
+
bw_get_header(VALUE self)
|
167
|
+
{
|
168
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
169
|
+
VALUE rb_header;
|
170
|
+
|
171
|
+
if (!bw)
|
172
|
+
{
|
173
|
+
rb_raise(rb_eRuntimeError, "The bigWig file handle is not opened!");
|
174
|
+
return Qnil;
|
175
|
+
}
|
176
|
+
|
177
|
+
if (bw->isWrite == 1)
|
178
|
+
{
|
179
|
+
rb_raise(rb_eRuntimeError, "The header cannot be accessed in files opened for writing!");
|
180
|
+
return Qnil;
|
181
|
+
}
|
182
|
+
|
183
|
+
rb_header = rb_hash_new();
|
184
|
+
|
185
|
+
// FIXME return int or double?
|
186
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("version")), ULONG2NUM(bw->hdr->version));
|
187
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("levels")), ULONG2NUM(bw->hdr->nLevels));
|
188
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("bases_covered")), ULL2NUM(bw->hdr->nBasesCovered));
|
189
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("min_val")), INT2NUM((int)bw->hdr->minVal));
|
190
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("max_val")), INT2NUM((int)bw->hdr->maxVal));
|
191
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("sum_data")), INT2NUM((int)bw->hdr->sumData));
|
192
|
+
rb_hash_aset(rb_header, ID2SYM(rb_intern("sum_squared")), INT2NUM((int)bw->hdr->sumSquared));
|
193
|
+
|
194
|
+
return rb_header;
|
195
|
+
|
196
|
+
error:
|
197
|
+
rb_raise(rb_eRuntimeError, "Received an error while getting the bigWig header!");
|
198
|
+
return Qnil;
|
199
|
+
}
|
200
|
+
|
201
|
+
static VALUE
|
202
|
+
bw_get_chroms(int argc, VALUE *argv, VALUE self)
|
203
|
+
{
|
204
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
205
|
+
VALUE rb_chrom, val, ret;
|
206
|
+
char *chrom = NULL;
|
207
|
+
uint32_t i;
|
208
|
+
|
209
|
+
ret = Qnil; // return nil if no chrom is found
|
210
|
+
|
211
|
+
if (!bw)
|
212
|
+
{
|
213
|
+
rb_raise(rb_eRuntimeError, "The bigWig file handle is not opened!");
|
214
|
+
return Qnil;
|
215
|
+
}
|
216
|
+
|
217
|
+
if (bw->isWrite == 1)
|
218
|
+
{
|
219
|
+
rb_raise(rb_eRuntimeError, "Chromosomes cannot be accessed in files opened for writing!");
|
220
|
+
return Qnil;
|
221
|
+
}
|
222
|
+
|
223
|
+
rb_scan_args(argc, argv, "01", &rb_chrom);
|
224
|
+
|
225
|
+
if (argc == 0)
|
226
|
+
{
|
227
|
+
ret = rb_hash_new();
|
228
|
+
for (i = 0; i < bw->cl->nKeys; i++)
|
229
|
+
{
|
230
|
+
val = ULONG2NUM(bw->cl->len[i]);
|
231
|
+
rb_hash_aset(ret, rb_str_new2(bw->cl->chrom[i]), val);
|
232
|
+
}
|
233
|
+
}
|
234
|
+
else
|
235
|
+
{
|
236
|
+
chrom = StringValueCStr(rb_chrom);
|
237
|
+
for (i = 0; i < bw->cl->nKeys; i++)
|
238
|
+
{
|
239
|
+
if (strcmp(bw->cl->chrom[i], chrom) == 0)
|
240
|
+
{
|
241
|
+
ret = ULONG2NUM(bw->cl->len[i]);
|
242
|
+
break;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
return ret;
|
248
|
+
}
|
249
|
+
|
250
|
+
enum bwStatsType char2enum(char *s)
|
251
|
+
{
|
252
|
+
if (strcmp(s, "mean") == 0)
|
253
|
+
return mean;
|
254
|
+
if (strcmp(s, "std") == 0)
|
255
|
+
return stdev;
|
256
|
+
if (strcmp(s, "dev") == 0)
|
257
|
+
return dev;
|
258
|
+
if (strcmp(s, "max") == 0)
|
259
|
+
return max;
|
260
|
+
if (strcmp(s, "min") == 0)
|
261
|
+
return min;
|
262
|
+
if (strcmp(s, "cov") == 0)
|
263
|
+
return cov;
|
264
|
+
if (strcmp(s, "coverage") == 0)
|
265
|
+
return cov;
|
266
|
+
if (strcmp(s, "sum") == 0)
|
267
|
+
return sum;
|
268
|
+
return -1;
|
269
|
+
};
|
270
|
+
|
271
|
+
// double *bwStats (bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
|
272
|
+
// double *bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
|
273
|
+
|
274
|
+
static VALUE
|
275
|
+
bw_get_stats(VALUE self, VALUE rb_chrom, VALUE rb_start, VALUE rb_end, VALUE rb_nBins, VALUE rb_type, VALUE rb_exact)
|
276
|
+
{
|
277
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
278
|
+
double *val;
|
279
|
+
unsigned long startl = 0, endl = -1;
|
280
|
+
uint32_t start, end = -1, tid;
|
281
|
+
int nBins = 1, i;
|
282
|
+
char *chrom = NULL, *type = "mean";
|
283
|
+
VALUE ret;
|
284
|
+
|
285
|
+
if (!bw)
|
286
|
+
{
|
287
|
+
rb_raise(rb_eRuntimeError, "The bigWig file handle is not opened!");
|
288
|
+
return Qnil;
|
289
|
+
}
|
290
|
+
|
291
|
+
if (bw->isWrite == 1)
|
292
|
+
{
|
293
|
+
rb_raise(rb_eRuntimeError, "Statistics cannot be accessed in files opened for writing!");
|
294
|
+
return Qnil;
|
295
|
+
}
|
296
|
+
|
297
|
+
if (bw->type == 1)
|
298
|
+
{
|
299
|
+
rb_raise(rb_eRuntimeError, "bigBed files have no statistics!");
|
300
|
+
return Qnil;
|
301
|
+
}
|
302
|
+
|
303
|
+
if (rb_chrom != Qnil)
|
304
|
+
{
|
305
|
+
chrom = StringValueCStr(rb_chrom);
|
306
|
+
}
|
307
|
+
|
308
|
+
if (rb_start != Qnil)
|
309
|
+
startl = NUM2LONG(rb_start);
|
310
|
+
|
311
|
+
if (rb_end != Qnil)
|
312
|
+
endl = NUM2LONG(rb_end);
|
313
|
+
|
314
|
+
if (rb_nBins != Qnil)
|
315
|
+
nBins = NUM2INT(rb_nBins);
|
316
|
+
|
317
|
+
if (rb_type != Qnil)
|
318
|
+
type = StringValueCStr(rb_type);
|
319
|
+
|
320
|
+
if (rb_exact != Qnil)
|
321
|
+
{
|
322
|
+
if (RTEST(rb_exact) == 1)
|
323
|
+
endl = startl + nBins - 1;
|
324
|
+
}
|
325
|
+
|
326
|
+
tid = bwGetTid(bw, chrom);
|
327
|
+
|
328
|
+
if (endl == (unsigned long)-1 && tid != (uint32_t)-1)
|
329
|
+
endl = bw->cl->len[tid];
|
330
|
+
|
331
|
+
if (tid == (uint32_t)-1 || startl > end || endl > end)
|
332
|
+
{
|
333
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
334
|
+
return Qnil;
|
335
|
+
}
|
336
|
+
|
337
|
+
start = (uint32_t)startl;
|
338
|
+
end = (uint32_t)endl;
|
339
|
+
|
340
|
+
if (end <= start || end > bw->cl->len[tid] || start >= end)
|
341
|
+
{
|
342
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
343
|
+
return Qnil;
|
344
|
+
}
|
345
|
+
|
346
|
+
if (char2enum(type) == doesNotExist)
|
347
|
+
{
|
348
|
+
rb_raise(rb_eRuntimeError, "Invalid type! : %s", type);
|
349
|
+
return Qnil;
|
350
|
+
}
|
351
|
+
|
352
|
+
if (!hasEntries(bw))
|
353
|
+
{
|
354
|
+
ret = rb_ary_new2(nBins);
|
355
|
+
for (i = 0; i < nBins; i++)
|
356
|
+
{
|
357
|
+
rb_ary_store(ret, i, Qnil);
|
358
|
+
}
|
359
|
+
return ret;
|
360
|
+
}
|
361
|
+
|
362
|
+
if (RTEST(rb_exact))
|
363
|
+
{
|
364
|
+
val = bwStatsFromFull(bw, chrom, start, end, nBins, char2enum(type));
|
365
|
+
}
|
366
|
+
else
|
367
|
+
{
|
368
|
+
val = bwStats(bw, chrom, start, end, nBins, char2enum(type));
|
369
|
+
}
|
370
|
+
|
371
|
+
if (!val)
|
372
|
+
{
|
373
|
+
rb_raise(rb_eRuntimeError, "Error getting statistics!An error was encountered while fetching statistics.");
|
374
|
+
return Qnil;
|
375
|
+
}
|
376
|
+
|
377
|
+
ret = rb_ary_new2(nBins);
|
378
|
+
for (i = 0; i < nBins; i++)
|
379
|
+
{
|
380
|
+
rb_ary_store(ret, i, rb_float_new(val[i]));
|
381
|
+
}
|
382
|
+
free(val);
|
383
|
+
|
384
|
+
return ret;
|
385
|
+
}
|
386
|
+
|
387
|
+
static VALUE
|
388
|
+
bw_get_values(VALUE self, VALUE rb_chrom, VALUE rb_start, VALUE rb_end)
|
389
|
+
{
|
390
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
391
|
+
int i;
|
392
|
+
uint32_t start, end = -1, tid;
|
393
|
+
unsigned long startl = 0, endl = -1;
|
394
|
+
char *chrom = NULL;
|
395
|
+
VALUE ret;
|
396
|
+
bwOverlappingIntervals_t *o;
|
397
|
+
|
398
|
+
if (!bw)
|
399
|
+
{
|
400
|
+
rb_raise(rb_eRuntimeError, "The bigWig file handle is not opened!");
|
401
|
+
return Qnil;
|
402
|
+
}
|
403
|
+
|
404
|
+
if (bw->type == 1)
|
405
|
+
{
|
406
|
+
rb_raise(rb_eRuntimeError, "bigBed files have no values! Use 'entries' instead.");
|
407
|
+
return Qnil;
|
408
|
+
}
|
409
|
+
|
410
|
+
if (rb_chrom != Qnil)
|
411
|
+
{
|
412
|
+
chrom = StringValueCStr(rb_chrom);
|
413
|
+
}
|
414
|
+
|
415
|
+
if (rb_start != Qnil)
|
416
|
+
startl = NUM2LONG(rb_start);
|
417
|
+
|
418
|
+
if (rb_end != Qnil)
|
419
|
+
endl = NUM2LONG(rb_end);
|
420
|
+
|
421
|
+
tid = bwGetTid(bw, chrom);
|
422
|
+
|
423
|
+
if (endl == (unsigned long)-1 && tid != (uint32_t)-1)
|
424
|
+
endl = bw->cl->len[tid];
|
425
|
+
if (tid == (uint32_t)-1 || startl > end || endl > end)
|
426
|
+
{
|
427
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
428
|
+
return Qnil;
|
429
|
+
}
|
430
|
+
|
431
|
+
start = (uint32_t)startl;
|
432
|
+
end = (uint32_t)endl;
|
433
|
+
|
434
|
+
if (end <= start || end > bw->cl->len[tid] || start >= end)
|
435
|
+
{
|
436
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
437
|
+
return Qnil;
|
438
|
+
}
|
439
|
+
|
440
|
+
if (!hasEntries(bw))
|
441
|
+
{
|
442
|
+
return rb_ary_new2(0);
|
443
|
+
}
|
444
|
+
|
445
|
+
o = bwGetValues(bw, chrom, start, end, 1);
|
446
|
+
if (!o)
|
447
|
+
{
|
448
|
+
rb_raise(rb_eRuntimeError, "An error occurred while fetching values!");
|
449
|
+
return Qnil;
|
450
|
+
}
|
451
|
+
|
452
|
+
ret = rb_ary_new2(end - start);
|
453
|
+
for (i = 0; i < (int)o->l; i++)
|
454
|
+
rb_ary_store(ret, i, DBL2NUM(o->value[i]));
|
455
|
+
bwDestroyOverlappingIntervals(o);
|
456
|
+
|
457
|
+
return ret;
|
458
|
+
}
|
459
|
+
|
460
|
+
static VALUE
|
461
|
+
bw_get_intervals(VALUE self, VALUE rb_chrom, VALUE rb_start, VALUE rb_end)
|
462
|
+
{
|
463
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
464
|
+
uint32_t start, end = -1, tid, i;
|
465
|
+
unsigned long startl = 0, endl = -1;
|
466
|
+
bwOverlappingIntervals_t *intervals = NULL;
|
467
|
+
char *chrom;
|
468
|
+
VALUE ret;
|
469
|
+
|
470
|
+
if (!bw)
|
471
|
+
{
|
472
|
+
rb_raise(rb_eRuntimeError, "The bigWig file handle is not opened!");
|
473
|
+
return Qnil;
|
474
|
+
}
|
475
|
+
|
476
|
+
if (bw->isWrite == 1)
|
477
|
+
{
|
478
|
+
rb_raise(rb_eRuntimeError, "Intervals cannot be accessed in files opened for writing!");
|
479
|
+
return Qnil;
|
480
|
+
}
|
481
|
+
|
482
|
+
if (bw->type == 1)
|
483
|
+
{
|
484
|
+
rb_raise(rb_eRuntimeError, "bigBed files have no intervals! Use 'entries' instead.");
|
485
|
+
return Qnil;
|
486
|
+
}
|
487
|
+
|
488
|
+
if (rb_chrom != Qnil)
|
489
|
+
{
|
490
|
+
chrom = StringValueCStr(rb_chrom);
|
491
|
+
}
|
492
|
+
|
493
|
+
if (rb_start != Qnil)
|
494
|
+
startl = NUM2LONG(rb_start);
|
495
|
+
|
496
|
+
if (rb_end != Qnil)
|
497
|
+
endl = NUM2LONG(rb_end);
|
498
|
+
|
499
|
+
tid = bwGetTid(bw, chrom);
|
500
|
+
|
501
|
+
//Sanity check
|
502
|
+
if (endl == (unsigned long)-1 && tid != (uint32_t)-1)
|
503
|
+
endl = bw->cl->len[tid];
|
504
|
+
|
505
|
+
if (tid == (uint32_t)-1 || startl > end || endl > end)
|
506
|
+
{
|
507
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
508
|
+
return Qnil;
|
509
|
+
}
|
510
|
+
|
511
|
+
start = (uint32_t)startl;
|
512
|
+
end = (uint32_t)endl;
|
513
|
+
|
514
|
+
if (end <= start || end > bw->cl->len[tid] || start >= end)
|
515
|
+
{
|
516
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
517
|
+
return Qnil;
|
518
|
+
}
|
519
|
+
|
520
|
+
if (!hasEntries(bw))
|
521
|
+
{
|
522
|
+
return rb_ary_new2(0);
|
523
|
+
}
|
524
|
+
|
525
|
+
//Get the intervals
|
526
|
+
intervals = bwGetOverlappingIntervals(bw, chrom, start, end);
|
527
|
+
if (!intervals)
|
528
|
+
{
|
529
|
+
rb_raise(rb_eRuntimeError, "An error occurred while fetching the overlapping intervals!");
|
530
|
+
return Qnil;
|
531
|
+
}
|
532
|
+
if (!intervals->l)
|
533
|
+
{
|
534
|
+
return rb_ary_new2(0);
|
535
|
+
}
|
536
|
+
|
537
|
+
ret = rb_ary_new2(intervals->l);
|
538
|
+
for (i = 0; i < intervals->l; i++)
|
539
|
+
{
|
540
|
+
VALUE tupl = rb_ary_new3(3, UINT32_2NUM(intervals->start[i]), UINT32_2NUM(intervals->end[i]), DBL2NUM(intervals->value[i]));
|
541
|
+
rb_ary_store(ret, i, tupl);
|
542
|
+
}
|
543
|
+
bwDestroyOverlappingIntervals(intervals);
|
544
|
+
|
545
|
+
return ret;
|
546
|
+
}
|
547
|
+
|
548
|
+
static VALUE
|
549
|
+
bb_get_entries(VALUE self, VALUE rb_chrom, VALUE rb_start, VALUE rb_end, VALUE rb_with_string)
|
550
|
+
{
|
551
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
552
|
+
uint32_t start, end = -1, tid, i;
|
553
|
+
unsigned long startl, endl;
|
554
|
+
char *chrom;
|
555
|
+
VALUE ret, t;
|
556
|
+
int withString = 1;
|
557
|
+
bbOverlappingEntries_t *o;
|
558
|
+
|
559
|
+
if (!bw)
|
560
|
+
{
|
561
|
+
rb_raise(rb_eRuntimeError, "The bigBed file handle is not opened!");
|
562
|
+
return Qnil;
|
563
|
+
}
|
564
|
+
|
565
|
+
if (bw->type == 0)
|
566
|
+
{
|
567
|
+
rb_raise(rb_eRuntimeError, "bigWig files have no entries! Use 'values' or 'intervals' instead.");
|
568
|
+
return Qnil;
|
569
|
+
}
|
570
|
+
|
571
|
+
if (rb_chrom != Qnil)
|
572
|
+
{
|
573
|
+
chrom = StringValueCStr(rb_chrom);
|
574
|
+
}
|
575
|
+
|
576
|
+
if (rb_start != Qnil)
|
577
|
+
startl = NUM2LONG(rb_start);
|
578
|
+
|
579
|
+
if (rb_end != Qnil)
|
580
|
+
endl = NUM2LONG(rb_end);
|
581
|
+
|
582
|
+
if (rb_with_string != Qnil)
|
583
|
+
{
|
584
|
+
if (RTEST(rb_with_string))
|
585
|
+
withString = 1;
|
586
|
+
else
|
587
|
+
withString = 0;
|
588
|
+
}
|
589
|
+
|
590
|
+
tid = bwGetTid(bw, chrom);
|
591
|
+
|
592
|
+
//Sanity check
|
593
|
+
if (endl == (unsigned long)-1 && tid != (uint32_t)-1)
|
594
|
+
endl = bw->cl->len[tid];
|
595
|
+
if (tid == (uint32_t)-1 || startl > end || endl > end)
|
596
|
+
{
|
597
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
598
|
+
return Qnil;
|
599
|
+
}
|
600
|
+
|
601
|
+
start = (uint32_t)startl;
|
602
|
+
end = (uint32_t)endl;
|
603
|
+
|
604
|
+
if (end <= start || end > bw->cl->len[tid] || start >= end)
|
605
|
+
{
|
606
|
+
rb_raise(rb_eRuntimeError, "Invalid interval bounds!");
|
607
|
+
return Qnil;
|
608
|
+
}
|
609
|
+
|
610
|
+
o = bbGetOverlappingEntries(bw, chrom, start, end, withString);
|
611
|
+
if (!o)
|
612
|
+
{
|
613
|
+
rb_raise(rb_eRuntimeError, "An error occurred while fetching the overlapping entries!\n");
|
614
|
+
return Qnil;
|
615
|
+
}
|
616
|
+
if (!o->l)
|
617
|
+
{
|
618
|
+
return rb_ary_new2(0);
|
619
|
+
}
|
620
|
+
|
621
|
+
ret = rb_ary_new2(o->l);
|
622
|
+
if (!ret)
|
623
|
+
goto error;
|
624
|
+
|
625
|
+
for (i = 0; i < o->l; i++)
|
626
|
+
{
|
627
|
+
if (withString)
|
628
|
+
{
|
629
|
+
t = rb_ary_new3(3, UINT32_2NUM(o->start[i]), UINT32_2NUM(o->end[i]), rb_str_new2(o->str[i]));
|
630
|
+
}
|
631
|
+
else
|
632
|
+
{
|
633
|
+
t = rb_ary_new3(2, UINT32_2NUM(o->start[i]), UINT32_2NUM(o->end[i]));
|
634
|
+
}
|
635
|
+
if (!t)
|
636
|
+
goto error;
|
637
|
+
rb_ary_store(ret, i, t);
|
638
|
+
}
|
639
|
+
|
640
|
+
bbDestroyOverlappingEntries(o);
|
641
|
+
return ret;
|
642
|
+
|
643
|
+
error:
|
644
|
+
bbDestroyOverlappingEntries(o);
|
645
|
+
rb_raise(rb_eRuntimeError, "An error occurred while constructing the output!\n");
|
646
|
+
return Qnil;
|
647
|
+
}
|
648
|
+
|
649
|
+
static VALUE
|
650
|
+
bb_get_sql(VALUE self)
|
651
|
+
{
|
652
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
653
|
+
VALUE ret;
|
654
|
+
char *str;
|
655
|
+
|
656
|
+
if (!bw)
|
657
|
+
{
|
658
|
+
rb_raise(rb_eRuntimeError, "The bigBed file handle is not opened!");
|
659
|
+
return Qnil;
|
660
|
+
}
|
661
|
+
|
662
|
+
if (bw->type == 0)
|
663
|
+
{
|
664
|
+
rb_raise(rb_eRuntimeError, "bigWig files have no entries!");
|
665
|
+
return Qnil;
|
666
|
+
}
|
667
|
+
|
668
|
+
str = bbGetSQL(bw);
|
669
|
+
if (!str)
|
670
|
+
{
|
671
|
+
return Qnil;
|
672
|
+
}
|
673
|
+
|
674
|
+
ret = rb_str_new2(str);
|
675
|
+
if (str)
|
676
|
+
free(str);
|
677
|
+
|
678
|
+
return ret;
|
679
|
+
}
|
680
|
+
|
681
|
+
static VALUE
|
682
|
+
bw_get_file_type(VALUE self)
|
683
|
+
{
|
684
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
685
|
+
|
686
|
+
if (bw->type == 0)
|
687
|
+
{
|
688
|
+
return rb_str_new2("BigWig");
|
689
|
+
}
|
690
|
+
else if (bw->type == 1)
|
691
|
+
{
|
692
|
+
return rb_str_new2("BigBed");
|
693
|
+
}
|
694
|
+
|
695
|
+
return rb_str_new2("Unknown");
|
696
|
+
}
|
697
|
+
|
698
|
+
static VALUE
|
699
|
+
bw_is_bigwig_q(VALUE self)
|
700
|
+
{
|
701
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
702
|
+
|
703
|
+
if (bw->type == 0)
|
704
|
+
{
|
705
|
+
return Qtrue;
|
706
|
+
}
|
707
|
+
else
|
708
|
+
{
|
709
|
+
return Qfalse;
|
710
|
+
}
|
711
|
+
}
|
712
|
+
|
713
|
+
static VALUE
|
714
|
+
bw_is_bigbed_q(VALUE self)
|
715
|
+
{
|
716
|
+
bigWigFile_t *bw = get_bigWigFile(self);
|
717
|
+
|
718
|
+
if (bw->type == 1)
|
719
|
+
{
|
720
|
+
return Qtrue;
|
721
|
+
}
|
722
|
+
else
|
723
|
+
{
|
724
|
+
return Qfalse;
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
void Init_bigwigext()
|
729
|
+
{
|
730
|
+
rb_Bio = rb_define_module("Bio");
|
731
|
+
rb_BigWig = rb_define_class_under(rb_Bio, "BigWig", rb_cObject);
|
732
|
+
|
733
|
+
rb_define_alloc_func(rb_BigWig, bigwig_allocate);
|
734
|
+
|
735
|
+
rb_define_private_method(rb_BigWig, "initialize_raw", bigwig_init, 2);
|
736
|
+
rb_define_method(rb_BigWig, "close", bigwig_close, 0);
|
737
|
+
rb_define_method(rb_BigWig, "header", bw_get_header, 0);
|
738
|
+
rb_define_method(rb_BigWig, "chroms", bw_get_chroms, -1);
|
739
|
+
rb_define_private_method(rb_BigWig, "stats_raw", bw_get_stats, 6);
|
740
|
+
rb_define_private_method(rb_BigWig, "values_raw", bw_get_values, 3);
|
741
|
+
rb_define_private_method(rb_BigWig, "intervals_raw", bw_get_intervals, 3);
|
742
|
+
rb_define_private_method(rb_BigWig, "entries_raw", bb_get_entries, 4);
|
743
|
+
rb_define_method(rb_BigWig, "sql", bb_get_sql, 0);
|
744
|
+
rb_define_method(rb_BigWig, "file_type", bw_get_file_type, 0);
|
745
|
+
rb_define_method(rb_BigWig, "is_bigwig?", bw_is_bigwig_q, 0);
|
746
|
+
rb_define_method(rb_BigWig, "is_bigbed?", bw_is_bigbed_q, 0);
|
747
|
+
}
|