bio-bigwig 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,803 @@
1
+ #include "bigWig.h"
2
+ #include "bwCommon.h"
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+ #include <string.h>
6
+ #include <zlib.h>
7
+ #include <errno.h>
8
+
9
+ static uint32_t roundup(uint32_t v) {
10
+ v--;
11
+ v |= v >> 1;
12
+ v |= v >> 2;
13
+ v |= v >> 4;
14
+ v |= v >> 8;
15
+ v |= v >> 16;
16
+ v++;
17
+ return v;
18
+ }
19
+
20
+ //Returns the root node on success and NULL on error
21
+ static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
22
+ uint32_t magic;
23
+ bwRTree_t *node;
24
+
25
+ if(!offset) {
26
+ if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
27
+ } else {
28
+ if(bwSetPos(fp, offset)) return NULL;
29
+ }
30
+
31
+ if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
32
+ if(magic != IDX_MAGIC) {
33
+ fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
34
+ return NULL;
35
+ }
36
+
37
+ node = calloc(1, sizeof(bwRTree_t));
38
+ if(!node) return NULL;
39
+
40
+ if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
41
+ if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
42
+ if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43
+ if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
44
+ if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45
+ if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
46
+ if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
47
+ if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
48
+ //Padding
49
+ if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
50
+ node->rootOffset = bwTell(fp);
51
+
52
+ //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
53
+ errno = 0;
54
+
55
+ return node;
56
+
57
+ error:
58
+ free(node);
59
+ return NULL;
60
+ }
61
+
62
+ //Returns a bwRTreeNode_t on success and NULL on an error
63
+ //For the root node, set offset to 0
64
+ static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
65
+ bwRTreeNode_t *node = NULL;
66
+ uint8_t padding;
67
+ uint16_t i;
68
+ if(offset) {
69
+ if(bwSetPos(fp, offset)) return NULL;
70
+ } else {
71
+ //seek
72
+ if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
73
+ }
74
+
75
+ node = calloc(1, sizeof(bwRTreeNode_t));
76
+ if(!node) return NULL;
77
+
78
+ if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
79
+ if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
80
+ if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
81
+
82
+ node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
83
+ if(!node->chrIdxStart) goto error;
84
+ node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
85
+ if(!node->baseStart) goto error;
86
+ node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
87
+ if(!node->chrIdxEnd) goto error;
88
+ node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
89
+ if(!node->baseEnd) goto error;
90
+ node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
91
+ if(!node->dataOffset) goto error;
92
+ if(node->isLeaf) {
93
+ node->x.size = malloc(node->nChildren * sizeof(uint64_t));
94
+ if(!node->x.size) goto error;
95
+ } else {
96
+ node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
97
+ if(!node->x.child) goto error;
98
+ }
99
+ for(i=0; i<node->nChildren; i++) {
100
+ if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101
+ if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102
+ if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103
+ if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104
+ if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105
+ if(node->isLeaf) {
106
+ if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107
+ }
108
+ }
109
+
110
+ return node;
111
+
112
+ error:
113
+ if(node->chrIdxStart) free(node->chrIdxStart);
114
+ if(node->baseStart) free(node->baseStart);
115
+ if(node->chrIdxEnd) free(node->chrIdxEnd);
116
+ if(node->baseEnd) free(node->baseEnd);
117
+ if(node->dataOffset) free(node->dataOffset);
118
+ if(node->isLeaf && node->x.size) free(node->x.size);
119
+ else if((!node->isLeaf) && node->x.child) free(node->x.child);
120
+ free(node);
121
+ return NULL;
122
+ }
123
+
124
+ void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125
+ if(!b) return;
126
+ if(b->size) free(b->size);
127
+ if(b->offset) free(b->offset);
128
+ free(b);
129
+ }
130
+
131
+ //Returns a bwOverlapBlock_t * object or NULL on error.
132
+ static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133
+ uint16_t i, idx = 0;
134
+ bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135
+ if(!o) return NULL;
136
+
137
+ for(i=0; i<node->nChildren; i++) {
138
+ if(tid < node->chrIdxStart[i]) break;
139
+ if(tid > node->chrIdxEnd[i]) continue;
140
+
141
+ /*
142
+ The individual blocks can theoretically span multiple contigs.
143
+ So if we treat the first/last contig in the range as special
144
+ but anything in the middle is a guaranteed match
145
+ */
146
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
147
+ if(tid == node->chrIdxStart[i]) {
148
+ if(node->baseStart[i] >= end) break;
149
+ } else if(tid == node->chrIdxEnd[i]) {
150
+ if(node->baseEnd[i] <= start) continue;
151
+ }
152
+ } else {
153
+ if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
154
+ }
155
+ o->n++;
156
+ }
157
+
158
+ if(o->n) {
159
+ o->offset = malloc(sizeof(uint64_t) * (o->n));
160
+ if(!o->offset) goto error;
161
+ o->size = malloc(sizeof(uint64_t) * (o->n));
162
+ if(!o->size) goto error;
163
+
164
+ for(i=0; i<node->nChildren; i++) {
165
+ if(tid < node->chrIdxStart[i]) break;
166
+ if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
167
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
168
+ if(tid == node->chrIdxStart[i]) {
169
+ if(node->baseStart[i] >= end) continue;
170
+ } else if(tid == node->chrIdxEnd[i]) {
171
+ if(node->baseEnd[i] <= start) continue;
172
+ }
173
+ } else {
174
+ if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
175
+ }
176
+ o->offset[idx] = node->dataOffset[i];
177
+ o->size[idx++] = node->x.size[i];
178
+ if(idx >= o->n) break;
179
+ }
180
+ }
181
+
182
+ if(idx != o->n) { //This should never happen
183
+ fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
184
+ goto error;
185
+ }
186
+
187
+ return o;
188
+
189
+ error:
190
+ if(o) destroyBWOverlapBlock(o);
191
+ return NULL;
192
+ }
193
+
194
+ //This will free l2 unless there's an error!
195
+ //Returns NULL on error, otherwise the merged lists
196
+ static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
197
+ uint64_t i,j;
198
+ if(!b2) return b1;
199
+ if(!b2->n) {
200
+ destroyBWOverlapBlock(b2);
201
+ return b1;
202
+ }
203
+ if(!b1->n) {
204
+ destroyBWOverlapBlock(b1);
205
+ return b2;
206
+ }
207
+ j = b1->n;
208
+ b1->n += b2->n;
209
+ b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
210
+ if(!b1->offset) goto error;
211
+ b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
212
+ if(!b1->size) goto error;
213
+
214
+ for(i=0; i<b2->n; i++) {
215
+ b1->offset[j+i] = b2->offset[i];
216
+ b1->size[j+i] = b2->size[i];
217
+ }
218
+ destroyBWOverlapBlock(b2);
219
+ return b1;
220
+
221
+ error:
222
+ destroyBWOverlapBlock(b1);
223
+ return NULL;
224
+ }
225
+
226
+ //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
227
+ //The output needs to be free()d if not NULL (likewise with *sizes)
228
+ static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
229
+ uint16_t i;
230
+ bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
231
+ if(!output) return NULL;
232
+
233
+ for(i=0; i<node->nChildren; i++) {
234
+ if(tid < node->chrIdxStart[i]) break;
235
+ if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
236
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
237
+ if(tid == node->chrIdxStart[i]) {
238
+ if(node->baseStart[i] >= end) continue;
239
+ } else if(tid == node->chrIdxEnd[i]) {
240
+ if(node->baseEnd[i] <= start) continue;
241
+ }
242
+ } else {
243
+ if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
244
+ }
245
+
246
+ //We have an overlap!
247
+ if(!node->x.child[i])
248
+ node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
249
+ if(!node->x.child[i]) goto error;
250
+
251
+ if(node->x.child[i]->isLeaf) { //leaf
252
+ nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
253
+ } else { //non-leaf
254
+ nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
255
+ }
256
+
257
+ //The output is processed the same regardless of leaf/non-leaf
258
+ if(!nodeBlocks) goto error;
259
+ else {
260
+ output = mergeOverlapBlocks(output, nodeBlocks);
261
+ if(!output) {
262
+ destroyBWOverlapBlock(nodeBlocks);
263
+ goto error;
264
+ }
265
+ }
266
+ }
267
+
268
+ return output;
269
+
270
+ error:
271
+ destroyBWOverlapBlock(output);
272
+ return NULL;
273
+ }
274
+
275
+ //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
276
+ //The output must be free()d
277
+ bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
278
+ if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
279
+ return overlapsNonLeaf(bw, root, tid, start, end);
280
+ }
281
+
282
+ //In reality, a hash or some sort of tree structure is probably faster...
283
+ //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
284
+ uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
285
+ uint32_t i;
286
+ if(!chrom) return -1;
287
+ for(i=0; i<fp->cl->nKeys; i++) {
288
+ if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
289
+ }
290
+ return -1;
291
+ }
292
+
293
+ static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
294
+ uint32_t tid = bwGetTid(fp, chrom);
295
+
296
+ if(tid == (uint32_t) -1) {
297
+ fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
298
+ return NULL;
299
+ }
300
+
301
+ //Get the info if needed
302
+ if(!fp->idx) {
303
+ fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
304
+ if(!fp->idx) {
305
+ return NULL;
306
+ }
307
+ }
308
+
309
+ if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
310
+ if(!fp->idx->root) return NULL;
311
+
312
+ return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
313
+ }
314
+
315
+ void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
316
+ hdr->tid = ((uint32_t*)b)[0];
317
+ hdr->start = ((uint32_t*)b)[1];
318
+ hdr->end = ((uint32_t*)b)[2];
319
+ hdr->step = ((uint32_t*)b)[3];
320
+ hdr->span = ((uint32_t*)b)[4];
321
+ hdr->type = ((uint8_t*)b)[20];
322
+ hdr->nItems = ((uint16_t*)b)[11];
323
+ }
324
+
325
+ void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
326
+ if(!o) return;
327
+ if(o->start) free(o->start);
328
+ if(o->end) free(o->end);
329
+ if(o->value) free(o->value);
330
+ free(o);
331
+ }
332
+
333
+ void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
334
+ uint32_t i;
335
+ if(!o) return;
336
+ if(o->start) free(o->start);
337
+ if(o->end) free(o->end);
338
+ if(o->str) {
339
+ for(i=0; i<o->l; i++) {
340
+ if(o->str[i]) free(o->str[i]);
341
+ }
342
+ free(o->str);
343
+ }
344
+ free(o);
345
+ }
346
+
347
+ //Returns NULL on error, in which case o has been free()d
348
+ static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
349
+ if(o->l+1 >= o->m) {
350
+ o->m = roundup(o->l+1);
351
+ o->start = realloc(o->start, o->m * sizeof(uint32_t));
352
+ if(!o->start) goto error;
353
+ o->end = realloc(o->end, o->m * sizeof(uint32_t));
354
+ if(!o->end) goto error;
355
+ o->value = realloc(o->value, o->m * sizeof(float));
356
+ if(!o->value) goto error;
357
+ }
358
+ o->start[o->l] = start;
359
+ o->end[o->l] = end;
360
+ o->value[o->l++] = value;
361
+ return o;
362
+
363
+ error:
364
+ bwDestroyOverlappingIntervals(o);
365
+ return NULL;
366
+ }
367
+
368
+ static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
369
+ if(o->l+1 >= o->m) {
370
+ o->m = roundup(o->l+1);
371
+ o->start = realloc(o->start, o->m * sizeof(uint32_t));
372
+ if(!o->start) goto error;
373
+ o->end = realloc(o->end, o->m * sizeof(uint32_t));
374
+ if(!o->end) goto error;
375
+ if(withString) {
376
+ o->str = realloc(o->str, o->m * sizeof(char**));
377
+ if(!o->str) goto error;
378
+ }
379
+ }
380
+ o->start[o->l] = start;
381
+ o->end[o->l] = end;
382
+ if(withString) o->str[o->l] = strdup(str);
383
+ o->l++;
384
+ return o;
385
+
386
+ error:
387
+ bbDestroyOverlappingEntries(o);
388
+ return NULL;
389
+ }
390
+
391
+ //Returns NULL on error
392
+ bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
393
+ uint64_t i;
394
+ uint16_t j;
395
+ int compressed = 0, rv;
396
+ uLongf sz = fp->hdr->bufSize, tmp;
397
+ void *buf = NULL, *compBuf = NULL;
398
+ uint32_t start = 0, end , *p;
399
+ float value;
400
+ bwDataHeader_t hdr;
401
+ bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
402
+
403
+ if(!output) goto error;
404
+
405
+ if(!o) return output;
406
+ if(!o->n) return output;
407
+
408
+ if(sz) {
409
+ compressed = 1;
410
+ buf = malloc(sz);
411
+ }
412
+ sz = 0; //This is now the size of the compressed buffer
413
+
414
+ for(i=0; i<o->n; i++) {
415
+ if(bwSetPos(fp, o->offset[i])) goto error;
416
+
417
+ if(sz < o->size[i]) {
418
+ compBuf = realloc(compBuf, o->size[i]);
419
+ sz = o->size[i];
420
+ }
421
+ if(!compBuf) goto error;
422
+
423
+ if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
424
+ if(compressed) {
425
+ tmp = fp->hdr->bufSize; //This gets over-written by uncompress
426
+ rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
427
+ if(rv != Z_OK) goto error;
428
+ } else {
429
+ buf = compBuf;
430
+ }
431
+
432
+ //TODO: ensure that tmp is large enough!
433
+ bwFillDataHdr(&hdr, buf);
434
+
435
+ p = ((uint32_t*) buf);
436
+ p += 6;
437
+ if(hdr.tid != tid) continue;
438
+
439
+ if(hdr.type == 3) start = hdr.start - hdr.step;
440
+
441
+ //FIXME: We should ensure that sz is large enough to hold nItems of the given type
442
+ for(j=0; j<hdr.nItems; j++) {
443
+ switch(hdr.type) {
444
+ case 1:
445
+ start = *p;
446
+ p++;
447
+ end = *p;
448
+ p++;
449
+ value = *((float *)p);
450
+ p++;
451
+ break;
452
+ case 2:
453
+ start = *p;
454
+ p++;
455
+ end = start + hdr.span;
456
+ value = *((float *)p);
457
+ p++;
458
+ break;
459
+ case 3:
460
+ start += hdr.step;
461
+ end = start+hdr.span;
462
+ value = *((float *)p);
463
+ p++;
464
+ break;
465
+ default :
466
+ goto error;
467
+ break;
468
+ }
469
+
470
+ if(end <= ostart || start >= oend) continue;
471
+ //Push the overlap
472
+ if(!pushIntervals(output, start, end, value)) goto error;
473
+ }
474
+ }
475
+
476
+ if(compressed && buf) free(buf);
477
+ if(compBuf) free(compBuf);
478
+ return output;
479
+
480
+ error:
481
+ fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
482
+ if(output) bwDestroyOverlappingIntervals(output);
483
+ if(compressed && buf) free(buf);
484
+ if(compBuf) free(compBuf);
485
+ return NULL;
486
+ }
487
+
488
+ bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
489
+ uint64_t i;
490
+ int compressed = 0, rv, slen;
491
+ uLongf sz = fp->hdr->bufSize, tmp = 0;
492
+ void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
493
+ uint32_t entryTid = 0, start = 0, end;
494
+ char *str;
495
+ bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
496
+
497
+ if(!output) goto error;
498
+
499
+ if(!o) return output;
500
+ if(!o->n) return output;
501
+
502
+ if(sz) {
503
+ compressed = 1;
504
+ buf = malloc(sz);
505
+ }
506
+ sz = 0; //This is now the size of the compressed buffer
507
+
508
+ for(i=0; i<o->n; i++) {
509
+ if(bwSetPos(fp, o->offset[i])) goto error;
510
+
511
+ if(sz < o->size[i]) {
512
+ compBuf = realloc(compBuf, o->size[i]);
513
+ sz = o->size[i];
514
+ }
515
+ if(!compBuf) goto error;
516
+
517
+ if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
518
+ if(compressed) {
519
+ tmp = fp->hdr->bufSize; //This gets over-written by uncompress
520
+ rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
521
+ if(rv != Z_OK) goto error;
522
+ } else {
523
+ buf = compBuf;
524
+ tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
525
+ }
526
+
527
+ bufEnd = buf + tmp;
528
+ while(buf < bufEnd) {
529
+ entryTid = ((uint32_t*)buf)[0];
530
+ start = ((uint32_t*)buf)[1];
531
+ end = ((uint32_t*)buf)[2];
532
+ buf += 12;
533
+ str = (char*)buf;
534
+ slen = strlen(str) + 1;
535
+ buf += slen;
536
+
537
+ if(entryTid < tid) continue;
538
+ if(entryTid > tid) break;
539
+ if(end <= ostart) continue;
540
+ if(start >= oend) break;
541
+
542
+ //Push the overlap
543
+ if(!pushBBIntervals(output, start, end, str, withString)) goto error;
544
+ }
545
+
546
+ buf = bufEnd - tmp; //reset the buffer pointer
547
+ }
548
+
549
+ if(compressed && buf) free(buf);
550
+ if(compBuf) free(compBuf);
551
+ return output;
552
+
553
+ error:
554
+ fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
555
+ buf = bufEnd - tmp;
556
+ if(output) bbDestroyOverlappingEntries(output);
557
+ if(compressed && buf) free(buf);
558
+ if(compBuf) free(compBuf);
559
+ return NULL;
560
+ }
561
+
562
+ //Returns NULL on error OR no intervals, which is a bad design...
563
+ bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
564
+ bwOverlappingIntervals_t *output;
565
+ uint32_t tid = bwGetTid(fp, chrom);
566
+ if(tid == (uint32_t) -1) return NULL;
567
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
568
+ if(!blocks) return NULL;
569
+ output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
570
+ destroyBWOverlapBlock(blocks);
571
+ return output;
572
+ }
573
+
574
+ //Like above, but for bigBed files
575
+ bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
576
+ bbOverlappingEntries_t *output;
577
+ uint32_t tid = bwGetTid(fp, chrom);
578
+ if(tid == (uint32_t) -1) return NULL;
579
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
580
+ if(!blocks) return NULL;
581
+ output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
582
+ destroyBWOverlapBlock(blocks);
583
+ return output;
584
+ }
585
+
586
+ //Returns NULL on error
587
+ bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
588
+ bwOverlapIterator_t *output = NULL;
589
+ uint64_t n;
590
+ uint32_t tid = bwGetTid(bw, chrom);
591
+ if(tid == (uint32_t) -1) return output;
592
+ output = calloc(1, sizeof(bwOverlapIterator_t));
593
+ if(!output) return output;
594
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
595
+
596
+ output->bw = bw;
597
+ output->tid = tid;
598
+ output->start = start;
599
+ output->end = end;
600
+ output->blocks = blocks;
601
+ output->blocksPerIteration = blocksPerIteration;
602
+
603
+ if(blocks) {
604
+ n = blocks->n;
605
+ if(n>blocksPerIteration) blocks->n = blocksPerIteration;
606
+ output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
607
+ blocks->n = n;
608
+ output->offset = blocksPerIteration;
609
+ }
610
+ output->data = output->intervals;
611
+ return output;
612
+ }
613
+
614
+ //Returns NULL on error
615
+ bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
616
+ bwOverlapIterator_t *output = NULL;
617
+ uint64_t n;
618
+ uint32_t tid = bwGetTid(bw, chrom);
619
+ if(tid == (uint32_t) -1) return output;
620
+ output = calloc(1, sizeof(bwOverlapIterator_t));
621
+ if(!output) return output;
622
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
623
+
624
+ output->bw = bw;
625
+ output->tid = tid;
626
+ output->start = start;
627
+ output->end = end;
628
+ output->blocks = blocks;
629
+ output->blocksPerIteration = blocksPerIteration;
630
+ output->withString = withString;
631
+
632
+ if(blocks) {
633
+ n = blocks->n;
634
+ if(n>blocksPerIteration) blocks->n = blocksPerIteration;
635
+ output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
636
+ blocks->n = n;
637
+ output->offset = blocksPerIteration;
638
+ }
639
+ output->data = output->entries;
640
+ return output;
641
+ }
642
+
643
+ void bwIteratorDestroy(bwOverlapIterator_t *iter) {
644
+ if(!iter) return;
645
+ if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
646
+ if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
647
+ if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
648
+ free(iter);
649
+ }
650
+
651
+ //On error, points to NULL and destroys the input
652
+ bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
653
+ uint64_t n, *offset, *size;
654
+ bwOverlapBlock_t *blocks = iter->blocks;
655
+
656
+ if(iter->intervals) {
657
+ bwDestroyOverlappingIntervals(iter->intervals);
658
+ iter->intervals = NULL;
659
+ }
660
+ if(iter->entries) {
661
+ bbDestroyOverlappingEntries(iter->entries);
662
+ iter->entries = NULL;
663
+ }
664
+ iter->data = NULL;
665
+
666
+ if(iter->offset < blocks->n) {
667
+ //store the previous values
668
+ n = blocks->n;
669
+ offset = blocks->offset;
670
+ size = blocks->size;
671
+
672
+ //Move the start of the blocks
673
+ blocks->offset += iter->offset;
674
+ blocks->size += iter->offset;
675
+ if(iter->offset + iter->blocksPerIteration > n) {
676
+ blocks->n = blocks->n - iter->offset;
677
+ } else {
678
+ blocks->n = iter->blocksPerIteration;
679
+ }
680
+
681
+ //Get the intervals or entries, as appropriate
682
+ if(iter->bw->type == 0) {
683
+ //bigWig
684
+ iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
685
+ iter->data = iter->intervals;
686
+ } else {
687
+ //bigBed
688
+ iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
689
+ iter->data = iter->entries;
690
+ }
691
+ iter->offset += iter->blocksPerIteration;
692
+
693
+ //reset the values in iter->blocks
694
+ blocks->n = n;
695
+ blocks->offset = offset;
696
+ blocks->size = size;
697
+
698
+ //Check for error
699
+ if(!iter->intervals && !iter->entries) goto error;
700
+ }
701
+
702
+ return iter;
703
+
704
+ error:
705
+ bwIteratorDestroy(iter);
706
+ return NULL;
707
+ }
708
+
709
+ //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
710
+ //The ->end member is NULL
711
+ //If includeNA is not 0 then ->start is also NULL, since it's implied
712
+ //Note that bwDestroyOverlappingIntervals() will work in either case
713
+ bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
714
+ uint32_t i, j, n;
715
+ bwOverlappingIntervals_t *output = NULL;
716
+ bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
717
+ if(!intermediate) return NULL;
718
+
719
+ output = calloc(1, sizeof(bwOverlappingIntervals_t));
720
+ if(!output) goto error;
721
+ if(includeNA) {
722
+ output->l = end-start;
723
+ output->value = malloc(output->l*sizeof(float));
724
+ if(!output->value) goto error;
725
+ for(i=0; i<output->l; i++) output->value[i] = NAN;
726
+ for(i=0; i<intermediate->l; i++) {
727
+ for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
728
+ if(j < start || j >= end) continue;
729
+ output->value[j-start] = intermediate->value[i];
730
+ }
731
+ }
732
+ } else {
733
+ n = 0;
734
+ for(i=0; i<intermediate->l; i++) {
735
+ if(intermediate->start[i] < start) intermediate->start[i] = start;
736
+ if(intermediate->end[i] > end) intermediate->end[i] = end;
737
+ n += intermediate->end[i]-intermediate->start[i];
738
+ }
739
+ output->l = n;
740
+ output->start = malloc(sizeof(uint32_t)*n);
741
+ if(!output->start) goto error;
742
+ output->value = malloc(sizeof(float)*n);
743
+ if(!output->value) goto error;
744
+ n = 0; //this is now the index
745
+ for(i=0; i<intermediate->l; i++) {
746
+ for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
747
+ if(j < start || j >= end) continue;
748
+ output->start[n] = j;
749
+ output->value[n++] = intermediate->value[i];
750
+ }
751
+ }
752
+ }
753
+
754
+ bwDestroyOverlappingIntervals(intermediate);
755
+ return output;
756
+
757
+ error:
758
+ if(intermediate) bwDestroyOverlappingIntervals(intermediate);
759
+ if(output) bwDestroyOverlappingIntervals(output);
760
+ return NULL;
761
+ }
762
+
763
+ void bwDestroyIndexNode(bwRTreeNode_t *node) {
764
+ uint16_t i;
765
+
766
+ if(!node) return;
767
+
768
+ free(node->chrIdxStart);
769
+ free(node->baseStart);
770
+ free(node->chrIdxEnd);
771
+ free(node->baseEnd);
772
+ free(node->dataOffset);
773
+ if(!node->isLeaf) {
774
+ for(i=0; i<node->nChildren; i++) {
775
+ bwDestroyIndexNode(node->x.child[i]);
776
+ }
777
+ free(node->x.child);
778
+ } else {
779
+ free(node->x.size);
780
+ }
781
+ free(node);
782
+ }
783
+
784
+ void bwDestroyIndex(bwRTree_t *idx) {
785
+ bwDestroyIndexNode(idx->root);
786
+ free(idx);
787
+ }
788
+
789
+ //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
790
+ //Returns NULL on error
791
+ bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
792
+ bwRTree_t *idx = readRTreeIdx(fp, offset);
793
+ if(!idx) return NULL;
794
+
795
+ //Read in the root node
796
+ idx->root = bwGetRTreeNode(fp, idx->rootOffset);
797
+
798
+ if(!idx->root) {
799
+ bwDestroyIndex(idx);
800
+ return NULL;
801
+ }
802
+ return idx;
803
+ }